summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano <55825613+luevano@users.noreply.github.com>2019-12-12 23:06:10 -0700
committerDavid Luevano <55825613+luevano@users.noreply.github.com>2019-12-12 23:06:10 -0700
commit651ec37ba8efa8fc1ffe3f490182e68bc468969d (patch)
treef3ecbe091555be26568dbba36fd5c5878b19ca47
parent91bd79feefa7b1bcfae79b4efd39955a6e301391 (diff)
First working parallelization test
-rw-r--r--c_matrix.py6
-rw-r--r--do_ml.py23
-rw-r--r--lj_matrix.py6
-rw-r--r--main.py69
4 files changed, 63 insertions, 41 deletions
diff --git a/c_matrix.py b/c_matrix.py
index 75cc4d8a2..4de711a1b 100644
--- a/c_matrix.py
+++ b/c_matrix.py
@@ -170,10 +170,10 @@ def c_matrix_multiple(mol_data,
cm_data = np.array([c_matrix(mol, nc, max_len, as_eig, bohr_radius_units)
for mol, nc in zip(mol_data, nc_data)])
- if pipe:
- pipe.send(cm_data)
-
toc = time.perf_counter()
printc('\tCM calculation took {:.4f} seconds.'.format(toc - tic), 'GREEN')
+ if pipe:
+ pipe.send(cm_data)
+
return cm_data
diff --git a/do_ml.py b/do_ml.py
index 63a6fc671..87e8b72db 100644
--- a/do_ml.py
+++ b/do_ml.py
@@ -30,19 +30,22 @@ from cholesky_solve import cholesky_solve
def do_ml(desc_data,
energy_data,
training_size,
+ desc_type=None,
+ pipe=None,
test_size=None,
sigma=1000.0,
- desc_type=None,
show_msgs=True):
"""
Does the ML methodology.
desc_data: descriptor (or representation) data.
energy_data: energy data associated with desc_data.
training_size: size of the training set to use.
+ desc_type: string with the name of the descriptor used.
+ pipe: for multiprocessing purposes. Sends the data calculated
+ through a pipe.
test_size: size of the test set to use. If no size is given,
the last remaining molecules are used.
sigma: depth of the kernel.
- desc_type: string with the name of the descriptor used.
show_msgs: Show debug messages or not.
NOTE: desc_type is just a string and is only for identification purposes.
Also, training is done with the first part of the data and
@@ -69,10 +72,10 @@ def do_ml(desc_data,
tic = time.perf_counter()
if show_msgs:
- printc('{} ML started, with parameters:'.format(desc_type), 'CYAN')
- printc('\tTraining size: {}'.format(training_size), 'BLUE')
- printc('\tTest size: {}'.format(test_size), 'BLUE')
- printc('\tSigma: {}'.format(sigma), 'BLUE')
+ printc('{} ML started.'.format(desc_type), 'GREEN')
+ printc('\tTraining size: {}'.format(training_size), 'CYAN')
+ printc('\tTest size: {}'.format(test_size), 'CYAN')
+ printc('\tSigma: {}'.format(sigma), 'CYAN')
Xcm_training = desc_data[:training_size]
Ycm_training = energy_data[:training_size]
@@ -86,12 +89,18 @@ def do_ml(desc_data,
mae = np.mean(np.abs(Ycm_predicted - Ycm_test))
if show_msgs:
- print('\tMAE for {}: {:.4f}'.format(desc_type, mae))
+ printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN')
toc = time.perf_counter()
tictoc = toc - tic
if show_msgs:
printc('\t{} ML took {:.4f} seconds.'.format(desc_type, tictoc),
'GREEN')
+ printc('\t\tTraining size: {}'.format(training_size), 'CYAN')
+ printc('\t\tTest size: {}'.format(test_size), 'CYAN')
+ printc('\t\tSigma: {}'.format(sigma), 'CYAN')
+
+ if pipe:
+ pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc])
return mae, tictoc
diff --git a/lj_matrix.py b/lj_matrix.py
index 55e729c56..5cb1b5a8d 100644
--- a/lj_matrix.py
+++ b/lj_matrix.py
@@ -188,10 +188,10 @@ def lj_matrix_multiple(mol_data,
ljm_data = np.array([lj_matrix(mol, nc, max_len, as_eig, bohr_radius_units)
for mol, nc in zip(mol_data, nc_data)])
- if pipe:
- pipe.send(ljm_data)
-
toc = time.perf_counter()
printc('\tL-JM calculation took {:.4f} seconds.'.format(toc-tic), 'GREEN')
+ if pipe:
+ pipe.send(ljm_data)
+
return ljm_data
diff --git a/main.py b/main.py
index c8df47800..f39b9b57b 100644
--- a/main.py
+++ b/main.py
@@ -27,60 +27,73 @@ from misc import printc
from read_qm7_data import read_qm7_data
from c_matrix import c_matrix_multiple
from lj_matrix import lj_matrix_multiple
-# from do_ml import do_ml
+from do_ml import do_ml
# Test
def main():
# Initialization time.
init_time = time.perf_counter()
- procs = []
- pipes = []
# Data reading.
zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\
read_qm7_data()
# Matrices calculation.
- cm_recv, cm_send = Pipe()
- pipes.append(cm_send)
+ procs = []
+ pipes = []
+
+ cm_recv, cm_send = Pipe(False)
p1 = Process(target=c_matrix_multiple,
args=(molecules, nuclear_charge, cm_send))
procs.append(p1)
+ pipes.append(cm_recv)
p1.start()
- ljm_recv, ljm_send = Pipe()
- pipes.append(ljm_send)
+ ljm_recv, ljm_send = Pipe(False)
p2 = Process(target=lj_matrix_multiple,
args=(molecules, nuclear_charge, ljm_send))
procs.append(p2)
+ pipes.append(ljm_recv)
p2.start()
- cm_data = cm_recv.recv()
- ljm_data = ljm_recv.recv()
+ cm_data = pipes[0].recv()
+ ljm_data = pipes[1].recv()
- for pipe, proc in zip(pipes, procs):
- pipe.close()
+ for proc in procs:
proc.join()
- print(type(cm_data), cm_data[0])
- print(type(ljm_data), ljm_data[0])
-
- """
# ML calculation.
- do_ml(cm_data,
- energy_pbe0,
- 1000,
- test_size=100,
- sigma=1000.0,
- desc_type='CM')
- do_ml(ljm_data,
- energy_pbe0,
- 1000,
- test_size=100,
- sigma=1000.0,
- desc_type='L-JM')
- """
+ procs = []
+ cm_pipes = []
+ ljm_pipes = []
+ for i in range(500, 1500 + 1, 500):
+ cm_recv, cm_send = Pipe(False)
+ p1 = Process(target=do_ml,
+ args=(cm_data, energy_pbe0, i, 'CM', cm_send, 500))
+ procs.append(p1)
+ cm_pipes.append(cm_recv)
+ p1.start()
+
+ ljm_recv, ljm_send = Pipe(False)
+ p2 = Process(target=do_ml,
+ args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send, 500))
+ procs.append(p2)
+ ljm_pipes.append(ljm_recv)
+ p2.start()
+
+ for proc in procs:
+ proc.join()
+
+ cm_bench_results = []
+ ljm_bench_results = []
+ for cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes):
+ cm_bench_results.append(cd_pipe.recv())
+ ljm_bench_results.append(ljd_pipe.recv())
+
+ for cm, ljm, in zip(cm_bench_results, ljm_bench_results):
+ print(cm)
+ print(ljm)
# End of program
end_time = time.perf_counter()