From 651ec37ba8efa8fc1ffe3f490182e68bc468969d Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Thu, 12 Dec 2019 23:06:10 -0700 Subject: First working parallelization test --- c_matrix.py | 6 +++--- do_ml.py | 23 ++++++++++++++------ lj_matrix.py | 6 +++--- main.py | 69 ++++++++++++++++++++++++++++++++++++------------------------ 4 files changed, 63 insertions(+), 41 deletions(-) diff --git a/c_matrix.py b/c_matrix.py index 75cc4d8a2..4de711a1b 100644 --- a/c_matrix.py +++ b/c_matrix.py @@ -170,10 +170,10 @@ def c_matrix_multiple(mol_data, cm_data = np.array([c_matrix(mol, nc, max_len, as_eig, bohr_radius_units) for mol, nc in zip(mol_data, nc_data)]) - if pipe: - pipe.send(cm_data) - toc = time.perf_counter() printc('\tCM calculation took {:.4f} seconds.'.format(toc - tic), 'GREEN') + if pipe: + pipe.send(cm_data) + return cm_data diff --git a/do_ml.py b/do_ml.py index 63a6fc671..87e8b72db 100644 --- a/do_ml.py +++ b/do_ml.py @@ -30,19 +30,22 @@ from cholesky_solve import cholesky_solve def do_ml(desc_data, energy_data, training_size, + desc_type=None, + pipe=None, test_size=None, sigma=1000.0, - desc_type=None, show_msgs=True): """ Does the ML methodology. desc_data: descriptor (or representation) data. energy_data: energy data associated with desc_data. training_size: size of the training set to use. + desc_type: string with the name of the descriptor used. + pipe: for multiprocessing purposes. Sends the data calculated + through a pipe. test_size: size of the test set to use. If no size is given, the last remaining molecules are used. sigma: depth of the kernel. - desc_type: string with the name of the descriptor used. show_msgs: Show debug messages or not. NOTE: desc_type is just a string and is only for identification purposes. Also, training is done with the first part of the data and @@ -69,10 +72,10 @@ def do_ml(desc_data, tic = time.perf_counter() if show_msgs: - printc('{} ML started, with parameters:'.format(desc_type), 'CYAN') - printc('\tTraining size: {}'.format(training_size), 'BLUE') - printc('\tTest size: {}'.format(test_size), 'BLUE') - printc('\tSigma: {}'.format(sigma), 'BLUE') + printc('{} ML started.'.format(desc_type), 'GREEN') + printc('\tTraining size: {}'.format(training_size), 'CYAN') + printc('\tTest size: {}'.format(test_size), 'CYAN') + printc('\tSigma: {}'.format(sigma), 'CYAN') Xcm_training = desc_data[:training_size] Ycm_training = energy_data[:training_size] @@ -86,12 +89,18 @@ def do_ml(desc_data, mae = np.mean(np.abs(Ycm_predicted - Ycm_test)) if show_msgs: - print('\tMAE for {}: {:.4f}'.format(desc_type, mae)) + printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN') toc = time.perf_counter() tictoc = toc - tic if show_msgs: printc('\t{} ML took {:.4f} seconds.'.format(desc_type, tictoc), 'GREEN') + printc('\t\tTraining size: {}'.format(training_size), 'CYAN') + printc('\t\tTest size: {}'.format(test_size), 'CYAN') + printc('\t\tSigma: {}'.format(sigma), 'CYAN') + + if pipe: + pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc]) return mae, tictoc diff --git a/lj_matrix.py b/lj_matrix.py index 55e729c56..5cb1b5a8d 100644 --- a/lj_matrix.py +++ b/lj_matrix.py @@ -188,10 +188,10 @@ def lj_matrix_multiple(mol_data, ljm_data = np.array([lj_matrix(mol, nc, max_len, as_eig, bohr_radius_units) for mol, nc in zip(mol_data, nc_data)]) - if pipe: - pipe.send(ljm_data) - toc = time.perf_counter() printc('\tL-JM calculation took {:.4f} seconds.'.format(toc-tic), 'GREEN') + if pipe: + pipe.send(ljm_data) + return ljm_data diff --git a/main.py b/main.py index c8df47800..f39b9b57b 100644 --- a/main.py +++ b/main.py @@ -27,60 +27,73 @@ from misc import printc from read_qm7_data import read_qm7_data from c_matrix import c_matrix_multiple from lj_matrix import lj_matrix_multiple -# from do_ml import do_ml +from do_ml import do_ml # Test def main(): # Initialization time. init_time = time.perf_counter() - procs = [] - pipes = [] # Data reading. zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\ read_qm7_data() # Matrices calculation. - cm_recv, cm_send = Pipe() - pipes.append(cm_send) + procs = [] + pipes = [] + + cm_recv, cm_send = Pipe(False) p1 = Process(target=c_matrix_multiple, args=(molecules, nuclear_charge, cm_send)) procs.append(p1) + pipes.append(cm_recv) p1.start() - ljm_recv, ljm_send = Pipe() - pipes.append(ljm_send) + ljm_recv, ljm_send = Pipe(False) p2 = Process(target=lj_matrix_multiple, args=(molecules, nuclear_charge, ljm_send)) procs.append(p2) + pipes.append(ljm_recv) p2.start() - cm_data = cm_recv.recv() - ljm_data = ljm_recv.recv() + cm_data = pipes[0].recv() + ljm_data = pipes[1].recv() - for pipe, proc in zip(pipes, procs): - pipe.close() + for proc in procs: proc.join() - print(type(cm_data), cm_data[0]) - print(type(ljm_data), ljm_data[0]) - - """ # ML calculation. - do_ml(cm_data, - energy_pbe0, - 1000, - test_size=100, - sigma=1000.0, - desc_type='CM') - do_ml(ljm_data, - energy_pbe0, - 1000, - test_size=100, - sigma=1000.0, - desc_type='L-JM') - """ + procs = [] + cm_pipes = [] + ljm_pipes = [] + for i in range(500, 1500 + 1, 500): + cm_recv, cm_send = Pipe(False) + p1 = Process(target=do_ml, + args=(cm_data, energy_pbe0, i, 'CM', cm_send, 500)) + procs.append(p1) + cm_pipes.append(cm_recv) + p1.start() + + ljm_recv, ljm_send = Pipe(False) + p2 = Process(target=do_ml, + args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send, 500)) + procs.append(p2) + ljm_pipes.append(ljm_recv) + p2.start() + + for proc in procs: + proc.join() + + cm_bench_results = [] + ljm_bench_results = [] + for cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes): + cm_bench_results.append(cd_pipe.recv()) + ljm_bench_results.append(ljd_pipe.recv()) + + for cm, ljm, in zip(cm_bench_results, ljm_bench_results): + print(cm) + print(ljm) # End of program end_time = time.perf_counter() -- cgit v1.2.3-54-g00ecf