From 651ec37ba8efa8fc1ffe3f490182e68bc468969d Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Thu, 12 Dec 2019 23:06:10 -0700
Subject: First working parallelization test

---
 c_matrix.py  |  6 +++---
 do_ml.py     | 23 ++++++++++++++------
 lj_matrix.py |  6 +++---
 main.py      | 69 ++++++++++++++++++++++++++++++++++++------------------------
 4 files changed, 63 insertions(+), 41 deletions(-)

diff --git a/c_matrix.py b/c_matrix.py
index 75cc4d8a2..4de711a1b 100644
--- a/c_matrix.py
+++ b/c_matrix.py
@@ -170,10 +170,10 @@ def c_matrix_multiple(mol_data,
     cm_data = np.array([c_matrix(mol, nc, max_len, as_eig, bohr_radius_units)
                        for mol, nc in zip(mol_data, nc_data)])
 
-    if pipe:
-        pipe.send(cm_data)
-
     toc = time.perf_counter()
     printc('\tCM calculation took {:.4f} seconds.'.format(toc - tic), 'GREEN')
 
+    if pipe:
+        pipe.send(cm_data)
+
     return cm_data
diff --git a/do_ml.py b/do_ml.py
index 63a6fc671..87e8b72db 100644
--- a/do_ml.py
+++ b/do_ml.py
@@ -30,19 +30,22 @@ from cholesky_solve import cholesky_solve
 def do_ml(desc_data,
           energy_data,
           training_size,
+          desc_type=None,
+          pipe=None,
           test_size=None,
           sigma=1000.0,
-          desc_type=None,
           show_msgs=True):
     """
     Does the ML methodology.
     desc_data: descriptor (or representation) data.
     energy_data: energy data associated with desc_data.
     training_size: size of the training set to use.
+    desc_type: string with the name of the descriptor used.
+    pipe: for multiprocessing purposes. Sends the data calculated
+        through a pipe.
     test_size: size of the test set to use. If no size is given,
         the last remaining molecules are used.
     sigma: depth of the kernel.
-    desc_type: string with the name of the descriptor used.
     show_msgs: Show debug messages or not.
     NOTE: desc_type is just a string and is only for identification purposes.
     Also, training is done with the first part of the data and
@@ -69,10 +72,10 @@ def do_ml(desc_data,
 
     tic = time.perf_counter()
     if show_msgs:
-        printc('{} ML started, with parameters:'.format(desc_type), 'CYAN')
-        printc('\tTraining size: {}'.format(training_size), 'BLUE')
-        printc('\tTest size: {}'.format(test_size), 'BLUE')
-        printc('\tSigma: {}'.format(sigma), 'BLUE')
+        printc('{} ML started.'.format(desc_type), 'GREEN')
+        printc('\tTraining size: {}'.format(training_size), 'CYAN')
+        printc('\tTest size: {}'.format(test_size), 'CYAN')
+        printc('\tSigma: {}'.format(sigma), 'CYAN')
 
     Xcm_training = desc_data[:training_size]
     Ycm_training = energy_data[:training_size]
@@ -86,12 +89,18 @@ def do_ml(desc_data,
 
     mae = np.mean(np.abs(Ycm_predicted - Ycm_test))
     if show_msgs:
-        print('\tMAE for {}: {:.4f}'.format(desc_type, mae))
+        printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN')
 
     toc = time.perf_counter()
     tictoc = toc - tic
     if show_msgs:
         printc('\t{} ML took {:.4f} seconds.'.format(desc_type, tictoc),
                'GREEN')
+        printc('\t\tTraining size: {}'.format(training_size), 'CYAN')
+        printc('\t\tTest size: {}'.format(test_size), 'CYAN')
+        printc('\t\tSigma: {}'.format(sigma), 'CYAN')
+
+    if pipe:
+        pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc])
 
     return mae, tictoc
diff --git a/lj_matrix.py b/lj_matrix.py
index 55e729c56..5cb1b5a8d 100644
--- a/lj_matrix.py
+++ b/lj_matrix.py
@@ -188,10 +188,10 @@ def lj_matrix_multiple(mol_data,
     ljm_data = np.array([lj_matrix(mol, nc, max_len, as_eig, bohr_radius_units)
                         for mol, nc in zip(mol_data, nc_data)])
 
-    if pipe:
-        pipe.send(ljm_data)
-
     toc = time.perf_counter()
     printc('\tL-JM calculation took {:.4f} seconds.'.format(toc-tic), 'GREEN')
 
+    if pipe:
+        pipe.send(ljm_data)
+
     return ljm_data
diff --git a/main.py b/main.py
index c8df47800..f39b9b57b 100644
--- a/main.py
+++ b/main.py
@@ -27,60 +27,73 @@ from misc import printc
 from read_qm7_data import read_qm7_data
 from c_matrix import c_matrix_multiple
 from lj_matrix import lj_matrix_multiple
-# from do_ml import do_ml
+from do_ml import do_ml
 
 
 # Test
 def main():
     # Initialization time.
     init_time = time.perf_counter()
-    procs = []
-    pipes = []
 
     # Data reading.
     zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\
         read_qm7_data()
 
     # Matrices calculation.
-    cm_recv, cm_send = Pipe()
-    pipes.append(cm_send)
+    procs = []
+    pipes = []
+
+    cm_recv, cm_send = Pipe(False)
     p1 = Process(target=c_matrix_multiple,
                  args=(molecules, nuclear_charge, cm_send))
     procs.append(p1)
+    pipes.append(cm_recv)
     p1.start()
 
-    ljm_recv, ljm_send = Pipe()
-    pipes.append(ljm_send)
+    ljm_recv, ljm_send = Pipe(False)
     p2 = Process(target=lj_matrix_multiple,
                  args=(molecules, nuclear_charge, ljm_send))
     procs.append(p2)
+    pipes.append(ljm_recv)
     p2.start()
 
-    cm_data = cm_recv.recv()
-    ljm_data = ljm_recv.recv()
+    cm_data = pipes[0].recv()
+    ljm_data = pipes[1].recv()
 
-    for pipe, proc in zip(pipes, procs):
-        pipe.close()
+    for proc in procs:
         proc.join()
 
-    print(type(cm_data), cm_data[0])
-    print(type(ljm_data), ljm_data[0])
-
-    """
     # ML calculation.
-    do_ml(cm_data,
-        energy_pbe0,
-        1000,
-        test_size=100,
-        sigma=1000.0,
-        desc_type='CM')
-    do_ml(ljm_data,
-        energy_pbe0,
-        1000,
-        test_size=100,
-        sigma=1000.0,
-        desc_type='L-JM')
-    """
+    procs = []
+    cm_pipes = []
+    ljm_pipes = []
+    for i in range(500, 1500 + 1, 500):
+        cm_recv, cm_send = Pipe(False)
+        p1 = Process(target=do_ml,
+                     args=(cm_data, energy_pbe0, i, 'CM', cm_send, 500))
+        procs.append(p1)
+        cm_pipes.append(cm_recv)
+        p1.start()
+
+        ljm_recv, ljm_send = Pipe(False)
+        p2 = Process(target=do_ml,
+                     args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send, 500))
+        procs.append(p2)
+        ljm_pipes.append(ljm_recv)
+        p2.start()
+
+    for proc in procs:
+        proc.join()
+
+    cm_bench_results = []
+    ljm_bench_results = []
+    for cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes):
+        cm_bench_results.append(cd_pipe.recv())
+        ljm_bench_results.append(ljd_pipe.recv())
+
+    for cm, ljm, in zip(cm_bench_results, ljm_bench_results):
+        print(cm)
+        print(ljm)
 
     # End of program
     end_time = time.perf_counter()
-- 
cgit v1.2.3-54-g00ecf