From 487bf8840846b5d4d694b38985268c308aadb36e Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Wed, 18 Dec 2019 07:21:35 -0700
Subject: Refactor files

---
 lj_matrix/__init__.py       |  22 ++++
 lj_matrix/__main__.py       | 238 ++++++++++++++++++++++++++++++++++++++++++++
 lj_matrix/c_matrix.py       | 179 +++++++++++++++++++++++++++++++++
 lj_matrix/cholesky_solve.py |  64 ++++++++++++
 lj_matrix/do_ml.py          | 108 ++++++++++++++++++++
 lj_matrix/frob_norm.py      |  51 ++++++++++
 lj_matrix/gauss_kernel.py   |  49 +++++++++
 lj_matrix/lj_matrix.py      | 207 ++++++++++++++++++++++++++++++++++++++
 lj_matrix/misc.py           |  53 ++++++++++
 lj_matrix/read_qm7_data.py  | 144 +++++++++++++++++++++++++++
 10 files changed, 1115 insertions(+)
 create mode 100644 lj_matrix/__init__.py
 create mode 100644 lj_matrix/__main__.py
 create mode 100644 lj_matrix/c_matrix.py
 create mode 100644 lj_matrix/cholesky_solve.py
 create mode 100644 lj_matrix/do_ml.py
 create mode 100644 lj_matrix/frob_norm.py
 create mode 100644 lj_matrix/gauss_kernel.py
 create mode 100644 lj_matrix/lj_matrix.py
 create mode 100644 lj_matrix/misc.py
 create mode 100644 lj_matrix/read_qm7_data.py

(limited to 'lj_matrix')

diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py
new file mode 100644
index 000000000..48cd14913
--- /dev/null
+++ b/lj_matrix/__init__.py
@@ -0,0 +1,22 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py
new file mode 100644
index 000000000..4e13f4995
--- /dev/null
+++ b/lj_matrix/__main__.py
@@ -0,0 +1,238 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import time
+from multiprocessing import Process, Pipe
+# import matplotlib.pyplot as plt
+import pandas as pd
+from lj_matrix.misc import printc
+from lj_matrix.read_qm7_data import read_qm7_data
+from lj_matrix.c_matrix import c_matrix_multiple
+from lj_matrix.lj_matrix import lj_matrix_multiple
+from lj_matrix.do_ml import do_ml
+
+
+# Test
+def ml():
+    """
+    Main function that does the whole ML process.
+    """
+    # Initialization time.
+    init_time = time.perf_counter()
+
+    # Data reading.
+    zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\
+        read_qm7_data()
+
+    # Matrices calculation.
+    procs = []
+    pipes = []
+
+    # cm_recv, cm_send = Pipe(False)
+    # p1 = Process(target=c_matrix_multiple,
+    #              args=(molecules, nuclear_charge, cm_send))
+    # procs.append(p1)
+    # pipes.append(cm_recv)
+    # p1.start()
+
+    ljm_recv, ljm_send = Pipe(False)
+    p2 = Process(target=lj_matrix_multiple,
+                 args=(molecules, nuclear_charge, ljm_send, 1, 0.25))
+    procs.append(p2)
+    pipes.append(ljm_recv)
+    p2.start()
+
+    # cm_data = pipes[0].recv()
+    ljm_data = pipes[0].recv()
+
+    for proc in procs:
+        proc.join()
+
+    # ML calculation.
+    procs = []
+    # cm_pipes = []
+    ljm_pipes = []
+    for i in range(1500, 6500 + 1, 500):
+        # cm_recv, cm_send = Pipe(False)
+        # p1 = Process(target=do_ml,
+        #              args=(cm_data, energy_pbe0, i, 'CM', cm_send))
+        # procs.append(p1)
+        # cm_pipes.append(cm_recv)
+        # p1.start()
+
+        ljm_recv, ljm_send = Pipe(False)
+        p2 = Process(target=do_ml,
+                     args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send))
+        procs.append(p2)
+        ljm_pipes.append(ljm_recv)
+        p2.start()
+
+    # cm_bench_results = []
+    ljm_bench_results = []
+    for ljd_pipe in ljm_pipes:  # cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes):
+        # cm_bench_results.append(cd_pipe.recv())
+        ljm_bench_results.append(ljd_pipe.recv())
+
+    for proc in procs:
+        proc.join()
+
+    with open('data\\benchmarks.csv', 'a') as save_file:
+        # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,',
+        #                          'mae,time,lj_s,lj_e,date_ran\n']))
+        date = '/'.join([str(field) for field in time.localtime()[:3][::-1]])
+        for ljm in ljm_bench_results:  # cm, ljm, in zip(cm_bench_results, ljm_bench_results):
+            # cm_text = ','.join([str(field) for field in cm])\
+            #     + ',' + date + '\n'
+            ljm_text = ','.join([str(field) for field in ljm])\
+                + ',1,0.25,' + date + '\n'
+            # save_file.write(cm_text)
+            save_file.write(ljm_text)
+
+    # End of program
+    end_time = time.perf_counter()
+    printc('Program took {:.4f} seconds.'.format(end_time - init_time),
+           'CYAN')
+
+
+def pl():
+    """
+    Function for plotting the benchmarks.
+    """
+    # Original columns.
+    or_cols = ['ml_type',
+               'tr_size',
+               'te_size',
+               'kernel_s',
+               'mae',
+               'time',
+               'lj_s',
+               'lj_e',
+               'date_ran']
+    # Drop some original columns.
+    dor_cols = ['te_size',
+                'kernel_s',
+                'time',
+                'date_ran']
+
+    # Read benchmarks data and drop some columns.
+    data_temp = pd.read_csv('data\\benchmarks.csv',)
+    data = pd.DataFrame(data_temp, columns=or_cols)
+    data = data.drop(columns=dor_cols)
+
+    # Get the data of the first benchmarks and drop unnecesary columns.
+    first_data = pd.DataFrame(data, index=range(0, 22))
+    first_data = first_data.drop(columns=['lj_s', 'lj_e'])
+
+    # Columns to keep temporarily.
+    fd_columns = ['ml_type',
+                  'tr_size',
+                  'mae']
+
+    # Create new dataframes for each matrix descriptor and fill them.
+    first_data_cm = pd.DataFrame(columns=fd_columns)
+    first_data_ljm = pd.DataFrame(columns=fd_columns)
+    for i in range(first_data.shape[0]):
+        temp_df = first_data.iloc[[i]]
+        if first_data.at[i, 'ml_type'] == 'CM':
+            first_data_cm = first_data_cm.append(temp_df)
+        else:
+            first_data_ljm = first_data_ljm.append(temp_df)
+
+    # Drop unnecesary column and rename 'mae' for later use.
+    first_data_cm = first_data_cm.drop(columns=['ml_type'])\
+        .rename(columns={'mae': 'cm_mae'})
+    first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\
+        .rename(columns={'mae': 'ljm_mae'})
+    # print(first_data_cm)
+    # print(first_data_ljm)
+
+    # Get the cm data axis so it can be joined with the ljm data axis.
+    cm_axis = first_data_cm.plot(x='tr_size',
+                                 y='cm_mae',
+                                 kind='line')
+    # Get the ljm data axis and join it with the cm one.
+    plot_axis = first_data_ljm.plot(ax=cm_axis,
+                                    x='tr_size',
+                                    y='ljm_mae',
+                                    kind='line')
+    plot_axis.set_xlabel('tr_size')
+    plot_axis.set_ylabel('mae')
+    plot_axis.set_title('mae for different tr_sizes')
+    # Get the figure and save it.
+    # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf')
+
+    # Get the rest of the benchmark data and drop unnecesary column.
+    new_data = data.drop(index=range(0, 22))
+    new_data = new_data.drop(columns=['ml_type'])
+
+    # Get the first set and rename it.
+    nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'})
+    ndf_axis = nd_first.plot(x='tr_size',
+                             y='1, 1',
+                             kind='line')
+    last_axis = ndf_axis
+    for i in range(22, 99, 11):
+        lj_s = new_data['lj_s'][i]
+        lj_e = new_data['lj_e'][i]
+        new_mae = '{}, {}'.format(lj_s, lj_e)
+        nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
+            .drop(columns=['lj_s', 'lj_e'])\
+            .rename(columns={'mae': new_mae})
+        last_axis = nd_temp.plot(ax=last_axis,
+                                 x='tr_size',
+                                 y=new_mae,
+                                 kind='line')
+        print(nd_temp)
+
+    last_axis.set_xlabel('tr_size')
+    last_axis.set_ylabel('mae')
+    last_axis.set_title('mae for different parameters of lj(s)')
+
+    last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf')
+
+    ndf_axis = nd_first.plot(x='tr_size',
+                             y='1, 1',
+                             kind='line')
+    last_axis = ndf_axis
+    for i in range(99, data.shape[0], 11):
+        lj_s = new_data['lj_s'][i]
+        lj_e = new_data['lj_e'][i]
+        new_mae = '{}, {}'.format(lj_s, lj_e)
+        nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
+            .drop(columns=['lj_s', 'lj_e'])\
+            .rename(columns={'mae': new_mae})
+        last_axis = nd_temp.plot(ax=last_axis,
+                                 x='tr_size',
+                                 y=new_mae,
+                                 kind='line')
+        print(nd_temp)
+
+    last_axis.set_xlabel('tr_size')
+    last_axis.set_ylabel('mae')
+    last_axis.set_title('mae for different parameters of lj(e)')
+
+    last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf')
+
+
+if __name__ == '__main__':
+    # ml()
+    pl()
diff --git a/lj_matrix/c_matrix.py b/lj_matrix/c_matrix.py
new file mode 100644
index 000000000..f40a18c68
--- /dev/null
+++ b/lj_matrix/c_matrix.py
@@ -0,0 +1,179 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import time
+from lj_matrix.misc import printc
+import math
+import numpy as np
+from numpy.linalg import eig
+
+
+def c_matrix(mol_data,
+             nc_data,
+             max_len=25,
+             as_eig=True,
+             bohr_radius_units=False):
+    """
+    Creates the Coulomb Matrix from the molecule data given.
+    mol_data: molecule data, matrix of atom coordinates.
+    nc_data: nuclear charge data, array of atom data.
+    max_len: maximum amount of atoms in molecule.
+    as_eig: if data should be returned as matrix or array of eigenvalues.
+    bohr_radius_units: if units should be in bohr's radius units.
+    """
+    if bohr_radius_units:
+        conversion_rate = 0.52917721067
+    else:
+        conversion_rate = 1
+
+    mol_n = len(mol_data)
+    mol_nr = range(mol_n)
+
+    if not mol_n == len(nc_data):
+        print(''.join(['Error. Molecule matrix dimension is different ',
+                       'than the nuclear charge array dimension.']))
+    else:
+        if max_len < mol_n:
+            print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
+                           'greater than max_len. Using mol_n.']))
+            max_len = None
+
+        if max_len:
+            cm = np.zeros((max_len, max_len))
+            ml_r = range(max_len)
+
+            # Actual calculation of the coulomb matrix.
+            for i in ml_r:
+                if i < mol_n:
+                    x_i = mol_data[i, 0]
+                    y_i = mol_data[i, 1]
+                    z_i = mol_data[i, 2]
+                    Z_i = nc_data[i]
+                else:
+                    break
+
+                for j in ml_r:
+                    if j < mol_n:
+                        x_j = mol_data[j, 0]
+                        y_j = mol_data[j, 1]
+                        z_j = mol_data[j, 2]
+                        Z_j = nc_data[j]
+
+                        x = (x_i-x_j)**2
+                        y = (y_i-y_j)**2
+                        z = (z_i-z_j)**2
+
+                        if i == j:
+                            cm[i, j] = (0.5*Z_i**2.4)
+                        else:
+                            cm[i, j] = (conversion_rate*Z_i*Z_j/math.sqrt(x
+                                                                          + y
+                                                                          + z))
+                    else:
+                        break
+
+            # Now the value will be returned.
+            if as_eig:
+                cm_sorted = np.sort(eig(cm)[0])[::-1]
+                # Thanks to SO for the following lines of code.
+                # https://stackoverflow.com/a/43011036
+
+                # Keep zeros at the end.
+                mask = cm_sorted != 0.
+                f_mask = mask.sum(0, keepdims=1) >\
+                    np.arange(cm_sorted.shape[0]-1, -1, -1)
+
+                f_mask = f_mask[::-1]
+                cm_sorted[f_mask] = cm_sorted[mask]
+                cm_sorted[~f_mask] = 0.
+
+                return cm_sorted
+
+            else:
+                return cm
+
+        else:
+            cm_temp = []
+            # Actual calculation of the coulomb matrix.
+            for i in mol_nr:
+                x_i = mol_data[i, 0]
+                y_i = mol_data[i, 1]
+                z_i = mol_data[i, 2]
+                Z_i = nc_data[i]
+
+                cm_row = []
+                for j in mol_nr:
+                    x_j = mol_data[j, 0]
+                    y_j = mol_data[j, 1]
+                    z_j = mol_data[j, 2]
+                    Z_j = nc_data[j]
+
+                    x = (x_i-x_j)**2
+                    y = (y_i-y_j)**2
+                    z = (z_i-z_j)**2
+
+                    if i == j:
+                        cm_row.append(0.5*Z_i**2.4)
+                    else:
+                        cm_row.append(conversion_rate*Z_i*Z_j/math.sqrt(x
+                                                                        + y
+                                                                        + z))
+
+                cm_temp.append(np.array(cm_row))
+
+            cm = np.array(cm_temp)
+            # Now the value will be returned.
+            if as_eig:
+                return np.sort(eig(cm)[0])[::-1]
+            else:
+                return cm
+
+
+def c_matrix_multiple(mol_data,
+                      nc_data,
+                      pipe=None,
+                      max_len=25,
+                      as_eig=True,
+                      bohr_radius_units=False):
+    """
+    Calculates the Coulomb Matrix of multiple molecules.
+    mol_data: molecule data, matrix of atom coordinates.
+    nc_data: nuclear charge data, array of atom data.
+    pipe: for multiprocessing purposes. Sends the data calculated
+        through a pipe.
+    max_len: maximum amount of atoms in molecule.
+    as_eig: if data should be returned as matrix or array of eigenvalues.
+    bohr_radius_units: if units should be in bohr's radius units.
+    """
+    printc('Coulomb Matrices calculation started.', 'CYAN')
+    tic = time.perf_counter()
+
+    cm_data = np.array([c_matrix(mol, nc, max_len, as_eig, bohr_radius_units)
+                       for mol, nc in zip(mol_data, nc_data)])
+
+    toc = time.perf_counter()
+    printc('\tCM calculation took {:.4f} seconds.'.format(toc - tic), 'GREEN')
+
+    if pipe:
+        pipe.send(cm_data)
+
+    return cm_data
diff --git a/lj_matrix/cholesky_solve.py b/lj_matrix/cholesky_solve.py
new file mode 100644
index 000000000..bc6a572a3
--- /dev/null
+++ b/lj_matrix/cholesky_solve.py
@@ -0,0 +1,64 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import numpy as np
+from numpy.linalg import cholesky
+
+
+def cholesky_solve(K, y):
+    """
+    Applies Cholesky decomposition to obtain the 'alpha coeficients'.
+    K: kernel.
+    y: known parameters.
+    """
+    # The initial mathematical problem is to solve Ka=y.
+
+    # First, add a small lambda value.
+    K[np.diag_indices_from(K)] += 1e-8
+
+    # Get the Cholesky decomposition of the kernel.
+    L = cholesky(K)
+    size = len(L)
+
+    # Solve Lx=y for x.
+    x = np.zeros(size)
+    x[0] = y[0] / L[0, 0]
+    for i in range(1, size):
+        temp_sum = 0.0
+        for j in range(i):
+            temp_sum += L[i, j] * x[j]
+        x[i] = (y[i] - temp_sum) / L[i, i]
+
+    # Now, solve LTa=x for a.
+    L2 = L.T
+    a = np.zeros(size)
+    a_ms = size - 1
+    a[a_ms] = x[a_ms] / L2[a_ms, a_ms]
+    # Because of the form of L2 (upper triangular matriz), an inversion of
+    # range() needs to be done.
+    for i in range(0, a_ms)[::-1]:
+        temp_sum = 0.0
+        for j in range(i, size)[::-1]:
+            temp_sum += L2[i, j] * a[j]
+        a[i] = (x[i] - temp_sum) / L2[i, i]
+
+    return a
diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py
new file mode 100644
index 000000000..acf5455f4
--- /dev/null
+++ b/lj_matrix/do_ml.py
@@ -0,0 +1,108 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import time
+from misc import printc
+import numpy as np
+from lj_matrix.gauss_kernel import gauss_kernel
+from lj_matrix.cholesky_solve import cholesky_solve
+
+
+def do_ml(desc_data,
+          energy_data,
+          training_size,
+          desc_type=None,
+          pipe=None,
+          test_size=None,
+          sigma=1000.0,
+          show_msgs=True):
+    """
+    Does the ML methodology.
+    desc_data: descriptor (or representation) data.
+    energy_data: energy data associated with desc_data.
+    training_size: size of the training set to use.
+    desc_type: string with the name of the descriptor used.
+    pipe: for multiprocessing purposes. Sends the data calculated
+        through a pipe.
+    test_size: size of the test set to use. If no size is given,
+        the last remaining molecules are used.
+    sigma: depth of the kernel.
+    show_msgs: Show debug messages or not.
+    NOTE: desc_type is just a string and is only for identification purposes.
+    Also, training is done with the first part of the data and
+    testing with the ending part of the data.
+    """
+    # Initial calculations for later use.
+    d_len = len(desc_data)
+    e_len = len(energy_data)
+
+    if not desc_type:
+        desc_type = 'NOT SPECIFIED'
+
+    if d_len != e_len:
+        printc(''.join(['ERROR. Descriptor data size different ',
+                        'than energy data size.']), 'RED')
+        return None
+
+    if training_size >= d_len:
+        printc('ERROR. Training size greater or equal than data size.', 'RED')
+        return None
+
+    if not test_size:
+        test_size = d_len - training_size
+        if test_size > 1500:
+            test_size = 1500
+
+    tic = time.perf_counter()
+    if show_msgs:
+        printc('{} ML started.'.format(desc_type), 'GREEN')
+        printc('\tTraining size: {}'.format(training_size), 'CYAN')
+        printc('\tTest size: {}'.format(test_size), 'CYAN')
+        printc('\tSigma: {}'.format(sigma), 'CYAN')
+
+    Xcm_training = desc_data[:training_size]
+    Ycm_training = energy_data[:training_size]
+    Kcm_training = gauss_kernel(Xcm_training, Xcm_training, sigma)
+    alpha_cm = cholesky_solve(Kcm_training, Ycm_training)
+
+    Xcm_test = desc_data[-test_size:]
+    Ycm_test = energy_data[-test_size:]
+    Kcm_test = gauss_kernel(Xcm_test, Xcm_training, sigma)
+    Ycm_predicted = np.dot(Kcm_test, alpha_cm)
+
+    mae = np.mean(np.abs(Ycm_predicted - Ycm_test))
+    if show_msgs:
+        printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN')
+
+    toc = time.perf_counter()
+    tictoc = toc - tic
+    if show_msgs:
+        printc('\t{} ML took {:.4f} seconds.'.format(desc_type, tictoc),
+               'GREEN')
+        printc('\t\tTraining size: {}'.format(training_size), 'CYAN')
+        printc('\t\tTest size: {}'.format(test_size), 'CYAN')
+        printc('\t\tSigma: {}'.format(sigma), 'CYAN')
+
+    if pipe:
+        pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc])
+
+    return mae, tictoc
diff --git a/lj_matrix/frob_norm.py b/lj_matrix/frob_norm.py
new file mode 100644
index 000000000..4c3a2945d
--- /dev/null
+++ b/lj_matrix/frob_norm.py
@@ -0,0 +1,51 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import math
+
+
+def frob_norm(array):
+    """
+    Calculates the frobenius norm of a given array or matrix.
+    array: array of data.
+    """
+
+    arr_sh_len = len(array.shape)
+    arr_range = range(len(array))
+    fn = 0.0
+
+    # If it is a 'vector'.
+    if arr_sh_len == 1:
+        for i in arr_range:
+            fn += array[i]*array[i]
+
+        return math.sqrt(fn)
+
+    # If it is a matrix.
+    elif arr_sh_len == 2:
+        for i in arr_range:
+            for j in arr_range:
+                fn += array[i, j]*array[i, j]
+
+        return math.sqrt(fn)
+    else:
+        print('Error. Array size greater than 2 ({}).'.format(arr_sh_len))
diff --git a/lj_matrix/gauss_kernel.py b/lj_matrix/gauss_kernel.py
new file mode 100644
index 000000000..5dd8e6406
--- /dev/null
+++ b/lj_matrix/gauss_kernel.py
@@ -0,0 +1,49 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import math
+import numpy as np
+from lj_matrix.frob_norm import frob_norm
+
+
+def gauss_kernel(X_1, X_2, sigma):
+    """
+    Calculates the Gaussian Kernel.
+    X_1: first representations.
+    X_2: second representations.
+    sigma: kernel width.
+    """
+    x1_l = len(X_1)
+    x1_range = range(x1_l)
+    x2_l = len(X_2)
+    x2_range = range(x2_l)
+
+    inv_sigma = -0.5 / (sigma*sigma)
+
+    K = np.zeros((x1_l, x2_l))
+    for i in x1_range:
+        for j in x2_range:
+            f_norm = frob_norm(X_1[i] - X_2[j])
+            # print(f_norm)
+            K[i, j] = math.exp(inv_sigma * f_norm)
+
+    return K
diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py
new file mode 100644
index 000000000..4f63e95ca
--- /dev/null
+++ b/lj_matrix/lj_matrix.py
@@ -0,0 +1,207 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import time
+from lj_matrix.misc import printc
+import math
+import numpy as np
+from numpy.linalg import eig
+
+
+def lj_matrix(mol_data,
+              nc_data,
+              sigma=1.0,
+              epsilon=1.0,
+              max_len=25,
+              as_eig=True,
+              bohr_radius_units=False):
+    """
+    Creates the Lennard-Jones Matrix from the molecule data given.
+    mol_data: molecule data, matrix of atom coordinates.
+    nc_data: nuclear charge data, array of atom data.
+    max_len: maximum amount of atoms in molecule.
+    as_eig: if data should be returned as matrix or array of eigenvalues.
+    bohr_radius_units: if units should be in bohr's radius units.
+    """
+    if bohr_radius_units:
+        conversion_rate = 0.52917721067
+    else:
+        conversion_rate = 1
+
+    mol_n = len(mol_data)
+    mol_nr = range(mol_n)
+
+    if not mol_n == len(nc_data):
+        print(''.join(['Error. Molecule matrix dimension is different ',
+                       'than the nuclear charge array dimension.']))
+    else:
+        if max_len < mol_n:
+            print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
+                           'greater than max_len. Using mol_n.']))
+            max_len = None
+
+        if max_len:
+            lj = np.zeros((max_len, max_len))
+            ml_r = range(max_len)
+
+            # Actual calculation of the coulomb matrix.
+            for i in ml_r:
+                if i < mol_n:
+                    x_i = mol_data[i, 0]
+                    y_i = mol_data[i, 1]
+                    z_i = mol_data[i, 2]
+                    Z_i = nc_data[i]
+                else:
+                    break
+
+                for j in ml_r:
+                    if j < mol_n:
+                        x_j = mol_data[j, 0]
+                        y_j = mol_data[j, 1]
+                        z_j = mol_data[j, 2]
+
+                        x = (x_i-x_j)**2
+                        y = (y_i-y_j)**2
+                        z = (z_i-z_j)**2
+
+                        if i == j:
+                            lj[i, j] = (0.5*Z_i**2.4)
+                        else:
+                            # Calculations are done after i==j is checked
+                            # so no division by zero is done.
+
+                            # A little play with r exponents
+                            # so no square root is calculated.
+                            # Conversion factor is included in r^2.
+
+                            # 1/r^2
+                            r_2 = sigma**2/(conversion_rate**2*(x + y + z))
+
+                            r_6 = math.pow(r_2, 3)
+                            r_12 = math.pow(r_6, 2)
+                            lj[i, j] = (4*epsilon*(r_12 - r_6))
+                    else:
+                        break
+
+            # Now the value will be returned.
+            if as_eig:
+                lj_sorted = np.sort(eig(lj)[0])[::-1]
+                # Thanks to SO for the following lines of code.
+                # https://stackoverflow.com/a/43011036
+
+                # Keep zeros at the end.
+                mask = lj_sorted != 0.
+                f_mask = mask.sum(0, keepdims=1) >\
+                    np.arange(lj_sorted.shape[0]-1, -1, -1)
+
+                f_mask = f_mask[::-1]
+                lj_sorted[f_mask] = lj_sorted[mask]
+                lj_sorted[~f_mask] = 0.
+
+                return lj_sorted
+
+            else:
+                return lj
+
+        else:
+            lj_temp = []
+            # Actual calculation of the coulomb matrix.
+            for i in mol_nr:
+                x_i = mol_data[i, 0]
+                y_i = mol_data[i, 1]
+                z_i = mol_data[i, 2]
+                Z_i = nc_data[i]
+
+                lj_row = []
+                for j in mol_nr:
+                    x_j = mol_data[j, 0]
+                    y_j = mol_data[j, 1]
+                    z_j = mol_data[j, 2]
+
+                    x = (x_i-x_j)**2
+                    y = (y_i-y_j)**2
+                    z = (z_i-z_j)**2
+
+                    if i == j:
+                        lj_row.append(0.5*Z_i**2.4)
+                    else:
+                        # Calculations are done after i==j is checked
+                        # so no division by zero is done.
+
+                        # A little play with r exponents
+                        # so no square root is calculated.
+                        # Conversion factor is included in r^2.
+
+                        # 1/r^2
+                        r_2 = sigma**2/(conversion_rate**2*(x + y + z))
+
+                        r_6 = math.pow(r_2, 3)
+                        r_12 = math.pow(r_6, 2)
+                        lj_row.append(4*epsilon*(r_12 - r_6))
+
+                lj_temp.append(np.array(lj_row))
+
+            lj = np.array(lj_temp)
+            # Now the value will be returned.
+            if as_eig:
+                return np.sort(eig(lj)[0])[::-1]
+            else:
+                return lj
+
+
+def lj_matrix_multiple(mol_data,
+                       nc_data,
+                       pipe=None,
+                       sigma=1,
+                       epsilon=1,
+                       max_len=25,
+                       as_eig=True,
+                       bohr_radius_units=False):
+    """
+    Calculates the Lennard-Jones Matrix of multiple molecules.
+    mol_data: molecule data, matrix of atom coordinates.
+    nc_data: nuclear charge data, array of atom data.
+    pipe: for multiprocessing purposes. Sends the data calculated
+        through a pipe.
+    max_len: maximum amount of atoms in molecule.
+    as_eig: if data should be returned as matrix or array of eigenvalues.
+    bohr_radius_units: if units should be in bohr's radius units.
+    """
+    printc('L-J Matrices calculation started.', 'CYAN')
+    tic = time.perf_counter()
+
+    ljm_data = np.array([lj_matrix(mol,
+                                   nc,
+                                   sigma,
+                                   epsilon,
+                                   max_len,
+                                   as_eig,
+                                   bohr_radius_units)
+                        for mol, nc in zip(mol_data, nc_data)])
+
+    toc = time.perf_counter()
+    printc('\tL-JM calculation took {:.4f} seconds.'.format(toc-tic), 'GREEN')
+
+    if pipe:
+        pipe.send(ljm_data)
+
+    return ljm_data
diff --git a/lj_matrix/misc.py b/lj_matrix/misc.py
new file mode 100644
index 000000000..c50653a5c
--- /dev/null
+++ b/lj_matrix/misc.py
@@ -0,0 +1,53 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+from colorama import init, Fore, Style
+
+init()
+
+
+def printc(text, color):
+    """
+    Prints texts normaly, but in color. Using colorama.
+    text: string with the text to print.
+    color: color to be used, same as available in colorama.
+    """
+    color_dic = {'BLACK': Fore.BLACK,
+                 'RED': Fore.RED,
+                 'GREEN': Fore.GREEN,
+                 'YELLOW': Fore.YELLOW,
+                 'BLUE': Fore.BLUE,
+                 'MAGENTA': Fore.MAGENTA,
+                 'CYAN': Fore.CYAN,
+                 'WHITE': Fore.WHITE,
+                 'RESET': Fore.RESET}
+
+    color_dic_keys = color_dic.keys()
+    if color not in color_dic_keys:
+        print(Fore.RED
+              + '\'{}\' not found, using default color.'.format(color)
+              + Style.RESET_ALL)
+        actual_color = Fore.RESET
+    else:
+        actual_color = color_dic[color]
+
+    print(actual_color + text + Style.RESET_ALL)
diff --git a/lj_matrix/read_qm7_data.py b/lj_matrix/read_qm7_data.py
new file mode 100644
index 000000000..b54691fb0
--- /dev/null
+++ b/lj_matrix/read_qm7_data.py
@@ -0,0 +1,144 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import os
+import time
+import numpy as np
+import random
+from lj_matrix.misc import printc
+
+
+# 'periodic_table_of_elements.txt' retrieved from
+# https://gist.github.com/GoodmanSciences/c2dd862cd38f21b0ad36b8f96b4bf1ee
+def read_nc_data(data_path):
+    """
+    Reads nuclear charge data from file and returns a dictionary.
+    data_path: path to the data directory.
+    """
+    fname = 'periodic_table_of_elements.txt'
+    with open(''.join([data_path, '\\', fname]), 'r') as infile:
+        temp_lines = infile.readlines()
+
+    del temp_lines[0]
+
+    lines = []
+    for temp_line in temp_lines:
+        new_line = temp_line.split(sep=',')
+        lines.append(new_line)
+
+    # Dictionary of nuclear charge.
+    return {line[2]: int(line[0]) for line in lines}
+
+
+# 'hof_qm7.txt.txt' retrieved from
+# https://github.com/qmlcode/tutorial
+def reas_db_data(zi_data,
+                 data_path,
+                 r_seed=111):
+    """
+    Reads molecule database and extracts
+    its contents as usable variables.
+    zi_data: dictionary containing nuclear charge data.
+    data_path: path to the data directory.
+    r_seed: random seed.
+    """
+    os.chdir(data_path)
+
+    fname = 'hof_qm7.txt'
+    with open(fname, 'r') as infile:
+        lines = infile.readlines()
+
+    # Temporary energy dictionary.
+    energy_temp = dict()
+
+    for line in lines:
+        xyz_data = line.split()
+
+        xyz_name = xyz_data[0]
+        hof = float(xyz_data[1])
+        dftb = float(xyz_data[2])
+        # print(xyz_name, hof, dftb)
+
+        energy_temp[xyz_name] = np.array([hof, hof - dftb])
+
+    # Use a random seed.
+    random.seed(r_seed)
+
+    et_keys = list(energy_temp.keys())
+    random.shuffle(et_keys)
+
+    # Temporary energy dictionary, shuffled.
+    energy_temp_shuffled = dict()
+    for key in et_keys:
+        energy_temp_shuffled.update({key: energy_temp[key]})
+
+    mol_data = []
+    mol_nc_data = []
+    # Actual reading of the xyz files.
+    for i, k in enumerate(energy_temp_shuffled.keys()):
+        with open(k, 'r') as xyz_file:
+            lines = xyz_file.readlines()
+
+        len_lines = len(lines)
+        mol_temp_data = []
+        mol_nc_temp_data = np.array(np.zeros(len_lines-2))
+        for j, line in enumerate(lines[2:len_lines]):
+            line_list = line.split()
+
+            mol_nc_temp_data[j] = float(zi_data[line_list[0]])
+            line_data = np.array(np.asarray(line_list[1:4], dtype=float))
+            mol_temp_data.append(line_data)
+
+        mol_data.append(mol_temp_data)
+        mol_nc_data.append(mol_nc_temp_data)
+
+    # Convert everything to a numpy array.
+    molecules = np.array([np.array(mol) for mol in mol_data])
+    nuclear_charge = np.array([nc_d for nc_d in mol_nc_data])
+    energy_pbe0 = np.array([energy_temp_shuffled[k][0]
+                            for k in energy_temp_shuffled.keys()])
+    energy_delta = np.array([energy_temp_shuffled[k][1]
+                             for k in energy_temp_shuffled.keys()])
+
+    return molecules, nuclear_charge, energy_pbe0, energy_delta
+
+
+def read_qm7_data():
+    """
+    Reads all the qm7 data.
+    """
+    tic = time.perf_counter()
+    printc('Data reading started.', 'CYAN')
+
+    init_path = os.getcwd()
+    os.chdir('data')
+    data_path = os.getcwd()
+
+    zi_data = read_nc_data(data_path)
+    molecules, nuclear_charge, energy_pbe0, energy_delta = \
+        reas_db_data(zi_data, data_path)
+
+    os.chdir(init_path)
+    toc = time.perf_counter()
+    printc('\tData reading took {:.4f} seconds.'.format(toc-tic), 'GREEN')
+
+    return zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta
-- 
cgit v1.2.3-70-g09d2


From 124c3c5eb77c807b8a8a78413f3800720914c8e1 Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Wed, 18 Dec 2019 08:15:18 -0700
Subject: Fix bugs

---
 lj_matrix/__init__.py      |  23 ++++++++++
 lj_matrix/__main__.py      |  13 +++---
 lj_matrix/c_matrix.py      |   2 +-
 lj_matrix/do_ml.py         |   4 +-
 lj_matrix/gauss_kernel.py  |   2 +-
 lj_matrix/lj_matrix.py     |   2 +-
 lj_matrix/read_qm7_data.py |   2 +-
 lj_matrix/version.py       |  23 ++++++++++
 setup.py                   | 102 +++++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 161 insertions(+), 12 deletions(-)
 create mode 100644 lj_matrix/version.py
 create mode 100644 setup.py

(limited to 'lj_matrix')

diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py
index 48cd14913..47d7e5013 100644
--- a/lj_matrix/__init__.py
+++ b/lj_matrix/__init__.py
@@ -20,3 +20,26 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
+from misc import printc
+from read_qm7_data import read_qm7_data, read_nc_data, reas_db_data
+from c_matrix import c_matrix, c_matrix_multiple
+from cholesky_solve import cholesky_solve
+from do_ml import do_ml
+from frob_norm import frob_norm
+from gauss_kernel import gauss_kernel
+from lj_matrix import lj_matrix, lj_matrix_multiple
+
+# If somebody does "from package import *", this is what they will
+# be able to access:
+__all__ = ['printc',
+           'read_qm7_data',
+           'read_nc_data',
+           'reas_db_data',
+           'c_matrix',
+           'c_matrix_multiple',
+           'cholesky_solve',
+           'do_ml',
+           'frob_norm',
+           'gauss_kernel',
+           'lj_matrix',
+           'lj_matrix_multiple']
diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py
index 4e13f4995..5a0e95b94 100644
--- a/lj_matrix/__main__.py
+++ b/lj_matrix/__main__.py
@@ -24,11 +24,11 @@ import time
 from multiprocessing import Process, Pipe
 # import matplotlib.pyplot as plt
 import pandas as pd
-from lj_matrix.misc import printc
-from lj_matrix.read_qm7_data import read_qm7_data
-from lj_matrix.c_matrix import c_matrix_multiple
-from lj_matrix.lj_matrix import lj_matrix_multiple
-from lj_matrix.do_ml import do_ml
+from misc import printc
+from read_qm7_data import read_qm7_data
+from c_matrix import c_matrix_multiple
+from lj_matrix import lj_matrix_multiple
+from do_ml import do_ml
 
 
 # Test
@@ -235,4 +235,5 @@ def pl():
 
 if __name__ == '__main__':
     # ml()
-    pl()
+    # pl()
+    print('OK!')
diff --git a/lj_matrix/c_matrix.py b/lj_matrix/c_matrix.py
index f40a18c68..4de711a1b 100644
--- a/lj_matrix/c_matrix.py
+++ b/lj_matrix/c_matrix.py
@@ -21,7 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 import time
-from lj_matrix.misc import printc
+from misc import printc
 import math
 import numpy as np
 from numpy.linalg import eig
diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py
index acf5455f4..c88533e68 100644
--- a/lj_matrix/do_ml.py
+++ b/lj_matrix/do_ml.py
@@ -23,8 +23,8 @@ SOFTWARE.
 import time
 from misc import printc
 import numpy as np
-from lj_matrix.gauss_kernel import gauss_kernel
-from lj_matrix.cholesky_solve import cholesky_solve
+from gauss_kernel import gauss_kernel
+from cholesky_solve import cholesky_solve
 
 
 def do_ml(desc_data,
diff --git a/lj_matrix/gauss_kernel.py b/lj_matrix/gauss_kernel.py
index 5dd8e6406..0dfc65d59 100644
--- a/lj_matrix/gauss_kernel.py
+++ b/lj_matrix/gauss_kernel.py
@@ -22,7 +22,7 @@ SOFTWARE.
 """
 import math
 import numpy as np
-from lj_matrix.frob_norm import frob_norm
+from frob_norm import frob_norm
 
 
 def gauss_kernel(X_1, X_2, sigma):
diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py
index 4f63e95ca..2a8e0d956 100644
--- a/lj_matrix/lj_matrix.py
+++ b/lj_matrix/lj_matrix.py
@@ -21,7 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 import time
-from lj_matrix.misc import printc
+from misc import printc
 import math
 import numpy as np
 from numpy.linalg import eig
diff --git a/lj_matrix/read_qm7_data.py b/lj_matrix/read_qm7_data.py
index b54691fb0..068ea1a42 100644
--- a/lj_matrix/read_qm7_data.py
+++ b/lj_matrix/read_qm7_data.py
@@ -24,7 +24,7 @@ import os
 import time
 import numpy as np
 import random
-from lj_matrix.misc import printc
+from misc import printc
 
 
 # 'periodic_table_of_elements.txt' retrieved from
diff --git a/lj_matrix/version.py b/lj_matrix/version.py
new file mode 100644
index 000000000..fab58433d
--- /dev/null
+++ b/lj_matrix/version.py
@@ -0,0 +1,23 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+__version__ = '0.0.1'
diff --git a/setup.py b/setup.py
new file mode 100644
index 000000000..719ef3ce0
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,102 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# This setup.py template was obtained from
+# https://github.com/navdeep-G/setup.py/blob/master/setup.py
+# ----------------------------------------------------------------------
+# Note: To use the 'upload' functionality of this file, you must:
+#   $ pipenv install twine --dev
+
+import io
+import os
+
+from setuptools import find_packages, setup
+
+from lj_matrix.version import __version__
+
+# Package meta-data.
+NAME = 'lj_matrix'
+DESCRIPTION = 'A Lennard Jones matrix exploration.'
+URL = 'https://github.com/luevano/lj_matrix'
+EMAIL = 'a301436@uach.mx'
+AUTHOR = 'David Luevano Alvarado'
+REQUIRES_PYTHON = '>=3.7'
+VERSION = __version__
+# VERSION = '0.0.1'
+
+# What packages are required for this module to be executed?
+REQUIRED = [
+    # 'requests', 'maya', 'records',
+]
+
+# What packages are optional?
+EXTRAS = {
+    # 'fancy feature': ['django'],
+}
+
+# The rest you shouldn't have to touch too much :)
+# ------------------------------------------------
+# Except, perhaps the License and Trove Classifiers!
+# If you do change the License, remember to change
+# the Trove Classifier for that!
+
+here = os.path.abspath(os.path.dirname(__file__))
+
+# Import the README and use it as the long-description.
+# Note: this will only work if 'README.md'
+# is present in your MANIFEST.in file!
+try:
+    with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
+        long_description = '\n' + f.read()
+except FileNotFoundError:
+    long_description = DESCRIPTION
+
+# Where the magic happens:
+setup(
+    name=NAME,
+    version=VERSION,
+    description=DESCRIPTION,
+    long_description=long_description,
+    long_description_content_type='text/markdown',
+    author=AUTHOR,
+    author_email=EMAIL,
+    python_requires=REQUIRES_PYTHON,
+    url=URL,
+    packages=find_packages(exclude=["tests",
+                                    "*.tests",
+                                    "*.tests.*",
+                                    "tests.*"]),
+    # If your package is a single module, use this instead of 'packages':
+    # py_modules=['mypackage'],
+    install_requires=REQUIRED,
+    extras_require=EXTRAS,
+    include_package_data=True,
+    license='MIT',
+    classifiers=[
+        # Trove classifiers
+        # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.7'
+    ]
+)
-- 
cgit v1.2.3-70-g09d2


From a50d424d0ab7dd4cc6a2d6fc94371fa65a0d89b2 Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Wed, 18 Dec 2019 09:53:44 -0700
Subject: Fix test issues

---
 lj_matrix/__init__.py      | 27 ++++++++++++++-------------
 lj_matrix/__main__.py      | 10 +++++-----
 lj_matrix/c_matrix.py      |  2 +-
 lj_matrix/do_ml.py         |  6 +++---
 lj_matrix/gauss_kernel.py  |  2 +-
 lj_matrix/lj_matrix.py     |  2 +-
 lj_matrix/read_qm7_data.py |  2 +-
 test/__init__.py           | 22 ++++++++++++++++++++++
 test/test_c_matrix.py      | 33 +++++++++++++++++++++++++++++++++
 9 files changed, 81 insertions(+), 25 deletions(-)
 create mode 100644 test/__init__.py
 create mode 100644 test/test_c_matrix.py

(limited to 'lj_matrix')

diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py
index 47d7e5013..5019bd51d 100644
--- a/lj_matrix/__init__.py
+++ b/lj_matrix/__init__.py
@@ -20,26 +20,27 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-from misc import printc
-from read_qm7_data import read_qm7_data, read_nc_data, reas_db_data
-from c_matrix import c_matrix, c_matrix_multiple
-from cholesky_solve import cholesky_solve
-from do_ml import do_ml
-from frob_norm import frob_norm
-from gauss_kernel import gauss_kernel
-from lj_matrix import lj_matrix, lj_matrix_multiple
+from lj_matrix.misc import printc
+from lj_matrix.read_qm7_data import read_nc_data, reas_db_data, read_qm7_data
+from lj_matrix.c_matrix import c_matrix, c_matrix_multiple
+from lj_matrix.lj_matrix import lj_matrix, lj_matrix_multiple
+from lj_matrix.frob_norm import frob_norm
+from lj_matrix.gauss_kernel import gauss_kernel
+from lj_matrix.cholesky_solve import cholesky_solve
+from lj_matrix.do_ml import do_ml
+
 
 # If somebody does "from package import *", this is what they will
 # be able to access:
 __all__ = ['printc',
-           'read_qm7_data',
            'read_nc_data',
            'reas_db_data',
+           'read_qm7_data',
            'c_matrix',
            'c_matrix_multiple',
-           'cholesky_solve',
-           'do_ml',
+           'lj_matrix',
+           'lj_matrix_multiple',
            'frob_norm',
            'gauss_kernel',
-           'lj_matrix',
-           'lj_matrix_multiple']
+           'cholesky_solve',
+           'do_ml']
diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py
index 5a0e95b94..0b2a7c6f8 100644
--- a/lj_matrix/__main__.py
+++ b/lj_matrix/__main__.py
@@ -24,11 +24,11 @@ import time
 from multiprocessing import Process, Pipe
 # import matplotlib.pyplot as plt
 import pandas as pd
-from misc import printc
-from read_qm7_data import read_qm7_data
-from c_matrix import c_matrix_multiple
-from lj_matrix import lj_matrix_multiple
-from do_ml import do_ml
+from lj_matrix.misc import printc
+from lj_matrix.read_qm7_data import read_qm7_data
+from lj_matrix.c_matrix import c_matrix_multiple
+from lj_matrix.lj_matrix import lj_matrix_multiple
+from lj_matrix.do_ml import do_ml
 
 
 # Test
diff --git a/lj_matrix/c_matrix.py b/lj_matrix/c_matrix.py
index 4de711a1b..f21ccfd8c 100644
--- a/lj_matrix/c_matrix.py
+++ b/lj_matrix/c_matrix.py
@@ -21,10 +21,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 import time
-from misc import printc
 import math
 import numpy as np
 from numpy.linalg import eig
+from lj_matrix.misc import printc
 
 
 def c_matrix(mol_data,
diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py
index c88533e68..ba88a6fd8 100644
--- a/lj_matrix/do_ml.py
+++ b/lj_matrix/do_ml.py
@@ -21,10 +21,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 import time
-from misc import printc
 import numpy as np
-from gauss_kernel import gauss_kernel
-from cholesky_solve import cholesky_solve
+from lj_matrix.misc import printc
+from lj_matrix.gauss_kernel import gauss_kernel
+from lj_matrix.cholesky_solve import cholesky_solve
 
 
 def do_ml(desc_data,
diff --git a/lj_matrix/gauss_kernel.py b/lj_matrix/gauss_kernel.py
index 0dfc65d59..5dd8e6406 100644
--- a/lj_matrix/gauss_kernel.py
+++ b/lj_matrix/gauss_kernel.py
@@ -22,7 +22,7 @@ SOFTWARE.
 """
 import math
 import numpy as np
-from frob_norm import frob_norm
+from lj_matrix.frob_norm import frob_norm
 
 
 def gauss_kernel(X_1, X_2, sigma):
diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py
index 2a8e0d956..2a56a3cdf 100644
--- a/lj_matrix/lj_matrix.py
+++ b/lj_matrix/lj_matrix.py
@@ -21,10 +21,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 import time
-from misc import printc
 import math
 import numpy as np
 from numpy.linalg import eig
+from lj_matrix.misc import printc
 
 
 def lj_matrix(mol_data,
diff --git a/lj_matrix/read_qm7_data.py b/lj_matrix/read_qm7_data.py
index 068ea1a42..b54691fb0 100644
--- a/lj_matrix/read_qm7_data.py
+++ b/lj_matrix/read_qm7_data.py
@@ -24,7 +24,7 @@ import os
 import time
 import numpy as np
 import random
-from misc import printc
+from lj_matrix.misc import printc
 
 
 # 'periodic_table_of_elements.txt' retrieved from
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 000000000..8b866e928
--- /dev/null
+++ b/test/__init__.py
@@ -0,0 +1,22 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
\ No newline at end of file
diff --git a/test/test_c_matrix.py b/test/test_c_matrix.py
new file mode 100644
index 000000000..a8bb5ae34
--- /dev/null
+++ b/test/test_c_matrix.py
@@ -0,0 +1,33 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import unittest
+from lj_matrix.c_matrix import c_matrix
+
+
+class TestCMatrix(unittest.TestCase):
+    def test_c_matrix(self):
+        self.assertAlmostEqual(1, 1)
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
cgit v1.2.3-70-g09d2


From 72be4105825c639cf9dfad6229c7a1d62a16c44d Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Mon, 23 Dec 2019 11:48:32 -0700
Subject: Change name convention

---
 lj_matrix/do_ml.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'lj_matrix')

diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py
index ba88a6fd8..bb954a0ae 100644
--- a/lj_matrix/do_ml.py
+++ b/lj_matrix/do_ml.py
@@ -79,17 +79,17 @@ def do_ml(desc_data,
         printc('\tTest size: {}'.format(test_size), 'CYAN')
         printc('\tSigma: {}'.format(sigma), 'CYAN')
 
-    Xcm_training = desc_data[:training_size]
-    Ycm_training = energy_data[:training_size]
-    Kcm_training = gauss_kernel(Xcm_training, Xcm_training, sigma)
-    alpha_cm = cholesky_solve(Kcm_training, Ycm_training)
+    X_training = desc_data[:training_size]
+    Y_training = energy_data[:training_size]
+    K_training = gauss_kernel(X_training, X_training, sigma)
+    alpha_ = cholesky_solve(K_training, Y_training)
 
-    Xcm_test = desc_data[-test_size:]
-    Ycm_test = energy_data[-test_size:]
-    Kcm_test = gauss_kernel(Xcm_test, Xcm_training, sigma)
-    Ycm_predicted = np.dot(Kcm_test, alpha_cm)
+    X_test = desc_data[-test_size:]
+    Y_test = energy_data[-test_size:]
+    K_test = gauss_kernel(X_test, X_training, sigma)
+    Y_predicted = np.dot(K_test, alpha_)
 
-    mae = np.mean(np.abs(Ycm_predicted - Ycm_test))
+    mae = np.mean(np.abs(Y_predicted - Y_test))
     if show_msgs:
         printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN')
 
-- 
cgit v1.2.3-70-g09d2


From db64425a5580a49312e313a6e75e7a296eb93b35 Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Mon, 23 Dec 2019 12:23:46 -0700
Subject: Restructure code and bug fix

---
 lj_matrix/__init__.py                 |  4 +-
 lj_matrix/__main__.py                 | 31 ++--------------
 lj_matrix/lj_matrix.py                |  6 ++-
 lj_matrix/parallel_create_matrices.py | 70 +++++++++++++++++++++++++++++++++++
 lj_matrix/read_qm7_data.py            |  6 +--
 5 files changed, 83 insertions(+), 34 deletions(-)
 create mode 100644 lj_matrix/parallel_create_matrices.py

(limited to 'lj_matrix')

diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py
index 5019bd51d..d7794d3be 100644
--- a/lj_matrix/__init__.py
+++ b/lj_matrix/__init__.py
@@ -21,7 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 from lj_matrix.misc import printc
-from lj_matrix.read_qm7_data import read_nc_data, reas_db_data, read_qm7_data
+from lj_matrix.read_qm7_data import read_nc_data, read_db_data, read_qm7_data
 from lj_matrix.c_matrix import c_matrix, c_matrix_multiple
 from lj_matrix.lj_matrix import lj_matrix, lj_matrix_multiple
 from lj_matrix.frob_norm import frob_norm
@@ -34,7 +34,7 @@ from lj_matrix.do_ml import do_ml
 # be able to access:
 __all__ = ['printc',
            'read_nc_data',
-           'reas_db_data',
+           'read_db_data',
            'read_qm7_data',
            'c_matrix',
            'c_matrix_multiple',
diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py
index 0b2a7c6f8..8e52031f1 100644
--- a/lj_matrix/__main__.py
+++ b/lj_matrix/__main__.py
@@ -26,8 +26,7 @@ from multiprocessing import Process, Pipe
 import pandas as pd
 from lj_matrix.misc import printc
 from lj_matrix.read_qm7_data import read_qm7_data
-from lj_matrix.c_matrix import c_matrix_multiple
-from lj_matrix.lj_matrix import lj_matrix_multiple
+from lj_matrix.parallel_create_matrices import parallel_create_matrices
 from lj_matrix.do_ml import do_ml
 
 
@@ -40,32 +39,10 @@ def ml():
     init_time = time.perf_counter()
 
     # Data reading.
-    zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\
-        read_qm7_data()
+    molecules, nuclear_charge, energy_pbe0, energy_delta = read_qm7_data()
 
     # Matrices calculation.
-    procs = []
-    pipes = []
-
-    # cm_recv, cm_send = Pipe(False)
-    # p1 = Process(target=c_matrix_multiple,
-    #              args=(molecules, nuclear_charge, cm_send))
-    # procs.append(p1)
-    # pipes.append(cm_recv)
-    # p1.start()
-
-    ljm_recv, ljm_send = Pipe(False)
-    p2 = Process(target=lj_matrix_multiple,
-                 args=(molecules, nuclear_charge, ljm_send, 1, 0.25))
-    procs.append(p2)
-    pipes.append(ljm_recv)
-    p2.start()
-
-    # cm_data = pipes[0].recv()
-    ljm_data = pipes[0].recv()
-
-    for proc in procs:
-        proc.join()
+    cm_data, ljm_data = parallel_create_matrices(molecules, nuclear_charge)
 
     # ML calculation.
     procs = []
@@ -234,6 +211,6 @@ def pl():
 
 
 if __name__ == '__main__':
-    # ml()
+    ml()
     # pl()
     print('OK!')
diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py
index 2a56a3cdf..0c16b5686 100644
--- a/lj_matrix/lj_matrix.py
+++ b/lj_matrix/lj_matrix.py
@@ -38,6 +38,8 @@ def lj_matrix(mol_data,
     Creates the Lennard-Jones Matrix from the molecule data given.
     mol_data: molecule data, matrix of atom coordinates.
     nc_data: nuclear charge data, array of atom data.
+    sigma: sigma value.
+    epsilon: epsilon value.
     max_len: maximum amount of atoms in molecule.
     as_eig: if data should be returned as matrix or array of eigenvalues.
     bohr_radius_units: if units should be in bohr's radius units.
@@ -171,8 +173,8 @@ def lj_matrix(mol_data,
 def lj_matrix_multiple(mol_data,
                        nc_data,
                        pipe=None,
-                       sigma=1,
-                       epsilon=1,
+                       sigma=1.0,
+                       epsilon=1.0,
                        max_len=25,
                        as_eig=True,
                        bohr_radius_units=False):
diff --git a/lj_matrix/parallel_create_matrices.py b/lj_matrix/parallel_create_matrices.py
new file mode 100644
index 000000000..0ab691525
--- /dev/null
+++ b/lj_matrix/parallel_create_matrices.py
@@ -0,0 +1,70 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+from multiprocessing import Process, Pipe
+from lj_matrix.c_matrix import c_matrix_multiple
+from lj_matrix.lj_matrix import lj_matrix_multiple
+
+
+def parallel_create_matrices(mol_data,
+                             nc_data,
+                             sigma=1.0,
+                             epsilon=1.0,
+                             max_len=25,
+                             as_eig=True,
+                             bohr_radius_units=False):
+    """
+    Creates the Coulomb and L-J matrices in parallel.
+    mol_data: molecule data, matrix of atom coordinates.
+    nc_data: nuclear charge data, array of atom data.
+    sigma: sigma value for L-J matrix.
+    epsilon: epsilon value for L-J matrix.
+    max_len: maximum amount of atoms in molecule.
+    as_eig: if data should be returned as matrix or array of eigenvalues.
+    bohr_radius_units: if units should be in bohr's radius units.
+    """
+
+    # Matrices calculation.
+    procs = []
+    pipes = []
+
+    cm_recv, cm_send = Pipe(False)
+    p1 = Process(target=c_matrix_multiple,
+                 args=(mol_data, nc_data, cm_send))
+    procs.append(p1)
+    pipes.append(cm_recv)
+    p1.start()
+
+    ljm_recv, ljm_send = Pipe(False)
+    p2 = Process(target=lj_matrix_multiple,
+                 args=(mol_data, nc_data, ljm_send, sigma, epsilon))
+    procs.append(p2)
+    pipes.append(ljm_recv)
+    p2.start()
+
+    cm_data = pipes[0].recv()
+    ljm_data = pipes[1].recv()
+
+    for proc in procs:
+        proc.join()
+
+    return cm_data, ljm_data
diff --git a/lj_matrix/read_qm7_data.py b/lj_matrix/read_qm7_data.py
index b54691fb0..9bb7629ca 100644
--- a/lj_matrix/read_qm7_data.py
+++ b/lj_matrix/read_qm7_data.py
@@ -51,7 +51,7 @@ def read_nc_data(data_path):
 
 # 'hof_qm7.txt.txt' retrieved from
 # https://github.com/qmlcode/tutorial
-def reas_db_data(zi_data,
+def read_db_data(zi_data,
                  data_path,
                  r_seed=111):
     """
@@ -135,10 +135,10 @@ def read_qm7_data():
 
     zi_data = read_nc_data(data_path)
     molecules, nuclear_charge, energy_pbe0, energy_delta = \
-        reas_db_data(zi_data, data_path)
+        read_db_data(zi_data, data_path)
 
     os.chdir(init_path)
     toc = time.perf_counter()
     printc('\tData reading took {:.4f} seconds.'.format(toc-tic), 'GREEN')
 
-    return zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta
+    return molecules, nuclear_charge, energy_pbe0, energy_delta
-- 
cgit v1.2.3-70-g09d2


From f8bd690096e432b313ee17baa93c7422b45ee9b8 Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Mon, 23 Dec 2019 12:29:35 -0700
Subject: Fix init

---
 lj_matrix/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'lj_matrix')

diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py
index d7794d3be..0c2407a57 100644
--- a/lj_matrix/__init__.py
+++ b/lj_matrix/__init__.py
@@ -28,6 +28,7 @@ from lj_matrix.frob_norm import frob_norm
 from lj_matrix.gauss_kernel import gauss_kernel
 from lj_matrix.cholesky_solve import cholesky_solve
 from lj_matrix.do_ml import do_ml
+from lj_matrix.parallel_create_matrices import parallel_create_matrices
 
 
 # If somebody does "from package import *", this is what they will
@@ -43,4 +44,5 @@ __all__ = ['printc',
            'frob_norm',
            'gauss_kernel',
            'cholesky_solve',
-           'do_ml']
+           'do_ml',
+           'parallel_create_matrices']
-- 
cgit v1.2.3-70-g09d2


From f5d72558ed6ec63c7de4940c29d4f6c92605a30d Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Mon, 23 Dec 2019 12:39:28 -0700
Subject: Fix init

---
 lj_matrix/__init__.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'lj_matrix')

diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py
index 0c2407a57..d59e3481c 100644
--- a/lj_matrix/__init__.py
+++ b/lj_matrix/__init__.py
@@ -20,7 +20,6 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-from lj_matrix.misc import printc
 from lj_matrix.read_qm7_data import read_nc_data, read_db_data, read_qm7_data
 from lj_matrix.c_matrix import c_matrix, c_matrix_multiple
 from lj_matrix.lj_matrix import lj_matrix, lj_matrix_multiple
@@ -33,8 +32,7 @@ from lj_matrix.parallel_create_matrices import parallel_create_matrices
 
 # If somebody does "from package import *", this is what they will
 # be able to access:
-__all__ = ['printc',
-           'read_nc_data',
+__all__ = ['read_nc_data',
            'read_db_data',
            'read_qm7_data',
            'c_matrix',
-- 
cgit v1.2.3-70-g09d2


From b14c581ca5fdab47d7e1c0b688331368cb7f29d0 Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Mon, 23 Dec 2019 13:11:12 -0700
Subject: Refactor ml code

---
 lj_matrix/do_ml.py | 104 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 93 insertions(+), 11 deletions(-)

(limited to 'lj_matrix')

diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py
index bb954a0ae..ac044cfb3 100644
--- a/lj_matrix/do_ml.py
+++ b/lj_matrix/do_ml.py
@@ -22,19 +22,22 @@ SOFTWARE.
 """
 import time
 import numpy as np
+from multiprocessing import Process, Pipe
 from lj_matrix.misc import printc
 from lj_matrix.gauss_kernel import gauss_kernel
 from lj_matrix.cholesky_solve import cholesky_solve
-
-
-def do_ml(desc_data,
-          energy_data,
-          training_size,
-          desc_type=None,
-          pipe=None,
-          test_size=None,
-          sigma=1000.0,
-          show_msgs=True):
+from lj_matrix.read_qm7_data import read_qm7_data
+from lj_matrix.parallel_create_matrices import parallel_create_matrices
+
+
+def ml(desc_data,
+       energy_data,
+       training_size,
+       desc_type=None,
+       pipe=None,
+       test_size=None,
+       sigma=1000.0,
+       show_msgs=True):
     """
     Does the ML methodology.
     desc_data: descriptor (or representation) data.
@@ -51,6 +54,7 @@ def do_ml(desc_data,
     Also, training is done with the first part of the data and
     testing with the ending part of the data.
     """
+    tic = time.perf_counter()
     # Initial calculations for later use.
     d_len = len(desc_data)
     e_len = len(energy_data)
@@ -72,7 +76,6 @@ def do_ml(desc_data,
         if test_size > 1500:
             test_size = 1500
 
-    tic = time.perf_counter()
     if show_msgs:
         printc('{} ML started.'.format(desc_type), 'GREEN')
         printc('\tTraining size: {}'.format(training_size), 'CYAN')
@@ -106,3 +109,82 @@ def do_ml(desc_data,
         pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc])
 
     return mae, tictoc
+
+
+# Test
+def do_ml(min_training_size,
+          max_training_size=None,
+          training_increment_size=None,
+          ljm_sigma=1.0,
+          ljm_epsilon=1.0,
+          save_benchmarks=False):
+    """
+    Main function that does the whole ML process.
+    min_training_size: minimum training size.
+    max_training_size: maximum training size.
+    training_increment_size: training increment size.
+    ljm_sigma: sigma value for lj matrix.
+    ljm_epsilon: epsilon value for lj matrix.
+    save_benchmarks: if benchmarks should be saved.
+    """
+    # Initialization time.
+    init_time = time.perf_counter()
+
+    # Data reading.
+    molecules, nuclear_charge, energy_pbe0, energy_delta = read_qm7_data()
+
+    # Matrices calculation.
+    cm_data, ljm_data = parallel_create_matrices(molecules,
+                                                 nuclear_charge,
+                                                 ljm_sigma,
+                                                 ljm_epsilon)
+
+    # ML calculation.
+    procs = []
+    cm_pipes = []
+    ljm_pipes = []
+    for i in range(min_training_size,
+                   max_training_size + 1,
+                   training_increment_size):
+        cm_recv, cm_send = Pipe(False)
+        p1 = Process(target=ml,
+                     args=(cm_data, energy_pbe0, i, 'CM', cm_send))
+        procs.append(p1)
+        cm_pipes.append(cm_recv)
+        p1.start()
+
+        ljm_recv, ljm_send = Pipe(False)
+        p2 = Process(target=ml,
+                     args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send))
+        procs.append(p2)
+        ljm_pipes.append(ljm_recv)
+        p2.start()
+
+    cm_bench_results = []
+    ljm_bench_results = []
+    for cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes):
+        cm_bench_results.append(cd_pipe.recv())
+        ljm_bench_results.append(ljd_pipe.recv())
+
+    for proc in procs:
+        proc.join()
+
+    if save_benchmarks:
+        with open('data\\benchmarks.csv', 'a') as save_file:
+            # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,',
+            #                          'mae,time,lj_s,lj_e,date_ran\n']))
+            ltime = time.localtime()[:3][::-1]
+            ljm_se = ',' + str(ljm_sigma) + ',' + str(ljm_epsilon) + ','
+            date = '/'.join([str(field) for field in ltime])
+            for cm, ljm, in zip(cm_bench_results, ljm_bench_results):
+                cm_text = ','.join([str(field) for field in cm])\
+                    + ',' + date + '\n'
+                ljm_text = ','.join([str(field) for field in ljm])\
+                    + ljm_se + date + '\n'
+                save_file.write(cm_text)
+                save_file.write(ljm_text)
+
+    # End of program
+    end_time = time.perf_counter()
+    printc('Program took {:.4f} seconds.'.format(end_time - init_time),
+           'CYAN')
-- 
cgit v1.2.3-70-g09d2


From b4c2dc01ab17248814988c8e141bf16072c45abd Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Sat, 28 Dec 2019 10:37:49 -0700
Subject: Add options to do_ml function

---
 lj_matrix/do_ml.py | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

(limited to 'lj_matrix')

diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py
index ac044cfb3..12323780a 100644
--- a/lj_matrix/do_ml.py
+++ b/lj_matrix/do_ml.py
@@ -111,13 +111,17 @@ def ml(desc_data,
     return mae, tictoc
 
 
-# Test
 def do_ml(min_training_size,
           max_training_size=None,
           training_increment_size=None,
           ljm_sigma=1.0,
           ljm_epsilon=1.0,
-          save_benchmarks=False):
+          save_benchmarks=False,
+          max_len=25,
+          as_eig=True,
+          bohr_radius_units=False,
+          sigma=1000.0,
+          show_msgs=True):
     """
     Main function that does the whole ML process.
     min_training_size: minimum training size.
@@ -126,6 +130,11 @@ def do_ml(min_training_size,
     ljm_sigma: sigma value for lj matrix.
     ljm_epsilon: epsilon value for lj matrix.
     save_benchmarks: if benchmarks should be saved.
+    max_len: maximum amount of atoms in molecule.
+    as_eig: if data should be returned as matrix or array of eigenvalues.
+    bohr_radius_units: if units should be in bohr's radius units.
+    sigma: depth of the kernel.
+    show_msgs: Show debug messages or not.
     """
     # Initialization time.
     init_time = time.perf_counter()
@@ -137,7 +146,10 @@ def do_ml(min_training_size,
     cm_data, ljm_data = parallel_create_matrices(molecules,
                                                  nuclear_charge,
                                                  ljm_sigma,
-                                                 ljm_epsilon)
+                                                 ljm_epsilon,
+                                                 max_len,
+                                                 as_eig,
+                                                 bohr_radius_units)
 
     # ML calculation.
     procs = []
@@ -148,14 +160,28 @@ def do_ml(min_training_size,
                    training_increment_size):
         cm_recv, cm_send = Pipe(False)
         p1 = Process(target=ml,
-                     args=(cm_data, energy_pbe0, i, 'CM', cm_send))
+                     args=(cm_data,
+                           energy_pbe0,
+                           i,
+                           'CM',
+                           cm_send,
+                           max_training_size,
+                           sigma,
+                           show_msgs))
         procs.append(p1)
         cm_pipes.append(cm_recv)
         p1.start()
 
         ljm_recv, ljm_send = Pipe(False)
         p2 = Process(target=ml,
-                     args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send))
+                     args=(ljm_data,
+                           energy_pbe0,
+                           i,
+                           'L-JM',
+                           ljm_send,
+                           max_training_size,
+                           sigma,
+                           show_msgs))
         procs.append(p2)
         ljm_pipes.append(ljm_recv)
         p2.start()
-- 
cgit v1.2.3-70-g09d2


From cdbb1ac890cb0d062cdb2f216c347f681fbfa7b8 Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Sat, 28 Dec 2019 10:47:20 -0700
Subject: Fix bug

---
 lj_matrix/__main__.py | 68 +--------------------------------------------------
 lj_matrix/do_ml.py    |  4 ++-
 2 files changed, 4 insertions(+), 68 deletions(-)

(limited to 'lj_matrix')

diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py
index 8e52031f1..f7e4065da 100644
--- a/lj_matrix/__main__.py
+++ b/lj_matrix/__main__.py
@@ -20,76 +20,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-import time
-from multiprocessing import Process, Pipe
-# import matplotlib.pyplot as plt
 import pandas as pd
-from lj_matrix.misc import printc
-from lj_matrix.read_qm7_data import read_qm7_data
-from lj_matrix.parallel_create_matrices import parallel_create_matrices
 from lj_matrix.do_ml import do_ml
 
 
-# Test
-def ml():
-    """
-    Main function that does the whole ML process.
-    """
-    # Initialization time.
-    init_time = time.perf_counter()
-
-    # Data reading.
-    molecules, nuclear_charge, energy_pbe0, energy_delta = read_qm7_data()
-
-    # Matrices calculation.
-    cm_data, ljm_data = parallel_create_matrices(molecules, nuclear_charge)
-
-    # ML calculation.
-    procs = []
-    # cm_pipes = []
-    ljm_pipes = []
-    for i in range(1500, 6500 + 1, 500):
-        # cm_recv, cm_send = Pipe(False)
-        # p1 = Process(target=do_ml,
-        #              args=(cm_data, energy_pbe0, i, 'CM', cm_send))
-        # procs.append(p1)
-        # cm_pipes.append(cm_recv)
-        # p1.start()
-
-        ljm_recv, ljm_send = Pipe(False)
-        p2 = Process(target=do_ml,
-                     args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send))
-        procs.append(p2)
-        ljm_pipes.append(ljm_recv)
-        p2.start()
-
-    # cm_bench_results = []
-    ljm_bench_results = []
-    for ljd_pipe in ljm_pipes:  # cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes):
-        # cm_bench_results.append(cd_pipe.recv())
-        ljm_bench_results.append(ljd_pipe.recv())
-
-    for proc in procs:
-        proc.join()
-
-    with open('data\\benchmarks.csv', 'a') as save_file:
-        # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,',
-        #                          'mae,time,lj_s,lj_e,date_ran\n']))
-        date = '/'.join([str(field) for field in time.localtime()[:3][::-1]])
-        for ljm in ljm_bench_results:  # cm, ljm, in zip(cm_bench_results, ljm_bench_results):
-            # cm_text = ','.join([str(field) for field in cm])\
-            #     + ',' + date + '\n'
-            ljm_text = ','.join([str(field) for field in ljm])\
-                + ',1,0.25,' + date + '\n'
-            # save_file.write(cm_text)
-            save_file.write(ljm_text)
-
-    # End of program
-    end_time = time.perf_counter()
-    printc('Program took {:.4f} seconds.'.format(end_time - init_time),
-           'CYAN')
-
-
 def pl():
     """
     Function for plotting the benchmarks.
@@ -211,6 +145,6 @@ def pl():
 
 
 if __name__ == '__main__':
-    ml()
+    do_ml(min_training_size=1500, max_training_size=3000)
     # pl()
     print('OK!')
diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py
index 12323780a..8724e6831 100644
--- a/lj_matrix/do_ml.py
+++ b/lj_matrix/do_ml.py
@@ -113,7 +113,7 @@ def ml(desc_data,
 
 def do_ml(min_training_size,
           max_training_size=None,
-          training_increment_size=None,
+          training_increment_size=500,
           ljm_sigma=1.0,
           ljm_epsilon=1.0,
           save_benchmarks=False,
@@ -138,6 +138,8 @@ def do_ml(min_training_size,
     """
     # Initialization time.
     init_time = time.perf_counter()
+    if not max_training_size:
+        max_training_size = min_training_size + training_increment_size
 
     # Data reading.
     molecules, nuclear_charge, energy_pbe0, energy_delta = read_qm7_data()
-- 
cgit v1.2.3-70-g09d2


From f9cd430d8e66cdac5d78a643f87445e3dd6bdf8e Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Sat, 28 Dec 2019 10:54:36 -0700
Subject: Refactor code

---
 lj_matrix/__init__.py |   4 +-
 lj_matrix/__main__.py | 125 +-------------------------------------------------
 lj_matrix/misc.py     | 121 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+), 124 deletions(-)

(limited to 'lj_matrix')

diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py
index d59e3481c..a430aac68 100644
--- a/lj_matrix/__init__.py
+++ b/lj_matrix/__init__.py
@@ -28,6 +28,7 @@ from lj_matrix.gauss_kernel import gauss_kernel
 from lj_matrix.cholesky_solve import cholesky_solve
 from lj_matrix.do_ml import do_ml
 from lj_matrix.parallel_create_matrices import parallel_create_matrices
+from lj_matrix.misc import plot_benchmarks
 
 
 # If somebody does "from package import *", this is what they will
@@ -43,4 +44,5 @@ __all__ = ['read_nc_data',
            'gauss_kernel',
            'cholesky_solve',
            'do_ml',
-           'parallel_create_matrices']
+           'parallel_create_matrices',
+           'plot_benchmarks']
diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py
index f7e4065da..98f341e1e 100644
--- a/lj_matrix/__main__.py
+++ b/lj_matrix/__main__.py
@@ -20,131 +20,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-import pandas as pd
 from lj_matrix.do_ml import do_ml
-
-
-def pl():
-    """
-    Function for plotting the benchmarks.
-    """
-    # Original columns.
-    or_cols = ['ml_type',
-               'tr_size',
-               'te_size',
-               'kernel_s',
-               'mae',
-               'time',
-               'lj_s',
-               'lj_e',
-               'date_ran']
-    # Drop some original columns.
-    dor_cols = ['te_size',
-                'kernel_s',
-                'time',
-                'date_ran']
-
-    # Read benchmarks data and drop some columns.
-    data_temp = pd.read_csv('data\\benchmarks.csv',)
-    data = pd.DataFrame(data_temp, columns=or_cols)
-    data = data.drop(columns=dor_cols)
-
-    # Get the data of the first benchmarks and drop unnecesary columns.
-    first_data = pd.DataFrame(data, index=range(0, 22))
-    first_data = first_data.drop(columns=['lj_s', 'lj_e'])
-
-    # Columns to keep temporarily.
-    fd_columns = ['ml_type',
-                  'tr_size',
-                  'mae']
-
-    # Create new dataframes for each matrix descriptor and fill them.
-    first_data_cm = pd.DataFrame(columns=fd_columns)
-    first_data_ljm = pd.DataFrame(columns=fd_columns)
-    for i in range(first_data.shape[0]):
-        temp_df = first_data.iloc[[i]]
-        if first_data.at[i, 'ml_type'] == 'CM':
-            first_data_cm = first_data_cm.append(temp_df)
-        else:
-            first_data_ljm = first_data_ljm.append(temp_df)
-
-    # Drop unnecesary column and rename 'mae' for later use.
-    first_data_cm = first_data_cm.drop(columns=['ml_type'])\
-        .rename(columns={'mae': 'cm_mae'})
-    first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\
-        .rename(columns={'mae': 'ljm_mae'})
-    # print(first_data_cm)
-    # print(first_data_ljm)
-
-    # Get the cm data axis so it can be joined with the ljm data axis.
-    cm_axis = first_data_cm.plot(x='tr_size',
-                                 y='cm_mae',
-                                 kind='line')
-    # Get the ljm data axis and join it with the cm one.
-    plot_axis = first_data_ljm.plot(ax=cm_axis,
-                                    x='tr_size',
-                                    y='ljm_mae',
-                                    kind='line')
-    plot_axis.set_xlabel('tr_size')
-    plot_axis.set_ylabel('mae')
-    plot_axis.set_title('mae for different tr_sizes')
-    # Get the figure and save it.
-    # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf')
-
-    # Get the rest of the benchmark data and drop unnecesary column.
-    new_data = data.drop(index=range(0, 22))
-    new_data = new_data.drop(columns=['ml_type'])
-
-    # Get the first set and rename it.
-    nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'})
-    ndf_axis = nd_first.plot(x='tr_size',
-                             y='1, 1',
-                             kind='line')
-    last_axis = ndf_axis
-    for i in range(22, 99, 11):
-        lj_s = new_data['lj_s'][i]
-        lj_e = new_data['lj_e'][i]
-        new_mae = '{}, {}'.format(lj_s, lj_e)
-        nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
-            .drop(columns=['lj_s', 'lj_e'])\
-            .rename(columns={'mae': new_mae})
-        last_axis = nd_temp.plot(ax=last_axis,
-                                 x='tr_size',
-                                 y=new_mae,
-                                 kind='line')
-        print(nd_temp)
-
-    last_axis.set_xlabel('tr_size')
-    last_axis.set_ylabel('mae')
-    last_axis.set_title('mae for different parameters of lj(s)')
-
-    last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf')
-
-    ndf_axis = nd_first.plot(x='tr_size',
-                             y='1, 1',
-                             kind='line')
-    last_axis = ndf_axis
-    for i in range(99, data.shape[0], 11):
-        lj_s = new_data['lj_s'][i]
-        lj_e = new_data['lj_e'][i]
-        new_mae = '{}, {}'.format(lj_s, lj_e)
-        nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
-            .drop(columns=['lj_s', 'lj_e'])\
-            .rename(columns={'mae': new_mae})
-        last_axis = nd_temp.plot(ax=last_axis,
-                                 x='tr_size',
-                                 y=new_mae,
-                                 kind='line')
-        print(nd_temp)
-
-    last_axis.set_xlabel('tr_size')
-    last_axis.set_ylabel('mae')
-    last_axis.set_title('mae for different parameters of lj(e)')
-
-    last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf')
-
+# from lj_matrix.misc import plot_benchmarks
 
 if __name__ == '__main__':
     do_ml(min_training_size=1500, max_training_size=3000)
-    # pl()
+    # plot_benchmarks()
     print('OK!')
diff --git a/lj_matrix/misc.py b/lj_matrix/misc.py
index c50653a5c..e9142b05f 100644
--- a/lj_matrix/misc.py
+++ b/lj_matrix/misc.py
@@ -21,6 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 from colorama import init, Fore, Style
+import pandas as pd
 
 init()
 
@@ -51,3 +52,123 @@ def printc(text, color):
         actual_color = color_dic[color]
 
     print(actual_color + text + Style.RESET_ALL)
+
+
+def plot_benchmarks():
+    """
+    For plotting the benchmarks.
+    """
+    # Original columns.
+    or_cols = ['ml_type',
+               'tr_size',
+               'te_size',
+               'kernel_s',
+               'mae',
+               'time',
+               'lj_s',
+               'lj_e',
+               'date_ran']
+    # Drop some original columns.
+    dor_cols = ['te_size',
+                'kernel_s',
+                'time',
+                'date_ran']
+
+    # Read benchmarks data and drop some columns.
+    data_temp = pd.read_csv('data\\benchmarks.csv',)
+    data = pd.DataFrame(data_temp, columns=or_cols)
+    data = data.drop(columns=dor_cols)
+
+    # Get the data of the first benchmarks and drop unnecesary columns.
+    first_data = pd.DataFrame(data, index=range(0, 22))
+    first_data = first_data.drop(columns=['lj_s', 'lj_e'])
+
+    # Columns to keep temporarily.
+    fd_columns = ['ml_type',
+                  'tr_size',
+                  'mae']
+
+    # Create new dataframes for each matrix descriptor and fill them.
+    first_data_cm = pd.DataFrame(columns=fd_columns)
+    first_data_ljm = pd.DataFrame(columns=fd_columns)
+    for i in range(first_data.shape[0]):
+        temp_df = first_data.iloc[[i]]
+        if first_data.at[i, 'ml_type'] == 'CM':
+            first_data_cm = first_data_cm.append(temp_df)
+        else:
+            first_data_ljm = first_data_ljm.append(temp_df)
+
+    # Drop unnecesary column and rename 'mae' for later use.
+    first_data_cm = first_data_cm.drop(columns=['ml_type'])\
+        .rename(columns={'mae': 'cm_mae'})
+    first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\
+        .rename(columns={'mae': 'ljm_mae'})
+    # print(first_data_cm)
+    # print(first_data_ljm)
+
+    # Get the cm data axis so it can be joined with the ljm data axis.
+    cm_axis = first_data_cm.plot(x='tr_size',
+                                 y='cm_mae',
+                                 kind='line')
+    # Get the ljm data axis and join it with the cm one.
+    plot_axis = first_data_ljm.plot(ax=cm_axis,
+                                    x='tr_size',
+                                    y='ljm_mae',
+                                    kind='line')
+    plot_axis.set_xlabel('tr_size')
+    plot_axis.set_ylabel('mae')
+    plot_axis.set_title('mae for different tr_sizes')
+    # Get the figure and save it.
+    # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf')
+
+    # Get the rest of the benchmark data and drop unnecesary column.
+    new_data = data.drop(index=range(0, 22))
+    new_data = new_data.drop(columns=['ml_type'])
+
+    # Get the first set and rename it.
+    nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'})
+    ndf_axis = nd_first.plot(x='tr_size',
+                             y='1, 1',
+                             kind='line')
+    last_axis = ndf_axis
+    for i in range(22, 99, 11):
+        lj_s = new_data['lj_s'][i]
+        lj_e = new_data['lj_e'][i]
+        new_mae = '{}, {}'.format(lj_s, lj_e)
+        nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
+            .drop(columns=['lj_s', 'lj_e'])\
+            .rename(columns={'mae': new_mae})
+        last_axis = nd_temp.plot(ax=last_axis,
+                                 x='tr_size',
+                                 y=new_mae,
+                                 kind='line')
+        print(nd_temp)
+
+    last_axis.set_xlabel('tr_size')
+    last_axis.set_ylabel('mae')
+    last_axis.set_title('mae for different parameters of lj(s)')
+
+    last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf')
+
+    ndf_axis = nd_first.plot(x='tr_size',
+                             y='1, 1',
+                             kind='line')
+    last_axis = ndf_axis
+    for i in range(99, data.shape[0], 11):
+        lj_s = new_data['lj_s'][i]
+        lj_e = new_data['lj_e'][i]
+        new_mae = '{}, {}'.format(lj_s, lj_e)
+        nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
+            .drop(columns=['lj_s', 'lj_e'])\
+            .rename(columns={'mae': new_mae})
+        last_axis = nd_temp.plot(ax=last_axis,
+                                 x='tr_size',
+                                 y=new_mae,
+                                 kind='line')
+        print(nd_temp)
+
+    last_axis.set_xlabel('tr_size')
+    last_axis.set_ylabel('mae')
+    last_axis.set_title('mae for different parameters of lj(e)')
+
+    last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf')
-- 
cgit v1.2.3-70-g09d2


From c1e7b327655ebaa5c44e4bef5b9b675b23782952 Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Sat, 28 Dec 2019 11:05:39 -0700
Subject: Refactor code and fix bug

---
 lj_matrix/do_ml.py                    |  3 +++
 lj_matrix/lj_matrix.py                | 17 +++++++++++++++--
 lj_matrix/parallel_create_matrices.py | 27 +++++++++++++++++++++------
 3 files changed, 39 insertions(+), 8 deletions(-)

(limited to 'lj_matrix')

diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py
index 8724e6831..45dc7a5f0 100644
--- a/lj_matrix/do_ml.py
+++ b/lj_matrix/do_ml.py
@@ -114,6 +114,7 @@ def ml(desc_data,
 def do_ml(min_training_size,
           max_training_size=None,
           training_increment_size=500,
+          ljm_diag_value=None,
           ljm_sigma=1.0,
           ljm_epsilon=1.0,
           save_benchmarks=False,
@@ -127,6 +128,7 @@ def do_ml(min_training_size,
     min_training_size: minimum training size.
     max_training_size: maximum training size.
     training_increment_size: training increment size.
+    ljm_diag_value: if a special diagonal value should be used in lj matrix.
     ljm_sigma: sigma value for lj matrix.
     ljm_epsilon: epsilon value for lj matrix.
     save_benchmarks: if benchmarks should be saved.
@@ -147,6 +149,7 @@ def do_ml(min_training_size,
     # Matrices calculation.
     cm_data, ljm_data = parallel_create_matrices(molecules,
                                                  nuclear_charge,
+                                                 ljm_diag_value,
                                                  ljm_sigma,
                                                  ljm_epsilon,
                                                  max_len,
diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py
index 0c16b5686..c3b61becb 100644
--- a/lj_matrix/lj_matrix.py
+++ b/lj_matrix/lj_matrix.py
@@ -29,6 +29,7 @@ from lj_matrix.misc import printc
 
 def lj_matrix(mol_data,
               nc_data,
+              diag_value=None,
               sigma=1.0,
               epsilon=1.0,
               max_len=25,
@@ -38,6 +39,7 @@ def lj_matrix(mol_data,
     Creates the Lennard-Jones Matrix from the molecule data given.
     mol_data: molecule data, matrix of atom coordinates.
     nc_data: nuclear charge data, array of atom data.
+    diag_value: if special diagonal value is to be used.
     sigma: sigma value.
     epsilon: epsilon value.
     max_len: maximum amount of atoms in molecule.
@@ -86,7 +88,10 @@ def lj_matrix(mol_data,
                         z = (z_i-z_j)**2
 
                         if i == j:
-                            lj[i, j] = (0.5*Z_i**2.4)
+                            if not diag_value:
+                                lj[i, j] = (0.5*Z_i**2.4)
+                            else:
+                                lj[i, j] = diag_value
                         else:
                             # Calculations are done after i==j is checked
                             # so no division by zero is done.
@@ -144,7 +149,10 @@ def lj_matrix(mol_data,
                     z = (z_i-z_j)**2
 
                     if i == j:
-                        lj_row.append(0.5*Z_i**2.4)
+                        if not diag_value:
+                            lj_row.append(0.5*Z_i**2.4)
+                        else:
+                            lj_row.append(diag_value)
                     else:
                         # Calculations are done after i==j is checked
                         # so no division by zero is done.
@@ -173,6 +181,7 @@ def lj_matrix(mol_data,
 def lj_matrix_multiple(mol_data,
                        nc_data,
                        pipe=None,
+                       diag_value=None,
                        sigma=1.0,
                        epsilon=1.0,
                        max_len=25,
@@ -184,6 +193,9 @@ def lj_matrix_multiple(mol_data,
     nc_data: nuclear charge data, array of atom data.
     pipe: for multiprocessing purposes. Sends the data calculated
         through a pipe.
+    diag_value: if special diagonal value is to be used.
+    sigma: sigma value.
+    epsilon: epsilon value.
     max_len: maximum amount of atoms in molecule.
     as_eig: if data should be returned as matrix or array of eigenvalues.
     bohr_radius_units: if units should be in bohr's radius units.
@@ -193,6 +205,7 @@ def lj_matrix_multiple(mol_data,
 
     ljm_data = np.array([lj_matrix(mol,
                                    nc,
+                                   diag_value,
                                    sigma,
                                    epsilon,
                                    max_len,
diff --git a/lj_matrix/parallel_create_matrices.py b/lj_matrix/parallel_create_matrices.py
index 0ab691525..cd5ef5c8e 100644
--- a/lj_matrix/parallel_create_matrices.py
+++ b/lj_matrix/parallel_create_matrices.py
@@ -27,8 +27,9 @@ from lj_matrix.lj_matrix import lj_matrix_multiple
 
 def parallel_create_matrices(mol_data,
                              nc_data,
-                             sigma=1.0,
-                             epsilon=1.0,
+                             ljm_diag_value=None,
+                             ljm_sigma=1.0,
+                             ljm_epsilon=1.0,
                              max_len=25,
                              as_eig=True,
                              bohr_radius_units=False):
@@ -36,8 +37,9 @@ def parallel_create_matrices(mol_data,
     Creates the Coulomb and L-J matrices in parallel.
     mol_data: molecule data, matrix of atom coordinates.
     nc_data: nuclear charge data, array of atom data.
-    sigma: sigma value for L-J matrix.
-    epsilon: epsilon value for L-J matrix.
+    ljm_diag_value: if special diagonal value is to be used for lj matrix.
+    ljm_sigma: sigma value for lj matrix.
+    ljm_epsilon: psilon value for lj matrix.
     max_len: maximum amount of atoms in molecule.
     as_eig: if data should be returned as matrix or array of eigenvalues.
     bohr_radius_units: if units should be in bohr's radius units.
@@ -49,14 +51,27 @@ def parallel_create_matrices(mol_data,
 
     cm_recv, cm_send = Pipe(False)
     p1 = Process(target=c_matrix_multiple,
-                 args=(mol_data, nc_data, cm_send))
+                 args=(mol_data,
+                       nc_data,
+                       cm_send,
+                       max_len,
+                       as_eig,
+                       bohr_radius_units))
     procs.append(p1)
     pipes.append(cm_recv)
     p1.start()
 
     ljm_recv, ljm_send = Pipe(False)
     p2 = Process(target=lj_matrix_multiple,
-                 args=(mol_data, nc_data, ljm_send, sigma, epsilon))
+                 args=(mol_data,
+                       nc_data,
+                       ljm_send,
+                       ljm_diag_value,
+                       ljm_sigma,
+                       ljm_epsilon,
+                       max_len,
+                       as_eig,
+                       bohr_radius_units))
     procs.append(p2)
     pipes.append(ljm_recv)
     p2.start()
-- 
cgit v1.2.3-70-g09d2


From e4f9e15588ec796f73c000a683cc9152454a913c Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Sat, 28 Dec 2019 11:12:36 -0700
Subject: Fix bugs

---
 lj_matrix/__main__.py | 10 +++++++++-
 lj_matrix/do_ml.py    |  7 +++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'lj_matrix')

diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py
index 98f341e1e..811024ff0 100644
--- a/lj_matrix/__main__.py
+++ b/lj_matrix/__main__.py
@@ -24,6 +24,14 @@ from lj_matrix.do_ml import do_ml
 # from lj_matrix.misc import plot_benchmarks
 
 if __name__ == '__main__':
-    do_ml(min_training_size=1500, max_training_size=3000)
+    do_ml(min_training_size=1500,
+          max_training_size=2000,
+          training_increment_size=500,
+          test_size=None,
+          ljm_diag_value=None,
+          ljm_sigma=1.0,
+          ljm_epsilon=1.0,
+          save_benchmarks=False,
+          show_msgs=True)
     # plot_benchmarks()
     print('OK!')
diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py
index 45dc7a5f0..da9386bf7 100644
--- a/lj_matrix/do_ml.py
+++ b/lj_matrix/do_ml.py
@@ -114,6 +114,7 @@ def ml(desc_data,
 def do_ml(min_training_size,
           max_training_size=None,
           training_increment_size=500,
+          test_size=None,
           ljm_diag_value=None,
           ljm_sigma=1.0,
           ljm_epsilon=1.0,
@@ -128,6 +129,8 @@ def do_ml(min_training_size,
     min_training_size: minimum training size.
     max_training_size: maximum training size.
     training_increment_size: training increment size.
+    test_size: size of the test set to use. If no size is given,
+        the last remaining molecules are used.
     ljm_diag_value: if a special diagonal value should be used in lj matrix.
     ljm_sigma: sigma value for lj matrix.
     ljm_epsilon: epsilon value for lj matrix.
@@ -170,7 +173,7 @@ def do_ml(min_training_size,
                            i,
                            'CM',
                            cm_send,
-                           max_training_size,
+                           test_size,
                            sigma,
                            show_msgs))
         procs.append(p1)
@@ -184,7 +187,7 @@ def do_ml(min_training_size,
                            i,
                            'L-JM',
                            ljm_send,
-                           max_training_size,
+                           test_size,
                            sigma,
                            show_msgs))
         procs.append(p2)
-- 
cgit v1.2.3-70-g09d2


From 4704314c9b4d1066383da5c3d6ca87bba9067c8d Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Sat, 28 Dec 2019 11:37:22 -0700
Subject: Refactor code

---
 lj_matrix/__main__.py      | 1 +
 lj_matrix/do_ml.py         | 5 ++++-
 lj_matrix/lj_matrix.py     | 2 +-
 lj_matrix/read_qm7_data.py | 7 ++++---
 4 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'lj_matrix')

diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py
index 811024ff0..688e5adcc 100644
--- a/lj_matrix/__main__.py
+++ b/lj_matrix/__main__.py
@@ -31,6 +31,7 @@ if __name__ == '__main__':
           ljm_diag_value=None,
           ljm_sigma=1.0,
           ljm_epsilon=1.0,
+          r_seed=111,
           save_benchmarks=False,
           show_msgs=True)
     # plot_benchmarks()
diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py
index da9386bf7..25a55e823 100644
--- a/lj_matrix/do_ml.py
+++ b/lj_matrix/do_ml.py
@@ -118,6 +118,7 @@ def do_ml(min_training_size,
           ljm_diag_value=None,
           ljm_sigma=1.0,
           ljm_epsilon=1.0,
+          r_seed=111,
           save_benchmarks=False,
           max_len=25,
           as_eig=True,
@@ -134,6 +135,7 @@ def do_ml(min_training_size,
     ljm_diag_value: if a special diagonal value should be used in lj matrix.
     ljm_sigma: sigma value for lj matrix.
     ljm_epsilon: epsilon value for lj matrix.
+    r_seed: random seed to use for the shuffling.
     save_benchmarks: if benchmarks should be saved.
     max_len: maximum amount of atoms in molecule.
     as_eig: if data should be returned as matrix or array of eigenvalues.
@@ -147,7 +149,8 @@ def do_ml(min_training_size,
         max_training_size = min_training_size + training_increment_size
 
     # Data reading.
-    molecules, nuclear_charge, energy_pbe0, energy_delta = read_qm7_data()
+    molecules, nuclear_charge, energy_pbe0, energy_delta =\
+        read_qm7_data(r_seed)
 
     # Matrices calculation.
     cm_data, ljm_data = parallel_create_matrices(molecules,
diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py
index c3b61becb..6739ae283 100644
--- a/lj_matrix/lj_matrix.py
+++ b/lj_matrix/lj_matrix.py
@@ -88,7 +88,7 @@ def lj_matrix(mol_data,
                         z = (z_i-z_j)**2
 
                         if i == j:
-                            if not diag_value:
+                            if diag_value is None:
                                 lj[i, j] = (0.5*Z_i**2.4)
                             else:
                                 lj[i, j] = diag_value
diff --git a/lj_matrix/read_qm7_data.py b/lj_matrix/read_qm7_data.py
index 9bb7629ca..4401ca1c0 100644
--- a/lj_matrix/read_qm7_data.py
+++ b/lj_matrix/read_qm7_data.py
@@ -59,7 +59,7 @@ def read_db_data(zi_data,
     its contents as usable variables.
     zi_data: dictionary containing nuclear charge data.
     data_path: path to the data directory.
-    r_seed: random seed.
+    r_seed: random seed to use for the shuffling.
     """
     os.chdir(data_path)
 
@@ -122,9 +122,10 @@ def read_db_data(zi_data,
     return molecules, nuclear_charge, energy_pbe0, energy_delta
 
 
-def read_qm7_data():
+def read_qm7_data(r_seed=111):
     """
     Reads all the qm7 data.
+    r_seed: random seed to use for the shuffling.
     """
     tic = time.perf_counter()
     printc('Data reading started.', 'CYAN')
@@ -135,7 +136,7 @@ def read_qm7_data():
 
     zi_data = read_nc_data(data_path)
     molecules, nuclear_charge, energy_pbe0, energy_delta = \
-        read_db_data(zi_data, data_path)
+        read_db_data(zi_data, data_path, r_seed)
 
     os.chdir(init_path)
     toc = time.perf_counter()
-- 
cgit v1.2.3-70-g09d2