From 0c91ee8b6c968457cff1d968cf69add73902bd11 Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Thu, 20 Feb 2020 19:02:23 -0700
Subject: Clean main

---
 ml_exp/__main__.py | 51 ++++++---------------------------------------------
 1 file changed, 6 insertions(+), 45 deletions(-)

diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py
index aaeb18714..83abb918e 100644
--- a/ml_exp/__main__.py
+++ b/ml_exp/__main__.py
@@ -22,50 +22,11 @@ SOFTWARE.
 """
 # from ml_exp.do_ml import do_ml
 # from ml_exp.misc import plot_benchmarks
-from ml_exp.read_qm7_data import read_qm7_data
-from ml_exp.adj_matrix import fneig_matrix, adj_matrix
-from ml_exp.c_matrix import c_matrix
-from ml_exp.bob import bob
-from ml_exp.bostuff import bok_cx
+# from ml_exp.read_qm7_data import read_qm7_data
+# from ml_exp.adj_matrix import fneig_matrix, adj_matrix
+# from ml_exp.c_matrix import c_matrix
+# from ml_exp.bob import bob
+# from ml_exp.bostuff import bok_cx
 
 if __name__ == '__main__':
-    """
-    do_ml(min_training_size=1500,
-          max_training_size=2000,
-          training_increment_size=500,
-          test_size=None,
-          ljm_diag_value=None,
-          ljm_sigma=1.0,
-          ljm_epsilon=1.0,
-          r_seed=111,
-          save_benchmarks=False,
-          show_msgs=True)
-    """
-    # plot_benchmarks()
-    """
-    xyz, nc, pbe0, delta, atoms = read_qm7_data(return_atoms=True)
-    for i in range(1):
-        fnm, bonds, forces = fneig_matrix(atoms[i], xyz[i], nc[i], True)
-        am = adj_matrix(bonds, forces)
-
-        print(f'{i} first neighbor matrix\n{fnm}')
-        print(f'{i} bond list\n{bonds}')
-        print(f'{i} force list\n{forces}')
-        print(f'{i} adjacency matrix\n{am}')
-        print('-'*30)
-    print('OK!')
-    """
-    xyz, nc, pbe0, delta, atoms = read_qm7_data(return_atoms=True)
-    for i in range(1):
-        cm = c_matrix(xyz[i], nc[i], as_eig=False)
-        bob_i = bob(cm, atoms[i])
-
-        print(f'{i} coulomb matrix\n{cm}')
-        print(f'{i} bag of bonds\n{bob}')
-
-        fnm, bonds = fneig_matrix(atoms[i], xyz[i], nc[i], False)
-        am = adj_matrix(fnm, bonds, None)
-        bok_cx_i = bok_cx(am, atoms[i])
-
-        print(f'{i} adjacency matrix\n{am}')
-        print(f'{i} bag of k_cx\n{bok_cx_i}')
+    print('Ok!')
-- 
cgit v1.2.3-70-g09d2


From 7a83a6e48a7503848bf1a4abd448f7858dca71e0 Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Thu, 20 Feb 2020 19:54:46 -0700
Subject: Add basic compound object

---
 ml_exp/__main__.py |  8 --------
 ml_exp/compound.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 8 deletions(-)
 create mode 100644 ml_exp/compound.py

diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py
index 83abb918e..3cb515d85 100644
--- a/ml_exp/__main__.py
+++ b/ml_exp/__main__.py
@@ -20,13 +20,5 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-# from ml_exp.do_ml import do_ml
-# from ml_exp.misc import plot_benchmarks
-# from ml_exp.read_qm7_data import read_qm7_data
-# from ml_exp.adj_matrix import fneig_matrix, adj_matrix
-# from ml_exp.c_matrix import c_matrix
-# from ml_exp.bob import bob
-# from ml_exp.bostuff import bok_cx
-
 if __name__ == '__main__':
     print('Ok!')
diff --git a/ml_exp/compound.py b/ml_exp/compound.py
new file mode 100644
index 000000000..6fd46fc3a
--- /dev/null
+++ b/ml_exp/compound.py
@@ -0,0 +1,51 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import numpy as np
+
+
+class Compound:
+    def __init__(self, xyz=None):
+        self.n = None
+        self.atoms = None
+        self.coordinates = None
+
+        if xyz is not None:
+            self.read_xyz(xyz)
+
+    def read_xyz(self, filename):
+        """
+        Reads an xyz file and adds the corresponding data to the Compound.
+        filename: (path to) the xyz file.
+        """
+        with open(filename, 'r') as f:
+            lines = f.readlines()
+
+        self.n = int(lines[0])
+        self.atoms = []
+        self.coordinates = np.empty((self.n, 3), dtype=float)
+
+        for i, atom in enumerate(lines[2:self.n + 2]):
+            atom_d = atom.split()
+
+            self.atoms.append(atom_d[0])
+            self.coordinates[i] = np.asarray(atom_d[1:4], dtype=float)
-- 
cgit v1.2.3-70-g09d2


From 96eade3b9f72c5d1fa5bae27ba82a2cd23e1e200 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 17:16:22 -0700
Subject: Add nuclear charge

---
 ml_exp/compound.py      |  11 ++++
 ml_exp/data.py          | 141 ++++++++++++++++++++++++++++++++++++++++++++++++
 ml_exp/read_qm7_data.py |  11 ++++
 3 files changed, 163 insertions(+)
 create mode 100644 ml_exp/data.py

diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index 6fd46fc3a..cfa3e0322 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -21,13 +21,22 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 import numpy as np
+from ml_exp.data import NUCLEAR_CHARGE
 
 
 class Compound:
     def __init__(self, xyz=None):
+        # empty_array = np.asarray([], dtype=float)
+
         self.n = None
         self.atoms = None
+        self.atoms_nc = None
         self.coordinates = None
+        self.energy = None
+
+        self.coulomb_matrix = None
+        self.lennard_jones_matrix = None
+        self.bob = None
 
         if xyz is not None:
             self.read_xyz(xyz)
@@ -42,10 +51,12 @@ class Compound:
 
         self.n = int(lines[0])
         self.atoms = []
+        self.atoms_nc = np.empty(self.n, dtype=int)
         self.coordinates = np.empty((self.n, 3), dtype=float)
 
         for i, atom in enumerate(lines[2:self.n + 2]):
             atom_d = atom.split()
 
             self.atoms.append(atom_d[0])
+            self.atoms_nc[i] = NUCLEAR_CHARGE[atom_d[0]]
             self.coordinates[i] = np.asarray(atom_d[1:4], dtype=float)
diff --git a/ml_exp/data.py b/ml_exp/data.py
new file mode 100644
index 000000000..243aff168
--- /dev/null
+++ b/ml_exp/data.py
@@ -0,0 +1,141 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+NUCLEAR_CHARGE = {
+    'H': 1,
+    'He': 2,
+    'Li': 3,
+    'Be': 4,
+    'B': 5,
+    'C': 6,
+    'N': 7,
+    'O': 8,
+    'F': 9,
+    'Ne': 10,
+    'Na': 11,
+    'Mg': 12,
+    'Al': 13,
+    'Si': 14,
+    'P': 15,
+    'S': 16,
+    'Cl': 17,
+    'Ar': 18,
+    'K': 19,
+    'Ca': 20,
+    'Sc': 21,
+    'Ti': 22,
+    'V': 23,
+    'Cr': 24,
+    'Mn': 25,
+    'Fe': 26,
+    'Co': 27,
+    'Ni': 28,
+    'Cu': 29,
+    'Zn': 30,
+    'Ga': 31,
+    'Ge': 32,
+    'As': 33,
+    'Se': 34,
+    'Br': 35,
+    'Kr': 36,
+    'Rb': 37,
+    'Sr': 38,
+    'Y': 39,
+    'Zr': 40,
+    'Nb': 41,
+    'Mo': 42,
+    'Tc': 43,
+    'Ru': 44,
+    'Rh': 45,
+    'Pd': 46,
+    'Ag': 47,
+    'Cd': 48,
+    'In': 49,
+    'Sn': 50,
+    'Sb': 51,
+    'Te': 52,
+    'I': 53,
+    'Xe': 54,
+    'Cs': 55,
+    'Ba': 56,
+    'La': 57,
+    'Ce': 58,
+    'Pr': 59,
+    'Nd': 60,
+    'Pm': 61,
+    'Sm': 62,
+    'Eu': 63,
+    'Gd': 64,
+    'Tb': 65,
+    'Dy': 66,
+    'Ho': 67,
+    'Er': 68,
+    'Tm': 69,
+    'Yb': 70,
+    'Lu': 71,
+    'Hf': 72,
+    'Ta': 73,
+    'W': 74,
+    'Re': 75,
+    'Os': 76,
+    'Ir': 77,
+    'Pt': 78,
+    'Au': 79,
+    'Hg': 80,
+    'Tl': 81,
+    'Pb': 82,
+    'Bi': 83,
+    'Po': 84,
+    'At': 85,
+    'Rn': 86,
+    'Fr': 87,
+    'Ra': 88,
+    'Ac': 89,
+    'Th': 90,
+    'Pa': 91,
+    'U': 92,
+    'Np': 93,
+    'Pu': 94,
+    'Am': 95,
+    'Cm': 96,
+    'Bk': 97,
+    'Cf': 98,
+    'Es': 99,
+    'Fm': 100,
+    'Md': 101,
+    'No': 102,
+    'Lr': 103,
+    'Rf': 104,
+    'Db': 105,
+    'Sg': 106,
+    'Bh': 107,
+    'Hs': 108,
+    'Mt': 109,
+    'Ds ': 110,
+    'Rg ': 111,
+    'Cn ': 112,
+    'Nh': 113,
+    'Fl': 114,
+    'Mc': 115,
+    'Lv': 116,
+    'Ts': 117,
+    'Og': 118}
diff --git a/ml_exp/read_qm7_data.py b/ml_exp/read_qm7_data.py
index 81be05a17..63c48195c 100644
--- a/ml_exp/read_qm7_data.py
+++ b/ml_exp/read_qm7_data.py
@@ -49,6 +49,17 @@ def read_nc_data(data_path):
     return {line[2]: int(line[0]) for line in lines}
 
 
+# For use in the rewriting of the code (from functional programming to object
+# oriented programming-ish)
+def print_nc(nc_data):
+    """
+    Prints the nuclear charge data to the terminal.
+    nc_data: dict of nc.
+    """
+    for key in nc_data.keys():
+        print(f'\'{key}\': {nc_data[key]},')
+
+
 # 'hof_qm7.txt.txt' retrieved from
 # https://github.com/qmlcode/tutorial
 def read_db_data(zi_data,
-- 
cgit v1.2.3-70-g09d2


From 4ce7823359a144973c667f488e5fa39e5d3f8acc Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 17:21:21 -0700
Subject: Remove unused function

---
 ml_exp/compound.py      |  3 +++
 ml_exp/read_qm7_data.py | 34 ----------------------------------
 2 files changed, 3 insertions(+), 34 deletions(-)

diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index cfa3e0322..9536767a7 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -41,6 +41,9 @@ class Compound:
         if xyz is not None:
             self.read_xyz(xyz)
 
+    def gen_cm(self):
+        pass
+
     def read_xyz(self, filename):
         """
         Reads an xyz file and adds the corresponding data to the Compound.
diff --git a/ml_exp/read_qm7_data.py b/ml_exp/read_qm7_data.py
index 63c48195c..666f9c3fc 100644
--- a/ml_exp/read_qm7_data.py
+++ b/ml_exp/read_qm7_data.py
@@ -21,10 +21,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 import os
-import time
 import numpy as np
 import random
-from ml_exp.misc import printc
 
 
 # 'periodic_table_of_elements.txt' retrieved from
@@ -139,35 +137,3 @@ def read_db_data(zi_data,
     if return_atoms:
         return molecules, nuclear_charge, energy_pbe0, energy_delta, atoms
     return molecules, nuclear_charge, energy_pbe0, energy_delta
-
-
-def read_qm7_data(data_path='data',
-                  r_seed=111,
-                  return_atoms=False):
-    """
-    Reads all the qm7 data.
-    data_path: path to the data directory.
-    r_seed: random seed to use for the shuffling.
-    return_atoms: if atom list should be returned.
-    """
-    tic = time.perf_counter()
-    printc('Data reading started.', 'CYAN')
-
-    init_path = os.getcwd()
-    os.chdir(data_path)
-    data_path = os.getcwd()
-
-    zi_data = read_nc_data(data_path)
-    if return_atoms:
-        molecules, nuclear_charge, energy_pbe0, energy_delta, atoms = \
-            read_db_data(zi_data, data_path, r_seed, return_atoms)
-    else:
-        molecules, nuclear_charge, energy_pbe0, energy_delta = \
-            read_db_data(zi_data, data_path, r_seed)
-
-    os.chdir(init_path)
-    toc = time.perf_counter()
-    printc('\tData reading took {:.4f} seconds.'.format(toc-tic), 'GREEN')
-    if return_atoms:
-        return molecules, nuclear_charge, energy_pbe0, energy_delta, atoms
-    return molecules, nuclear_charge, energy_pbe0, energy_delta
-- 
cgit v1.2.3-70-g09d2


From 2dd24b5f7dfa5c43acd8e4281dab22f178a5019e Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 17:25:25 -0700
Subject: Remove unused function

---
 ml_exp/c_matrix.py | 33 ---------------------------------
 1 file changed, 33 deletions(-)

diff --git a/ml_exp/c_matrix.py b/ml_exp/c_matrix.py
index 00034a660..ac942d473 100644
--- a/ml_exp/c_matrix.py
+++ b/ml_exp/c_matrix.py
@@ -20,11 +20,9 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-import time
 import math
 import numpy as np
 from numpy.linalg import eig
-from ml_exp.misc import printc
 
 
 def c_matrix(mol_data,
@@ -146,34 +144,3 @@ def c_matrix(mol_data,
                 return np.sort(eig(cm)[0])[::-1]
             else:
                 return cm
-
-
-def c_matrix_multiple(mol_data,
-                      nc_data,
-                      pipe=None,
-                      max_len=25,
-                      as_eig=True,
-                      bohr_radius_units=False):
-    """
-    Calculates the Coulomb Matrix of multiple molecules.
-    mol_data: molecule data, matrix of atom coordinates.
-    nc_data: nuclear charge data, array of atom data.
-    pipe: for multiprocessing purposes. Sends the data calculated
-        through a pipe.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
-    """
-    printc('Coulomb Matrices calculation started.', 'CYAN')
-    tic = time.perf_counter()
-
-    cm_data = np.array([c_matrix(mol, nc, max_len, as_eig, bohr_radius_units)
-                       for mol, nc in zip(mol_data, nc_data)])
-
-    toc = time.perf_counter()
-    printc('\tCM calculation took {:.4f} seconds.'.format(toc - tic), 'GREEN')
-
-    if pipe:
-        pipe.send(cm_data)
-
-    return cm_data
-- 
cgit v1.2.3-70-g09d2


From f6fd349a822d6dd2f7172a90c6c35d1eb36f5c95 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 17:26:26 -0700
Subject: Move cmatrix to representations

---
 ml_exp/c_matrix.py | 146 -----------------------------------------------------
 1 file changed, 146 deletions(-)
 delete mode 100644 ml_exp/c_matrix.py

diff --git a/ml_exp/c_matrix.py b/ml_exp/c_matrix.py
deleted file mode 100644
index ac942d473..000000000
--- a/ml_exp/c_matrix.py
+++ /dev/null
@@ -1,146 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import math
-import numpy as np
-from numpy.linalg import eig
-
-
-def c_matrix(mol_data,
-             nc_data,
-             max_len=25,
-             as_eig=True,
-             bohr_radius_units=False):
-    """
-    Creates the Coulomb Matrix from the molecule data given.
-    mol_data: molecule data, matrix of atom coordinates.
-    nc_data: nuclear charge data, array of atom data.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
-    """
-    if bohr_radius_units:
-        conversion_rate = 0.52917721067
-    else:
-        conversion_rate = 1
-
-    mol_n = len(mol_data)
-    mol_nr = range(mol_n)
-
-    if not mol_n == len(nc_data):
-        print(''.join(['Error. Molecule matrix dimension is different ',
-                       'than the nuclear charge array dimension.']))
-    else:
-        if max_len < mol_n:
-            print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                           'greater than max_len. Using mol_n.']))
-            max_len = None
-
-        if max_len:
-            cm = np.zeros((max_len, max_len))
-            ml_r = range(max_len)
-
-            # Actual calculation of the coulomb matrix.
-            for i in ml_r:
-                if i < mol_n:
-                    x_i = mol_data[i, 0]
-                    y_i = mol_data[i, 1]
-                    z_i = mol_data[i, 2]
-                    Z_i = nc_data[i]
-                else:
-                    break
-
-                for j in ml_r:
-                    if j < mol_n:
-                        x_j = mol_data[j, 0]
-                        y_j = mol_data[j, 1]
-                        z_j = mol_data[j, 2]
-                        Z_j = nc_data[j]
-
-                        x = (x_i-x_j)**2
-                        y = (y_i-y_j)**2
-                        z = (z_i-z_j)**2
-
-                        if i == j:
-                            cm[i, j] = (0.5*Z_i**2.4)
-                        else:
-                            cm[i, j] = (conversion_rate*Z_i*Z_j/math.sqrt(x
-                                                                          + y
-                                                                          + z))
-                    else:
-                        break
-
-            # Now the value will be returned.
-            if as_eig:
-                cm_sorted = np.sort(eig(cm)[0])[::-1]
-                # Thanks to SO for the following lines of code.
-                # https://stackoverflow.com/a/43011036
-
-                # Keep zeros at the end.
-                mask = cm_sorted != 0.
-                f_mask = mask.sum(0, keepdims=1) >\
-                    np.arange(cm_sorted.shape[0]-1, -1, -1)
-
-                f_mask = f_mask[::-1]
-                cm_sorted[f_mask] = cm_sorted[mask]
-                cm_sorted[~f_mask] = 0.
-
-                return cm_sorted
-
-            else:
-                return cm
-
-        else:
-            cm_temp = []
-            # Actual calculation of the coulomb matrix.
-            for i in mol_nr:
-                x_i = mol_data[i, 0]
-                y_i = mol_data[i, 1]
-                z_i = mol_data[i, 2]
-                Z_i = nc_data[i]
-
-                cm_row = []
-                for j in mol_nr:
-                    x_j = mol_data[j, 0]
-                    y_j = mol_data[j, 1]
-                    z_j = mol_data[j, 2]
-                    Z_j = nc_data[j]
-
-                    x = (x_i-x_j)**2
-                    y = (y_i-y_j)**2
-                    z = (z_i-z_j)**2
-
-                    if i == j:
-                        cm_row.append(0.5*Z_i**2.4)
-                    else:
-                        cm_row.append(conversion_rate*Z_i*Z_j/math.sqrt(x
-                                                                        + y
-                                                                        + z))
-
-                cm_temp.append(np.array(cm_row))
-
-            cm = np.array(cm_temp)
-            # Now the value will be returned.
-            if as_eig:
-                return np.sort(eig(cm)[0])[::-1]
-            else:
-                return cm
-- 
cgit v1.2.3-70-g09d2


From d6b381e1ea629879b9855989b0f753632ea9df2a Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 17:33:41 -0700
Subject: Refactor code, size is 23 not 25

---
 ml_exp/representations.py | 145 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)
 create mode 100644 ml_exp/representations.py

diff --git a/ml_exp/representations.py b/ml_exp/representations.py
new file mode 100644
index 000000000..830e51707
--- /dev/null
+++ b/ml_exp/representations.py
@@ -0,0 +1,145 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import math
+import numpy as np
+
+
+def coulomb_matrix(coords,
+                   nc,
+                   size=23,
+                   as_eig=True,
+                   bohr_radius_units=False):
+    """
+    Creates the Coulomb Matrix from the molecule data given.
+    coords: compound coordinates.
+    nc: nuclear charge data.
+    size: compound size.
+    as_eig: if data should be returned as matrix or array of eigenvalues.
+    bohr_radius_units: if units should be in bohr's radius units.
+    """
+    if bohr_radius_units:
+        conversion_rate = 0.52917721067
+    else:
+        conversion_rate = 1
+
+    mol_n = len(coords)
+    mol_nr = range(mol_n)
+
+    if not mol_n == len(nc):
+        print(''.join(['Error. Molecule matrix dimension is different ',
+                       'than the nuclear charge array dimension.']))
+    else:
+        if size < mol_n:
+            print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
+                           'greater than size. Using mol_n.']))
+            size = None
+
+        if size:
+            cm = np.zeros((size, size))
+            ml_r = range(size)
+
+            # Actual calculation of the coulomb matrix.
+            for i in ml_r:
+                if i < mol_n:
+                    x_i = coords[i, 0]
+                    y_i = coords[i, 1]
+                    z_i = coords[i, 2]
+                    Z_i = nc[i]
+                else:
+                    break
+
+                for j in ml_r:
+                    if j < mol_n:
+                        x_j = coords[j, 0]
+                        y_j = coords[j, 1]
+                        z_j = coords[j, 2]
+                        Z_j = nc[j]
+
+                        x = (x_i-x_j)**2
+                        y = (y_i-y_j)**2
+                        z = (z_i-z_j)**2
+
+                        if i == j:
+                            cm[i, j] = (0.5*Z_i**2.4)
+                        else:
+                            cm[i, j] = (conversion_rate*Z_i*Z_j/math.sqrt(x
+                                                                          + y
+                                                                          + z))
+                    else:
+                        break
+
+            # Now the value will be returned.
+            if as_eig:
+                cm_sorted = np.sort(np.linalg.eig(cm)[0])[::-1]
+                # Thanks to SO for the following lines of code.
+                # https://stackoverflow.com/a/43011036
+
+                # Keep zeros at the end.
+                mask = cm_sorted != 0.
+                f_mask = mask.sum(0, keepdims=1) >\
+                    np.arange(cm_sorted.shape[0]-1, -1, -1)
+
+                f_mask = f_mask[::-1]
+                cm_sorted[f_mask] = cm_sorted[mask]
+                cm_sorted[~f_mask] = 0.
+
+                return cm_sorted
+
+            else:
+                return cm
+
+        else:
+            cm_temp = []
+            # Actual calculation of the coulomb matrix.
+            for i in mol_nr:
+                x_i = coords[i, 0]
+                y_i = coords[i, 1]
+                z_i = coords[i, 2]
+                Z_i = nc[i]
+
+                cm_row = []
+                for j in mol_nr:
+                    x_j = coords[j, 0]
+                    y_j = coords[j, 1]
+                    z_j = coords[j, 2]
+                    Z_j = nc[j]
+
+                    x = (x_i-x_j)**2
+                    y = (y_i-y_j)**2
+                    z = (z_i-z_j)**2
+
+                    if i == j:
+                        cm_row.append(0.5*Z_i**2.4)
+                    else:
+                        cm_row.append(conversion_rate*Z_i*Z_j/math.sqrt(x
+                                                                        + y
+                                                                        + z))
+
+                cm_temp.append(np.array(cm_row))
+
+            cm = np.array(cm_temp)
+            # Now the value will be returned.
+            if as_eig:
+                return np.sort(np.linalg.eig(cm)[0])[::-1]
+            else:
+                return cm
-- 
cgit v1.2.3-70-g09d2


From 02cb17d411e9a1bc2fe5c6bcf75695f23f340648 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 17:44:34 -0700
Subject: Add exception

---
 ml_exp/representations.py | 158 +++++++++++++++++++++++-----------------------
 1 file changed, 79 insertions(+), 79 deletions(-)

diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index 830e51707..50b78e548 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -42,83 +42,34 @@ def coulomb_matrix(coords,
     else:
         conversion_rate = 1
 
-    mol_n = len(coords)
-    mol_nr = range(mol_n)
+    n = coords.shape[0]
+    nr = range(n)
 
-    if not mol_n == len(nc):
-        print(''.join(['Error. Molecule matrix dimension is different ',
-                       'than the nuclear charge array dimension.']))
-    else:
-        if size < mol_n:
-            print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                           'greater than size. Using mol_n.']))
-            size = None
-
-        if size:
-            cm = np.zeros((size, size))
-            ml_r = range(size)
-
-            # Actual calculation of the coulomb matrix.
-            for i in ml_r:
-                if i < mol_n:
-                    x_i = coords[i, 0]
-                    y_i = coords[i, 1]
-                    z_i = coords[i, 2]
-                    Z_i = nc[i]
-                else:
-                    break
-
-                for j in ml_r:
-                    if j < mol_n:
-                        x_j = coords[j, 0]
-                        y_j = coords[j, 1]
-                        z_j = coords[j, 2]
-                        Z_j = nc[j]
-
-                        x = (x_i-x_j)**2
-                        y = (y_i-y_j)**2
-                        z = (z_i-z_j)**2
-
-                        if i == j:
-                            cm[i, j] = (0.5*Z_i**2.4)
-                        else:
-                            cm[i, j] = (conversion_rate*Z_i*Z_j/math.sqrt(x
-                                                                          + y
-                                                                          + z))
-                    else:
-                        break
-
-            # Now the value will be returned.
-            if as_eig:
-                cm_sorted = np.sort(np.linalg.eig(cm)[0])[::-1]
-                # Thanks to SO for the following lines of code.
-                # https://stackoverflow.com/a/43011036
+    if not n == nc.shape[0]:
+        raise ValueError('Compound size is different than the nuclear charge\
+                         size. Arrays are not of the right shape.')
 
-                # Keep zeros at the end.
-                mask = cm_sorted != 0.
-                f_mask = mask.sum(0, keepdims=1) >\
-                    np.arange(cm_sorted.shape[0]-1, -1, -1)
+    if size < n:
+        print(''.join(['Error. Molecule matrix dimension (n) is ',
+                       'greater than size. Using n.']))
+        size = None
 
-                f_mask = f_mask[::-1]
-                cm_sorted[f_mask] = cm_sorted[mask]
-                cm_sorted[~f_mask] = 0.
+    if size:
+        cm = np.zeros((size, size))
+        ml_r = range(size)
 
-                return cm_sorted
-
-            else:
-                return cm
-
-        else:
-            cm_temp = []
-            # Actual calculation of the coulomb matrix.
-            for i in mol_nr:
+        # Actual calculation of the coulomb matrix.
+        for i in ml_r:
+            if i < n:
                 x_i = coords[i, 0]
                 y_i = coords[i, 1]
                 z_i = coords[i, 2]
                 Z_i = nc[i]
+            else:
+                break
 
-                cm_row = []
-                for j in mol_nr:
+            for j in ml_r:
+                if j < n:
                     x_j = coords[j, 0]
                     y_j = coords[j, 1]
                     z_j = coords[j, 2]
@@ -129,17 +80,66 @@ def coulomb_matrix(coords,
                     z = (z_i-z_j)**2
 
                     if i == j:
-                        cm_row.append(0.5*Z_i**2.4)
+                        cm[i, j] = (0.5*Z_i**2.4)
                     else:
-                        cm_row.append(conversion_rate*Z_i*Z_j/math.sqrt(x
-                                                                        + y
-                                                                        + z))
+                        cm[i, j] = (conversion_rate*Z_i*Z_j/math.sqrt(x
+                                                                      + y
+                                                                      + z))
+                else:
+                    break
 
-                cm_temp.append(np.array(cm_row))
+        # Now the value will be returned.
+        if as_eig:
+            cm_sorted = np.sort(np.linalg.eig(cm)[0])[::-1]
+            # Thanks to SO for the following lines of code.
+            # https://stackoverflow.com/a/43011036
 
-            cm = np.array(cm_temp)
-            # Now the value will be returned.
-            if as_eig:
-                return np.sort(np.linalg.eig(cm)[0])[::-1]
-            else:
-                return cm
+            # Keep zeros at the end.
+            mask = cm_sorted != 0.
+            f_mask = mask.sum(0, keepdims=1) >\
+                np.arange(cm_sorted.shape[0]-1, -1, -1)
+
+            f_mask = f_mask[::-1]
+            cm_sorted[f_mask] = cm_sorted[mask]
+            cm_sorted[~f_mask] = 0.
+
+            return cm_sorted
+
+        else:
+            return cm
+
+    else:
+        cm_temp = []
+        # Actual calculation of the coulomb matrix.
+        for i in nr:
+            x_i = coords[i, 0]
+            y_i = coords[i, 1]
+            z_i = coords[i, 2]
+            Z_i = nc[i]
+
+            cm_row = []
+            for j in nr:
+                x_j = coords[j, 0]
+                y_j = coords[j, 1]
+                z_j = coords[j, 2]
+                Z_j = nc[j]
+
+                x = (x_i-x_j)**2
+                y = (y_i-y_j)**2
+                z = (z_i-z_j)**2
+
+                if i == j:
+                    cm_row.append(0.5*Z_i**2.4)
+                else:
+                    cm_row.append(conversion_rate*Z_i*Z_j/math.sqrt(x
+                                                                    + y
+                                                                    + z))
+
+            cm_temp.append(np.array(cm_row))
+
+        cm = np.array(cm_temp)
+        # Now the value will be returned.
+        if as_eig:
+            return np.sort(np.linalg.eig(cm)[0])[::-1]
+        else:
+            return cm
-- 
cgit v1.2.3-70-g09d2


From bb5e608f0ca6491e1abfd7fd25d2f28ff6c96e6b Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 17:58:13 -0700
Subject: Rewrite cmatrix

---
 ml_exp/representations.py | 111 +++++++++++++++-------------------------------
 1 file changed, 35 insertions(+), 76 deletions(-)

diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index 50b78e548..bbc538ae6 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -38,87 +38,36 @@ def coulomb_matrix(coords,
     bohr_radius_units: if units should be in bohr's radius units.
     """
     if bohr_radius_units:
-        conversion_rate = 0.52917721067
+        cr = 0.52917721067
     else:
-        conversion_rate = 1
+        cr = 1
 
     n = coords.shape[0]
-    nr = range(n)
 
     if not n == nc.shape[0]:
         raise ValueError('Compound size is different than the nuclear charge\
                          size. Arrays are not of the right shape.')
 
     if size < n:
-        print(''.join(['Error. Molecule matrix dimension (n) is ',
-                       'greater than size. Using n.']))
-        size = None
-
-    if size:
-        cm = np.zeros((size, size))
-        ml_r = range(size)
-
-        # Actual calculation of the coulomb matrix.
-        for i in ml_r:
-            if i < n:
-                x_i = coords[i, 0]
-                y_i = coords[i, 1]
-                z_i = coords[i, 2]
-                Z_i = nc[i]
-            else:
-                break
-
-            for j in ml_r:
-                if j < n:
-                    x_j = coords[j, 0]
-                    y_j = coords[j, 1]
-                    z_j = coords[j, 2]
-                    Z_j = nc[j]
-
-                    x = (x_i-x_j)**2
-                    y = (y_i-y_j)**2
-                    z = (z_i-z_j)**2
-
-                    if i == j:
-                        cm[i, j] = (0.5*Z_i**2.4)
-                    else:
-                        cm[i, j] = (conversion_rate*Z_i*Z_j/math.sqrt(x
-                                                                      + y
-                                                                      + z))
-                else:
-                    break
-
-        # Now the value will be returned.
-        if as_eig:
-            cm_sorted = np.sort(np.linalg.eig(cm)[0])[::-1]
-            # Thanks to SO for the following lines of code.
-            # https://stackoverflow.com/a/43011036
+        print('Error. Compound size (n) is greater han (size). Using (n)\
+              instead of (size).')
+        size = n
 
-            # Keep zeros at the end.
-            mask = cm_sorted != 0.
-            f_mask = mask.sum(0, keepdims=1) >\
-                np.arange(cm_sorted.shape[0]-1, -1, -1)
-
-            f_mask = f_mask[::-1]
-            cm_sorted[f_mask] = cm_sorted[mask]
-            cm_sorted[~f_mask] = 0.
-
-            return cm_sorted
-
-        else:
-            return cm
+    nr = range(size)
+    cm = np.empty((size, size), dtype=float)
 
-    else:
-        cm_temp = []
-        # Actual calculation of the coulomb matrix.
-        for i in nr:
+    # Actual calculation of the coulomb matrix.
+    for i in nr:
+        if i < n:
             x_i = coords[i, 0]
             y_i = coords[i, 1]
             z_i = coords[i, 2]
             Z_i = nc[i]
+        else:
+            break
 
-            cm_row = []
-            for j in nr:
+        for j in nr:
+            if j < n:
                 x_j = coords[j, 0]
                 y_j = coords[j, 1]
                 z_j = coords[j, 2]
@@ -129,17 +78,27 @@ def coulomb_matrix(coords,
                 z = (z_i-z_j)**2
 
                 if i == j:
-                    cm_row.append(0.5*Z_i**2.4)
+                    cm[i, j] = (0.5*Z_i**2.4)
                 else:
-                    cm_row.append(conversion_rate*Z_i*Z_j/math.sqrt(x
-                                                                    + y
-                                                                    + z))
+                    cm[i, j] = (cr*Z_i*Z_j/math.sqrt(x + y + z))
+            else:
+                break
 
-            cm_temp.append(np.array(cm_row))
+    # Now the value will be returned.
+    if as_eig:
+        cm_sorted = np.sort(np.linalg.eig(cm)[0])[::-1]
+        # Thanks to SO for the following lines of code.
+        # https://stackoverflow.com/a/43011036
 
-        cm = np.array(cm_temp)
-        # Now the value will be returned.
-        if as_eig:
-            return np.sort(np.linalg.eig(cm)[0])[::-1]
-        else:
-            return cm
+        # Keep zeros at the end.
+        mask = cm_sorted != 0.
+        f_mask = mask.sum(0, keepdims=1) >\
+            np.arange(cm_sorted.shape[0]-1, -1, -1)
+
+        f_mask = f_mask[::-1]
+        cm_sorted[f_mask] = cm_sorted[mask]
+        cm_sorted[~f_mask] = 0.
+
+        return cm_sorted
+    else:
+        return cm
-- 
cgit v1.2.3-70-g09d2


From efaa8eeb58d8e19d32f94ec4e91ec794574dfd32 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 18:13:33 -0700
Subject: Coulomb matrix done, fix init

---
 ml_exp/__init__.py        | 27 ++++-----------------------
 ml_exp/compound.py        | 28 ++++++++++++++++++++++++----
 ml_exp/representations.py | 10 +++++-----
 3 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index 06425f6a2..8e803b37e 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -20,29 +20,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-from ml_exp.read_qm7_data import read_nc_data, read_db_data, read_qm7_data
-from ml_exp.c_matrix import c_matrix, c_matrix_multiple
-from ml_exp.lj_matrix import lj_matrix, lj_matrix_multiple
-from ml_exp.frob_norm import frob_norm
-from ml_exp.gauss_kernel import gauss_kernel
-from ml_exp.cholesky_solve import cholesky_solve
-from ml_exp.do_ml import do_ml
-from ml_exp.parallel_create_matrices import parallel_create_matrices
-from ml_exp.misc import plot_benchmarks
-
+from ml_exp.compound import Compound
+from ml_exp.representations import coulomb_matrix
 
 # If somebody does "from package import *", this is what they will
 # be able to access:
-__all__ = ['read_nc_data',
-           'read_db_data',
-           'read_qm7_data',
-           'c_matrix',
-           'c_matrix_multiple',
-           'lj_matrix',
-           'lj_matrix_multiple',
-           'frob_norm',
-           'gauss_kernel',
-           'cholesky_solve',
-           'do_ml',
-           'parallel_create_matrices',
-           'plot_benchmarks']
+__all__ = ['Compound',
+           'coulomb_matrix']
diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index 9536767a7..3ea377826 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -22,10 +22,16 @@ SOFTWARE.
 """
 import numpy as np
 from ml_exp.data import NUCLEAR_CHARGE
+from ml_exp.representations import coulomb_matrix
 
 
 class Compound:
-    def __init__(self, xyz=None):
+    def __init__(self,
+                 xyz=None):
+        """
+        Initialization of the Compound.
+        xyz: (path to) the xyz file.
+        """
         # empty_array = np.asarray([], dtype=float)
 
         self.n = None
@@ -41,10 +47,24 @@ class Compound:
         if xyz is not None:
             self.read_xyz(xyz)
 
-    def gen_cm(self):
-        pass
+    def gen_cm(self,
+               size=23,
+               as_eig=True,
+               bohr_ru=False):
+        """
+        Generate the Coulomb Matrix for the compund.
+        size: compound size.
+        as_eig: if the representation should be as the eigenvalues.
+        bhor_ru: if radius units should be in bohr's radius units.
+        """
+        self.coulomb_matrix = coulomb_matrix(self.coordinates,
+                                             self.atoms_nc,
+                                             size=size,
+                                             as_eig=as_eig,
+                                             bhor_ru=bohr_ru)
 
-    def read_xyz(self, filename):
+    def read_xyz(self,
+                 filename):
         """
         Reads an xyz file and adds the corresponding data to the Compound.
         filename: (path to) the xyz file.
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index bbc538ae6..c503c310d 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -28,16 +28,16 @@ def coulomb_matrix(coords,
                    nc,
                    size=23,
                    as_eig=True,
-                   bohr_radius_units=False):
+                   bhor_ru=False):
     """
     Creates the Coulomb Matrix from the molecule data given.
     coords: compound coordinates.
     nc: nuclear charge data.
     size: compound size.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
+    as_eig: if the representation should be as the eigenvalues.
+    bhor_ru: if radius units should be in bohr's radius units.
     """
-    if bohr_radius_units:
+    if bhor_ru:
         cr = 0.52917721067
     else:
         cr = 1
@@ -54,7 +54,7 @@ def coulomb_matrix(coords,
         size = n
 
     nr = range(size)
-    cm = np.empty((size, size), dtype=float)
+    cm = np.zeros((size, size), dtype=float)
 
     # Actual calculation of the coulomb matrix.
     for i in nr:
-- 
cgit v1.2.3-70-g09d2


From 8a108f8757027750f825c6ef9dd55adb4c09def0 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 18:20:22 -0700
Subject: Remove unused function

---
 ml_exp/lj_matrix.py | 46 ----------------------------------------------
 1 file changed, 46 deletions(-)

diff --git a/ml_exp/lj_matrix.py b/ml_exp/lj_matrix.py
index 549790755..5e3e89678 100644
--- a/ml_exp/lj_matrix.py
+++ b/ml_exp/lj_matrix.py
@@ -20,11 +20,9 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-import time
 import math
 import numpy as np
 from numpy.linalg import eig
-from ml_exp.misc import printc
 
 
 def lj_matrix(mol_data,
@@ -176,47 +174,3 @@ def lj_matrix(mol_data,
                 return np.sort(eig(lj)[0])[::-1]
             else:
                 return lj
-
-
-def lj_matrix_multiple(mol_data,
-                       nc_data,
-                       pipe=None,
-                       diag_value=None,
-                       sigma=1.0,
-                       epsilon=1.0,
-                       max_len=25,
-                       as_eig=True,
-                       bohr_radius_units=False):
-    """
-    Calculates the Lennard-Jones Matrix of multiple molecules.
-    mol_data: molecule data, matrix of atom coordinates.
-    nc_data: nuclear charge data, array of atom data.
-    pipe: for multiprocessing purposes. Sends the data calculated
-        through a pipe.
-    diag_value: if special diagonal value is to be used.
-    sigma: sigma value.
-    epsilon: epsilon value.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
-    """
-    printc('L-J Matrices calculation started.', 'CYAN')
-    tic = time.perf_counter()
-
-    ljm_data = np.array([lj_matrix(mol,
-                                   nc,
-                                   diag_value,
-                                   sigma,
-                                   epsilon,
-                                   max_len,
-                                   as_eig,
-                                   bohr_radius_units)
-                        for mol, nc in zip(mol_data, nc_data)])
-
-    toc = time.perf_counter()
-    printc('\tL-JM calculation took {:.4f} seconds.'.format(toc-tic), 'GREEN')
-
-    if pipe:
-        pipe.send(ljm_data)
-
-    return ljm_data
-- 
cgit v1.2.3-70-g09d2


From a2463470b639a743189deece071654ed03791c3c Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 19:00:34 -0700
Subject: Moved ljmatrix to representations, rewrote code

---
 ml_exp/lj_matrix.py       | 176 ----------------------------------------------
 ml_exp/representations.py | 102 +++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 176 deletions(-)
 delete mode 100644 ml_exp/lj_matrix.py

diff --git a/ml_exp/lj_matrix.py b/ml_exp/lj_matrix.py
deleted file mode 100644
index 5e3e89678..000000000
--- a/ml_exp/lj_matrix.py
+++ /dev/null
@@ -1,176 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import math
-import numpy as np
-from numpy.linalg import eig
-
-
-def lj_matrix(mol_data,
-              nc_data,
-              diag_value=None,
-              sigma=1.0,
-              epsilon=1.0,
-              max_len=25,
-              as_eig=True,
-              bohr_radius_units=False):
-    """
-    Creates the Lennard-Jones Matrix from the molecule data given.
-    mol_data: molecule data, matrix of atom coordinates.
-    nc_data: nuclear charge data, array of atom data.
-    diag_value: if special diagonal value is to be used.
-    sigma: sigma value.
-    epsilon: epsilon value.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
-    """
-    if bohr_radius_units:
-        conversion_rate = 0.52917721067
-    else:
-        conversion_rate = 1
-
-    mol_n = len(mol_data)
-    mol_nr = range(mol_n)
-
-    if not mol_n == len(nc_data):
-        print(''.join(['Error. Molecule matrix dimension is different ',
-                       'than the nuclear charge array dimension.']))
-    else:
-        if max_len < mol_n:
-            print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                           'greater than max_len. Using mol_n.']))
-            max_len = None
-
-        if max_len:
-            lj = np.zeros((max_len, max_len))
-            ml_r = range(max_len)
-
-            # Actual calculation of the coulomb matrix.
-            for i in ml_r:
-                if i < mol_n:
-                    x_i = mol_data[i, 0]
-                    y_i = mol_data[i, 1]
-                    z_i = mol_data[i, 2]
-                    Z_i = nc_data[i]
-                else:
-                    break
-
-                for j in ml_r:
-                    if j < mol_n:
-                        x_j = mol_data[j, 0]
-                        y_j = mol_data[j, 1]
-                        z_j = mol_data[j, 2]
-
-                        x = (x_i-x_j)**2
-                        y = (y_i-y_j)**2
-                        z = (z_i-z_j)**2
-
-                        if i == j:
-                            if diag_value is None:
-                                lj[i, j] = (0.5*Z_i**2.4)
-                            else:
-                                lj[i, j] = diag_value
-                        else:
-                            # Calculations are done after i==j is checked
-                            # so no division by zero is done.
-
-                            # A little play with r exponents
-                            # so no square root is calculated.
-                            # Conversion factor is included in r^2.
-
-                            # 1/r^2
-                            r_2 = sigma**2/(conversion_rate**2*(x + y + z))
-
-                            r_6 = math.pow(r_2, 3)
-                            r_12 = math.pow(r_6, 2)
-                            lj[i, j] = (4*epsilon*(r_12 - r_6))
-                    else:
-                        break
-
-            # Now the value will be returned.
-            if as_eig:
-                lj_sorted = np.sort(eig(lj)[0])[::-1]
-                # Thanks to SO for the following lines of code.
-                # https://stackoverflow.com/a/43011036
-
-                # Keep zeros at the end.
-                mask = lj_sorted != 0.
-                f_mask = mask.sum(0, keepdims=1) >\
-                    np.arange(lj_sorted.shape[0]-1, -1, -1)
-
-                f_mask = f_mask[::-1]
-                lj_sorted[f_mask] = lj_sorted[mask]
-                lj_sorted[~f_mask] = 0.
-
-                return lj_sorted
-
-            else:
-                return lj
-
-        else:
-            lj_temp = []
-            # Actual calculation of the coulomb matrix.
-            for i in mol_nr:
-                x_i = mol_data[i, 0]
-                y_i = mol_data[i, 1]
-                z_i = mol_data[i, 2]
-                Z_i = nc_data[i]
-
-                lj_row = []
-                for j in mol_nr:
-                    x_j = mol_data[j, 0]
-                    y_j = mol_data[j, 1]
-                    z_j = mol_data[j, 2]
-
-                    x = (x_i-x_j)**2
-                    y = (y_i-y_j)**2
-                    z = (z_i-z_j)**2
-
-                    if i == j:
-                        if not diag_value:
-                            lj_row.append(0.5*Z_i**2.4)
-                        else:
-                            lj_row.append(diag_value)
-                    else:
-                        # Calculations are done after i==j is checked
-                        # so no division by zero is done.
-
-                        # A little play with r exponents
-                        # so no square root is calculated.
-                        # Conversion factor is included in r^2.
-
-                        # 1/r^2
-                        r_2 = sigma**2/(conversion_rate**2*(x + y + z))
-
-                        r_6 = math.pow(r_2, 3)
-                        r_12 = math.pow(r_6, 2)
-                        lj_row.append(4*epsilon*(r_12 - r_6))
-
-                lj_temp.append(np.array(lj_row))
-
-            lj = np.array(lj_temp)
-            # Now the value will be returned.
-            if as_eig:
-                return np.sort(eig(lj)[0])[::-1]
-            else:
-                return lj
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index c503c310d..b4edef8a5 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -102,3 +102,105 @@ def coulomb_matrix(coords,
         return cm_sorted
     else:
         return cm
+
+
+def lennard_jones_matrix(coords,
+                         nc,
+                         diag_value=None,
+                         sigma=1.0,
+                         epsilon=1.0,
+                         size=23,
+                         as_eig=True,
+                         bhor_ru=False):
+    """
+    Creates the Lennard-Jones Matrix from the molecule data given.
+    coords: compound coordinates.
+    nc: nuclear charge data.
+    diag_value: if special diagonal value is to be used.
+    sigma: sigma value.
+    epsilon: epsilon value.
+    size: compound size.
+    as_eig: if the representation should be as the eigenvalues.
+    bhor_ru: if radius units should be in bohr's radius units.
+    """
+    if bhor_ru:
+        cr = 0.52917721067
+    else:
+        cr = 1
+
+    n = coords.shape[0]
+
+    if not n == nc.shape[0]:
+        raise ValueError('Compound size is different than the nuclear charge\
+                         size. Arrays are not of the right shape.')
+
+    if size < n:
+        print('Error. Compound size (n) is greater han (size). Using (n)\
+              instead of (size).')
+        size = n
+
+    nr = range(size)
+    lj = np.zeros((size, size), dtype=float)
+
+    # Actual calculation of the lennard-jones matrix.
+    for i in nr:
+        if i < n:
+            x_i = coords[i, 0]
+            y_i = coords[i, 1]
+            z_i = coords[i, 2]
+            Z_i = nc[i]
+        else:
+            break
+
+        for j in nr:
+            if j < n:
+                x_j = coords[j, 0]
+                y_j = coords[j, 1]
+                z_j = coords[j, 2]
+                # Never really used because when needed it is equal to Z_i.
+                # Z_j = nc[j]
+
+                x = (x_i-x_j)**2
+                y = (y_i-y_j)**2
+                z = (z_i-z_j)**2
+
+                if i == j:
+                    if diag_value is None:
+                        lj[i, j] = (0.5*Z_i**2.4)
+                    else:
+                        lj[i, j] = diag_value
+                else:
+                    # Calculations are done after i==j is checked
+                    # so no division by zero is done.
+
+                    # A little play with r exponents
+                    # so no square root is calculated.
+                    # Conversion factor is included in r^2.
+
+                    # 1/r^2
+                    r_2 = sigma**2/(cr**2*(x + y + z))
+
+                    r_6 = math.pow(r_2, 3)
+                    r_12 = math.pow(r_6, 2)
+                    lj[i, j] = (4*epsilon*(r_12 - r_6))
+            else:
+                break
+
+    # Now the value will be returned.
+    if as_eig:
+        lj_sorted = np.sort(np.linalg.eig(lj)[0])[::-1]
+        # Thanks to SO for the following lines of code.
+        # https://stackoverflow.com/a/43011036
+
+        # Keep zeros at the end.
+        mask = lj_sorted != 0.
+        f_mask = mask.sum(0, keepdims=1) >\
+            np.arange(lj_sorted.shape[0]-1, -1, -1)
+
+        f_mask = f_mask[::-1]
+        lj_sorted[f_mask] = lj_sorted[mask]
+        lj_sorted[~f_mask] = 0.
+
+        return lj_sorted
+    else:
+        return lj
-- 
cgit v1.2.3-70-g09d2


From 5ba008a928d34c49e431f32b5161f624d780c254 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 19:06:36 -0700
Subject: Add ljmatrix to compound

---
 ml_exp/compound.py | 41 +++++++++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index 3ea377826..d74f1230a 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -22,7 +22,7 @@ SOFTWARE.
 """
 import numpy as np
 from ml_exp.data import NUCLEAR_CHARGE
-from ml_exp.representations import coulomb_matrix
+from ml_exp.representations import coulomb_matrix, lennard_jones_matrix
 
 
 class Compound:
@@ -40,8 +40,8 @@ class Compound:
         self.coordinates = None
         self.energy = None
 
-        self.coulomb_matrix = None
-        self.lennard_jones_matrix = None
+        self.c_matrix = None
+        self.lj_matrix = None
         self.bob = None
 
         if xyz is not None:
@@ -57,11 +57,36 @@ class Compound:
         as_eig: if the representation should be as the eigenvalues.
         bhor_ru: if radius units should be in bohr's radius units.
         """
-        self.coulomb_matrix = coulomb_matrix(self.coordinates,
-                                             self.atoms_nc,
-                                             size=size,
-                                             as_eig=as_eig,
-                                             bhor_ru=bohr_ru)
+        self.c_matrix = coulomb_matrix(self.coordinates,
+                                       self.atoms_nc,
+                                       size=size,
+                                       as_eig=as_eig,
+                                       bhor_ru=bohr_ru)
+
+    def gen_lj(self,
+               diag_value=None,
+               sigma=1.0,
+               epsilon=1.0,
+               size=23,
+               as_eig=True,
+               bohr_ru=False):
+        """
+        Generate the Lennard-Jones Matrix for the compund.
+        diag_value: if special diagonal value is to be used.
+        sigma: sigma value.
+        epsilon: epsilon value.
+        size: compound size.
+        as_eig: if the representation should be as the eigenvalues.
+        bhor_ru: if radius units should be in bohr's radius units.
+        """
+        self.lj_matrix = lennard_jones_matrix(self.coordinates,
+                                              self.atoms_nc,
+                                              diag_value=diag_value,
+                                              sigma=sigma,
+                                              epsilon=epsilon,
+                                              size=size,
+                                              as_eig=as_eig,
+                                              bhor_ru=bohr_ru)
 
     def read_xyz(self,
                  filename):
-- 
cgit v1.2.3-70-g09d2


From 05d97a15d822400f7018a4381593c2638c6ad577 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 19:18:24 -0700
Subject: Refactor code

---
 ml_exp/compound.py | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index d74f1230a..c6d5f8a64 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -40,8 +40,8 @@ class Compound:
         self.coordinates = None
         self.energy = None
 
-        self.c_matrix = None
-        self.lj_matrix = None
+        self.cm = None
+        self.ljm = None
         self.bob = None
 
         if xyz is not None:
@@ -57,19 +57,19 @@ class Compound:
         as_eig: if the representation should be as the eigenvalues.
         bhor_ru: if radius units should be in bohr's radius units.
         """
-        self.c_matrix = coulomb_matrix(self.coordinates,
-                                       self.atoms_nc,
-                                       size=size,
-                                       as_eig=as_eig,
-                                       bhor_ru=bohr_ru)
+        self.cm = coulomb_matrix(self.coordinates,
+                                 self.atoms_nc,
+                                 size=size,
+                                 as_eig=as_eig,
+                                 bhor_ru=bohr_ru)
 
-    def gen_lj(self,
-               diag_value=None,
-               sigma=1.0,
-               epsilon=1.0,
-               size=23,
-               as_eig=True,
-               bohr_ru=False):
+    def gen_ljm(self,
+                diag_value=None,
+                sigma=1.0,
+                epsilon=1.0,
+                size=23,
+                as_eig=True,
+                bohr_ru=False):
         """
         Generate the Lennard-Jones Matrix for the compund.
         diag_value: if special diagonal value is to be used.
@@ -79,14 +79,14 @@ class Compound:
         as_eig: if the representation should be as the eigenvalues.
         bhor_ru: if radius units should be in bohr's radius units.
         """
-        self.lj_matrix = lennard_jones_matrix(self.coordinates,
-                                              self.atoms_nc,
-                                              diag_value=diag_value,
-                                              sigma=sigma,
-                                              epsilon=epsilon,
-                                              size=size,
-                                              as_eig=as_eig,
-                                              bhor_ru=bohr_ru)
+        self.ljm = lennard_jones_matrix(self.coordinates,
+                                        self.atoms_nc,
+                                        diag_value=diag_value,
+                                        sigma=sigma,
+                                        epsilon=epsilon,
+                                        size=size,
+                                        as_eig=as_eig,
+                                        bhor_ru=bohr_ru)
 
     def read_xyz(self,
                  filename):
-- 
cgit v1.2.3-70-g09d2


From a7d605f094e81c5139530560e26eb0ff9be8ec82 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Fri, 21 Feb 2020 19:21:18 -0700
Subject: Add ljm to init

---
 ml_exp/__init__.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index 8e803b37e..f5ec7068b 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -21,9 +21,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 from ml_exp.compound import Compound
-from ml_exp.representations import coulomb_matrix
+from ml_exp.representations import coulomb_matrix, lennard_jones_matrix
 
 # If somebody does "from package import *", this is what they will
 # be able to access:
 __all__ = ['Compound',
-           'coulomb_matrix']
+           'coulomb_matrix',
+           'lennard_jones_matrix']
-- 
cgit v1.2.3-70-g09d2


From 4f95e38f1943856f9a0581c06ea603ffb6283d47 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sat, 22 Feb 2020 16:02:12 -0700
Subject: Move adj matrix to representations

---
 ml_exp/adj_matrix.py      | 131 ----------------------------------------------
 ml_exp/representations.py | 108 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+), 131 deletions(-)
 delete mode 100644 ml_exp/adj_matrix.py

diff --git a/ml_exp/adj_matrix.py b/ml_exp/adj_matrix.py
deleted file mode 100644
index de484b5be..000000000
--- a/ml_exp/adj_matrix.py
+++ /dev/null
@@ -1,131 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from numpy import array, zeros, dot
-from numpy.linalg import norm
-from ml_exp.misc import printc
-
-
-def fneig_matrix(atoms,
-                 xyz,
-                 nc=None,
-                 use_forces=False):
-    """
-    Creates the first neighbor matrix of the given molecule data.
-    atoms: list of atoms.
-    xyz: matrix of atomic coords.
-    nc: nuclear charge info.
-    use_forces: if the use of forces instead of k_cx should be used.
-    NOTE: Bond distance of carbon to other elements
-        are (for atoms present in the qm7 dataset):
-            C: 1.20 - 1.53 A
-            H: 1.06 - 1.12 A
-            O: 1.43 - 2.15 A
-            N: 1.47 - 2.10 A
-            S: 1.81 - 2.55 A
-    """
-    if use_forces and nc is None:
-        printc(f'Error. use_forces={use_forces} but nc={nc}', 'RED')
-        return None
-    # Possible bonds.
-    cc_bond = sorted(['C', 'C'])
-    ch_bond = sorted(['C', 'H'])
-    co_bond = sorted(['C', 'O'])
-    cn_bond = sorted(['C', 'N'])
-    cs_bond = sorted(['C', 'S'])
-
-    # Number of atoms, empty matrix and bond list.
-    n = len(atoms)
-    fnm = array(zeros((n, n)), dtype=float)
-    bonds = []
-    forces = []
-    for i, xyz_i in enumerate(xyz):
-        for j, xyz_j in enumerate(xyz):
-            # Ignore the diagonal.
-            if i != j:
-                bond = sorted([atoms[i], atoms[j]])
-                rv = xyz_i - xyz_j
-                r = norm(rv)
-                # Check for each type of bond.
-                if (cc_bond == bond) and (r >= 1.20 and r <= 1.53):
-                    fnm[i, j] = 1.0
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (ch_bond == bond) and (r >= 1.06 and r <= 1.12):
-                    fnm[i, j] = 1.0
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (co_bond == bond) and (r >= 1.43 and r <= 2.15):
-                    fnm[i, j] = 0.8
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (cn_bond == bond) and (r >= 1.47 and r <= 2.10):
-                    fnm[i, j] = 1.0
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (cs_bond == bond) and (r >= 1.81 and r <= 2.55):
-                    fnm[i, j] = 0.7
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-    if use_forces:
-        return fnm, bonds, forces
-    return fnm, bonds
-
-
-def adj_matrix(fneig_matrix,
-               bonds,
-               forces=None,
-               max_len=25):
-    """
-    Calculates the adjacency matrix given the bond list.
-    bonds: list of bonds (tuple of indexes).
-    max_len: maximum amount of atoms in molecule.
-    forces: list of forces.
-    """
-    n = len(bonds)
-
-    if max_len < n:
-        print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                       'greater than max_len. Using mol_n.']))
-        max_len = n
-
-    am = array(zeros((max_len, max_len)))
-    for i, bond_i in enumerate(bonds):
-        for j, bond_j in enumerate(bonds):
-            # Ignore the diagonal.
-            if i != j:
-                if (bond_i[0] in bond_j) or (bond_i[1] in bond_j):
-                    if forces:
-                        am[i, j] = dot(forces[i], forces[j])
-                    else:
-                        am[i, j] = fneig_matrix[bond_i[0], bond_i[1]]
-    return am
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index b4edef8a5..f503b5987 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -204,3 +204,111 @@ def lennard_jones_matrix(coords,
         return lj_sorted
     else:
         return lj
+
+
+def fneig_matrix(atoms,
+                 xyz,
+                 nc=None,
+                 use_forces=False):
+    """
+    Creates the first neighbor matrix of the given molecule data.
+    atoms: list of atoms.
+    xyz: matrix of atomic coords.
+    nc: nuclear charge info.
+    use_forces: if the use of forces instead of k_cx should be used.
+    NOTE: Bond distance of carbon to other elements
+        are (for atoms present in the qm7 dataset):
+            C: 1.20 - 1.53 A
+            H: 1.06 - 1.12 A
+            O: 1.43 - 2.15 A
+            N: 1.47 - 2.10 A
+            S: 1.81 - 2.55 A
+    """
+    if use_forces and nc is None:
+        # print(f'Error. use_forces={use_forces} but nc={nc}', 'RED')
+        return None
+    # Possible bonds.
+    cc_bond = sorted(['C', 'C'])
+    ch_bond = sorted(['C', 'H'])
+    co_bond = sorted(['C', 'O'])
+    cn_bond = sorted(['C', 'N'])
+    cs_bond = sorted(['C', 'S'])
+
+    # Number of atoms, empty matrix and bond list.
+    n = len(atoms)
+    fnm = np.empty((n, n), dtype=float)
+    bonds = []
+    forces = []
+    for i, xyz_i in enumerate(xyz):
+        for j, xyz_j in enumerate(xyz):
+
+            # Ignore the diagonal.
+            if i != j:
+                bond = sorted([atoms[i], atoms[j]])
+                rv = xyz_i - xyz_j
+                r = np.linalg.norm(rv)
+
+                # Check for each type of bond.
+                if (cc_bond == bond) and (r >= 1.20 and r <= 1.53):
+                    fnm[i, j] = 1.0
+                    if j > i:
+                        bonds.append((i, j))
+                        if use_forces:
+                            forces.append(rv*nc[i]*nc[j]/r**3)
+                elif (ch_bond == bond) and (r >= 1.06 and r <= 1.12):
+                    fnm[i, j] = 1.0
+                    if j > i:
+                        bonds.append((i, j))
+                        if use_forces:
+                            forces.append(rv*nc[i]*nc[j]/r**3)
+                elif (co_bond == bond) and (r >= 1.43 and r <= 2.15):
+                    fnm[i, j] = 0.8
+                    if j > i:
+                        bonds.append((i, j))
+                        if use_forces:
+                            forces.append(rv*nc[i]*nc[j]/r**3)
+                elif (cn_bond == bond) and (r >= 1.47 and r <= 2.10):
+                    fnm[i, j] = 1.0
+                    if j > i:
+                        bonds.append((i, j))
+                        if use_forces:
+                            forces.append(rv*nc[i]*nc[j]/r**3)
+                elif (cs_bond == bond) and (r >= 1.81 and r <= 2.55):
+                    fnm[i, j] = 0.7
+                    if j > i:
+                        bonds.append((i, j))
+                        if use_forces:
+                            forces.append(rv*nc[i]*nc[j]/r**3)
+    if use_forces:
+        return fnm, bonds, forces
+    return fnm, bonds
+
+
+def adj_matrix(fneig_matrix,
+               bonds,
+               forces=None,
+               max_len=25):
+    """
+    Calculates the adjacency matrix given the bond list.
+    bonds: list of bonds (tuple of indexes).
+    max_len: maximum amount of atoms in molecule.
+    forces: list of forces.
+    """
+    n = len(bonds)
+
+    if max_len < n:
+        print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
+                       'greater than max_len. Using mol_n.']))
+        max_len = n
+
+    am = np.empty((max_len, max_len), dtype=float)
+    for i, bond_i in enumerate(bonds):
+        for j, bond_j in enumerate(bonds):
+            # Ignore the diagonal.
+            if i != j:
+                if (bond_i[0] in bond_j) or (bond_i[1] in bond_j):
+                    if forces:
+                        am[i, j] = np.dot(forces[i], forces[j])
+                    else:
+                        am[i, j] = fneig_matrix[bond_i[0], bond_i[1]]
+    return am
-- 
cgit v1.2.3-70-g09d2


From 2572f9581b24547692a5654a1d452690a2cbcad6 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sat, 22 Feb 2020 16:36:40 -0700
Subject: Add adj matrix to compound)

---
 ml_exp/compound.py        | 35 ++++++++++++++++---
 ml_exp/representations.py | 89 ++++++++++++++++++++++++++++-------------------
 2 files changed, 83 insertions(+), 41 deletions(-)

diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index c6d5f8a64..d4dc9b7c0 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -22,7 +22,8 @@ SOFTWARE.
 """
 import numpy as np
 from ml_exp.data import NUCLEAR_CHARGE
-from ml_exp.representations import coulomb_matrix, lennard_jones_matrix
+from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
+    first_neighbor_matrix, adjacency_matrix
 
 
 class Compound:
@@ -42,6 +43,7 @@ class Compound:
 
         self.cm = None
         self.ljm = None
+        self.am = None
         self.bob = None
 
         if xyz is not None:
@@ -55,13 +57,13 @@ class Compound:
         Generate the Coulomb Matrix for the compund.
         size: compound size.
         as_eig: if the representation should be as the eigenvalues.
-        bhor_ru: if radius units should be in bohr's radius units.
+        bohr_ru: if radius units should be in bohr's radius units.
         """
         self.cm = coulomb_matrix(self.coordinates,
                                  self.atoms_nc,
                                  size=size,
                                  as_eig=as_eig,
-                                 bhor_ru=bohr_ru)
+                                 bohr_ru=bohr_ru)
 
     def gen_ljm(self,
                 diag_value=None,
@@ -77,7 +79,7 @@ class Compound:
         epsilon: epsilon value.
         size: compound size.
         as_eig: if the representation should be as the eigenvalues.
-        bhor_ru: if radius units should be in bohr's radius units.
+        bohr_ru: if radius units should be in bohr's radius units.
         """
         self.ljm = lennard_jones_matrix(self.coordinates,
                                         self.atoms_nc,
@@ -86,7 +88,30 @@ class Compound:
                                         epsilon=epsilon,
                                         size=size,
                                         as_eig=as_eig,
-                                        bhor_ru=bohr_ru)
+                                        bohr_ru=bohr_ru)
+
+    def gen_am(self,
+               use_forces=False,
+               size=23,
+               bohr_ru=False):
+        """
+        Generate the Adjacency Matrix for the compund.
+        use_forces: if the use of forces instead of k_cx should be used.
+        size: compound size.
+        bohr_ru: if radius units should be in bohr's radius units.
+        """
+        # First, generate the first neighor matrix.
+        fnm, bonds, forces = first_neighbor_matrix(self.coordinates,
+                                                   self.atoms_nc,
+                                                   self.atoms,
+                                                   use_forces=use_forces,
+                                                   bohr_ru=bohr_ru)
+
+        # Now, generate the adjacency matrix.
+        self.am = adjacency_matrix(fnm,
+                                   bonds,
+                                   forces,
+                                   size=size)
 
     def read_xyz(self,
                  filename):
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index f503b5987..483da2898 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -28,16 +28,16 @@ def coulomb_matrix(coords,
                    nc,
                    size=23,
                    as_eig=True,
-                   bhor_ru=False):
+                   bohr_ru=False):
     """
     Creates the Coulomb Matrix from the molecule data given.
     coords: compound coordinates.
     nc: nuclear charge data.
     size: compound size.
     as_eig: if the representation should be as the eigenvalues.
-    bhor_ru: if radius units should be in bohr's radius units.
+    bohr_ru: if radius units should be in bohr's radius units.
     """
-    if bhor_ru:
+    if bohr_ru:
         cr = 0.52917721067
     else:
         cr = 1
@@ -111,7 +111,7 @@ def lennard_jones_matrix(coords,
                          epsilon=1.0,
                          size=23,
                          as_eig=True,
-                         bhor_ru=False):
+                         bohr_ru=False):
     """
     Creates the Lennard-Jones Matrix from the molecule data given.
     coords: compound coordinates.
@@ -121,9 +121,9 @@ def lennard_jones_matrix(coords,
     epsilon: epsilon value.
     size: compound size.
     as_eig: if the representation should be as the eigenvalues.
-    bhor_ru: if radius units should be in bohr's radius units.
+    bohr_ru: if radius units should be in bohr's radius units.
     """
-    if bhor_ru:
+    if bohr_ru:
         cr = 0.52917721067
     else:
         cr = 1
@@ -206,16 +206,18 @@ def lennard_jones_matrix(coords,
         return lj
 
 
-def fneig_matrix(atoms,
-                 xyz,
-                 nc=None,
-                 use_forces=False):
+def first_neighbor_matrix(coords,
+                          nc,
+                          atoms,
+                          use_forces=False,
+                          bohr_ru=False):
     """
-    Creates the first neighbor matrix of the given molecule data.
+    Creates the First Neighbor Matrix from the molecule data given.
+    coords: compound coordinates.
+    nc: nuclear charge data.
     atoms: list of atoms.
-    xyz: matrix of atomic coords.
-    nc: nuclear charge info.
     use_forces: if the use of forces instead of k_cx should be used.
+    bohr_ru: if radius units should be in bohr's radius units.
     NOTE: Bond distance of carbon to other elements
         are (for atoms present in the qm7 dataset):
             C: 1.20 - 1.53 A
@@ -224,9 +226,17 @@ def fneig_matrix(atoms,
             N: 1.47 - 2.10 A
             S: 1.81 - 2.55 A
     """
-    if use_forces and nc is None:
-        # print(f'Error. use_forces={use_forces} but nc={nc}', 'RED')
-        return None
+    if bohr_ru:
+        cr = 0.52917721067
+    else:
+        cr = 1
+
+    n = coords.shape[0]
+
+    if not n == nc.shape[0]:
+        raise ValueError('Compound size is different than the nuclear charge\
+                         size. Arrays are not of the right shape.')
+
     # Possible bonds.
     cc_bond = sorted(['C', 'C'])
     ch_bond = sorted(['C', 'H'])
@@ -234,19 +244,21 @@ def fneig_matrix(atoms,
     cn_bond = sorted(['C', 'N'])
     cs_bond = sorted(['C', 'S'])
 
-    # Number of atoms, empty matrix and bond list.
-    n = len(atoms)
-    fnm = np.empty((n, n), dtype=float)
+    if use_forces:
+        fnm = np.empty((n, n), dtype=float)
+    else:
+        fnm = np.empty((n, n), dtype=int)
+
     bonds = []
     forces = []
-    for i, xyz_i in enumerate(xyz):
-        for j, xyz_j in enumerate(xyz):
+    for i, xyz_i in enumerate(coords):
+        for j, xyz_j in enumerate(coords):
 
             # Ignore the diagonal.
             if i != j:
                 bond = sorted([atoms[i], atoms[j]])
                 rv = xyz_i - xyz_j
-                r = np.linalg.norm(rv)
+                r = np.linalg.norm(rv)/cr
 
                 # Check for each type of bond.
                 if (cc_bond == bond) and (r >= 1.20 and r <= 1.53):
@@ -279,29 +291,33 @@ def fneig_matrix(atoms,
                         bonds.append((i, j))
                         if use_forces:
                             forces.append(rv*nc[i]*nc[j]/r**3)
-    if use_forces:
-        return fnm, bonds, forces
-    return fnm, bonds
+
+    return fnm, bonds, forces
 
 
-def adj_matrix(fneig_matrix,
-               bonds,
-               forces=None,
-               max_len=25):
+def adjacency_matrix(fnm,
+                     bonds,
+                     forces,
+                     size=23):
     """
     Calculates the adjacency matrix given the bond list.
+    fnm: first neighbour matrix.
     bonds: list of bonds (tuple of indexes).
-    max_len: maximum amount of atoms in molecule.
     forces: list of forces.
+    size: compund size.
     """
     n = len(bonds)
 
-    if max_len < n:
-        print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                       'greater than max_len. Using mol_n.']))
-        max_len = n
+    if size < n:
+        print('Error. Compound size (n) is greater han (size). Using (n)\
+              instead of (size).')
+        size = n
+
+    if forces:
+        am = np.empty((size, size), dtype=float)
+    else:
+        am = np.empty((size, size), dtype=int)
 
-    am = np.empty((max_len, max_len), dtype=float)
     for i, bond_i in enumerate(bonds):
         for j, bond_j in enumerate(bonds):
             # Ignore the diagonal.
@@ -310,5 +326,6 @@ def adj_matrix(fneig_matrix,
                     if forces:
                         am[i, j] = np.dot(forces[i], forces[j])
                     else:
-                        am[i, j] = fneig_matrix[bond_i[0], bond_i[1]]
+                        am[i, j] = fnm[bond_i[0], bond_i[1]]
+
     return am
-- 
cgit v1.2.3-70-g09d2


From fb4307513ddda06742bbe4d542c2716c0d67c06d Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sat, 22 Feb 2020 16:56:05 -0700
Subject: Refactor code

---
 ml_exp/representations.py | 93 +++++++++++++----------------------------------
 1 file changed, 26 insertions(+), 67 deletions(-)

diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index 483da2898..354611190 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -20,7 +20,6 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-import math
 import numpy as np
 
 
@@ -53,36 +52,17 @@ def coulomb_matrix(coords,
               instead of (size).')
         size = n
 
-    nr = range(size)
     cm = np.zeros((size, size), dtype=float)
 
     # Actual calculation of the coulomb matrix.
-    for i in nr:
-        if i < n:
-            x_i = coords[i, 0]
-            y_i = coords[i, 1]
-            z_i = coords[i, 2]
-            Z_i = nc[i]
-        else:
-            break
-
-        for j in nr:
-            if j < n:
-                x_j = coords[j, 0]
-                y_j = coords[j, 1]
-                z_j = coords[j, 2]
-                Z_j = nc[j]
-
-                x = (x_i-x_j)**2
-                y = (y_i-y_j)**2
-                z = (z_i-z_j)**2
-
-                if i == j:
-                    cm[i, j] = (0.5*Z_i**2.4)
-                else:
-                    cm[i, j] = (cr*Z_i*Z_j/math.sqrt(x + y + z))
+    for i, xyz_i in enumerate(coords):
+        for j, xyz_j in enumerate(coords):
+            rv = xyz_i - xyz_j
+            r = np.linalg.norm(rv)/cr
+            if i == j:
+                cm[i, j] = (0.5*nc[i]**2.4)
             else:
-                break
+                cm[i, j] = nc[i]*nc[j]/r
 
     # Now the value will be returned.
     if as_eig:
@@ -139,52 +119,31 @@ def lennard_jones_matrix(coords,
               instead of (size).')
         size = n
 
-    nr = range(size)
     lj = np.zeros((size, size), dtype=float)
 
     # Actual calculation of the lennard-jones matrix.
-    for i in nr:
-        if i < n:
-            x_i = coords[i, 0]
-            y_i = coords[i, 1]
-            z_i = coords[i, 2]
-            Z_i = nc[i]
-        else:
-            break
-
-        for j in nr:
-            if j < n:
-                x_j = coords[j, 0]
-                y_j = coords[j, 1]
-                z_j = coords[j, 2]
-                # Never really used because when needed it is equal to Z_i.
-                # Z_j = nc[j]
-
-                x = (x_i-x_j)**2
-                y = (y_i-y_j)**2
-                z = (z_i-z_j)**2
-
-                if i == j:
-                    if diag_value is None:
-                        lj[i, j] = (0.5*Z_i**2.4)
-                    else:
-                        lj[i, j] = diag_value
+    for i, xyz_i in enumerate(coords):
+        for j, xyz_j in enumerate(coords):
+            if i == j:
+                if diag_value is None:
+                    lj[i, j] = (0.5*nc[i]**2.4)
                 else:
-                    # Calculations are done after i==j is checked
-                    # so no division by zero is done.
-
-                    # A little play with r exponents
-                    # so no square root is calculated.
-                    # Conversion factor is included in r^2.
+                    lj[i, j] = diag_value
+            else:
+                # Calculations are done after i==j is checked
+                # so no division by zero is done.
 
-                    # 1/r^2
-                    r_2 = sigma**2/(cr**2*(x + y + z))
+                # A little play with r exponents
+                # so no square root is calculated.
+                # Conversion factor is included in r^2.
+                rv = xyz_i - xyz_j
+                r = sigma*np.linalg.norm(rv)/cr
 
-                    r_6 = math.pow(r_2, 3)
-                    r_12 = math.pow(r_6, 2)
-                    lj[i, j] = (4*epsilon*(r_12 - r_6))
-            else:
-                break
+                # 1/r^n
+                r_2 = 1/r**2
+                r_6 = r_2**3
+                r_12 = r_6**2
+                lj[i, j] = (4*epsilon*(r_12 - r_6))
 
     # Now the value will be returned.
     if as_eig:
-- 
cgit v1.2.3-70-g09d2


From 8d0a22b9fdef41ece399468423cb533768d0e631 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sat, 22 Feb 2020 17:12:32 -0700
Subject: Add tests

---
 ml_exp/__main__.py        | 11 ++++++++++-
 ml_exp/representations.py |  4 ++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py
index 3cb515d85..cb8a9a3f1 100644
--- a/ml_exp/__main__.py
+++ b/ml_exp/__main__.py
@@ -20,5 +20,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
+from ml_exp.compound import Compound
+
+
 if __name__ == '__main__':
-    print('Ok!')
+    print('Initialize test')
+    test = []
+    for i in range(1, 10):
+        test.append(Compound(f'/home/luevano/py/ml_exp/data/qm7/000{i}.xyz'))
+        test[i-1].gen_cm(size=1, as_eig=False)
+        test[i-1].gen_ljm(size=1, as_eig=False)
+        test[i-1].gen_am(size=1)
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index 354611190..ae870f3b0 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -273,9 +273,9 @@ def adjacency_matrix(fnm,
         size = n
 
     if forces:
-        am = np.empty((size, size), dtype=float)
+        am = np.zeros((size, size), dtype=float)
     else:
-        am = np.empty((size, size), dtype=int)
+        am = np.zeros((size, size), dtype=int)
 
     for i, bond_i in enumerate(bonds):
         for j, bond_j in enumerate(bonds):
-- 
cgit v1.2.3-70-g09d2


From 85198178482767d2831ddf3762e5e6eeafa990bd Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sat, 22 Feb 2020 18:37:50 -0700
Subject: Fix bug, current errors present on adj matrix

---
 ml_exp/__main__.py        |  3 ---
 ml_exp/representations.py | 16 ++++++++--------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py
index cb8a9a3f1..17119623e 100644
--- a/ml_exp/__main__.py
+++ b/ml_exp/__main__.py
@@ -28,6 +28,3 @@ if __name__ == '__main__':
     test = []
     for i in range(1, 10):
         test.append(Compound(f'/home/luevano/py/ml_exp/data/qm7/000{i}.xyz'))
-        test[i-1].gen_cm(size=1, as_eig=False)
-        test[i-1].gen_ljm(size=1, as_eig=False)
-        test[i-1].gen_am(size=1)
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index ae870f3b0..796d15f98 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -48,8 +48,8 @@ def coulomb_matrix(coords,
                          size. Arrays are not of the right shape.')
 
     if size < n:
-        print('Error. Compound size (n) is greater han (size). Using (n)\
-              instead of (size).')
+        print('Error. Compound size (n) is greater han (size). Using (n)',
+              'instead of (size).')
         size = n
 
     cm = np.zeros((size, size), dtype=float)
@@ -115,8 +115,8 @@ def lennard_jones_matrix(coords,
                          size. Arrays are not of the right shape.')
 
     if size < n:
-        print('Error. Compound size (n) is greater han (size). Using (n)\
-              instead of (size).')
+        print('Error. Compound size (n) is greater han (size). Using (n)',
+              'instead of (size).')
         size = n
 
     lj = np.zeros((size, size), dtype=float)
@@ -137,10 +137,10 @@ def lennard_jones_matrix(coords,
                 # so no square root is calculated.
                 # Conversion factor is included in r^2.
                 rv = xyz_i - xyz_j
-                r = sigma*np.linalg.norm(rv)/cr
+                r = np.linalg.norm(rv)/cr
 
                 # 1/r^n
-                r_2 = 1/r**2
+                r_2 = sigma**2/r**2
                 r_6 = r_2**3
                 r_12 = r_6**2
                 lj[i, j] = (4*epsilon*(r_12 - r_6))
@@ -204,9 +204,9 @@ def first_neighbor_matrix(coords,
     cs_bond = sorted(['C', 'S'])
 
     if use_forces:
-        fnm = np.empty((n, n), dtype=float)
+        fnm = np.zeros((n, n), dtype=float)
     else:
-        fnm = np.empty((n, n), dtype=int)
+        fnm = np.zeros((n, n), dtype=int)
 
     bonds = []
     forces = []
-- 
cgit v1.2.3-70-g09d2


From db47ff8606b14e4cdddc0e317972c3515b9c788b Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sun, 23 Feb 2020 21:03:39 -0700
Subject: Fix adj matrix issue with cc bonds)

---
 ml_exp/__main__.py        | 11 ++++++++---
 ml_exp/compound.py        |  6 ++++--
 ml_exp/representations.py | 29 ++++++++++++++---------------
 3 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py
index 17119623e..124ce8cd9 100644
--- a/ml_exp/__main__.py
+++ b/ml_exp/__main__.py
@@ -25,6 +25,11 @@ from ml_exp.compound import Compound
 
 if __name__ == '__main__':
     print('Initialize test')
-    test = []
-    for i in range(1, 10):
-        test.append(Compound(f'/home/luevano/py/ml_exp/data/qm7/000{i}.xyz'))
+    mol_name = '/home/luevano/py/ml_exp/data/qm7/0004.xyz'
+    mol = Compound(mol_name)
+    mol.gen_cm(size=1, as_eig=False)
+    mol.gen_ljm(size=1, as_eig=False)
+    mol.gen_am(size=1)
+
+    print(mol.n, mol.atoms, mol.atoms_nc)
+    print(mol.am)
diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index d4dc9b7c0..d499e6b83 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -39,7 +39,8 @@ class Compound:
         self.atoms = None
         self.atoms_nc = None
         self.coordinates = None
-        self.energy = None
+        self.pbe0 = None
+        self.delta = None
 
         self.cm = None
         self.ljm = None
@@ -91,8 +92,8 @@ class Compound:
                                         bohr_ru=bohr_ru)
 
     def gen_am(self,
-               use_forces=False,
                size=23,
+               use_forces=False,
                bohr_ru=False):
         """
         Generate the Adjacency Matrix for the compund.
@@ -104,6 +105,7 @@ class Compound:
         fnm, bonds, forces = first_neighbor_matrix(self.coordinates,
                                                    self.atoms_nc,
                                                    self.atoms,
+                                                   size=size,
                                                    use_forces=use_forces,
                                                    bohr_ru=bohr_ru)
 
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index 796d15f98..11c30dfd2 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -39,7 +39,7 @@ def coulomb_matrix(coords,
     if bohr_ru:
         cr = 0.52917721067
     else:
-        cr = 1
+        cr = 1.0
 
     n = coords.shape[0]
 
@@ -106,7 +106,7 @@ def lennard_jones_matrix(coords,
     if bohr_ru:
         cr = 0.52917721067
     else:
-        cr = 1
+        cr = 1.0
 
     n = coords.shape[0]
 
@@ -168,6 +168,7 @@ def lennard_jones_matrix(coords,
 def first_neighbor_matrix(coords,
                           nc,
                           atoms,
+                          size=23,
                           use_forces=False,
                           bohr_ru=False):
     """
@@ -179,7 +180,7 @@ def first_neighbor_matrix(coords,
     bohr_ru: if radius units should be in bohr's radius units.
     NOTE: Bond distance of carbon to other elements
         are (for atoms present in the qm7 dataset):
-            C: 1.20 - 1.53 A
+            C: 1.20 - 1.54 A (Edited to 1.19 - 1.54 A)
             H: 1.06 - 1.12 A
             O: 1.43 - 2.15 A
             N: 1.47 - 2.10 A
@@ -188,7 +189,7 @@ def first_neighbor_matrix(coords,
     if bohr_ru:
         cr = 0.52917721067
     else:
-        cr = 1
+        cr = 1.0
 
     n = coords.shape[0]
 
@@ -196,6 +197,11 @@ def first_neighbor_matrix(coords,
         raise ValueError('Compound size is different than the nuclear charge\
                          size. Arrays are not of the right shape.')
 
+    if size < n:
+        print('Error. Compound size (n) is greater han (size). Using (n)',
+              'instead of (size).')
+        size = n
+
     # Possible bonds.
     cc_bond = sorted(['C', 'C'])
     ch_bond = sorted(['C', 'H'])
@@ -203,16 +209,12 @@ def first_neighbor_matrix(coords,
     cn_bond = sorted(['C', 'N'])
     cs_bond = sorted(['C', 'S'])
 
-    if use_forces:
-        fnm = np.zeros((n, n), dtype=float)
-    else:
-        fnm = np.zeros((n, n), dtype=int)
+    fnm = np.zeros((size, size), dtype=float)
 
     bonds = []
     forces = []
     for i, xyz_i in enumerate(coords):
         for j, xyz_j in enumerate(coords):
-
             # Ignore the diagonal.
             if i != j:
                 bond = sorted([atoms[i], atoms[j]])
@@ -220,7 +222,7 @@ def first_neighbor_matrix(coords,
                 r = np.linalg.norm(rv)/cr
 
                 # Check for each type of bond.
-                if (cc_bond == bond) and (r >= 1.20 and r <= 1.53):
+                if (cc_bond == bond) and (r >= 1.19 and r <= 1.54):
                     fnm[i, j] = 1.0
                     if j > i:
                         bonds.append((i, j))
@@ -257,7 +259,7 @@ def first_neighbor_matrix(coords,
 def adjacency_matrix(fnm,
                      bonds,
                      forces,
-                     size=23):
+                     size=22):
     """
     Calculates the adjacency matrix given the bond list.
     fnm: first neighbour matrix.
@@ -272,10 +274,7 @@ def adjacency_matrix(fnm,
               instead of (size).')
         size = n
 
-    if forces:
-        am = np.zeros((size, size), dtype=float)
-    else:
-        am = np.zeros((size, size), dtype=int)
+    am = np.zeros((size, size), dtype=float)
 
     for i, bond_i in enumerate(bonds):
         for j, bond_j in enumerate(bonds):
-- 
cgit v1.2.3-70-g09d2


From 54e69de6126afb321c0ea575ede595347ffc121c Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sun, 23 Feb 2020 21:07:54 -0700
Subject: Add fnm and am to init

---
 ml_exp/__init__.py |  7 +++++--
 ml_exp/__main__.py | 12 ++----------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index f5ec7068b..d81f0999a 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -21,10 +21,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 from ml_exp.compound import Compound
-from ml_exp.representations import coulomb_matrix, lennard_jones_matrix
+from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
+    first_neighbor_matrix, adjacency_matrix
 
 # If somebody does "from package import *", this is what they will
 # be able to access:
 __all__ = ['Compound',
            'coulomb_matrix',
-           'lennard_jones_matrix']
+           'lennard_jones_matrix',
+           'first_neighbor_matrix',
+           'adjacency_matrix']
diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py
index 124ce8cd9..44b492103 100644
--- a/ml_exp/__main__.py
+++ b/ml_exp/__main__.py
@@ -20,16 +20,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-from ml_exp.compound import Compound
+# from ml_exp.compound import Compound
 
 
 if __name__ == '__main__':
-    print('Initialize test')
-    mol_name = '/home/luevano/py/ml_exp/data/qm7/0004.xyz'
-    mol = Compound(mol_name)
-    mol.gen_cm(size=1, as_eig=False)
-    mol.gen_ljm(size=1, as_eig=False)
-    mol.gen_am(size=1)
-
-    print(mol.n, mol.atoms, mol.atoms_nc)
-    print(mol.am)
+    print('OK!')
-- 
cgit v1.2.3-70-g09d2


From 336654366a0e0a15263bd0d93bcb81ef48fcb040 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sun, 23 Feb 2020 21:12:00 -0700
Subject: Remove bag of stuff

---
 ml_exp/bostuff.py | 99 -------------------------------------------------------
 1 file changed, 99 deletions(-)
 delete mode 100644 ml_exp/bostuff.py

diff --git a/ml_exp/bostuff.py b/ml_exp/bostuff.py
deleted file mode 100644
index 1741e26cd..000000000
--- a/ml_exp/bostuff.py
+++ /dev/null
@@ -1,99 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from numpy import array, zeros
-from collections import Counter
-from ml_exp.bob import check_bond
-
-
-def bok_cx(adj_matrix,
-           atoms,
-           max_n=25,
-           max_bond_len=325):
-    """
-    Creates the bag of k_cx using the adjacency matrix data.
-    adj_matrix: adjacency matrix.
-    max_n: maximum amount of atoms.
-    max_bond_len: maximum amount of bonds in molecule.
-    """
-    n = len(atoms)
-    bond_n = (n * n - n) / 2 + n
-    n_r = range(n)
-
-    if max_n < n:
-        print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                       'greater than max_len. Using mol_n.']))
-        max_n = n
-
-    if max_bond_len < bond_n:
-        print(''.join(['Error. Molecule bond length (bond_n) is ',
-                       'greater than max_bond_len. Using bond_n.']))
-        max_bond_len = bond_n
-
-    # List where each bag data is stored.
-    bags = []
-    for i in n_r:
-        for j in n_r:
-            # Work only in the upper triangle of the coulomb matrix.
-            if j >= i:
-                # Get the string of the current bond.
-                if i == j:
-                    current_bond = atoms[i]
-                else:
-                    current_bond = ''.join(sorted([atoms[i], atoms[j]]))
-
-                # Check if that bond is already in a bag.
-                checker = check_bond(bags, current_bond)
-                # Either create a new bag or add values to an existing one.
-                if not checker[0]:
-                    bags.append([current_bond, adj_matrix[i, j]])
-                else:
-                    bags[checker[1]].append(adj_matrix[i, j])
-
-    # Create the actual bond list ordered.
-    atom_counter = Counter(atoms)
-    atom_list = sorted(list(set(atoms)))
-    bonds = []
-    for i, a_i in enumerate(atom_list):
-        if atom_counter[a_i] > 1:
-            for a_j in atom_list[i:]:
-                bonds.append(''.join(sorted([a_i, a_j])))
-    bonds = atom_list + bonds
-
-    # Create the final vector for the bok_cx.
-    bok_cx = array(zeros(max_bond_len), dtype=float)
-    c_i = 0
-    for i, bond in enumerate(bonds):
-        checker = check_bond(bags, bond)
-        if checker[0]:
-            for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
-                # Use c_i as the index for bok_cx if the zero padding should
-                # be at the end of the vector instead of between each bond.
-                bok_cx[i*max_n + j] = num
-                c_i += 1
-        # This is set to false because this was a debugging measure.
-        else:
-            print(''.join([f'Error. Bond {bond} from bond list coudn\'t',
-                           ' be found in the bags list. This could be',
-                           ' a case where the atom is only present once',
-                           ' in the molecule.']))
-    return bok_cx
-- 
cgit v1.2.3-70-g09d2


From 321681e542509869568e9ed610d821c1d9d9d5e6 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sun, 23 Feb 2020 21:16:16 -0700
Subject: Move bob to representations

---
 ml_exp/bob.py             | 117 ----------------------------------------------
 ml_exp/representations.py |  94 +++++++++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+), 117 deletions(-)
 delete mode 100644 ml_exp/bob.py

diff --git a/ml_exp/bob.py b/ml_exp/bob.py
deleted file mode 100644
index 86efecdb4..000000000
--- a/ml_exp/bob.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from numpy import array, zeros
-from collections import Counter
-
-
-def check_bond(bags,
-               bond):
-    """
-    Checks if a bond is in a bag.
-    bags: list of bags, containing a bag per entry, which in turn
-    contains a list of bond-values.
-    bond: bond to check.
-    """
-    if bags == []:
-        return False, None
-
-    for i, bag in enumerate(bags):
-        if bag[0] == bond:
-            return True, i
-
-    return False, None
-
-
-def bob(c_matrix,
-        atoms,
-        max_n=25,
-        max_bond_len=325):
-    """
-    Creates the bag of bond using the coulomb matrix data.
-    c_matrix: coulomb matrix.
-    atoms: list of atoms.
-    max_n: maximum amount of atoms.
-    max_bond_len: maximum amount of bonds in molecule.
-    """
-    n = len(atoms)
-    bond_n = (n * n - n) / 2 + n
-    n_r = range(n)
-
-    if max_n < n:
-        print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                       'greater than max_len. Using mol_n.']))
-        max_n = n
-
-    if max_bond_len < bond_n:
-        print(''.join(['Error. Molecule bond lenght (bond_n) is ',
-                       'greater than max_bond_len. Using bond_n.']))
-        max_bond_len = bond_n
-
-    # List where each bag data is stored.
-    bags = []
-    for i in n_r:
-        for j in n_r:
-            # Work only in the upper triangle of the coulomb matrix.
-            if j >= i:
-                # Get the string of the current bond.
-                if i == j:
-                    current_bond = atoms[i]
-                else:
-                    current_bond = ''.join(sorted([atoms[i], atoms[j]]))
-
-                # Check if that bond is already in a bag.
-                checker = check_bond(bags, current_bond)
-                # Either create a new bag or add values to an existing one.
-                if not checker[0]:
-                    bags.append([current_bond, c_matrix[i, j]])
-                else:
-                    bags[checker[1]].append(c_matrix[i, j])
-
-    # Create the actual bond list ordered.
-    atom_counter = Counter(atoms)
-    atom_list = sorted(list(set(atoms)))
-    bonds = []
-    for i, a_i in enumerate(atom_list):
-        if atom_counter[a_i] > 1:
-            for a_j in atom_list[i:]:
-                bonds.append(''.join(sorted([a_i, a_j])))
-    bonds = atom_list + bonds
-
-    # Create the final vector for the bob.
-    bob = array(zeros(max_bond_len), dtype=float)
-    c_i = 0
-    for i, bond in enumerate(bonds):
-        checker = check_bond(bags, bond)
-        if checker[0]:
-            for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
-                # Use c_i as the index for bob if the zero padding should
-                # be at the end of the vector instead of between each bond.
-                bob[i*max_n + j] = num
-                c_i += 1
-        # This is set to false because this was a debugging measure.
-        else:
-            print(''.join([f'Error. Bond {bond} from bond list coudn\'t',
-                           ' be found in the bags list. This could be',
-                           ' a case where the atom is only present once',
-                           ' in the molecule.']))
-    return bob
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index 11c30dfd2..3119a4a88 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -21,6 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 import numpy as np
+from collections import Counter
 
 
 def coulomb_matrix(coords,
@@ -287,3 +288,96 @@ def adjacency_matrix(fnm,
                         am[i, j] = fnm[bond_i[0], bond_i[1]]
 
     return am
+
+
+def check_bond(bags,
+               bond):
+    """
+    Checks if a bond is in a bag.
+    bags: list of bags, containing a bag per entry, which in turn
+    contains a list of bond-values.
+    bond: bond to check.
+    """
+    if bags == []:
+        return False, None
+
+    for i, bag in enumerate(bags):
+        if bag[0] == bond:
+            return True, i
+
+    return False, None
+
+
+def bob(c_matrix,
+        atoms,
+        max_n=25,
+        max_bond_len=325):
+    """
+    Creates the bag of bond using the coulomb matrix data.
+    c_matrix: coulomb matrix.
+    atoms: list of atoms.
+    max_n: maximum amount of atoms.
+    max_bond_len: maximum amount of bonds in molecule.
+    """
+    n = len(atoms)
+    bond_n = (n * n - n) / 2 + n
+    n_r = range(n)
+
+    if max_n < n:
+        print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
+                       'greater than max_len. Using mol_n.']))
+        max_n = n
+
+    if max_bond_len < bond_n:
+        print(''.join(['Error. Molecule bond lenght (bond_n) is ',
+                       'greater than max_bond_len. Using bond_n.']))
+        max_bond_len = bond_n
+
+    # List where each bag data is stored.
+    bags = []
+    for i in n_r:
+        for j in n_r:
+            # Work only in the upper triangle of the coulomb matrix.
+            if j >= i:
+                # Get the string of the current bond.
+                if i == j:
+                    current_bond = atoms[i]
+                else:
+                    current_bond = ''.join(sorted([atoms[i], atoms[j]]))
+
+                # Check if that bond is already in a bag.
+                checker = check_bond(bags, current_bond)
+                # Either create a new bag or add values to an existing one.
+                if not checker[0]:
+                    bags.append([current_bond, c_matrix[i, j]])
+                else:
+                    bags[checker[1]].append(c_matrix[i, j])
+
+    # Create the actual bond list ordered.
+    atom_counter = Counter(atoms)
+    atom_list = sorted(list(set(atoms)))
+    bonds = []
+    for i, a_i in enumerate(atom_list):
+        if atom_counter[a_i] > 1:
+            for a_j in atom_list[i:]:
+                bonds.append(''.join(sorted([a_i, a_j])))
+    bonds = atom_list + bonds
+
+    # Create the final vector for the bob.
+    bob = array(zeros(max_bond_len), dtype=float)
+    c_i = 0
+    for i, bond in enumerate(bonds):
+        checker = check_bond(bags, bond)
+        if checker[0]:
+            for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
+                # Use c_i as the index for bob if the zero padding should
+                # be at the end of the vector instead of between each bond.
+                bob[i*max_n + j] = num
+                c_i += 1
+        # This is set to false because this was a debugging measure.
+        else:
+            print(''.join([f'Error. Bond {bond} from bond list coudn\'t',
+                           ' be found in the bags list. This could be',
+                           ' a case where the atom is only present once',
+                           ' in the molecule.']))
+    return bob
-- 
cgit v1.2.3-70-g09d2


From 2c8a05c20fbfec9d6c34a83958b694a7611b6cf1 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sun, 23 Feb 2020 21:45:06 -0700
Subject: Refactor bag of sutff in representations

---
 ml_exp/compound.py        |  2 +-
 ml_exp/representations.py | 73 +++++++++++++++++++++++------------------------
 2 files changed, 36 insertions(+), 39 deletions(-)

diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index d499e6b83..595078d55 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -45,7 +45,7 @@ class Compound:
         self.cm = None
         self.ljm = None
         self.am = None
-        self.bob = None
+        self.bos = None
 
         if xyz is not None:
             self.read_xyz(xyz)
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index 3119a4a88..a85fee11f 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -45,8 +45,8 @@ def coulomb_matrix(coords,
     n = coords.shape[0]
 
     if not n == nc.shape[0]:
-        raise ValueError('Compound size is different than the nuclear charge\
-                         size. Arrays are not of the right shape.')
+        raise ValueError('Compound size is different than the nuclear charge \
+size. Arrays are not of the right shape.')
 
     if size < n:
         print('Error. Compound size (n) is greater han (size). Using (n)',
@@ -112,8 +112,8 @@ def lennard_jones_matrix(coords,
     n = coords.shape[0]
 
     if not n == nc.shape[0]:
-        raise ValueError('Compound size is different than the nuclear charge\
-                         size. Arrays are not of the right shape.')
+        raise ValueError('Compound size is different than the nuclear charge \
+size. Arrays are not of the right shape.')
 
     if size < n:
         print('Error. Compound size (n) is greater han (size). Using (n)',
@@ -195,8 +195,8 @@ def first_neighbor_matrix(coords,
     n = coords.shape[0]
 
     if not n == nc.shape[0]:
-        raise ValueError('Compound size is different than the nuclear charge\
-                         size. Arrays are not of the right shape.')
+        raise ValueError('Compound size is different than the nuclear charge \
+size. Arrays are not of the right shape.')
 
     if size < n:
         print('Error. Compound size (n) is greater han (size). Using (n)',
@@ -295,7 +295,7 @@ def check_bond(bags,
     """
     Checks if a bond is in a bag.
     bags: list of bags, containing a bag per entry, which in turn
-    contains a list of bond-values.
+        contains a list of bond-values.
     bond: bond to check.
     """
     if bags == []:
@@ -308,50 +308,49 @@ def check_bond(bags,
     return False, None
 
 
-def bob(c_matrix,
-        atoms,
-        max_n=25,
-        max_bond_len=325):
+def bag_of(cm,
+           atoms,
+           stuff='bonds',
+           size=23):
     """
-    Creates the bag of bond using the coulomb matrix data.
-    c_matrix: coulomb matrix.
+    Creates the Bag of Bonds using the Coulomb Matrix.
+    cm: coulomb matrix.
     atoms: list of atoms.
-    max_n: maximum amount of atoms.
-    max_bond_len: maximum amount of bonds in molecule.
+    size: maximum amount of atoms.
     """
+    if cm is None:
+        raise ValueError('Coulomb Matrix hasn\'t been initialized for the \
+current compound.')
+
     n = len(atoms)
-    bond_n = (n * n - n) / 2 + n
-    n_r = range(n)
 
-    if max_n < n:
-        print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                       'greater than max_len. Using mol_n.']))
-        max_n = n
+    if size < n:
+        print('Error. Compound size (n) is greater han (size). Using (n)',
+              'instead of (size).')
+        size = n
 
-    if max_bond_len < bond_n:
-        print(''.join(['Error. Molecule bond lenght (bond_n) is ',
-                       'greater than max_bond_len. Using bond_n.']))
-        max_bond_len = bond_n
+    # Bond max length, calculated using only the upper triangular matrix.
+    bond_size = (size * size - size)/2 + size
 
     # List where each bag data is stored.
     bags = []
-    for i in n_r:
-        for j in n_r:
+    for i, atom_i in enumerate(atoms):
+        for j, atom_j in enumerate(atoms):
             # Work only in the upper triangle of the coulomb matrix.
             if j >= i:
                 # Get the string of the current bond.
                 if i == j:
-                    current_bond = atoms[i]
+                    current_bond = atom_i
                 else:
-                    current_bond = ''.join(sorted([atoms[i], atoms[j]]))
+                    current_bond = ''.join(sorted([atom_i, atom_j]))
 
                 # Check if that bond is already in a bag.
                 checker = check_bond(bags, current_bond)
                 # Either create a new bag or add values to an existing one.
                 if not checker[0]:
-                    bags.append([current_bond, c_matrix[i, j]])
+                    bags.append([current_bond, cm[i, j]])
                 else:
-                    bags[checker[1]].append(c_matrix[i, j])
+                    bags[checker[1]].append(cm[i, j])
 
     # Create the actual bond list ordered.
     atom_counter = Counter(atoms)
@@ -364,7 +363,7 @@ def bob(c_matrix,
     bonds = atom_list + bonds
 
     # Create the final vector for the bob.
-    bob = array(zeros(max_bond_len), dtype=float)
+    bob = np.zeros(bond_size, dtype=float)
     c_i = 0
     for i, bond in enumerate(bonds):
         checker = check_bond(bags, bond)
@@ -372,12 +371,10 @@ def bob(c_matrix,
             for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
                 # Use c_i as the index for bob if the zero padding should
                 # be at the end of the vector instead of between each bond.
-                bob[i*max_n + j] = num
+                bob[i*size + j] = num
                 c_i += 1
-        # This is set to false because this was a debugging measure.
         else:
-            print(''.join([f'Error. Bond {bond} from bond list coudn\'t',
-                           ' be found in the bags list. This could be',
-                           ' a case where the atom is only present once',
-                           ' in the molecule.']))
+            print(f'Error. Bond {bond} from bond list coudn\'t be found',
+                  'in the bags list. This could be a case where the atom',
+                  'is only present oncce in the molecule.')
     return bob
-- 
cgit v1.2.3-70-g09d2


From b82b90ec609ee80629f1e7f4b8a03cbbb53a0f21 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sun, 23 Feb 2020 21:45:45 -0700
Subject: Edit bag of stuff function name

---
 ml_exp/representations.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index a85fee11f..1fe55aa5f 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -308,15 +308,15 @@ def check_bond(bags,
     return False, None
 
 
-def bag_of(cm,
-           atoms,
-           stuff='bonds',
-           size=23):
+def bag_of_stuff(cm,
+                 atoms,
+                 stuff='bonds',
+                 size=23):
     """
     Creates the Bag of Bonds using the Coulomb Matrix.
     cm: coulomb matrix.
     atoms: list of atoms.
-    size: maximum amount of atoms.
+    size: compound size.
     """
     if cm is None:
         raise ValueError('Coulomb Matrix hasn\'t been initialized for the \
-- 
cgit v1.2.3-70-g09d2


From b300e365d0886695b0d0220aad1be6cb38cf3c36 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sun, 23 Feb 2020 22:06:30 -0700
Subject: Add bos to compound, and bugfix to bos

---
 ml_exp/__init__.py        |  6 ++++--
 ml_exp/compound.py        | 15 ++++++++++++++-
 ml_exp/representations.py | 20 ++++++++++++--------
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index d81f0999a..dcb10a1df 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -22,7 +22,7 @@ SOFTWARE.
 """
 from ml_exp.compound import Compound
 from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
-    first_neighbor_matrix, adjacency_matrix
+    first_neighbor_matrix, adjacency_matrix, check_bond, bag_of_stuff
 
 # If somebody does "from package import *", this is what they will
 # be able to access:
@@ -30,4 +30,6 @@ __all__ = ['Compound',
            'coulomb_matrix',
            'lennard_jones_matrix',
            'first_neighbor_matrix',
-           'adjacency_matrix']
+           'adjacency_matrix',
+           'check_bond',
+           'bag_of_stuff']
diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index 595078d55..8b6af0ae9 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -23,7 +23,7 @@ SOFTWARE.
 import numpy as np
 from ml_exp.data import NUCLEAR_CHARGE
 from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
-    first_neighbor_matrix, adjacency_matrix
+    first_neighbor_matrix, adjacency_matrix, bag_of_stuff
 
 
 class Compound:
@@ -115,6 +115,19 @@ class Compound:
                                    forces,
                                    size=size)
 
+    def gen_bos(self,
+                size=23,
+                stuff='bonds'):
+        """
+        Generate the Bag of Stuff for the compound.
+        size: compound size.
+        stuff: elements of the bag, by default the known bag of bonds.
+        """
+        self.bos = bag_of_stuff(self.cm,
+                                self.atoms,
+                                size=size,
+                                stuff=stuff)
+
     def read_xyz(self,
                  filename):
         """
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index 1fe55aa5f..ea079595e 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -310,8 +310,8 @@ def check_bond(bags,
 
 def bag_of_stuff(cm,
                  atoms,
-                 stuff='bonds',
-                 size=23):
+                 size=23,
+                 stuff='bonds'):
     """
     Creates the Bag of Bonds using the Coulomb Matrix.
     cm: coulomb matrix.
@@ -322,6 +322,10 @@ def bag_of_stuff(cm,
         raise ValueError('Coulomb Matrix hasn\'t been initialized for the \
 current compound.')
 
+    if cm.ndim == 1:
+        raise ValueError('Coulomb Matrix (CM) dimension is 1. Maybe it was \
+generated as the vector of eigenvalues, try (re-)generating the CM.')
+
     n = len(atoms)
 
     if size < n:
@@ -330,7 +334,7 @@ current compound.')
         size = n
 
     # Bond max length, calculated using only the upper triangular matrix.
-    bond_size = (size * size - size)/2 + size
+    bond_size = int((size * size - size)/2 + size)
 
     # List where each bag data is stored.
     bags = []
@@ -362,19 +366,19 @@ current compound.')
                 bonds.append(''.join(sorted([a_i, a_j])))
     bonds = atom_list + bonds
 
-    # Create the final vector for the bob.
-    bob = np.zeros(bond_size, dtype=float)
+    # Create the final vector for the bos.
+    bos = np.zeros(bond_size, dtype=float)
     c_i = 0
     for i, bond in enumerate(bonds):
         checker = check_bond(bags, bond)
         if checker[0]:
             for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
-                # Use c_i as the index for bob if the zero padding should
+                # Use c_i as the index for bos if the zero padding should
                 # be at the end of the vector instead of between each bond.
-                bob[i*size + j] = num
+                bos[i*size + j] = num
                 c_i += 1
         else:
             print(f'Error. Bond {bond} from bond list coudn\'t be found',
                   'in the bags list. This could be a case where the atom',
                   'is only present oncce in the molecule.')
-    return bob
+    return bos
-- 
cgit v1.2.3-70-g09d2


From adbc889949b8399353ab166b5d9c15734f1f0bb8 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Tue, 25 Feb 2020 20:41:09 -0700
Subject: Move frob norm and cholesky solve to math

---
 ml_exp/cholesky_solve.py | 64 ---------------------------------
 ml_exp/frob_norm.py      | 51 ---------------------------
 ml_exp/gauss_kernel.py   | 49 --------------------------
 ml_exp/kernels.py        | 49 ++++++++++++++++++++++++++
 ml_exp/math.py           | 92 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 141 insertions(+), 164 deletions(-)
 delete mode 100644 ml_exp/cholesky_solve.py
 delete mode 100644 ml_exp/frob_norm.py
 delete mode 100644 ml_exp/gauss_kernel.py
 create mode 100644 ml_exp/kernels.py
 create mode 100644 ml_exp/math.py

diff --git a/ml_exp/cholesky_solve.py b/ml_exp/cholesky_solve.py
deleted file mode 100644
index bc6a572a3..000000000
--- a/ml_exp/cholesky_solve.py
+++ /dev/null
@@ -1,64 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import numpy as np
-from numpy.linalg import cholesky
-
-
-def cholesky_solve(K, y):
-    """
-    Applies Cholesky decomposition to obtain the 'alpha coeficients'.
-    K: kernel.
-    y: known parameters.
-    """
-    # The initial mathematical problem is to solve Ka=y.
-
-    # First, add a small lambda value.
-    K[np.diag_indices_from(K)] += 1e-8
-
-    # Get the Cholesky decomposition of the kernel.
-    L = cholesky(K)
-    size = len(L)
-
-    # Solve Lx=y for x.
-    x = np.zeros(size)
-    x[0] = y[0] / L[0, 0]
-    for i in range(1, size):
-        temp_sum = 0.0
-        for j in range(i):
-            temp_sum += L[i, j] * x[j]
-        x[i] = (y[i] - temp_sum) / L[i, i]
-
-    # Now, solve LTa=x for a.
-    L2 = L.T
-    a = np.zeros(size)
-    a_ms = size - 1
-    a[a_ms] = x[a_ms] / L2[a_ms, a_ms]
-    # Because of the form of L2 (upper triangular matriz), an inversion of
-    # range() needs to be done.
-    for i in range(0, a_ms)[::-1]:
-        temp_sum = 0.0
-        for j in range(i, size)[::-1]:
-            temp_sum += L2[i, j] * a[j]
-        a[i] = (x[i] - temp_sum) / L2[i, i]
-
-    return a
diff --git a/ml_exp/frob_norm.py b/ml_exp/frob_norm.py
deleted file mode 100644
index 4c3a2945d..000000000
--- a/ml_exp/frob_norm.py
+++ /dev/null
@@ -1,51 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import math
-
-
-def frob_norm(array):
-    """
-    Calculates the frobenius norm of a given array or matrix.
-    array: array of data.
-    """
-
-    arr_sh_len = len(array.shape)
-    arr_range = range(len(array))
-    fn = 0.0
-
-    # If it is a 'vector'.
-    if arr_sh_len == 1:
-        for i in arr_range:
-            fn += array[i]*array[i]
-
-        return math.sqrt(fn)
-
-    # If it is a matrix.
-    elif arr_sh_len == 2:
-        for i in arr_range:
-            for j in arr_range:
-                fn += array[i, j]*array[i, j]
-
-        return math.sqrt(fn)
-    else:
-        print('Error. Array size greater than 2 ({}).'.format(arr_sh_len))
diff --git a/ml_exp/gauss_kernel.py b/ml_exp/gauss_kernel.py
deleted file mode 100644
index 834d62408..000000000
--- a/ml_exp/gauss_kernel.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import math
-import numpy as np
-from ml_exp.frob_norm import frob_norm
-
-
-def gauss_kernel(X_1, X_2, sigma):
-    """
-    Calculates the Gaussian Kernel.
-    X_1: first representations.
-    X_2: second representations.
-    sigma: kernel width.
-    """
-    x1_l = len(X_1)
-    x1_range = range(x1_l)
-    x2_l = len(X_2)
-    x2_range = range(x2_l)
-
-    inv_sigma = -0.5 / (sigma*sigma)
-
-    K = np.zeros((x1_l, x2_l))
-    for i in x1_range:
-        for j in x2_range:
-            f_norm = frob_norm(X_1[i] - X_2[j])
-            # print(f_norm)
-            K[i, j] = math.exp(inv_sigma * f_norm)
-
-    return K
diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py
new file mode 100644
index 000000000..834d62408
--- /dev/null
+++ b/ml_exp/kernels.py
@@ -0,0 +1,49 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import math
+import numpy as np
+from ml_exp.frob_norm import frob_norm
+
+
+def gauss_kernel(X_1, X_2, sigma):
+    """
+    Calculates the Gaussian Kernel.
+    X_1: first representations.
+    X_2: second representations.
+    sigma: kernel width.
+    """
+    x1_l = len(X_1)
+    x1_range = range(x1_l)
+    x2_l = len(X_2)
+    x2_range = range(x2_l)
+
+    inv_sigma = -0.5 / (sigma*sigma)
+
+    K = np.zeros((x1_l, x2_l))
+    for i in x1_range:
+        for j in x2_range:
+            f_norm = frob_norm(X_1[i] - X_2[j])
+            # print(f_norm)
+            K[i, j] = math.exp(inv_sigma * f_norm)
+
+    return K
diff --git a/ml_exp/math.py b/ml_exp/math.py
new file mode 100644
index 000000000..781985118
--- /dev/null
+++ b/ml_exp/math.py
@@ -0,0 +1,92 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import numpy as np
+import math
+
+
+def frob_norm(array):
+    """
+    Calculates the frobenius norm of a given array or matrix.
+    array: array of data.
+    """
+
+    arr_sh_len = len(array.shape)
+    arr_range = range(len(array))
+    fn = 0.0
+
+    # If it is a 'vector'.
+    if arr_sh_len == 1:
+        for i in arr_range:
+            fn += array[i]*array[i]
+
+        return math.sqrt(fn)
+
+    # If it is a matrix.
+    elif arr_sh_len == 2:
+        for i in arr_range:
+            for j in arr_range:
+                fn += array[i, j]*array[i, j]
+
+        return math.sqrt(fn)
+    else:
+        print('Error. Array size greater than 2 ({}).'.format(arr_sh_len))
+
+
+def cholesky_solve(K, y):
+    """
+    Applies Cholesky decomposition to obtain the 'alpha coeficients'.
+    K: kernel.
+    y: known parameters.
+    """
+    # The initial mathematical problem is to solve Ka=y.
+
+    # First, add a small lambda value.
+    K[np.diag_indices_from(K)] += 1e-8
+
+    # Get the Cholesky decomposition of the kernel.
+    L = np.linalg.cholesky(K)
+    size = len(L)
+
+    # Solve Lx=y for x.
+    x = np.zeros(size)
+    x[0] = y[0] / L[0, 0]
+    for i in range(1, size):
+        temp_sum = 0.0
+        for j in range(i):
+            temp_sum += L[i, j] * x[j]
+        x[i] = (y[i] - temp_sum) / L[i, i]
+
+    # Now, solve LTa=x for a.
+    L2 = L.T
+    a = np.zeros(size)
+    a_ms = size - 1
+    a[a_ms] = x[a_ms] / L2[a_ms, a_ms]
+    # Because of the form of L2 (upper triangular matriz), an inversion of
+    # range() needs to be done.
+    for i in range(0, a_ms)[::-1]:
+        temp_sum = 0.0
+        for j in range(i, size)[::-1]:
+            temp_sum += L2[i, j] * a[j]
+        a[i] = (x[i] - temp_sum) / L2[i, i]
+
+    return a
-- 
cgit v1.2.3-70-g09d2


From 2bdd175a6eb1f6edd8689987fef1a29fc1043da1 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Tue, 25 Feb 2020 20:42:15 -0700
Subject: Remove frobnorm, already in numpy

---
 ml_exp/math.py | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/ml_exp/math.py b/ml_exp/math.py
index 781985118..e7c8dcabc 100644
--- a/ml_exp/math.py
+++ b/ml_exp/math.py
@@ -21,35 +21,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 import numpy as np
-import math
-
-
-def frob_norm(array):
-    """
-    Calculates the frobenius norm of a given array or matrix.
-    array: array of data.
-    """
-
-    arr_sh_len = len(array.shape)
-    arr_range = range(len(array))
-    fn = 0.0
-
-    # If it is a 'vector'.
-    if arr_sh_len == 1:
-        for i in arr_range:
-            fn += array[i]*array[i]
-
-        return math.sqrt(fn)
-
-    # If it is a matrix.
-    elif arr_sh_len == 2:
-        for i in arr_range:
-            for j in arr_range:
-                fn += array[i, j]*array[i, j]
-
-        return math.sqrt(fn)
-    else:
-        print('Error. Array size greater than 2 ({}).'.format(arr_sh_len))
 
 
 def cholesky_solve(K, y):
-- 
cgit v1.2.3-70-g09d2


From d1b84b91f4b94b717de477e47b22946fd42be0db Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Tue, 25 Feb 2020 20:52:08 -0700
Subject: Refactor gauss kernel, more natural syntax

---
 ml_exp/__init__.py |  4 +++-
 ml_exp/kernels.py  | 22 +++++++++-------------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index dcb10a1df..e9dd83eae 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -23,6 +23,7 @@ SOFTWARE.
 from ml_exp.compound import Compound
 from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
     first_neighbor_matrix, adjacency_matrix, check_bond, bag_of_stuff
+from ml_exp.math import cholesky_solve
 
 # If somebody does "from package import *", this is what they will
 # be able to access:
@@ -32,4 +33,5 @@ __all__ = ['Compound',
            'first_neighbor_matrix',
            'adjacency_matrix',
            'check_bond',
-           'bag_of_stuff']
+           'bag_of_stuff',
+           'cholesky_solve']
diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py
index 834d62408..7a61a1e1e 100644
--- a/ml_exp/kernels.py
+++ b/ml_exp/kernels.py
@@ -22,27 +22,23 @@ SOFTWARE.
 """
 import math
 import numpy as np
-from ml_exp.frob_norm import frob_norm
 
 
-def gauss_kernel(X_1, X_2, sigma):
+def gaussian_kernel(X1,
+                    X2,
+                    sigma):
     """
     Calculates the Gaussian Kernel.
-    X_1: first representations.
-    X_2: second representations.
+    X1: first representations.
+    X2: second representations.
     sigma: kernel width.
     """
-    x1_l = len(X_1)
-    x1_range = range(x1_l)
-    x2_l = len(X_2)
-    x2_range = range(x2_l)
-
     inv_sigma = -0.5 / (sigma*sigma)
 
-    K = np.zeros((x1_l, x2_l))
-    for i in x1_range:
-        for j in x2_range:
-            f_norm = frob_norm(X_1[i] - X_2[j])
+    K = np.zeros((X1.shape[0], X2.shape[0]), dtype=float)
+    for i in X1:
+        for j in X2:
+            f_norm = np.linalg.norm(i - j)
             # print(f_norm)
             K[i, j] = math.exp(inv_sigma * f_norm)
 
-- 
cgit v1.2.3-70-g09d2


From 0b35ed647ec4d786b488082dd98bb91ca0f5cc05 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Tue, 25 Feb 2020 20:54:01 -0700
Subject: Placeholder

---
 ml_exp/__main__.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py
index 44b492103..0d9b87bad 100644
--- a/ml_exp/__main__.py
+++ b/ml_exp/__main__.py
@@ -20,8 +20,5 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-# from ml_exp.compound import Compound
-
-
 if __name__ == '__main__':
-    print('OK!')
+    print('Everything OK!\n\tThis is a placeholder.')
-- 
cgit v1.2.3-70-g09d2


From 339954ffc95c29327d33e3e2697fea1cf8e9427d Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Tue, 25 Feb 2020 21:03:31 -0700
Subject: Remove unused functions

---
 ml_exp/read_qm7_data.py | 33 ---------------------------------
 1 file changed, 33 deletions(-)

diff --git a/ml_exp/read_qm7_data.py b/ml_exp/read_qm7_data.py
index 666f9c3fc..3017ac429 100644
--- a/ml_exp/read_qm7_data.py
+++ b/ml_exp/read_qm7_data.py
@@ -25,39 +25,6 @@ import numpy as np
 import random
 
 
-# 'periodic_table_of_elements.txt' retrieved from
-# https://gist.github.com/GoodmanSciences/c2dd862cd38f21b0ad36b8f96b4bf1ee
-def read_nc_data(data_path):
-    """
-    Reads nuclear charge data from file and returns a dictionary.
-    data_path: path to the data directory.
-    """
-    fname = 'periodic_table_of_elements.txt'
-    with open(''.join([data_path, '/', fname]), 'r') as infile:
-        temp_lines = infile.readlines()
-
-    del temp_lines[0]
-
-    lines = []
-    for temp_line in temp_lines:
-        new_line = temp_line.split(sep=',')
-        lines.append(new_line)
-
-    # Dictionary of nuclear charge.
-    return {line[2]: int(line[0]) for line in lines}
-
-
-# For use in the rewriting of the code (from functional programming to object
-# oriented programming-ish)
-def print_nc(nc_data):
-    """
-    Prints the nuclear charge data to the terminal.
-    nc_data: dict of nc.
-    """
-    for key in nc_data.keys():
-        print(f'\'{key}\': {nc_data[key]},')
-
-
 # 'hof_qm7.txt.txt' retrieved from
 # https://github.com/qmlcode/tutorial
 def read_db_data(zi_data,
-- 
cgit v1.2.3-70-g09d2


From 3c8a53f42a4a90d3cc235fc4e4132556b9cd74fa Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Tue, 25 Feb 2020 21:06:10 -0700
Subject: Rename function

---
 ml_exp/qm7db.py         | 106 ++++++++++++++++++++++++++++++++++++++++++++++++
 ml_exp/read_qm7_data.py | 106 ------------------------------------------------
 2 files changed, 106 insertions(+), 106 deletions(-)
 create mode 100644 ml_exp/qm7db.py
 delete mode 100644 ml_exp/read_qm7_data.py

diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
new file mode 100644
index 000000000..e584bc123
--- /dev/null
+++ b/ml_exp/qm7db.py
@@ -0,0 +1,106 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import os
+import numpy as np
+import random
+
+
+# 'hof_qm7.txt.txt' retrieved from
+# https://github.com/qmlcode/tutorial
+def qm7db(zi_data,
+          data_path,
+          r_seed=111,
+          return_atoms=False):
+    """
+    Reads molecule database and extracts
+    its contents as usable variables.
+    zi_data: dictionary containing nuclear charge data.
+    data_path: path to the data directory.
+    r_seed: random seed to use for the shuffling.
+    return_atoms: if atom list should be returned.
+    """
+    os.chdir(data_path)
+
+    fname = 'hof_qm7.txt'
+    with open(fname, 'r') as infile:
+        lines = infile.readlines()
+
+    # Temporary energy dictionary.
+    energy_temp = dict()
+
+    for line in lines:
+        xyz_data = line.split()
+
+        xyz_name = xyz_data[0]
+        hof = float(xyz_data[1])
+        dftb = float(xyz_data[2])
+        # print(xyz_name, hof, dftb)
+
+        energy_temp[xyz_name] = np.array([hof, hof - dftb])
+
+    # Use a random seed.
+    random.seed(r_seed)
+
+    et_keys = list(energy_temp.keys())
+    random.shuffle(et_keys)
+
+    # Temporary energy dictionary, shuffled.
+    energy_temp_shuffled = dict()
+    for key in et_keys:
+        energy_temp_shuffled.update({key: energy_temp[key]})
+
+    mol_data = []
+    mol_nc_data = []
+    atoms = []
+    # Actual reading of the xyz files.
+    for i, k in enumerate(energy_temp_shuffled.keys()):
+        with open(k, 'r') as xyz_file:
+            lines = xyz_file.readlines()
+
+        len_lines = len(lines)
+        mol_temp_data = []
+        mol_nc_temp_data = np.array(np.zeros(len_lines-2))
+        atoms_temp = []
+        for j, line in enumerate(lines[2:len_lines]):
+            line_list = line.split()
+
+            atoms_temp.append(line_list[0])
+            mol_nc_temp_data[j] = float(zi_data[line_list[0]])
+            line_data = np.array(np.asarray(line_list[1:4], dtype=float))
+            mol_temp_data.append(line_data)
+
+        mol_data.append(mol_temp_data)
+        mol_nc_data.append(mol_nc_temp_data)
+        atoms.append(atoms_temp)
+
+    # Convert everything to a numpy array.
+    molecules = np.array([np.array(mol) for mol in mol_data])
+    nuclear_charge = np.array([nc_d for nc_d in mol_nc_data])
+    energy_pbe0 = np.array([energy_temp_shuffled[k][0]
+                            for k in energy_temp_shuffled.keys()])
+    energy_delta = np.array([energy_temp_shuffled[k][1]
+                             for k in energy_temp_shuffled.keys()])
+
+    if return_atoms:
+        return molecules, nuclear_charge, energy_pbe0, energy_delta, atoms
+    return molecules, nuclear_charge, energy_pbe0, energy_delta
diff --git a/ml_exp/read_qm7_data.py b/ml_exp/read_qm7_data.py
deleted file mode 100644
index 3017ac429..000000000
--- a/ml_exp/read_qm7_data.py
+++ /dev/null
@@ -1,106 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import os
-import numpy as np
-import random
-
-
-# 'hof_qm7.txt.txt' retrieved from
-# https://github.com/qmlcode/tutorial
-def read_db_data(zi_data,
-                 data_path,
-                 r_seed=111,
-                 return_atoms=False):
-    """
-    Reads molecule database and extracts
-    its contents as usable variables.
-    zi_data: dictionary containing nuclear charge data.
-    data_path: path to the data directory.
-    r_seed: random seed to use for the shuffling.
-    return_atoms: if atom list should be returned.
-    """
-    os.chdir(data_path)
-
-    fname = 'hof_qm7.txt'
-    with open(fname, 'r') as infile:
-        lines = infile.readlines()
-
-    # Temporary energy dictionary.
-    energy_temp = dict()
-
-    for line in lines:
-        xyz_data = line.split()
-
-        xyz_name = xyz_data[0]
-        hof = float(xyz_data[1])
-        dftb = float(xyz_data[2])
-        # print(xyz_name, hof, dftb)
-
-        energy_temp[xyz_name] = np.array([hof, hof - dftb])
-
-    # Use a random seed.
-    random.seed(r_seed)
-
-    et_keys = list(energy_temp.keys())
-    random.shuffle(et_keys)
-
-    # Temporary energy dictionary, shuffled.
-    energy_temp_shuffled = dict()
-    for key in et_keys:
-        energy_temp_shuffled.update({key: energy_temp[key]})
-
-    mol_data = []
-    mol_nc_data = []
-    atoms = []
-    # Actual reading of the xyz files.
-    for i, k in enumerate(energy_temp_shuffled.keys()):
-        with open(k, 'r') as xyz_file:
-            lines = xyz_file.readlines()
-
-        len_lines = len(lines)
-        mol_temp_data = []
-        mol_nc_temp_data = np.array(np.zeros(len_lines-2))
-        atoms_temp = []
-        for j, line in enumerate(lines[2:len_lines]):
-            line_list = line.split()
-
-            atoms_temp.append(line_list[0])
-            mol_nc_temp_data[j] = float(zi_data[line_list[0]])
-            line_data = np.array(np.asarray(line_list[1:4], dtype=float))
-            mol_temp_data.append(line_data)
-
-        mol_data.append(mol_temp_data)
-        mol_nc_data.append(mol_nc_temp_data)
-        atoms.append(atoms_temp)
-
-    # Convert everything to a numpy array.
-    molecules = np.array([np.array(mol) for mol in mol_data])
-    nuclear_charge = np.array([nc_d for nc_d in mol_nc_data])
-    energy_pbe0 = np.array([energy_temp_shuffled[k][0]
-                            for k in energy_temp_shuffled.keys()])
-    energy_delta = np.array([energy_temp_shuffled[k][1]
-                             for k in energy_temp_shuffled.keys()])
-
-    if return_atoms:
-        return molecules, nuclear_charge, energy_pbe0, energy_delta, atoms
-    return molecules, nuclear_charge, energy_pbe0, energy_delta
-- 
cgit v1.2.3-70-g09d2


From 2185e43ffdbf70a06cb894549fc7ff09f9c90cc3 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Tue, 25 Feb 2020 21:09:49 -0700
Subject: Refactor code

---
 ml_exp/qm7db.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
index e584bc123..4a7beaed6 100644
--- a/ml_exp/qm7db.py
+++ b/ml_exp/qm7db.py
@@ -27,17 +27,14 @@ import random
 
 # 'hof_qm7.txt.txt' retrieved from
 # https://github.com/qmlcode/tutorial
-def qm7db(zi_data,
+def qm7db(nc,
           data_path,
-          r_seed=111,
-          return_atoms=False):
+          r_seed=111):
     """
-    Reads molecule database and extracts
-    its contents as usable variables.
-    zi_data: dictionary containing nuclear charge data.
+    Creates a list of compounds with the qm7 database.
+    nc: dictionary containing nuclear charge data.
     data_path: path to the data directory.
     r_seed: random seed to use for the shuffling.
-    return_atoms: if atom list should be returned.
     """
     os.chdir(data_path)
 
@@ -85,7 +82,7 @@ def qm7db(zi_data,
             line_list = line.split()
 
             atoms_temp.append(line_list[0])
-            mol_nc_temp_data[j] = float(zi_data[line_list[0]])
+            mol_nc_temp_data[j] = float(nc[line_list[0]])
             line_data = np.array(np.asarray(line_list[1:4], dtype=float))
             mol_temp_data.append(line_data)
 
@@ -101,6 +98,4 @@ def qm7db(zi_data,
     energy_delta = np.array([energy_temp_shuffled[k][1]
                              for k in energy_temp_shuffled.keys()])
 
-    if return_atoms:
-        return molecules, nuclear_charge, energy_pbe0, energy_delta, atoms
-    return molecules, nuclear_charge, energy_pbe0, energy_delta
+    return molecules, nuclear_charge, energy_pbe0, energy_delta, atoms
-- 
cgit v1.2.3-70-g09d2


From 39d7b86cfb578b491c8cbf99905e67f7f7937dc9 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Tue, 25 Feb 2020 21:13:33 -0700
Subject: Refactor code

---
 ml_exp/qm7db.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
index 4a7beaed6..1f1115ba0 100644
--- a/ml_exp/qm7db.py
+++ b/ml_exp/qm7db.py
@@ -20,7 +20,6 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-import os
 import numpy as np
 import random
 
@@ -28,19 +27,18 @@ import random
 # 'hof_qm7.txt.txt' retrieved from
 # https://github.com/qmlcode/tutorial
 def qm7db(nc,
-          data_path,
+          db_path='data',
           r_seed=111):
     """
     Creates a list of compounds with the qm7 database.
     nc: dictionary containing nuclear charge data.
-    data_path: path to the data directory.
+    db_path: path to the database directory.
     r_seed: random seed to use for the shuffling.
     """
-    os.chdir(data_path)
 
-    fname = 'hof_qm7.txt'
-    with open(fname, 'r') as infile:
-        lines = infile.readlines()
+    fname = f'{db_path}/hof_qm7.txt'
+    with open(fname, 'r') as f:
+        lines = f.readlines()
 
     # Temporary energy dictionary.
     energy_temp = dict()
-- 
cgit v1.2.3-70-g09d2


From d064df5d045a5502f273814252debdb564df3ffc Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Wed, 26 Feb 2020 04:47:44 -0700
Subject: Refactor code

---
 ml_exp/math.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/ml_exp/math.py b/ml_exp/math.py
index e7c8dcabc..da1fdf08f 100644
--- a/ml_exp/math.py
+++ b/ml_exp/math.py
@@ -23,23 +23,23 @@ SOFTWARE.
 import numpy as np
 
 
-def cholesky_solve(K, y):
+def cholesky_solve(K,
+                   y):
     """
-    Applies Cholesky decomposition to obtain the 'alpha coeficients'.
+    Applies Cholesky decomposition to solve Ka=y. Where a are the alpha
+        coeficients.
     K: kernel.
     y: known parameters.
     """
-    # The initial mathematical problem is to solve Ka=y.
-
-    # First, add a small lambda value.
+    # Add a small lambda value.
     K[np.diag_indices_from(K)] += 1e-8
 
     # Get the Cholesky decomposition of the kernel.
     L = np.linalg.cholesky(K)
-    size = len(L)
+    size = K.shape[0]
 
     # Solve Lx=y for x.
-    x = np.zeros(size)
+    x = np.zeros(size, dtype=float)
     x[0] = y[0] / L[0, 0]
     for i in range(1, size):
         temp_sum = 0.0
@@ -49,12 +49,9 @@ def cholesky_solve(K, y):
 
     # Now, solve LTa=x for a.
     L2 = L.T
-    a = np.zeros(size)
-    a_ms = size - 1
-    a[a_ms] = x[a_ms] / L2[a_ms, a_ms]
-    # Because of the form of L2 (upper triangular matriz), an inversion of
-    # range() needs to be done.
-    for i in range(0, a_ms)[::-1]:
+    a = np.zeros(size, dtype=float)
+    a[size - 1] = x[size - 1] / L2[size - 1, size - 1]
+    for i in range(0, size - 1)[::-1]:
         temp_sum = 0.0
         for j in range(i, size)[::-1]:
             temp_sum += L2[i, j] * a[j]
-- 
cgit v1.2.3-70-g09d2


From 5a85580b5f99484b957b65326ec5a19ab627d801 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Wed, 26 Feb 2020 05:26:21 -0700
Subject: Optimize qm7db reading for the rewrite

---
 ml_exp/__init__.py |  2 --
 ml_exp/compound.py |  3 ++-
 ml_exp/qm7db.py    | 68 +++++++++++-------------------------------------------
 3 files changed, 16 insertions(+), 57 deletions(-)

diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index e9dd83eae..d685e7ccc 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -25,8 +25,6 @@ from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
     first_neighbor_matrix, adjacency_matrix, check_bond, bag_of_stuff
 from ml_exp.math import cholesky_solve
 
-# If somebody does "from package import *", this is what they will
-# be able to access:
 __all__ = ['Compound',
            'coulomb_matrix',
            'lennard_jones_matrix',
diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index 8b6af0ae9..0a8b89610 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -33,7 +33,7 @@ class Compound:
         Initialization of the Compound.
         xyz: (path to) the xyz file.
         """
-        # empty_array = np.asarray([], dtype=float)
+        self.name = None
 
         self.n = None
         self.atoms = None
@@ -137,6 +137,7 @@ class Compound:
         with open(filename, 'r') as f:
             lines = f.readlines()
 
+        self.name = filename.split('/')[-1]
         self.n = int(lines[0])
         self.atoms = []
         self.atoms_nc = np.empty(self.n, dtype=int)
diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
index 1f1115ba0..f9950c317 100644
--- a/ml_exp/qm7db.py
+++ b/ml_exp/qm7db.py
@@ -20,6 +20,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
+from ml_exp.compound import Compound
 import numpy as np
 import random
 
@@ -28,72 +29,31 @@ import random
 # https://github.com/qmlcode/tutorial
 def qm7db(nc,
           db_path='data',
+          is_shuffled=True,
           r_seed=111):
     """
     Creates a list of compounds with the qm7 database.
     nc: dictionary containing nuclear charge data.
     db_path: path to the database directory.
+    is_shuffled: if the resulting list of compounds should be shuffled.
     r_seed: random seed to use for the shuffling.
     """
-
     fname = f'{db_path}/hof_qm7.txt'
     with open(fname, 'r') as f:
         lines = f.readlines()
 
-    # Temporary energy dictionary.
-    energy_temp = dict()
-
-    for line in lines:
-        xyz_data = line.split()
-
-        xyz_name = xyz_data[0]
-        hof = float(xyz_data[1])
-        dftb = float(xyz_data[2])
-        # print(xyz_name, hof, dftb)
+    compounds = []
+    for i, line in enumerate(lines):
+        line = line.split()
+        compounds.append(Compound(f'{db_path}/{line[0]}'))
+        compounds[i].pbe0 = float(line[1])
+        compounds[i].delta = float(line[1]) - float(line[2])
 
-        energy_temp[xyz_name] = np.array([hof, hof - dftb])
-
-    # Use a random seed.
+    # Shuffle the compounds list
     random.seed(r_seed)
+    random.shuffle(compounds)
 
-    et_keys = list(energy_temp.keys())
-    random.shuffle(et_keys)
-
-    # Temporary energy dictionary, shuffled.
-    energy_temp_shuffled = dict()
-    for key in et_keys:
-        energy_temp_shuffled.update({key: energy_temp[key]})
-
-    mol_data = []
-    mol_nc_data = []
-    atoms = []
-    # Actual reading of the xyz files.
-    for i, k in enumerate(energy_temp_shuffled.keys()):
-        with open(k, 'r') as xyz_file:
-            lines = xyz_file.readlines()
-
-        len_lines = len(lines)
-        mol_temp_data = []
-        mol_nc_temp_data = np.array(np.zeros(len_lines-2))
-        atoms_temp = []
-        for j, line in enumerate(lines[2:len_lines]):
-            line_list = line.split()
-
-            atoms_temp.append(line_list[0])
-            mol_nc_temp_data[j] = float(nc[line_list[0]])
-            line_data = np.array(np.asarray(line_list[1:4], dtype=float))
-            mol_temp_data.append(line_data)
-
-        mol_data.append(mol_temp_data)
-        mol_nc_data.append(mol_nc_temp_data)
-        atoms.append(atoms_temp)
-
-    # Convert everything to a numpy array.
-    molecules = np.array([np.array(mol) for mol in mol_data])
-    nuclear_charge = np.array([nc_d for nc_d in mol_nc_data])
-    energy_pbe0 = np.array([energy_temp_shuffled[k][0]
-                            for k in energy_temp_shuffled.keys()])
-    energy_delta = np.array([energy_temp_shuffled[k][1]
-                             for k in energy_temp_shuffled.keys()])
+    e_pbe0 = np.array([compound.pbe0 for compound in compounds], dtype=float)
+    e_delta = np.array([compound.delta for compound in compounds], dtype=float)
 
-    return molecules, nuclear_charge, energy_pbe0, energy_delta, atoms
+    return compounds, e_pbe0, e_delta
-- 
cgit v1.2.3-70-g09d2


From a0e73407bfef3569a994c07d5bb9c8df97518e30 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Wed, 26 Feb 2020 05:32:35 -0700
Subject: Add feature

---
 ml_exp/compound.py | 2 ++
 ml_exp/qm7db.py    | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index 0a8b89610..9c7102860 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -36,6 +36,7 @@ class Compound:
         self.name = None
 
         self.n = None
+        self.extra = None  # In case a comment is in the compound file.
         self.atoms = None
         self.atoms_nc = None
         self.coordinates = None
@@ -139,6 +140,7 @@ class Compound:
 
         self.name = filename.split('/')[-1]
         self.n = int(lines[0])
+        self.extra = lines[1]
         self.atoms = []
         self.atoms_nc = np.empty(self.n, dtype=int)
         self.coordinates = np.empty((self.n, 3), dtype=float)
diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
index f9950c317..8d2453f45 100644
--- a/ml_exp/qm7db.py
+++ b/ml_exp/qm7db.py
@@ -49,9 +49,9 @@ def qm7db(nc,
         compounds[i].pbe0 = float(line[1])
         compounds[i].delta = float(line[1]) - float(line[2])
 
-    # Shuffle the compounds list
-    random.seed(r_seed)
-    random.shuffle(compounds)
+    if is_shuffled:
+        random.seed(r_seed)
+        random.shuffle(compounds)
 
     e_pbe0 = np.array([compound.pbe0 for compound in compounds], dtype=float)
     e_delta = np.array([compound.delta for compound in compounds], dtype=float)
-- 
cgit v1.2.3-70-g09d2


From b209f5e79ce45e6a4d5b442f10217806d078d5a6 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Wed, 26 Feb 2020 05:40:44 -0700
Subject: Update requirements

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 437e0428a..4b242770c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 colorama==0.4.3
 numpy==1.18.1
-pandas==0.25.3
\ No newline at end of file
+pandas==1.0.1
-- 
cgit v1.2.3-70-g09d2


From e18a909d18481e5e292882b3b1dd2ad8d693d097 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Wed, 26 Feb 2020 05:53:54 -0700
Subject: Revise printc

---
 ml_exp/__init__.py |  4 +++-
 ml_exp/misc.py     | 38 +++++++++++++++++---------------------
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index d685e7ccc..c62141a58 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -24,6 +24,7 @@ from ml_exp.compound import Compound
 from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
     first_neighbor_matrix, adjacency_matrix, check_bond, bag_of_stuff
 from ml_exp.math import cholesky_solve
+from ml_exp.qm7db import qm7db
 
 __all__ = ['Compound',
            'coulomb_matrix',
@@ -32,4 +33,5 @@ __all__ = ['Compound',
            'adjacency_matrix',
            'check_bond',
            'bag_of_stuff',
-           'cholesky_solve']
+           'cholesky_solve',
+           'qm7db']
diff --git a/ml_exp/misc.py b/ml_exp/misc.py
index e9142b05f..4bd68eefc 100644
--- a/ml_exp/misc.py
+++ b/ml_exp/misc.py
@@ -29,29 +29,25 @@ init()
 def printc(text, color):
     """
     Prints texts normaly, but in color. Using colorama.
-    text: string with the text to print.
+    text: text to print.
     color: color to be used, same as available in colorama.
     """
-    color_dic = {'BLACK': Fore.BLACK,
-                 'RED': Fore.RED,
-                 'GREEN': Fore.GREEN,
-                 'YELLOW': Fore.YELLOW,
-                 'BLUE': Fore.BLUE,
-                 'MAGENTA': Fore.MAGENTA,
-                 'CYAN': Fore.CYAN,
-                 'WHITE': Fore.WHITE,
-                 'RESET': Fore.RESET}
-
-    color_dic_keys = color_dic.keys()
-    if color not in color_dic_keys:
-        print(Fore.RED
-              + '\'{}\' not found, using default color.'.format(color)
-              + Style.RESET_ALL)
-        actual_color = Fore.RESET
-    else:
-        actual_color = color_dic[color]
-
-    print(actual_color + text + Style.RESET_ALL)
+    color = color.upper()
+    colors = {'BLACK': Fore.BLACK,
+              'RED': Fore.RED,
+              'GREEN': Fore.GREEN,
+              'YELLOW': Fore.YELLOW,
+              'BLUE': Fore.BLUE,
+              'MAGENTA': Fore.MAGENTA,
+              'CYAN': Fore.CYAN,
+              'WHITE': Fore.WHITE,
+              'RESET': Fore.RESET}
+
+    av_colors = colors.keys()
+    if color not in av_colors:
+        raise KeyError(f'{color} not found.')
+
+    print(colors[color] + text + Style.RESET_ALL)
 
 
 def plot_benchmarks():
-- 
cgit v1.2.3-70-g09d2


From eeea8ff03aeae2367f8db4e4f0d65cccf708d434 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Wed, 26 Feb 2020 21:20:10 -0700
Subject: Remove unused function

---
 ml_exp/parallel_create_matrices.py | 85 --------------------------------------
 1 file changed, 85 deletions(-)
 delete mode 100644 ml_exp/parallel_create_matrices.py

diff --git a/ml_exp/parallel_create_matrices.py b/ml_exp/parallel_create_matrices.py
deleted file mode 100644
index a68122d48..000000000
--- a/ml_exp/parallel_create_matrices.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from multiprocessing import Process, Pipe
-from ml_exp.c_matrix import c_matrix_multiple
-from ml_exp.lj_matrix import lj_matrix_multiple
-
-
-def parallel_create_matrices(mol_data,
-                             nc_data,
-                             ljm_diag_value=None,
-                             ljm_sigma=1.0,
-                             ljm_epsilon=1.0,
-                             max_len=25,
-                             as_eig=True,
-                             bohr_radius_units=False):
-    """
-    Creates the Coulomb and L-J matrices in parallel.
-    mol_data: molecule data, matrix of atom coordinates.
-    nc_data: nuclear charge data, array of atom data.
-    ljm_diag_value: if special diagonal value is to be used for lj matrix.
-    ljm_sigma: sigma value for lj matrix.
-    ljm_epsilon: psilon value for lj matrix.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
-    """
-
-    # Matrices calculation.
-    procs = []
-    pipes = []
-
-    cm_recv, cm_send = Pipe(False)
-    p1 = Process(target=c_matrix_multiple,
-                 args=(mol_data,
-                       nc_data,
-                       cm_send,
-                       max_len,
-                       as_eig,
-                       bohr_radius_units))
-    procs.append(p1)
-    pipes.append(cm_recv)
-    p1.start()
-
-    ljm_recv, ljm_send = Pipe(False)
-    p2 = Process(target=lj_matrix_multiple,
-                 args=(mol_data,
-                       nc_data,
-                       ljm_send,
-                       ljm_diag_value,
-                       ljm_sigma,
-                       ljm_epsilon,
-                       max_len,
-                       as_eig,
-                       bohr_radius_units))
-    procs.append(p2)
-    pipes.append(ljm_recv)
-    p2.start()
-
-    cm_data = pipes[0].recv()
-    ljm_data = pipes[1].recv()
-
-    for proc in procs:
-        proc.join()
-
-    return cm_data, ljm_data
-- 
cgit v1.2.3-70-g09d2


From 4d759471c3e92381bb6e8b91083b8dea5ec49b02 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Thu, 27 Feb 2020 15:31:23 -0700
Subject: Rewrite basic ml function

---
 ml_exp/do_ml.py | 218 ++++++++++++--------------------------------------------
 1 file changed, 45 insertions(+), 173 deletions(-)

diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index 110242a1d..de4d4061c 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -22,206 +22,78 @@ SOFTWARE.
 """
 import time
 import numpy as np
-from multiprocessing import Process, Pipe
 from ml_exp.misc import printc
-from ml_exp.gauss_kernel import gauss_kernel
-from ml_exp.cholesky_solve import cholesky_solve
-from ml_exp.read_qm7_data import read_qm7_data
-from ml_exp.parallel_create_matrices import parallel_create_matrices
-
-
-def ml(desc_data,
-       energy_data,
-       training_size,
-       desc_type=None,
-       pipe=None,
-       test_size=None,
-       sigma=1000.0,
-       show_msgs=True):
+from ml_exp.kernels import gaussian_kernel
+from ml_exp.math import cholesky_solve
+# from ml_exp.qm7db import qm7db
+
+
+def simple_ml(descriptors,
+              energies,
+              training_size,
+              identifier=None,
+              test_size=None,
+              sigma=1000.0,
+              show_msgs=True):
     """
-    Does the ML methodology.
-    desc_data: descriptor (or representation) data.
-    energy_data: energy data associated with desc_data.
+    Basic ML methodology for a single descriptor type.
+    descriptors: array of descriptors.
+    energies: array of energies.
     training_size: size of the training set to use.
-    desc_type: string with the name of the descriptor used.
-    pipe: for multiprocessing purposes. Sends the data calculated
-        through a pipe.
+    identifier: string with the name of the descriptor used.
     test_size: size of the test set to use. If no size is given,
         the last remaining molecules are used.
     sigma: depth of the kernel.
-    show_msgs: Show debug messages or not.
-    NOTE: desc_type is just a string and is only for identification purposes.
+    show_msgs: if debu messages should be shown.
+    NOTE: identifier is just a string and is only for identification purposes.
     Also, training is done with the first part of the data and
-    testing with the ending part of the data.
+        testing with the ending part of the data.
     """
     tic = time.perf_counter()
     # Initial calculations for later use.
-    d_len = len(desc_data)
-    e_len = len(energy_data)
+    data_size = descriptors.shape[0]
 
-    if not desc_type:
-        desc_type = 'NOT SPECIFIED'
+    if not identifier:
+        identifier = 'NOT SPECIFIED'
 
-    if d_len != e_len:
-        printc(''.join(['ERROR. Descriptor data size different ',
-                        'than energy data size.']), 'RED')
-        return None
+    if not data_size == energies.shape[0]:
+        raise ValueError('Energies size is different than descriptors size.')
 
-    if training_size >= d_len:
-        printc('ERROR. Training size greater or equal than data size.', 'RED')
-        return None
+    if training_size >= data_size:
+        raise ValueError('Training size is greater or equal to the data size.')
 
+    # If test_size is not set, it is set to a maximum size of 1500.
     if not test_size:
-        test_size = d_len - training_size
+        test_size = data_size - training_size
         if test_size > 1500:
             test_size = 1500
 
     if show_msgs:
-        printc('{} ML started.'.format(desc_type), 'GREEN')
-        printc('\tTraining size: {}'.format(training_size), 'CYAN')
-        printc('\tTest size: {}'.format(test_size), 'CYAN')
-        printc('\tSigma: {}'.format(sigma), 'CYAN')
+        printc(f'{identifier} ML started.', 'GREEN')
+        printc(f'\tTraining size: {training_size}', 'CYAN')
+        printc(f'\tTest size: {test_size}', 'CYAN')
+        printc(f'\tSigma: {test_size}', 'CYAN')
 
-    X_training = desc_data[:training_size]
-    Y_training = energy_data[:training_size]
-    K_training = gauss_kernel(X_training, X_training, sigma)
-    alpha_ = cholesky_solve(K_training, Y_training)
+    X_training = descriptors[:training_size]
+    Y_training = energies[:training_size]
+    K_training = gaussian_kernel(X_training, X_training, sigma)
+    alpha = cholesky_solve(K_training, Y_training)
 
-    X_test = desc_data[-test_size:]
-    Y_test = energy_data[-test_size:]
-    K_test = gauss_kernel(X_test, X_training, sigma)
-    Y_predicted = np.dot(K_test, alpha_)
+    X_test = descriptors[-test_size:]
+    Y_test = energies[-test_size:]
+    K_test = gaussian_kernel(X_test, X_training, sigma)
+    Y_predicted = np.dot(K_test, alpha)
 
     mae = np.mean(np.abs(Y_predicted - Y_test))
     if show_msgs:
-        printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN')
+        printc('\tMAE for {identifier}: {mae:.4f}', 'GREEN')
 
     toc = time.perf_counter()
     tictoc = toc - tic
     if show_msgs:
-        printc('\t{} ML took {:.4f} seconds.'.format(desc_type, tictoc),
-               'GREEN')
-        printc('\t\tTraining size: {}'.format(training_size), 'CYAN')
-        printc('\t\tTest size: {}'.format(test_size), 'CYAN')
-        printc('\t\tSigma: {}'.format(sigma), 'CYAN')
-
-    if pipe:
-        pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc])
+        printc(f'\t{identifier} ML took {tictoc:.4f} seconds.', 'GREEN')
+        printc(f'\t\tTraining size: {training_size}', 'CYAN')
+        printc(f'\t\tTest size: {test_size}', 'CYAN')
+        printc(f'\t\tSigma: {sigma}', 'CYAN')
 
     return mae, tictoc
-
-
-def do_ml(min_training_size,
-          max_training_size=None,
-          training_increment_size=500,
-          test_size=None,
-          ljm_diag_value=None,
-          ljm_sigma=1.0,
-          ljm_epsilon=1.0,
-          r_seed=111,
-          save_benchmarks=False,
-          max_len=25,
-          as_eig=True,
-          bohr_radius_units=False,
-          sigma=1000.0,
-          show_msgs=True):
-    """
-    Main function that does the whole ML process.
-    min_training_size: minimum training size.
-    max_training_size: maximum training size.
-    training_increment_size: training increment size.
-    test_size: size of the test set to use. If no size is given,
-        the last remaining molecules are used.
-    ljm_diag_value: if a special diagonal value should be used in lj matrix.
-    ljm_sigma: sigma value for lj matrix.
-    ljm_epsilon: epsilon value for lj matrix.
-    r_seed: random seed to use for the shuffling.
-    save_benchmarks: if benchmarks should be saved.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
-    sigma: depth of the kernel.
-    show_msgs: Show debug messages or not.
-    """
-    # Initialization time.
-    init_time = time.perf_counter()
-    if not max_training_size:
-        max_training_size = min_training_size + training_increment_size
-
-    # Data reading.
-    molecules, nuclear_charge, energy_pbe0, energy_delta =\
-        read_qm7_data(r_seed=r_seed)
-
-    # Matrices calculation.
-    cm_data, ljm_data = parallel_create_matrices(molecules,
-                                                 nuclear_charge,
-                                                 ljm_diag_value,
-                                                 ljm_sigma,
-                                                 ljm_epsilon,
-                                                 max_len,
-                                                 as_eig,
-                                                 bohr_radius_units)
-
-    # ML calculation.
-    procs = []
-    cm_pipes = []
-    ljm_pipes = []
-    for i in range(min_training_size,
-                   max_training_size + 1,
-                   training_increment_size):
-        cm_recv, cm_send = Pipe(False)
-        p1 = Process(target=ml,
-                     args=(cm_data,
-                           energy_pbe0,
-                           i,
-                           'CM',
-                           cm_send,
-                           test_size,
-                           sigma,
-                           show_msgs))
-        procs.append(p1)
-        cm_pipes.append(cm_recv)
-        p1.start()
-
-        ljm_recv, ljm_send = Pipe(False)
-        p2 = Process(target=ml,
-                     args=(ljm_data,
-                           energy_pbe0,
-                           i,
-                           'L-JM',
-                           ljm_send,
-                           test_size,
-                           sigma,
-                           show_msgs))
-        procs.append(p2)
-        ljm_pipes.append(ljm_recv)
-        p2.start()
-
-    cm_bench_results = []
-    ljm_bench_results = []
-    for cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes):
-        cm_bench_results.append(cd_pipe.recv())
-        ljm_bench_results.append(ljd_pipe.recv())
-
-    for proc in procs:
-        proc.join()
-
-    if save_benchmarks:
-        with open('data\\benchmarks.csv', 'a') as save_file:
-            # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,',
-            #                          'mae,time,lj_s,lj_e,date_ran\n']))
-            ltime = time.localtime()[:3][::-1]
-            ljm_se = ',' + str(ljm_sigma) + ',' + str(ljm_epsilon) + ','
-            date = '/'.join([str(field) for field in ltime])
-            for cm, ljm, in zip(cm_bench_results, ljm_bench_results):
-                cm_text = ','.join([str(field) for field in cm])\
-                    + ',' + date + '\n'
-                ljm_text = ','.join([str(field) for field in ljm])\
-                    + ljm_se + date + '\n'
-                save_file.write(cm_text)
-                save_file.write(ljm_text)
-
-    # End of program
-    end_time = time.perf_counter()
-    printc('Program took {:.4f} seconds.'.format(end_time - init_time),
-           'CYAN')
-- 
cgit v1.2.3-70-g09d2


From a8dc304d0c85b3db76986f109bdcd7a530bd3045 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Thu, 27 Feb 2020 15:42:02 -0700
Subject: Fix bug

---
 ml_exp/qm7db.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
index 8d2453f45..3fd46b6bc 100644
--- a/ml_exp/qm7db.py
+++ b/ml_exp/qm7db.py
@@ -25,15 +25,11 @@ import numpy as np
 import random
 
 
-# 'hof_qm7.txt.txt' retrieved from
-# https://github.com/qmlcode/tutorial
-def qm7db(nc,
-          db_path='data',
+def qm7db(db_path='data',
           is_shuffled=True,
           r_seed=111):
     """
     Creates a list of compounds with the qm7 database.
-    nc: dictionary containing nuclear charge data.
     db_path: path to the database directory.
     is_shuffled: if the resulting list of compounds should be shuffled.
     r_seed: random seed to use for the shuffling.
-- 
cgit v1.2.3-70-g09d2


From b937df41c1c5e996be94a3a690908ea989e281dc Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Thu, 27 Feb 2020 15:59:38 -0700
Subject: Placeholder function, test

---
 ml_exp/do_ml.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index de4d4061c..f32480367 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -25,7 +25,7 @@ import numpy as np
 from ml_exp.misc import printc
 from ml_exp.kernels import gaussian_kernel
 from ml_exp.math import cholesky_solve
-# from ml_exp.qm7db import qm7db
+from ml_exp.qm7db import qm7db
 
 
 def simple_ml(descriptors,
@@ -44,7 +44,7 @@ def simple_ml(descriptors,
     test_size: size of the test set to use. If no size is given,
         the last remaining molecules are used.
     sigma: depth of the kernel.
-    show_msgs: if debu messages should be shown.
+    show_msgs: if debug messages should be shown.
     NOTE: identifier is just a string and is only for identification purposes.
     Also, training is done with the first part of the data and
         testing with the ending part of the data.
@@ -97,3 +97,55 @@ def simple_ml(descriptors,
         printc(f'\t\tSigma: {sigma}', 'CYAN')
 
     return mae, tictoc
+
+
+def do_ml(db_path='data',
+          is_shuffled=True,
+          r_seed=111,
+          show_msgs=True):
+    """
+    Main function that does the whole ML process.
+    training_size: minimum training size.
+    test_size: size of the test set to use. If no size is given,
+        the last remaining molecules are used.
+    ljm_diag_value: if a special diagonal value should be used in lj matrix.
+    ljm_sigma: sigma value for lj matrix.
+    ljm_epsilon: epsilon value for lj matrix.
+    r_seed: random seed to use for the shuffling.
+    save_benchmarks: if benchmarks should be saved.
+    size: maximum amount of atoms in molecule.
+    as_eig: if data should be returned as matrix or array of eigenvalues.
+    bohr_radius_units: if units should be in bohr's radius units.
+    sigma: depth of the kernel.
+    show_msgs: if debug messages should be shown.
+    """
+    init_time = time.perf_counter()
+
+    # Data reading.
+    tic = time.perf_counter()
+    compounds, energy_pbe0, energy_delta = qm7db(db_path=db_path,
+                                                 is_shuffled=is_shuffled,
+                                                 r_seed=r_seed)
+    toc = time.perf_counter()
+    tictoc = toc - tic
+    if show_msgs:
+        printc(f'Data reading took {tictoc:.4f} seconds.', 'CYAN')
+
+    # Matrices calculation.
+    tic = time.perf_counter()
+    for compound in compounds:
+        compound.gen_cm()
+        compound.gen_ljm()
+        compound.gen_am()
+    toc = time.perf_counter()
+    tictoc = toc - tic
+    if show_msgs:
+        printc(f'Matrices calculation took {tictoc:.4f} seconds.', 'CYAN')
+
+    # ML calculation.
+    # PLHLDR
+
+    # End of program
+    end_time = time.perf_counter()
+    totaltime = end_time - init_time
+    printc(f'Program took {totaltime:.4f} seconds.', 'CYAN')
-- 
cgit v1.2.3-70-g09d2


From c4d30a346d114185ab93eb5e88e241b922b5a538 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Thu, 27 Feb 2020 16:50:57 -0700
Subject: Add simple ml methodology, fix bugs

---
 ml_exp/__init__.py |  5 ++++-
 ml_exp/compound.py |  2 +-
 ml_exp/do_ml.py    | 65 ++++++++++++++++++++++++++++++++++++++++--------------
 ml_exp/kernels.py  |  6 ++---
 4 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index c62141a58..48ece56c8 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -25,6 +25,7 @@ from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
     first_neighbor_matrix, adjacency_matrix, check_bond, bag_of_stuff
 from ml_exp.math import cholesky_solve
 from ml_exp.qm7db import qm7db
+from ml_exp.do_ml import simple_ml, do_ml
 
 __all__ = ['Compound',
            'coulomb_matrix',
@@ -34,4 +35,6 @@ __all__ = ['Compound',
            'check_bond',
            'bag_of_stuff',
            'cholesky_solve',
-           'qm7db']
+           'qm7db',
+           'simple_ml',
+           'do_ml']
diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index 9c7102860..1598fa482 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -93,8 +93,8 @@ class Compound:
                                         bohr_ru=bohr_ru)
 
     def gen_am(self,
-               size=23,
                use_forces=False,
+               size=23,
                bohr_ru=False):
         """
         Generate the Adjacency Matrix for the compund.
diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index f32480367..40243d413 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -31,19 +31,19 @@ from ml_exp.qm7db import qm7db
 def simple_ml(descriptors,
               energies,
               training_size,
-              identifier=None,
               test_size=None,
               sigma=1000.0,
+              identifier=None,
               show_msgs=True):
     """
     Basic ML methodology for a single descriptor type.
     descriptors: array of descriptors.
     energies: array of energies.
     training_size: size of the training set to use.
-    identifier: string with the name of the descriptor used.
     test_size: size of the test set to use. If no size is given,
         the last remaining molecules are used.
     sigma: depth of the kernel.
+    identifier: string with the name of the descriptor used.
     show_msgs: if debug messages should be shown.
     NOTE: identifier is just a string and is only for identification purposes.
     Also, training is done with the first part of the data and
@@ -86,7 +86,7 @@ def simple_ml(descriptors,
 
     mae = np.mean(np.abs(Y_predicted - Y_test))
     if show_msgs:
-        printc('\tMAE for {identifier}: {mae:.4f}', 'GREEN')
+        printc(f'\tMAE for {identifier}: {mae:.4f}', 'GREEN')
 
     toc = time.perf_counter()
     tictoc = toc - tic
@@ -102,20 +102,27 @@ def simple_ml(descriptors,
 def do_ml(db_path='data',
           is_shuffled=True,
           r_seed=111,
+          diag_value=None,
+          lj_sigma=1.0,
+          lj_epsilon=1.0,
+          use_forces=False,
+          size=23,
+          as_eig=True,
+          bohr_ru=False,
+          sigma=1000.0,
           show_msgs=True):
     """
     Main function that does the whole ML process.
-    training_size: minimum training size.
-    test_size: size of the test set to use. If no size is given,
-        the last remaining molecules are used.
-    ljm_diag_value: if a special diagonal value should be used in lj matrix.
-    ljm_sigma: sigma value for lj matrix.
-    ljm_epsilon: epsilon value for lj matrix.
+    db_path: path to the database directory.
+    is_shuffled: if the resulting list of compounds should be shuffled.
     r_seed: random seed to use for the shuffling.
-    save_benchmarks: if benchmarks should be saved.
-    size: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
+    diag_value: if special diagonal value is to be used.
+    lj_sigma: sigma value.
+    lj_epsilon: epsilon value.
+    use_forces: if the use of forces instead of k_cx should be used.
+    size: compound size.
+    as_eig: if the representation should be as the eigenvalues.
+    bohr_ru: if radius units should be in bohr's radius units.
     sigma: depth of the kernel.
     show_msgs: if debug messages should be shown.
     """
@@ -134,16 +141,40 @@ def do_ml(db_path='data',
     # Matrices calculation.
     tic = time.perf_counter()
     for compound in compounds:
-        compound.gen_cm()
-        compound.gen_ljm()
-        compound.gen_am()
+        compound.gen_cm(size=size,
+                        as_eig=as_eig,
+                        bohr_ru=bohr_ru)
+        compound.gen_ljm(diag_value=diag_value,
+                         sigma=lj_sigma,
+                         epsilon=lj_epsilon,
+                         size=size,
+                         as_eig=as_eig,
+                         bohr_ru=bohr_ru)
+        compound.gen_am(use_forces=use_forces,
+                        size=size,
+                        bohr_ru=bohr_ru)
+
+    # Create a numpy array for the descriptors.
+    cm_data = np.array([compound.cm for compound in compounds], dtype=float)
+    ljm_data = np.array([compound.ljm for compound in compounds], dtype=float)
+    am_data = np.array([compound.cm for compound in compounds], dtype=float)
+    print(cm_data.shape, ljm_data.shape, am_data.shape)
+
     toc = time.perf_counter()
     tictoc = toc - tic
     if show_msgs:
         printc(f'Matrices calculation took {tictoc:.4f} seconds.', 'CYAN')
 
     # ML calculation.
-    # PLHLDR
+    # CM
+    cm_mae, cm_tictoc = simple_ml(cm_data,
+                                  energy_pbe0,
+                                  training_size=5000,
+                                  test_size=1500,
+                                  sigma=1000.0,
+                                  identifier='CM',
+                                  show_msgs=show_msgs)
+    print(cm_mae, cm_tictoc)
 
     # End of program
     end_time = time.perf_counter()
diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py
index 7a61a1e1e..3914ffc20 100644
--- a/ml_exp/kernels.py
+++ b/ml_exp/kernels.py
@@ -36,9 +36,9 @@ def gaussian_kernel(X1,
     inv_sigma = -0.5 / (sigma*sigma)
 
     K = np.zeros((X1.shape[0], X2.shape[0]), dtype=float)
-    for i in X1:
-        for j in X2:
-            f_norm = np.linalg.norm(i - j)
+    for i, x1 in enumerate(X1):
+        for j, x2 in enumerate(X2):
+            f_norm = np.linalg.norm(x1 - x2)
             # print(f_norm)
             K[i, j] = math.exp(inv_sigma * f_norm)
 
-- 
cgit v1.2.3-70-g09d2


From 6ad25bb4cd5822fa84d1c4cec54952c6fd8df2f6 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Thu, 27 Feb 2020 21:41:41 -0700
Subject: Add rest of the descriptors

---
 ml_exp/do_ml.py | 95 ++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 25 deletions(-)

diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index 40243d413..d8ee415bf 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -106,10 +106,14 @@ def do_ml(db_path='data',
           lj_sigma=1.0,
           lj_epsilon=1.0,
           use_forces=False,
+          stuff='bonds',
           size=23,
           as_eig=True,
           bohr_ru=False,
+          training_size=1500,
+          test_size=None,
           sigma=1000.0,
+          identifiers=["CM"],
           show_msgs=True):
     """
     Main function that does the whole ML process.
@@ -120,12 +124,20 @@ def do_ml(db_path='data',
     lj_sigma: sigma value.
     lj_epsilon: epsilon value.
     use_forces: if the use of forces instead of k_cx should be used.
+    stuff: elements of the bag, by default the known bag of bonds.
     size: compound size.
     as_eig: if the representation should be as the eigenvalues.
     bohr_ru: if radius units should be in bohr's radius units.
+    training_size: size of the training set to use.
+    test_size: size of the test set to use. If no size is given,
+        the last remaining molecules are used.
     sigma: depth of the kernel.
+    identifiers: list of names (strings) of descriptors to use.
     show_msgs: if debug messages should be shown.
     """
+    if type(identifiers) != list:
+        raise TypeError('\'identifiers\' is not a list.')
+
     init_time = time.perf_counter()
 
     # Data reading.
@@ -141,24 +153,34 @@ def do_ml(db_path='data',
     # Matrices calculation.
     tic = time.perf_counter()
     for compound in compounds:
-        compound.gen_cm(size=size,
-                        as_eig=as_eig,
-                        bohr_ru=bohr_ru)
-        compound.gen_ljm(diag_value=diag_value,
-                         sigma=lj_sigma,
-                         epsilon=lj_epsilon,
-                         size=size,
-                         as_eig=as_eig,
-                         bohr_ru=bohr_ru)
-        compound.gen_am(use_forces=use_forces,
-                        size=size,
-                        bohr_ru=bohr_ru)
+        if 'CM' in identifiers:
+            compound.gen_cm(size=size,
+                            as_eig=as_eig,
+                            bohr_ru=bohr_ru)
+        if 'LJM' in identifiers:
+            compound.gen_ljm(diag_value=diag_value,
+                             sigma=lj_sigma,
+                             epsilon=lj_epsilon,
+                             size=size,
+                             as_eig=as_eig,
+                             bohr_ru=bohr_ru)
+        if 'AM' in identifiers:
+            compound.gen_am(use_forces=use_forces,
+                            size=size,
+                            bohr_ru=bohr_ru)
+        if 'BOS' in identifiers:
+            compound.gen_bos(size=size,
+                             stuff=stuff)
 
     # Create a numpy array for the descriptors.
-    cm_data = np.array([compound.cm for compound in compounds], dtype=float)
-    ljm_data = np.array([compound.ljm for compound in compounds], dtype=float)
-    am_data = np.array([compound.cm for compound in compounds], dtype=float)
-    print(cm_data.shape, ljm_data.shape, am_data.shape)
+    if 'CM' in identifiers:
+        cm_data = np.array([comp.cm for comp in compounds], dtype=float)
+    if 'LJM' in identifiers:
+        ljm_data = np.array([comp.ljm for comp in compounds], dtype=float)
+    if 'AM' in identifiers:
+        am_data = np.array([comp.cm for comp in compounds], dtype=float)
+    if 'BOS' in identifiers:
+        bos_data = np.array([comp.bos for comp in compounds], dtype=float)
 
     toc = time.perf_counter()
     tictoc = toc - tic
@@ -166,15 +188,38 @@ def do_ml(db_path='data',
         printc(f'Matrices calculation took {tictoc:.4f} seconds.', 'CYAN')
 
     # ML calculation.
-    # CM
-    cm_mae, cm_tictoc = simple_ml(cm_data,
-                                  energy_pbe0,
-                                  training_size=5000,
-                                  test_size=1500,
-                                  sigma=1000.0,
-                                  identifier='CM',
-                                  show_msgs=show_msgs)
-    print(cm_mae, cm_tictoc)
+    if 'CM' in identifiers:
+        cm_mae, cm_tictoc = simple_ml(cm_data,
+                                      energy_pbe0,
+                                      training_size=training_size,
+                                      test_size=test_size,
+                                      sigma=sigma,
+                                      identifier='CM',
+                                      show_msgs=show_msgs)
+    if 'LJM' in identifiers:
+        ljm_mae, ljm_tictoc = simple_ml(ljm_data,
+                                        energy_pbe0,
+                                        training_size=training_size,
+                                        test_size=test_size,
+                                        sigma=sigma,
+                                        identifier='LJM',
+                                        show_msgs=show_msgs)
+    if 'AM' in identifiers:
+        am_mae, am_tictoc = simple_ml(am_data,
+                                      energy_pbe0,
+                                      training_size=training_size,
+                                      test_size=test_size,
+                                      sigma=sigma,
+                                      identifier='CM',
+                                      show_msgs=show_msgs)
+    if 'BOS' in identifiers:
+        bos_mae, bos_tictoc = simple_ml(bos_data,
+                                        energy_pbe0,
+                                        training_size=training_size,
+                                        test_size=test_size,
+                                        sigma=sigma,
+                                        identifier='CM',
+                                        show_msgs=show_msgs)
 
     # End of program
     end_time = time.perf_counter()
-- 
cgit v1.2.3-70-g09d2