Merge pull request #3 from luevano/addbags

Addbags
author: David Luevano Alvarado <55825613+luevano@users.noreply.github.com> 2020-03-02 12:02:52 -0700
committer: GitHub <noreply@github.com> 2020-03-02 12:02:52 -0700
commit: 1628e8c079c7c6c68c47a29347d2cfabd5abaa9f (patch)
tree: dd42fddfb03f1ff201d6f770bb92c3f32cae9329
parent: 86faabeac5aed75c1dfc883fe757c21eba94b645 (diff)
parent: 62cef5f1782caf4ff379cd021744b98be4be6c8b (diff)
7 files changed, 193 insertions, 158 deletions
diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index 48ece56c8..4d672efd7 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -22,7 +22,7 @@ SOFTWARE.
 """
 from ml_exp.compound import Compound
 from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
-    first_neighbor_matrix, adjacency_matrix, check_bond, bag_of_stuff
+        get_helping_data, adjacency_matrix, check_bond, bag_of_bonds
 from ml_exp.math import cholesky_solve
 from ml_exp.qm7db import qm7db
 from ml_exp.do_ml import simple_ml, do_ml
@@ -30,10 +30,10 @@ from ml_exp.do_ml import simple_ml, do_ml
 __all__ = ['Compound',
            'coulomb_matrix',
            'lennard_jones_matrix',
-           'first_neighbor_matrix',
+           'get_helping_data',
            'adjacency_matrix',
            'check_bond',
-           'bag_of_stuff',
+           'bag_of_bonds',
            'cholesky_solve',
            'qm7db',
            'simple_ml',
diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index 1598fa482..6c8525195 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -23,7 +23,7 @@ SOFTWARE.
 import numpy as np
 from ml_exp.data import NUCLEAR_CHARGE
 from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
-    first_neighbor_matrix, adjacency_matrix, bag_of_stuff
+    get_helping_data, adjacency_matrix, bag_of_bonds
 
 
 class Compound:
@@ -33,20 +33,31 @@ class Compound:
         Initialization of the Compound.
         xyz: (path to) the xyz file.
         """
+        # General compound data.
         self.name = None
-
         self.n = None
-        self.extra = None  # In case a comment is in the compound file.
+        self.extra = None
         self.atoms = None
-        self.atoms_nc = None
+        self.nc = None
         self.coordinates = None
         self.pbe0 = None
         self.delta = None
 
+        # Computed data.
         self.cm = None
         self.ljm = None
         self.am = None
-        self.bos = None
+        self.bob = None
+        self.bo_atoms = None
+        self.bok_cx = None
+        self.bof = None
+
+        # Helping data.
+        self.fnm = None
+        self.bonds = None
+        self.bonds_i = None
+        self.bonds_k = None
+        self.bonds_f = None
 
         if xyz is not None:
             self.read_xyz(xyz)
@@ -62,7 +73,7 @@ class Compound:
         bohr_ru: if radius units should be in bohr's radius units.
         """
         self.cm = coulomb_matrix(self.coordinates,
-                                 self.atoms_nc,
+                                 self.nc,
                                  size=size,
                                  as_eig=as_eig,
                                  bohr_ru=bohr_ru)
@@ -84,7 +95,7 @@ class Compound:
         bohr_ru: if radius units should be in bohr's radius units.
         """
         self.ljm = lennard_jones_matrix(self.coordinates,
-                                        self.atoms_nc,
+                                        self.nc,
                                         diag_value=diag_value,
                                         sigma=sigma,
                                         epsilon=epsilon,
@@ -92,42 +103,45 @@ class Compound:
                                         as_eig=as_eig,
                                         bohr_ru=bohr_ru)
 
-    def gen_am(self,
-               use_forces=False,
+    def gen_hd(self,
                size=23,
                bohr_ru=False):
         """
+        Generate the helping data for use in Adjacency Matrix, for example.
+        size: compund size.
+        bohr_ru: if radius units should be in bohr's radius units.
+        """
+        hd = get_helping_data(self.coordinates,
+                              self.atoms,
+                              self.nc,
+                              size=size,
+                              bohr_ru=bohr_ru)
+
+        self.fnm, self.bonds, self.bonds_i, self.bonds_k, self.bonds_f = hd
+
+    def gen_am(self,
+               use_forces=False,
+               size=23):
+        """
         Generate the Adjacency Matrix for the compund.
         use_forces: if the use of forces instead of k_cx should be used.
         size: compound size.
-        bohr_ru: if radius units should be in bohr's radius units.
         """
-        # First, generate the first neighor matrix.
-        fnm, bonds, forces = first_neighbor_matrix(self.coordinates,
-                                                   self.atoms_nc,
-                                                   self.atoms,
-                                                   size=size,
-                                                   use_forces=use_forces,
-                                                   bohr_ru=bohr_ru)
-
-        # Now, generate the adjacency matrix.
-        self.am = adjacency_matrix(fnm,
-                                   bonds,
-                                   forces,
+        self.am = adjacency_matrix(self.bonds_i,
+                                   self.bonds_k,
+                                   self.bonds_f,
+                                   use_forces=use_forces,
                                    size=size)
 
-    def gen_bos(self,
-                size=23,
-                stuff='bonds'):
+    def gen_bob(self,
+                size=23):
         """
         Generate the Bag of Stuff for the compound.
         size: compound size.
-        stuff: elements of the bag, by default the known bag of bonds.
         """
-        self.bos = bag_of_stuff(self.cm,
+        self.bob = bag_of_bonds(self.cm,
                                 self.atoms,
-                                size=size,
-                                stuff=stuff)
+                                size=size)
 
     def read_xyz(self,
                  filename):
@@ -139,15 +153,15 @@ class Compound:
             lines = f.readlines()
 
         self.name = filename.split('/')[-1]
-        self.n = int(lines[0])
+        self.n = np.int32(lines[0])
         self.extra = lines[1]
         self.atoms = []
-        self.atoms_nc = np.empty(self.n, dtype=int)
-        self.coordinates = np.empty((self.n, 3), dtype=float)
+        self.nc = np.empty(self.n, dtype=np.int64)
+        self.coordinates = np.empty((self.n, 3), dtype=np.float64)
 
         for i, atom in enumerate(lines[2:self.n + 2]):
             atom_d = atom.split()
 
             self.atoms.append(atom_d[0])
-            self.atoms_nc[i] = NUCLEAR_CHARGE[atom_d[0]]
-            self.coordinates[i] = np.asarray(atom_d[1:4], dtype=float)
+            self.nc[i] = NUCLEAR_CHARGE[atom_d[0]]
+            self.coordinates[i] = np.asarray(atom_d[1:4], dtype=np.float64)
diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index d8ee415bf..5efd13690 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -33,6 +33,7 @@ def simple_ml(descriptors,
               training_size,
               test_size=None,
               sigma=1000.0,
+              opt=True,
               identifier=None,
               show_msgs=True):
     """
@@ -43,6 +44,7 @@ def simple_ml(descriptors,
     test_size: size of the test set to use. If no size is given,
         the last remaining molecules are used.
     sigma: depth of the kernel.
+    opt: if the optimized algorithm should be used. For benchmarking purposes.
     identifier: string with the name of the descriptor used.
     show_msgs: if debug messages should be shown.
     NOTE: identifier is just a string and is only for identification purposes.
@@ -76,13 +78,21 @@ def simple_ml(descriptors,
 
     X_training = descriptors[:training_size]
     Y_training = energies[:training_size]
-    K_training = gaussian_kernel(X_training, X_training, sigma)
-    alpha = cholesky_solve(K_training, Y_training)
+    K_training = gaussian_kernel(X_training,
+                                 X_training,
+                                 sigma,
+                                 opt=opt)
+    alpha = cholesky_solve(K_training,
+                           Y_training)
 
     X_test = descriptors[-test_size:]
     Y_test = energies[-test_size:]
-    K_test = gaussian_kernel(X_test, X_training, sigma)
-    Y_predicted = np.dot(K_test, alpha)
+    K_test = gaussian_kernel(X_test,
+                             X_training,
+                             sigma,
+                             opt=opt)
+    Y_predicted = np.dot(K_test,
+                         alpha)
 
     mae = np.mean(np.abs(Y_predicted - Y_test))
     if show_msgs:
@@ -113,7 +123,8 @@ def do_ml(db_path='data',
           training_size=1500,
           test_size=None,
           sigma=1000.0,
-          identifiers=["CM"],
+          opt=True,
+          identifiers=['CM'],
           show_msgs=True):
     """
     Main function that does the whole ML process.
@@ -132,6 +143,7 @@ def do_ml(db_path='data',
     test_size: size of the test set to use. If no size is given,
         the last remaining molecules are used.
     sigma: depth of the kernel.
+    opt: if the optimized algorithm should be used. For benchmarking purposes.
     identifiers: list of names (strings) of descriptors to use.
     show_msgs: if debug messages should be shown.
     """
@@ -164,23 +176,27 @@ def do_ml(db_path='data',
                              size=size,
                              as_eig=as_eig,
                              bohr_ru=bohr_ru)
+        """
         if 'AM' in identifiers:
-            compound.gen_am(use_forces=use_forces,
-                            size=size,
+            compound.gen_hd(size=size,
                             bohr_ru=bohr_ru)
-        if 'BOS' in identifiers:
-            compound.gen_bos(size=size,
-                             stuff=stuff)
+            compound.gen_am(use_forces=use_forces,
+                            size=size)
+        """
+        if 'BOB' in identifiers:
+            compound.gen_bob(size=size)
 
     # Create a numpy array for the descriptors.
     if 'CM' in identifiers:
-        cm_data = np.array([comp.cm for comp in compounds], dtype=float)
+        cm_data = np.array([comp.cm for comp in compounds], dtype=np.float64)
     if 'LJM' in identifiers:
-        ljm_data = np.array([comp.ljm for comp in compounds], dtype=float)
+        ljm_data = np.array([comp.ljm for comp in compounds], dtype=np.float64)
+    """
     if 'AM' in identifiers:
-        am_data = np.array([comp.cm for comp in compounds], dtype=float)
-    if 'BOS' in identifiers:
-        bos_data = np.array([comp.bos for comp in compounds], dtype=float)
+        am_data = np.array([comp.cm for comp in compounds], dtype=np.float64)
+    """
+    if 'BOB' in identifiers:
+        bob_data = np.array([comp.bob for comp in compounds], dtype=np.float64)
 
     toc = time.perf_counter()
     tictoc = toc - tic
@@ -194,6 +210,7 @@ def do_ml(db_path='data',
                                       training_size=training_size,
                                       test_size=test_size,
                                       sigma=sigma,
+                                      opt=opt,
                                       identifier='CM',
                                       show_msgs=show_msgs)
     if 'LJM' in identifiers:
@@ -202,23 +219,28 @@ def do_ml(db_path='data',
                                         training_size=training_size,
                                         test_size=test_size,
                                         sigma=sigma,
+                                        opt=opt,
                                         identifier='LJM',
                                         show_msgs=show_msgs)
+    """
     if 'AM' in identifiers:
         am_mae, am_tictoc = simple_ml(am_data,
                                       energy_pbe0,
                                       training_size=training_size,
                                       test_size=test_size,
                                       sigma=sigma,
-                                      identifier='CM',
+                                      opt=opt,
+                                      identifier='AM',
                                       show_msgs=show_msgs)
-    if 'BOS' in identifiers:
-        bos_mae, bos_tictoc = simple_ml(bos_data,
+    """
+    if 'BOB' in identifiers:
+        bob_mae, bob_tictoc = simple_ml(bob_data,
                                         energy_pbe0,
                                         training_size=training_size,
                                         test_size=test_size,
                                         sigma=sigma,
-                                        identifier='CM',
+                                        opt=opt,
+                                        identifier='BOB',
                                         show_msgs=show_msgs)
 
     # End of program
diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py
index 3914ffc20..feaf9a990 100644
--- a/ml_exp/kernels.py
+++ b/ml_exp/kernels.py
@@ -26,20 +26,30 @@ import numpy as np
 
 def gaussian_kernel(X1,
                     X2,
-                    sigma):
+                    sigma,
+                    opt=True):
     """
     Calculates the Gaussian Kernel.
     X1: first representations.
     X2: second representations.
     sigma: kernel width.
+    opt: if the optimized algorithm should be used. For benchmarking purposes.
     """
-    inv_sigma = -0.5 / (sigma*sigma)
-
-    K = np.zeros((X1.shape[0], X2.shape[0]), dtype=float)
-    for i, x1 in enumerate(X1):
-        for j, x2 in enumerate(X2):
-            f_norm = np.linalg.norm(x1 - x2)
-            # print(f_norm)
-            K[i, j] = math.exp(inv_sigma * f_norm)
+    i_sigma = -0.5 / (sigma*sigma)
+
+    K = np.zeros((X1.shape[0], X2.shape[0]), dtype=np.float64)
+    if opt:
+        # Faster way of calculating the kernel (no numba support).
+        for i, x1 in enumerate(X1):
+            if X2.ndim == 3:
+                norm = np.linalg.norm(X2 - x1, axis=(1, 2))
+            else:
+                norm = np.linalg.norm(X2 - x1, axis=-1)
+            K[i, :] = np.exp(i_sigma * np.square(norm))
+    else:
+        for i, x1 in enumerate(X1):
+            for j, x2 in enumerate(X2):
+                f_norm = np.linalg.norm(x2 - x1)
+                K[i, j] = math.exp(i_sigma * f_norm**2)
 
     return K
diff --git a/ml_exp/math.py b/ml_exp/math.py
index da1fdf08f..253abe5d2 100644
--- a/ml_exp/math.py
+++ b/ml_exp/math.py
@@ -39,7 +39,7 @@ def cholesky_solve(K,
     size = K.shape[0]
 
     # Solve Lx=y for x.
-    x = np.zeros(size, dtype=float)
+    x = np.zeros(size, dtype=np.float64)
     x[0] = y[0] / L[0, 0]
     for i in range(1, size):
         temp_sum = 0.0
@@ -49,7 +49,7 @@ def cholesky_solve(K,
 
     # Now, solve LTa=x for a.
     L2 = L.T
-    a = np.zeros(size, dtype=float)
+    a = np.zeros(size, dtype=np.float64)
     a[size - 1] = x[size - 1] / L2[size - 1, size - 1]
     for i in range(0, size - 1)[::-1]:
         temp_sum = 0.0
diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
index 3fd46b6bc..3ba2c5814 100644
--- a/ml_exp/qm7db.py
+++ b/ml_exp/qm7db.py
@@ -42,14 +42,14 @@ def qm7db(db_path='data',
     for i, line in enumerate(lines):
         line = line.split()
         compounds.append(Compound(f'{db_path}/{line[0]}'))
-        compounds[i].pbe0 = float(line[1])
-        compounds[i].delta = float(line[1]) - float(line[2])
+        compounds[i].pbe0 = np.float64(line[1])
+        compounds[i].delta = np.float64(line[1]) - np.float64(line[2])
 
     if is_shuffled:
         random.seed(r_seed)
         random.shuffle(compounds)
 
-    e_pbe0 = np.array([compound.pbe0 for compound in compounds], dtype=float)
-    e_delta = np.array([compound.delta for compound in compounds], dtype=float)
+    e_pbe0 = np.array([comp.pbe0 for comp in compounds], dtype=np.float64)
+    e_delta = np.array([comp.delta for comp in compounds], dtype=np.float64)
 
     return compounds, e_pbe0, e_delta
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index ea079595e..be567d67a 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -53,7 +53,7 @@ size. Arrays are not of the right shape.')
               'instead of (size).')
         size = n
 
-    cm = np.zeros((size, size), dtype=float)
+    cm = np.zeros((size, size), dtype=np.float64)
 
     # Actual calculation of the coulomb matrix.
     for i, xyz_i in enumerate(coords):
@@ -120,7 +120,7 @@ size. Arrays are not of the right shape.')
               'instead of (size).')
         size = n
 
-    lj = np.zeros((size, size), dtype=float)
+    lj = np.zeros((size, size), dtype=np.float64)
 
     # Actual calculation of the lennard-jones matrix.
     for i, xyz_i in enumerate(coords):
@@ -166,26 +166,25 @@ size. Arrays are not of the right shape.')
         return lj
 
 
-def first_neighbor_matrix(coords,
-                          nc,
-                          atoms,
-                          size=23,
-                          use_forces=False,
-                          bohr_ru=False):
+def get_helping_data(coords,
+                     atoms,
+                     nc,
+                     size=23,
+                     bohr_ru=False):
     """
-    Creates the First Neighbor Matrix from the molecule data given.
+    Creates helping data such as the First Neighbor Matrix for the compound.
     coords: compound coordinates.
-    nc: nuclear charge data.
     atoms: list of atoms.
-    use_forces: if the use of forces instead of k_cx should be used.
+    nc: nuclear charge data.
+    size: compund size.
     bohr_ru: if radius units should be in bohr's radius units.
     NOTE: Bond distance of carbon to other elements
         are (for atoms present in the qm7 dataset):
-            C: 1.20 - 1.54 A (Edited to 1.19 - 1.54 A)
-            H: 1.06 - 1.12 A
-            O: 1.43 - 2.15 A
-            N: 1.47 - 2.10 A
-            S: 1.81 - 2.55 A
+            C: 1.19 - 1.54 A, 1.0
+            H: 1.06 - 1.12 A, 1.0
+            O: 1.43 - 2.15 A, 0.8
+            N: 1.47 - 2.10 A, 1.0
+            S: 1.81 - 2.55 A, 0.7
     """
     if bohr_ru:
         cr = 0.52917721067
@@ -204,88 +203,77 @@ size. Arrays are not of the right shape.')
         size = n
 
     # Possible bonds.
-    cc_bond = sorted(['C', 'C'])
-    ch_bond = sorted(['C', 'H'])
-    co_bond = sorted(['C', 'O'])
-    cn_bond = sorted(['C', 'N'])
-    cs_bond = sorted(['C', 'S'])
-
-    fnm = np.zeros((size, size), dtype=float)
-
+    cc_bond = ''.join(sorted(['C', 'C']))
+    ch_bond = ''.join(sorted(['C', 'H']))
+    co_bond = ''.join(sorted(['C', 'O']))
+    cn_bond = ''.join(sorted(['C', 'N']))
+    cs_bond = ''.join(sorted(['C', 'S']))
+    pos_bonds = {cc_bond: (1.19, 1.54, 1.0), ch_bond: (1.06, 1.12, 1.0),
+                 co_bond: (1.43, 2.15, 0.8), cn_bond: (1.47, 2.19, 1.0),
+                 cs_bond: (1.81, 2.55, 0.7)}
+
+    fnm = np.zeros((size, size), dtype=bool)
     bonds = []
-    forces = []
+    bonds_i = []
+    bonds_k = []
+    bonds_f = []
     for i, xyz_i in enumerate(coords):
         for j, xyz_j in enumerate(coords):
             # Ignore the diagonal.
             if i != j:
-                bond = sorted([atoms[i], atoms[j]])
-                rv = xyz_i - xyz_j
-                r = np.linalg.norm(rv)/cr
-
-                # Check for each type of bond.
-                if (cc_bond == bond) and (r >= 1.19 and r <= 1.54):
-                    fnm[i, j] = 1.0
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (ch_bond == bond) and (r >= 1.06 and r <= 1.12):
-                    fnm[i, j] = 1.0
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (co_bond == bond) and (r >= 1.43 and r <= 2.15):
-                    fnm[i, j] = 0.8
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (cn_bond == bond) and (r >= 1.47 and r <= 2.10):
-                    fnm[i, j] = 1.0
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (cs_bond == bond) and (r >= 1.81 and r <= 2.55):
-                    fnm[i, j] = 0.7
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-
-    return fnm, bonds, forces
-
-
-def adjacency_matrix(fnm,
-                     bonds,
-                     forces,
-                     size=22):
+                bond = ''.join(sorted([atoms[i], atoms[j]]))
+                if bond in pos_bonds.keys():
+                    r_min = pos_bonds[bond][0]
+                    r_max = pos_bonds[bond][1]
+                    rv = xyz_i - xyz_j
+                    r = np.linalg.norm(rv)/cr
+                    if r >= r_min and r <= r_max:
+                        fnm[i, j] = True
+                        # Only add to the list if in the upper triangle.
+                        if j > i:
+                            bonds.append(bond)
+                            bonds_i.append((i, j))
+                            bonds_k.append(pos_bonds[bond][2])
+                            bonds_f.append(rv*nc[i]*nc[j]/r**3)
+
+    return fnm, bonds, bonds_i, bonds_k, bonds_f
+
+
+def adjacency_matrix(bonds_i,
+                     bonds_k,
+                     bonds_f,
+                     use_forces=False,
+                     size=23):
     """
     Calculates the adjacency matrix given the bond list.
-    fnm: first neighbour matrix.
-    bonds: list of bonds (tuple of indexes).
-    forces: list of forces.
+    bonds: list of bond names.
+    bonds_i: list of bond indexes (tuple of indexes).
+    bonds_k: list of k_cx values.
+    bonds_f: list of force values.
+    use_forces: if the use of forces instead of k_cx should be used.
     size: compund size.
     """
-    n = len(bonds)
+    if bonds_i is None:
+        raise ValueError('The helping data hasn\'t been initialized for\
+the current compound.')
 
+    n = len(bonds_i)
     if size < n:
         print('Error. Compound size (n) is greater han (size). Using (n)\
               instead of (size).')
         size = n
 
-    am = np.zeros((size, size), dtype=float)
+    am = np.zeros((size, size), dtype=np.float64)
 
-    for i, bond_i in enumerate(bonds):
-        for j, bond_j in enumerate(bonds):
+    for i, bond_i in enumerate(bonds_i):
+        for j, bond_j in enumerate(bonds_i):
             # Ignore the diagonal.
             if i != j:
                 if (bond_i[0] in bond_j) or (bond_i[1] in bond_j):
-                    if forces:
-                        am[i, j] = np.dot(forces[i], forces[j])
+                    if use_forces:
+                        am[i, j] = np.dot(bonds_f[i], bonds_f[j])
                     else:
-                        am[i, j] = fnm[bond_i[0], bond_i[1]]
+                        am[i, j] = bonds_k[i]
 
     return am
 
@@ -308,10 +296,9 @@ def check_bond(bags,
     return False, None
 
 
-def bag_of_stuff(cm,
+def bag_of_bonds(cm,
                  atoms,
-                 size=23,
-                 stuff='bonds'):
+                 size=23):
     """
     Creates the Bag of Bonds using the Coulomb Matrix.
     cm: coulomb matrix.
@@ -334,7 +321,7 @@ generated as the vector of eigenvalues, try (re-)generating the CM.')
         size = n
 
     # Bond max length, calculated using only the upper triangular matrix.
-    bond_size = int((size * size - size)/2 + size)
+    bond_size = np.int32((size * size - size)/2 + size)
 
     # List where each bag data is stored.
     bags = []
@@ -366,19 +353,21 @@ generated as the vector of eigenvalues, try (re-)generating the CM.')
                 bonds.append(''.join(sorted([a_i, a_j])))
     bonds = atom_list + bonds
 
-    # Create the final vector for the bos.
-    bos = np.zeros(bond_size, dtype=float)
+    # Create the final vector for the bob.
+    bob = np.zeros(bond_size, dtype=np.float64)
     c_i = 0
     for i, bond in enumerate(bonds):
         checker = check_bond(bags, bond)
         if checker[0]:
             for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
-                # Use c_i as the index for bos if the zero padding should
+                # Use c_i as the index for bob if the zero padding should
                 # be at the end of the vector instead of between each bond.
-                bos[i*size + j] = num
+                # bob[i*size + j] = num
+                bob[c_i] = num
                 c_i += 1
         else:
             print(f'Error. Bond {bond} from bond list coudn\'t be found',
                   'in the bags list. This could be a case where the atom',
                   'is only present oncce in the molecule.')
-    return bos
+
+    return bob
author	David Luevano Alvarado <55825613+luevano@users.noreply.github.com>	2020-03-02 12:02:52 -0700
committer	GitHub <noreply@github.com>	2020-03-02 12:02:52 -0700
commit	1628e8c079c7c6c68c47a29347d2cfabd5abaa9f (patch)
tree	dd42fddfb03f1ff201d6f770bb92c3f32cae9329
parent	86faabeac5aed75c1dfc883fe757c21eba94b645 (diff)
parent	62cef5f1782caf4ff379cd021744b98be4be6c8b (diff)