18 files changed, 904 insertions, 1284 deletions
diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index 06425f6a2..48ece56c8 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -20,29 +20,21 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-from ml_exp.read_qm7_data import read_nc_data, read_db_data, read_qm7_data
-from ml_exp.c_matrix import c_matrix, c_matrix_multiple
-from ml_exp.lj_matrix import lj_matrix, lj_matrix_multiple
-from ml_exp.frob_norm import frob_norm
-from ml_exp.gauss_kernel import gauss_kernel
-from ml_exp.cholesky_solve import cholesky_solve
-from ml_exp.do_ml import do_ml
-from ml_exp.parallel_create_matrices import parallel_create_matrices
-from ml_exp.misc import plot_benchmarks
+from ml_exp.compound import Compound
+from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
+    first_neighbor_matrix, adjacency_matrix, check_bond, bag_of_stuff
+from ml_exp.math import cholesky_solve
+from ml_exp.qm7db import qm7db
+from ml_exp.do_ml import simple_ml, do_ml
 
-
-# If somebody does "from package import *", this is what they will
-# be able to access:
-__all__ = ['read_nc_data',
-           'read_db_data',
-           'read_qm7_data',
-           'c_matrix',
-           'c_matrix_multiple',
-           'lj_matrix',
-           'lj_matrix_multiple',
-           'frob_norm',
-           'gauss_kernel',
+__all__ = ['Compound',
+           'coulomb_matrix',
+           'lennard_jones_matrix',
+           'first_neighbor_matrix',
+           'adjacency_matrix',
+           'check_bond',
+           'bag_of_stuff',
            'cholesky_solve',
-           'do_ml',
-           'parallel_create_matrices',
-           'plot_benchmarks']
+           'qm7db',
+           'simple_ml',
+           'do_ml']
diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py
index aaeb18714..0d9b87bad 100644
--- a/ml_exp/__main__.py
+++ b/ml_exp/__main__.py
@@ -20,52 +20,5 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-# from ml_exp.do_ml import do_ml
-# from ml_exp.misc import plot_benchmarks
-from ml_exp.read_qm7_data import read_qm7_data
-from ml_exp.adj_matrix import fneig_matrix, adj_matrix
-from ml_exp.c_matrix import c_matrix
-from ml_exp.bob import bob
-from ml_exp.bostuff import bok_cx
-
 if __name__ == '__main__':
-    """
-    do_ml(min_training_size=1500,
-          max_training_size=2000,
-          training_increment_size=500,
-          test_size=None,
-          ljm_diag_value=None,
-          ljm_sigma=1.0,
-          ljm_epsilon=1.0,
-          r_seed=111,
-          save_benchmarks=False,
-          show_msgs=True)
-    """
-    # plot_benchmarks()
-    """
-    xyz, nc, pbe0, delta, atoms = read_qm7_data(return_atoms=True)
-    for i in range(1):
-        fnm, bonds, forces = fneig_matrix(atoms[i], xyz[i], nc[i], True)
-        am = adj_matrix(bonds, forces)
-
-        print(f'{i} first neighbor matrix\n{fnm}')
-        print(f'{i} bond list\n{bonds}')
-        print(f'{i} force list\n{forces}')
-        print(f'{i} adjacency matrix\n{am}')
-        print('-'*30)
-    print('OK!')
-    """
-    xyz, nc, pbe0, delta, atoms = read_qm7_data(return_atoms=True)
-    for i in range(1):
-        cm = c_matrix(xyz[i], nc[i], as_eig=False)
-        bob_i = bob(cm, atoms[i])
-
-        print(f'{i} coulomb matrix\n{cm}')
-        print(f'{i} bag of bonds\n{bob}')
-
-        fnm, bonds = fneig_matrix(atoms[i], xyz[i], nc[i], False)
-        am = adj_matrix(fnm, bonds, None)
-        bok_cx_i = bok_cx(am, atoms[i])
-
-        print(f'{i} adjacency matrix\n{am}')
-        print(f'{i} bag of k_cx\n{bok_cx_i}')
+    print('Everything OK!\n\tThis is a placeholder.')
diff --git a/ml_exp/adj_matrix.py b/ml_exp/adj_matrix.py
deleted file mode 100644
index de484b5be..000000000
--- a/ml_exp/adj_matrix.py
+++ /dev/null
@@ -1,131 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from numpy import array, zeros, dot
-from numpy.linalg import norm
-from ml_exp.misc import printc
-
-
-def fneig_matrix(atoms,
-                 xyz,
-                 nc=None,
-                 use_forces=False):
-    """
-    Creates the first neighbor matrix of the given molecule data.
-    atoms: list of atoms.
-    xyz: matrix of atomic coords.
-    nc: nuclear charge info.
-    use_forces: if the use of forces instead of k_cx should be used.
-    NOTE: Bond distance of carbon to other elements
-        are (for atoms present in the qm7 dataset):
-            C: 1.20 - 1.53 A
-            H: 1.06 - 1.12 A
-            O: 1.43 - 2.15 A
-            N: 1.47 - 2.10 A
-            S: 1.81 - 2.55 A
-    """
-    if use_forces and nc is None:
-        printc(f'Error. use_forces={use_forces} but nc={nc}', 'RED')
-        return None
-    # Possible bonds.
-    cc_bond = sorted(['C', 'C'])
-    ch_bond = sorted(['C', 'H'])
-    co_bond = sorted(['C', 'O'])
-    cn_bond = sorted(['C', 'N'])
-    cs_bond = sorted(['C', 'S'])
-
-    # Number of atoms, empty matrix and bond list.
-    n = len(atoms)
-    fnm = array(zeros((n, n)), dtype=float)
-    bonds = []
-    forces = []
-    for i, xyz_i in enumerate(xyz):
-        for j, xyz_j in enumerate(xyz):
-            # Ignore the diagonal.
-            if i != j:
-                bond = sorted([atoms[i], atoms[j]])
-                rv = xyz_i - xyz_j
-                r = norm(rv)
-                # Check for each type of bond.
-                if (cc_bond == bond) and (r >= 1.20 and r <= 1.53):
-                    fnm[i, j] = 1.0
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (ch_bond == bond) and (r >= 1.06 and r <= 1.12):
-                    fnm[i, j] = 1.0
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (co_bond == bond) and (r >= 1.43 and r <= 2.15):
-                    fnm[i, j] = 0.8
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (cn_bond == bond) and (r >= 1.47 and r <= 2.10):
-                    fnm[i, j] = 1.0
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-                elif (cs_bond == bond) and (r >= 1.81 and r <= 2.55):
-                    fnm[i, j] = 0.7
-                    if j > i:
-                        bonds.append((i, j))
-                        if use_forces:
-                            forces.append(rv*nc[i]*nc[j]/r**3)
-    if use_forces:
-        return fnm, bonds, forces
-    return fnm, bonds
-
-
-def adj_matrix(fneig_matrix,
-               bonds,
-               forces=None,
-               max_len=25):
-    """
-    Calculates the adjacency matrix given the bond list.
-    bonds: list of bonds (tuple of indexes).
-    max_len: maximum amount of atoms in molecule.
-    forces: list of forces.
-    """
-    n = len(bonds)
-
-    if max_len < n:
-        print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                       'greater than max_len. Using mol_n.']))
-        max_len = n
-
-    am = array(zeros((max_len, max_len)))
-    for i, bond_i in enumerate(bonds):
-        for j, bond_j in enumerate(bonds):
-            # Ignore the diagonal.
-            if i != j:
-                if (bond_i[0] in bond_j) or (bond_i[1] in bond_j):
-                    if forces:
-                        am[i, j] = dot(forces[i], forces[j])
-                    else:
-                        am[i, j] = fneig_matrix[bond_i[0], bond_i[1]]
-    return am
diff --git a/ml_exp/bob.py b/ml_exp/bob.py
deleted file mode 100644
index 86efecdb4..000000000
--- a/ml_exp/bob.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from numpy import array, zeros
-from collections import Counter
-
-
-def check_bond(bags,
-               bond):
-    """
-    Checks if a bond is in a bag.
-    bags: list of bags, containing a bag per entry, which in turn
-    contains a list of bond-values.
-    bond: bond to check.
-    """
-    if bags == []:
-        return False, None
-
-    for i, bag in enumerate(bags):
-        if bag[0] == bond:
-            return True, i
-
-    return False, None
-
-
-def bob(c_matrix,
-        atoms,
-        max_n=25,
-        max_bond_len=325):
-    """
-    Creates the bag of bond using the coulomb matrix data.
-    c_matrix: coulomb matrix.
-    atoms: list of atoms.
-    max_n: maximum amount of atoms.
-    max_bond_len: maximum amount of bonds in molecule.
-    """
-    n = len(atoms)
-    bond_n = (n * n - n) / 2 + n
-    n_r = range(n)
-
-    if max_n < n:
-        print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                       'greater than max_len. Using mol_n.']))
-        max_n = n
-
-    if max_bond_len < bond_n:
-        print(''.join(['Error. Molecule bond lenght (bond_n) is ',
-                       'greater than max_bond_len. Using bond_n.']))
-        max_bond_len = bond_n
-
-    # List where each bag data is stored.
-    bags = []
-    for i in n_r:
-        for j in n_r:
-            # Work only in the upper triangle of the coulomb matrix.
-            if j >= i:
-                # Get the string of the current bond.
-                if i == j:
-                    current_bond = atoms[i]
-                else:
-                    current_bond = ''.join(sorted([atoms[i], atoms[j]]))
-
-                # Check if that bond is already in a bag.
-                checker = check_bond(bags, current_bond)
-                # Either create a new bag or add values to an existing one.
-                if not checker[0]:
-                    bags.append([current_bond, c_matrix[i, j]])
-                else:
-                    bags[checker[1]].append(c_matrix[i, j])
-
-    # Create the actual bond list ordered.
-    atom_counter = Counter(atoms)
-    atom_list = sorted(list(set(atoms)))
-    bonds = []
-    for i, a_i in enumerate(atom_list):
-        if atom_counter[a_i] > 1:
-            for a_j in atom_list[i:]:
-                bonds.append(''.join(sorted([a_i, a_j])))
-    bonds = atom_list + bonds
-
-    # Create the final vector for the bob.
-    bob = array(zeros(max_bond_len), dtype=float)
-    c_i = 0
-    for i, bond in enumerate(bonds):
-        checker = check_bond(bags, bond)
-        if checker[0]:
-            for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
-                # Use c_i as the index for bob if the zero padding should
-                # be at the end of the vector instead of between each bond.
-                bob[i*max_n + j] = num
-                c_i += 1
-        # This is set to false because this was a debugging measure.
-        else:
-            print(''.join([f'Error. Bond {bond} from bond list coudn\'t',
-                           ' be found in the bags list. This could be',
-                           ' a case where the atom is only present once',
-                           ' in the molecule.']))
-    return bob
diff --git a/ml_exp/bostuff.py b/ml_exp/bostuff.py
deleted file mode 100644
index 1741e26cd..000000000
--- a/ml_exp/bostuff.py
+++ /dev/null
@@ -1,99 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from numpy import array, zeros
-from collections import Counter
-from ml_exp.bob import check_bond
-
-
-def bok_cx(adj_matrix,
-           atoms,
-           max_n=25,
-           max_bond_len=325):
-    """
-    Creates the bag of k_cx using the adjacency matrix data.
-    adj_matrix: adjacency matrix.
-    max_n: maximum amount of atoms.
-    max_bond_len: maximum amount of bonds in molecule.
-    """
-    n = len(atoms)
-    bond_n = (n * n - n) / 2 + n
-    n_r = range(n)
-
-    if max_n < n:
-        print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                       'greater than max_len. Using mol_n.']))
-        max_n = n
-
-    if max_bond_len < bond_n:
-        print(''.join(['Error. Molecule bond length (bond_n) is ',
-                       'greater than max_bond_len. Using bond_n.']))
-        max_bond_len = bond_n
-
-    # List where each bag data is stored.
-    bags = []
-    for i in n_r:
-        for j in n_r:
-            # Work only in the upper triangle of the coulomb matrix.
-            if j >= i:
-                # Get the string of the current bond.
-                if i == j:
-                    current_bond = atoms[i]
-                else:
-                    current_bond = ''.join(sorted([atoms[i], atoms[j]]))
-
-                # Check if that bond is already in a bag.
-                checker = check_bond(bags, current_bond)
-                # Either create a new bag or add values to an existing one.
-                if not checker[0]:
-                    bags.append([current_bond, adj_matrix[i, j]])
-                else:
-                    bags[checker[1]].append(adj_matrix[i, j])
-
-    # Create the actual bond list ordered.
-    atom_counter = Counter(atoms)
-    atom_list = sorted(list(set(atoms)))
-    bonds = []
-    for i, a_i in enumerate(atom_list):
-        if atom_counter[a_i] > 1:
-            for a_j in atom_list[i:]:
-                bonds.append(''.join(sorted([a_i, a_j])))
-    bonds = atom_list + bonds
-
-    # Create the final vector for the bok_cx.
-    bok_cx = array(zeros(max_bond_len), dtype=float)
-    c_i = 0
-    for i, bond in enumerate(bonds):
-        checker = check_bond(bags, bond)
-        if checker[0]:
-            for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
-                # Use c_i as the index for bok_cx if the zero padding should
-                # be at the end of the vector instead of between each bond.
-                bok_cx[i*max_n + j] = num
-                c_i += 1
-        # This is set to false because this was a debugging measure.
-        else:
-            print(''.join([f'Error. Bond {bond} from bond list coudn\'t',
-                           ' be found in the bags list. This could be',
-                           ' a case where the atom is only present once',
-                           ' in the molecule.']))
-    return bok_cx
diff --git a/ml_exp/c_matrix.py b/ml_exp/c_matrix.py
deleted file mode 100644
index 00034a660..000000000
--- a/ml_exp/c_matrix.py
+++ /dev/null
@@ -1,179 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import time
-import math
-import numpy as np
-from numpy.linalg import eig
-from ml_exp.misc import printc
-
-
-def c_matrix(mol_data,
-             nc_data,
-             max_len=25,
-             as_eig=True,
-             bohr_radius_units=False):
-    """
-    Creates the Coulomb Matrix from the molecule data given.
-    mol_data: molecule data, matrix of atom coordinates.
-    nc_data: nuclear charge data, array of atom data.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
-    """
-    if bohr_radius_units:
-        conversion_rate = 0.52917721067
-    else:
-        conversion_rate = 1
-
-    mol_n = len(mol_data)
-    mol_nr = range(mol_n)
-
-    if not mol_n == len(nc_data):
-        print(''.join(['Error. Molecule matrix dimension is different ',
-                       'than the nuclear charge array dimension.']))
-    else:
-        if max_len < mol_n:
-            print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                           'greater than max_len. Using mol_n.']))
-            max_len = None
-
-        if max_len:
-            cm = np.zeros((max_len, max_len))
-            ml_r = range(max_len)
-
-            # Actual calculation of the coulomb matrix.
-            for i in ml_r:
-                if i < mol_n:
-                    x_i = mol_data[i, 0]
-                    y_i = mol_data[i, 1]
-                    z_i = mol_data[i, 2]
-                    Z_i = nc_data[i]
-                else:
-                    break
-
-                for j in ml_r:
-                    if j < mol_n:
-                        x_j = mol_data[j, 0]
-                        y_j = mol_data[j, 1]
-                        z_j = mol_data[j, 2]
-                        Z_j = nc_data[j]
-
-                        x = (x_i-x_j)**2
-                        y = (y_i-y_j)**2
-                        z = (z_i-z_j)**2
-
-                        if i == j:
-                            cm[i, j] = (0.5*Z_i**2.4)
-                        else:
-                            cm[i, j] = (conversion_rate*Z_i*Z_j/math.sqrt(x
-                                                                          + y
-                                                                          + z))
-                    else:
-                        break
-
-            # Now the value will be returned.
-            if as_eig:
-                cm_sorted = np.sort(eig(cm)[0])[::-1]
-                # Thanks to SO for the following lines of code.
-                # https://stackoverflow.com/a/43011036
-
-                # Keep zeros at the end.
-                mask = cm_sorted != 0.
-                f_mask = mask.sum(0, keepdims=1) >\
-                    np.arange(cm_sorted.shape[0]-1, -1, -1)
-
-                f_mask = f_mask[::-1]
-                cm_sorted[f_mask] = cm_sorted[mask]
-                cm_sorted[~f_mask] = 0.
-
-                return cm_sorted
-
-            else:
-                return cm
-
-        else:
-            cm_temp = []
-            # Actual calculation of the coulomb matrix.
-            for i in mol_nr:
-                x_i = mol_data[i, 0]
-                y_i = mol_data[i, 1]
-                z_i = mol_data[i, 2]
-                Z_i = nc_data[i]
-
-                cm_row = []
-                for j in mol_nr:
-                    x_j = mol_data[j, 0]
-                    y_j = mol_data[j, 1]
-                    z_j = mol_data[j, 2]
-                    Z_j = nc_data[j]
-
-                    x = (x_i-x_j)**2
-                    y = (y_i-y_j)**2
-                    z = (z_i-z_j)**2
-
-                    if i == j:
-                        cm_row.append(0.5*Z_i**2.4)
-                    else:
-                        cm_row.append(conversion_rate*Z_i*Z_j/math.sqrt(x
-                                                                        + y
-                                                                        + z))
-
-                cm_temp.append(np.array(cm_row))
-
-            cm = np.array(cm_temp)
-            # Now the value will be returned.
-            if as_eig:
-                return np.sort(eig(cm)[0])[::-1]
-            else:
-                return cm
-
-
-def c_matrix_multiple(mol_data,
-                      nc_data,
-                      pipe=None,
-                      max_len=25,
-                      as_eig=True,
-                      bohr_radius_units=False):
-    """
-    Calculates the Coulomb Matrix of multiple molecules.
-    mol_data: molecule data, matrix of atom coordinates.
-    nc_data: nuclear charge data, array of atom data.
-    pipe: for multiprocessing purposes. Sends the data calculated
-        through a pipe.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
-    """
-    printc('Coulomb Matrices calculation started.', 'CYAN')
-    tic = time.perf_counter()
-
-    cm_data = np.array([c_matrix(mol, nc, max_len, as_eig, bohr_radius_units)
-                       for mol, nc in zip(mol_data, nc_data)])
-
-    toc = time.perf_counter()
-    printc('\tCM calculation took {:.4f} seconds.'.format(toc - tic), 'GREEN')
-
-    if pipe:
-        pipe.send(cm_data)
-
-    return cm_data
diff --git a/ml_exp/compound.py b/ml_exp/compound.py
new file mode 100644
index 000000000..1598fa482
--- /dev/null
+++ b/ml_exp/compound.py
@@ -0,0 +1,153 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import numpy as np
+from ml_exp.data import NUCLEAR_CHARGE
+from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
+    first_neighbor_matrix, adjacency_matrix, bag_of_stuff
+
+
+class Compound:
+    def __init__(self,
+                 xyz=None):
+        """
+        Initialization of the Compound.
+        xyz: (path to) the xyz file.
+        """
+        self.name = None
+
+        self.n = None
+        self.extra = None  # In case a comment is in the compound file.
+        self.atoms = None
+        self.atoms_nc = None
+        self.coordinates = None
+        self.pbe0 = None
+        self.delta = None
+
+        self.cm = None
+        self.ljm = None
+        self.am = None
+        self.bos = None
+
+        if xyz is not None:
+            self.read_xyz(xyz)
+
+    def gen_cm(self,
+               size=23,
+               as_eig=True,
+               bohr_ru=False):
+        """
+        Generate the Coulomb Matrix for the compund.
+        size: compound size.
+        as_eig: if the representation should be as the eigenvalues.
+        bohr_ru: if radius units should be in bohr's radius units.
+        """
+        self.cm = coulomb_matrix(self.coordinates,
+                                 self.atoms_nc,
+                                 size=size,
+                                 as_eig=as_eig,
+                                 bohr_ru=bohr_ru)
+
+    def gen_ljm(self,
+                diag_value=None,
+                sigma=1.0,
+                epsilon=1.0,
+                size=23,
+                as_eig=True,
+                bohr_ru=False):
+        """
+        Generate the Lennard-Jones Matrix for the compund.
+        diag_value: if special diagonal value is to be used.
+        sigma: sigma value.
+        epsilon: epsilon value.
+        size: compound size.
+        as_eig: if the representation should be as the eigenvalues.
+        bohr_ru: if radius units should be in bohr's radius units.
+        """
+        self.ljm = lennard_jones_matrix(self.coordinates,
+                                        self.atoms_nc,
+                                        diag_value=diag_value,
+                                        sigma=sigma,
+                                        epsilon=epsilon,
+                                        size=size,
+                                        as_eig=as_eig,
+                                        bohr_ru=bohr_ru)
+
+    def gen_am(self,
+               use_forces=False,
+               size=23,
+               bohr_ru=False):
+        """
+        Generate the Adjacency Matrix for the compund.
+        use_forces: if the use of forces instead of k_cx should be used.
+        size: compound size.
+        bohr_ru: if radius units should be in bohr's radius units.
+        """
+        # First, generate the first neighor matrix.
+        fnm, bonds, forces = first_neighbor_matrix(self.coordinates,
+                                                   self.atoms_nc,
+                                                   self.atoms,
+                                                   size=size,
+                                                   use_forces=use_forces,
+                                                   bohr_ru=bohr_ru)
+
+        # Now, generate the adjacency matrix.
+        self.am = adjacency_matrix(fnm,
+                                   bonds,
+                                   forces,
+                                   size=size)
+
+    def gen_bos(self,
+                size=23,
+                stuff='bonds'):
+        """
+        Generate the Bag of Stuff for the compound.
+        size: compound size.
+        stuff: elements of the bag, by default the known bag of bonds.
+        """
+        self.bos = bag_of_stuff(self.cm,
+                                self.atoms,
+                                size=size,
+                                stuff=stuff)
+
+    def read_xyz(self,
+                 filename):
+        """
+        Reads an xyz file and adds the corresponding data to the Compound.
+        filename: (path to) the xyz file.
+        """
+        with open(filename, 'r') as f:
+            lines = f.readlines()
+
+        self.name = filename.split('/')[-1]
+        self.n = int(lines[0])
+        self.extra = lines[1]
+        self.atoms = []
+        self.atoms_nc = np.empty(self.n, dtype=int)
+        self.coordinates = np.empty((self.n, 3), dtype=float)
+
+        for i, atom in enumerate(lines[2:self.n + 2]):
+            atom_d = atom.split()
+
+            self.atoms.append(atom_d[0])
+            self.atoms_nc[i] = NUCLEAR_CHARGE[atom_d[0]]
+            self.coordinates[i] = np.asarray(atom_d[1:4], dtype=float)
diff --git a/ml_exp/data.py b/ml_exp/data.py
new file mode 100644
index 000000000..243aff168
--- /dev/null
+++ b/ml_exp/data.py
@@ -0,0 +1,141 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+NUCLEAR_CHARGE = {
+    'H': 1,
+    'He': 2,
+    'Li': 3,
+    'Be': 4,
+    'B': 5,
+    'C': 6,
+    'N': 7,
+    'O': 8,
+    'F': 9,
+    'Ne': 10,
+    'Na': 11,
+    'Mg': 12,
+    'Al': 13,
+    'Si': 14,
+    'P': 15,
+    'S': 16,
+    'Cl': 17,
+    'Ar': 18,
+    'K': 19,
+    'Ca': 20,
+    'Sc': 21,
+    'Ti': 22,
+    'V': 23,
+    'Cr': 24,
+    'Mn': 25,
+    'Fe': 26,
+    'Co': 27,
+    'Ni': 28,
+    'Cu': 29,
+    'Zn': 30,
+    'Ga': 31,
+    'Ge': 32,
+    'As': 33,
+    'Se': 34,
+    'Br': 35,
+    'Kr': 36,
+    'Rb': 37,
+    'Sr': 38,
+    'Y': 39,
+    'Zr': 40,
+    'Nb': 41,
+    'Mo': 42,
+    'Tc': 43,
+    'Ru': 44,
+    'Rh': 45,
+    'Pd': 46,
+    'Ag': 47,
+    'Cd': 48,
+    'In': 49,
+    'Sn': 50,
+    'Sb': 51,
+    'Te': 52,
+    'I': 53,
+    'Xe': 54,
+    'Cs': 55,
+    'Ba': 56,
+    'La': 57,
+    'Ce': 58,
+    'Pr': 59,
+    'Nd': 60,
+    'Pm': 61,
+    'Sm': 62,
+    'Eu': 63,
+    'Gd': 64,
+    'Tb': 65,
+    'Dy': 66,
+    'Ho': 67,
+    'Er': 68,
+    'Tm': 69,
+    'Yb': 70,
+    'Lu': 71,
+    'Hf': 72,
+    'Ta': 73,
+    'W': 74,
+    'Re': 75,
+    'Os': 76,
+    'Ir': 77,
+    'Pt': 78,
+    'Au': 79,
+    'Hg': 80,
+    'Tl': 81,
+    'Pb': 82,
+    'Bi': 83,
+    'Po': 84,
+    'At': 85,
+    'Rn': 86,
+    'Fr': 87,
+    'Ra': 88,
+    'Ac': 89,
+    'Th': 90,
+    'Pa': 91,
+    'U': 92,
+    'Np': 93,
+    'Pu': 94,
+    'Am': 95,
+    'Cm': 96,
+    'Bk': 97,
+    'Cf': 98,
+    'Es': 99,
+    'Fm': 100,
+    'Md': 101,
+    'No': 102,
+    'Lr': 103,
+    'Rf': 104,
+    'Db': 105,
+    'Sg': 106,
+    'Bh': 107,
+    'Hs': 108,
+    'Mt': 109,
+    'Ds ': 110,
+    'Rg ': 111,
+    'Cn ': 112,
+    'Nh': 113,
+    'Fl': 114,
+    'Mc': 115,
+    'Lv': 116,
+    'Ts': 117,
+    'Og': 118}
diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index 110242a1d..d8ee415bf 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -22,206 +22,206 @@ SOFTWARE.
 """
 import time
 import numpy as np
-from multiprocessing import Process, Pipe
 from ml_exp.misc import printc
-from ml_exp.gauss_kernel import gauss_kernel
-from ml_exp.cholesky_solve import cholesky_solve
-from ml_exp.read_qm7_data import read_qm7_data
-from ml_exp.parallel_create_matrices import parallel_create_matrices
+from ml_exp.kernels import gaussian_kernel
+from ml_exp.math import cholesky_solve
+from ml_exp.qm7db import qm7db
 
 
-def ml(desc_data,
-       energy_data,
-       training_size,
-       desc_type=None,
-       pipe=None,
-       test_size=None,
-       sigma=1000.0,
-       show_msgs=True):
+def simple_ml(descriptors,
+              energies,
+              training_size,
+              test_size=None,
+              sigma=1000.0,
+              identifier=None,
+              show_msgs=True):
     """
-    Does the ML methodology.
-    desc_data: descriptor (or representation) data.
-    energy_data: energy data associated with desc_data.
+    Basic ML methodology for a single descriptor type.
+    descriptors: array of descriptors.
+    energies: array of energies.
     training_size: size of the training set to use.
-    desc_type: string with the name of the descriptor used.
-    pipe: for multiprocessing purposes. Sends the data calculated
-        through a pipe.
     test_size: size of the test set to use. If no size is given,
         the last remaining molecules are used.
     sigma: depth of the kernel.
-    show_msgs: Show debug messages or not.
-    NOTE: desc_type is just a string and is only for identification purposes.
+    identifier: string with the name of the descriptor used.
+    show_msgs: if debug messages should be shown.
+    NOTE: identifier is just a string and is only for identification purposes.
     Also, training is done with the first part of the data and
-    testing with the ending part of the data.
+        testing with the ending part of the data.
     """
     tic = time.perf_counter()
     # Initial calculations for later use.
-    d_len = len(desc_data)
-    e_len = len(energy_data)
+    data_size = descriptors.shape[0]
 
-    if not desc_type:
-        desc_type = 'NOT SPECIFIED'
+    if not identifier:
+        identifier = 'NOT SPECIFIED'
 
-    if d_len != e_len:
-        printc(''.join(['ERROR. Descriptor data size different ',
-                        'than energy data size.']), 'RED')
-        return None
+    if not data_size == energies.shape[0]:
+        raise ValueError('Energies size is different than descriptors size.')
 
-    if training_size >= d_len:
-        printc('ERROR. Training size greater or equal than data size.', 'RED')
-        return None
+    if training_size >= data_size:
+        raise ValueError('Training size is greater or equal to the data size.')
 
+    # If test_size is not set, it is set to a maximum size of 1500.
     if not test_size:
-        test_size = d_len - training_size
+        test_size = data_size - training_size
         if test_size > 1500:
             test_size = 1500
 
     if show_msgs:
-        printc('{} ML started.'.format(desc_type), 'GREEN')
-        printc('\tTraining size: {}'.format(training_size), 'CYAN')
-        printc('\tTest size: {}'.format(test_size), 'CYAN')
-        printc('\tSigma: {}'.format(sigma), 'CYAN')
+        printc(f'{identifier} ML started.', 'GREEN')
+        printc(f'\tTraining size: {training_size}', 'CYAN')
+        printc(f'\tTest size: {test_size}', 'CYAN')
+        printc(f'\tSigma: {test_size}', 'CYAN')
 
-    X_training = desc_data[:training_size]
-    Y_training = energy_data[:training_size]
-    K_training = gauss_kernel(X_training, X_training, sigma)
-    alpha_ = cholesky_solve(K_training, Y_training)
+    X_training = descriptors[:training_size]
+    Y_training = energies[:training_size]
+    K_training = gaussian_kernel(X_training, X_training, sigma)
+    alpha = cholesky_solve(K_training, Y_training)
 
-    X_test = desc_data[-test_size:]
-    Y_test = energy_data[-test_size:]
-    K_test = gauss_kernel(X_test, X_training, sigma)
-    Y_predicted = np.dot(K_test, alpha_)
+    X_test = descriptors[-test_size:]
+    Y_test = energies[-test_size:]
+    K_test = gaussian_kernel(X_test, X_training, sigma)
+    Y_predicted = np.dot(K_test, alpha)
 
     mae = np.mean(np.abs(Y_predicted - Y_test))
     if show_msgs:
-        printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN')
+        printc(f'\tMAE for {identifier}: {mae:.4f}', 'GREEN')
 
     toc = time.perf_counter()
     tictoc = toc - tic
     if show_msgs:
-        printc('\t{} ML took {:.4f} seconds.'.format(desc_type, tictoc),
-               'GREEN')
-        printc('\t\tTraining size: {}'.format(training_size), 'CYAN')
-        printc('\t\tTest size: {}'.format(test_size), 'CYAN')
-        printc('\t\tSigma: {}'.format(sigma), 'CYAN')
-
-    if pipe:
-        pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc])
+        printc(f'\t{identifier} ML took {tictoc:.4f} seconds.', 'GREEN')
+        printc(f'\t\tTraining size: {training_size}', 'CYAN')
+        printc(f'\t\tTest size: {test_size}', 'CYAN')
+        printc(f'\t\tSigma: {sigma}', 'CYAN')
 
     return mae, tictoc
 
 
-def do_ml(min_training_size,
-          max_training_size=None,
-          training_increment_size=500,
-          test_size=None,
-          ljm_diag_value=None,
-          ljm_sigma=1.0,
-          ljm_epsilon=1.0,
+def do_ml(db_path='data',
+          is_shuffled=True,
           r_seed=111,
-          save_benchmarks=False,
-          max_len=25,
+          diag_value=None,
+          lj_sigma=1.0,
+          lj_epsilon=1.0,
+          use_forces=False,
+          stuff='bonds',
+          size=23,
           as_eig=True,
-          bohr_radius_units=False,
+          bohr_ru=False,
+          training_size=1500,
+          test_size=None,
           sigma=1000.0,
+          identifiers=["CM"],
           show_msgs=True):
     """
     Main function that does the whole ML process.
-    min_training_size: minimum training size.
-    max_training_size: maximum training size.
-    training_increment_size: training increment size.
+    db_path: path to the database directory.
+    is_shuffled: if the resulting list of compounds should be shuffled.
+    r_seed: random seed to use for the shuffling.
+    diag_value: if special diagonal value is to be used.
+    lj_sigma: sigma value.
+    lj_epsilon: epsilon value.
+    use_forces: if the use of forces instead of k_cx should be used.
+    stuff: elements of the bag, by default the known bag of bonds.
+    size: compound size.
+    as_eig: if the representation should be as the eigenvalues.
+    bohr_ru: if radius units should be in bohr's radius units.
+    training_size: size of the training set to use.
     test_size: size of the test set to use. If no size is given,
         the last remaining molecules are used.
-    ljm_diag_value: if a special diagonal value should be used in lj matrix.
-    ljm_sigma: sigma value for lj matrix.
-    ljm_epsilon: epsilon value for lj matrix.
-    r_seed: random seed to use for the shuffling.
-    save_benchmarks: if benchmarks should be saved.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
     sigma: depth of the kernel.
-    show_msgs: Show debug messages or not.
+    identifiers: list of names (strings) of descriptors to use.
+    show_msgs: if debug messages should be shown.
     """
-    # Initialization time.
+    if type(identifiers) != list:
+        raise TypeError('\'identifiers\' is not a list.')
+
     init_time = time.perf_counter()
-    if not max_training_size:
-        max_training_size = min_training_size + training_increment_size
 
     # Data reading.
-    molecules, nuclear_charge, energy_pbe0, energy_delta =\
-        read_qm7_data(r_seed=r_seed)
+    tic = time.perf_counter()
+    compounds, energy_pbe0, energy_delta = qm7db(db_path=db_path,
+                                                 is_shuffled=is_shuffled,
+                                                 r_seed=r_seed)
+    toc = time.perf_counter()
+    tictoc = toc - tic
+    if show_msgs:
+        printc(f'Data reading took {tictoc:.4f} seconds.', 'CYAN')
 
     # Matrices calculation.
-    cm_data, ljm_data = parallel_create_matrices(molecules,
-                                                 nuclear_charge,
-                                                 ljm_diag_value,
-                                                 ljm_sigma,
-                                                 ljm_epsilon,
-                                                 max_len,
-                                                 as_eig,
-                                                 bohr_radius_units)
-
-    # ML calculation.
-    procs = []
-    cm_pipes = []
-    ljm_pipes = []
-    for i in range(min_training_size,
-                   max_training_size + 1,
-                   training_increment_size):
-        cm_recv, cm_send = Pipe(False)
-        p1 = Process(target=ml,
-                     args=(cm_data,
-                           energy_pbe0,
-                           i,
-                           'CM',
-                           cm_send,
-                           test_size,
-                           sigma,
-                           show_msgs))
-        procs.append(p1)
-        cm_pipes.append(cm_recv)
-        p1.start()
-
-        ljm_recv, ljm_send = Pipe(False)
-        p2 = Process(target=ml,
-                     args=(ljm_data,
-                           energy_pbe0,
-                           i,
-                           'L-JM',
-                           ljm_send,
-                           test_size,
-                           sigma,
-                           show_msgs))
-        procs.append(p2)
-        ljm_pipes.append(ljm_recv)
-        p2.start()
+    tic = time.perf_counter()
+    for compound in compounds:
+        if 'CM' in identifiers:
+            compound.gen_cm(size=size,
+                            as_eig=as_eig,
+                            bohr_ru=bohr_ru)
+        if 'LJM' in identifiers:
+            compound.gen_ljm(diag_value=diag_value,
+                             sigma=lj_sigma,
+                             epsilon=lj_epsilon,
+                             size=size,
+                             as_eig=as_eig,
+                             bohr_ru=bohr_ru)
+        if 'AM' in identifiers:
+            compound.gen_am(use_forces=use_forces,
+                            size=size,
+                            bohr_ru=bohr_ru)
+        if 'BOS' in identifiers:
+            compound.gen_bos(size=size,
+                             stuff=stuff)
 
-    cm_bench_results = []
-    ljm_bench_results = []
-    for cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes):
-        cm_bench_results.append(cd_pipe.recv())
-        ljm_bench_results.append(ljd_pipe.recv())
+    # Create a numpy array for the descriptors.
+    if 'CM' in identifiers:
+        cm_data = np.array([comp.cm for comp in compounds], dtype=float)
+    if 'LJM' in identifiers:
+        ljm_data = np.array([comp.ljm for comp in compounds], dtype=float)
+    if 'AM' in identifiers:
+        am_data = np.array([comp.cm for comp in compounds], dtype=float)
+    if 'BOS' in identifiers:
+        bos_data = np.array([comp.bos for comp in compounds], dtype=float)
 
-    for proc in procs:
-        proc.join()
+    toc = time.perf_counter()
+    tictoc = toc - tic
+    if show_msgs:
+        printc(f'Matrices calculation took {tictoc:.4f} seconds.', 'CYAN')
 
-    if save_benchmarks:
-        with open('data\\benchmarks.csv', 'a') as save_file:
-            # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,',
-            #                          'mae,time,lj_s,lj_e,date_ran\n']))
-            ltime = time.localtime()[:3][::-1]
-            ljm_se = ',' + str(ljm_sigma) + ',' + str(ljm_epsilon) + ','
-            date = '/'.join([str(field) for field in ltime])
-            for cm, ljm, in zip(cm_bench_results, ljm_bench_results):
-                cm_text = ','.join([str(field) for field in cm])\
-                    + ',' + date + '\n'
-                ljm_text = ','.join([str(field) for field in ljm])\
-                    + ljm_se + date + '\n'
-                save_file.write(cm_text)
-                save_file.write(ljm_text)
+    # ML calculation.
+    if 'CM' in identifiers:
+        cm_mae, cm_tictoc = simple_ml(cm_data,
+                                      energy_pbe0,
+                                      training_size=training_size,
+                                      test_size=test_size,
+                                      sigma=sigma,
+                                      identifier='CM',
+                                      show_msgs=show_msgs)
+    if 'LJM' in identifiers:
+        ljm_mae, ljm_tictoc = simple_ml(ljm_data,
+                                        energy_pbe0,
+                                        training_size=training_size,
+                                        test_size=test_size,
+                                        sigma=sigma,
+                                        identifier='LJM',
+                                        show_msgs=show_msgs)
+    if 'AM' in identifiers:
+        am_mae, am_tictoc = simple_ml(am_data,
+                                      energy_pbe0,
+                                      training_size=training_size,
+                                      test_size=test_size,
+                                      sigma=sigma,
+                                      identifier='CM',
+                                      show_msgs=show_msgs)
+    if 'BOS' in identifiers:
+        bos_mae, bos_tictoc = simple_ml(bos_data,
+                                        energy_pbe0,
+                                        training_size=training_size,
+                                        test_size=test_size,
+                                        sigma=sigma,
+                                        identifier='CM',
+                                        show_msgs=show_msgs)
 
     # End of program
     end_time = time.perf_counter()
-    printc('Program took {:.4f} seconds.'.format(end_time - init_time),
-           'CYAN')
+    totaltime = end_time - init_time
+    printc(f'Program took {totaltime:.4f} seconds.', 'CYAN')
diff --git a/ml_exp/gauss_kernel.py b/ml_exp/kernels.py
index 834d62408..3914ffc20 100644
--- a/ml_exp/gauss_kernel.py
+++ b/ml_exp/kernels.py
@@ -22,27 +22,23 @@ SOFTWARE.
 """
 import math
 import numpy as np
-from ml_exp.frob_norm import frob_norm
 
 
-def gauss_kernel(X_1, X_2, sigma):
+def gaussian_kernel(X1,
+                    X2,
+                    sigma):
     """
     Calculates the Gaussian Kernel.
-    X_1: first representations.
-    X_2: second representations.
+    X1: first representations.
+    X2: second representations.
     sigma: kernel width.
     """
-    x1_l = len(X_1)
-    x1_range = range(x1_l)
-    x2_l = len(X_2)
-    x2_range = range(x2_l)
-
     inv_sigma = -0.5 / (sigma*sigma)
 
-    K = np.zeros((x1_l, x2_l))
-    for i in x1_range:
-        for j in x2_range:
-            f_norm = frob_norm(X_1[i] - X_2[j])
+    K = np.zeros((X1.shape[0], X2.shape[0]), dtype=float)
+    for i, x1 in enumerate(X1):
+        for j, x2 in enumerate(X2):
+            f_norm = np.linalg.norm(x1 - x2)
             # print(f_norm)
             K[i, j] = math.exp(inv_sigma * f_norm)
 
diff --git a/ml_exp/lj_matrix.py b/ml_exp/lj_matrix.py
deleted file mode 100644
index 549790755..000000000
--- a/ml_exp/lj_matrix.py
+++ /dev/null
@@ -1,222 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import time
-import math
-import numpy as np
-from numpy.linalg import eig
-from ml_exp.misc import printc
-
-
-def lj_matrix(mol_data,
-              nc_data,
-              diag_value=None,
-              sigma=1.0,
-              epsilon=1.0,
-              max_len=25,
-              as_eig=True,
-              bohr_radius_units=False):
-    """
-    Creates the Lennard-Jones Matrix from the molecule data given.
-    mol_data: molecule data, matrix of atom coordinates.
-    nc_data: nuclear charge data, array of atom data.
-    diag_value: if special diagonal value is to be used.
-    sigma: sigma value.
-    epsilon: epsilon value.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
-    """
-    if bohr_radius_units:
-        conversion_rate = 0.52917721067
-    else:
-        conversion_rate = 1
-
-    mol_n = len(mol_data)
-    mol_nr = range(mol_n)
-
-    if not mol_n == len(nc_data):
-        print(''.join(['Error. Molecule matrix dimension is different ',
-                       'than the nuclear charge array dimension.']))
-    else:
-        if max_len < mol_n:
-            print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
-                           'greater than max_len. Using mol_n.']))
-            max_len = None
-
-        if max_len:
-            lj = np.zeros((max_len, max_len))
-            ml_r = range(max_len)
-
-            # Actual calculation of the coulomb matrix.
-            for i in ml_r:
-                if i < mol_n:
-                    x_i = mol_data[i, 0]
-                    y_i = mol_data[i, 1]
-                    z_i = mol_data[i, 2]
-                    Z_i = nc_data[i]
-                else:
-                    break
-
-                for j in ml_r:
-                    if j < mol_n:
-                        x_j = mol_data[j, 0]
-                        y_j = mol_data[j, 1]
-                        z_j = mol_data[j, 2]
-
-                        x = (x_i-x_j)**2
-                        y = (y_i-y_j)**2
-                        z = (z_i-z_j)**2
-
-                        if i == j:
-                            if diag_value is None:
-                                lj[i, j] = (0.5*Z_i**2.4)
-                            else:
-                                lj[i, j] = diag_value
-                        else:
-                            # Calculations are done after i==j is checked
-                            # so no division by zero is done.
-
-                            # A little play with r exponents
-                            # so no square root is calculated.
-                            # Conversion factor is included in r^2.
-
-                            # 1/r^2
-                            r_2 = sigma**2/(conversion_rate**2*(x + y + z))
-
-                            r_6 = math.pow(r_2, 3)
-                            r_12 = math.pow(r_6, 2)
-                            lj[i, j] = (4*epsilon*(r_12 - r_6))
-                    else:
-                        break
-
-            # Now the value will be returned.
-            if as_eig:
-                lj_sorted = np.sort(eig(lj)[0])[::-1]
-                # Thanks to SO for the following lines of code.
-                # https://stackoverflow.com/a/43011036
-
-                # Keep zeros at the end.
-                mask = lj_sorted != 0.
-                f_mask = mask.sum(0, keepdims=1) >\
-                    np.arange(lj_sorted.shape[0]-1, -1, -1)
-
-                f_mask = f_mask[::-1]
-                lj_sorted[f_mask] = lj_sorted[mask]
-                lj_sorted[~f_mask] = 0.
-
-                return lj_sorted
-
-            else:
-                return lj
-
-        else:
-            lj_temp = []
-            # Actual calculation of the coulomb matrix.
-            for i in mol_nr:
-                x_i = mol_data[i, 0]
-                y_i = mol_data[i, 1]
-                z_i = mol_data[i, 2]
-                Z_i = nc_data[i]
-
-                lj_row = []
-                for j in mol_nr:
-                    x_j = mol_data[j, 0]
-                    y_j = mol_data[j, 1]
-                    z_j = mol_data[j, 2]
-
-                    x = (x_i-x_j)**2
-                    y = (y_i-y_j)**2
-                    z = (z_i-z_j)**2
-
-                    if i == j:
-                        if not diag_value:
-                            lj_row.append(0.5*Z_i**2.4)
-                        else:
-                            lj_row.append(diag_value)
-                    else:
-                        # Calculations are done after i==j is checked
-                        # so no division by zero is done.
-
-                        # A little play with r exponents
-                        # so no square root is calculated.
-                        # Conversion factor is included in r^2.
-
-                        # 1/r^2
-                        r_2 = sigma**2/(conversion_rate**2*(x + y + z))
-
-                        r_6 = math.pow(r_2, 3)
-                        r_12 = math.pow(r_6, 2)
-                        lj_row.append(4*epsilon*(r_12 - r_6))
-
-                lj_temp.append(np.array(lj_row))
-
-            lj = np.array(lj_temp)
-            # Now the value will be returned.
-            if as_eig:
-                return np.sort(eig(lj)[0])[::-1]
-            else:
-                return lj
-
-
-def lj_matrix_multiple(mol_data,
-                       nc_data,
-                       pipe=None,
-                       diag_value=None,
-                       sigma=1.0,
-                       epsilon=1.0,
-                       max_len=25,
-                       as_eig=True,
-                       bohr_radius_units=False):
-    """
-    Calculates the Lennard-Jones Matrix of multiple molecules.
-    mol_data: molecule data, matrix of atom coordinates.
-    nc_data: nuclear charge data, array of atom data.
-    pipe: for multiprocessing purposes. Sends the data calculated
-        through a pipe.
-    diag_value: if special diagonal value is to be used.
-    sigma: sigma value.
-    epsilon: epsilon value.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
-    """
-    printc('L-J Matrices calculation started.', 'CYAN')
-    tic = time.perf_counter()
-
-    ljm_data = np.array([lj_matrix(mol,
-                                   nc,
-                                   diag_value,
-                                   sigma,
-                                   epsilon,
-                                   max_len,
-                                   as_eig,
-                                   bohr_radius_units)
-                        for mol, nc in zip(mol_data, nc_data)])
-
-    toc = time.perf_counter()
-    printc('\tL-JM calculation took {:.4f} seconds.'.format(toc-tic), 'GREEN')
-
-    if pipe:
-        pipe.send(ljm_data)
-
-    return ljm_data
diff --git a/ml_exp/cholesky_solve.py b/ml_exp/math.py
index bc6a572a3..da1fdf08f 100644
--- a/ml_exp/cholesky_solve.py
+++ b/ml_exp/math.py
@@ -21,26 +21,25 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 import numpy as np
-from numpy.linalg import cholesky
 
 
-def cholesky_solve(K, y):
+def cholesky_solve(K,
+                   y):
     """
-    Applies Cholesky decomposition to obtain the 'alpha coeficients'.
+    Applies Cholesky decomposition to solve Ka=y. Where a are the alpha
+        coeficients.
     K: kernel.
     y: known parameters.
     """
-    # The initial mathematical problem is to solve Ka=y.
-
-    # First, add a small lambda value.
+    # Add a small lambda value.
     K[np.diag_indices_from(K)] += 1e-8
 
     # Get the Cholesky decomposition of the kernel.
-    L = cholesky(K)
-    size = len(L)
+    L = np.linalg.cholesky(K)
+    size = K.shape[0]
 
     # Solve Lx=y for x.
-    x = np.zeros(size)
+    x = np.zeros(size, dtype=float)
     x[0] = y[0] / L[0, 0]
     for i in range(1, size):
         temp_sum = 0.0
@@ -50,12 +49,9 @@ def cholesky_solve(K, y):
 
     # Now, solve LTa=x for a.
     L2 = L.T
-    a = np.zeros(size)
-    a_ms = size - 1
-    a[a_ms] = x[a_ms] / L2[a_ms, a_ms]
-    # Because of the form of L2 (upper triangular matriz), an inversion of
-    # range() needs to be done.
-    for i in range(0, a_ms)[::-1]:
+    a = np.zeros(size, dtype=float)
+    a[size - 1] = x[size - 1] / L2[size - 1, size - 1]
+    for i in range(0, size - 1)[::-1]:
         temp_sum = 0.0
         for j in range(i, size)[::-1]:
             temp_sum += L2[i, j] * a[j]
diff --git a/ml_exp/misc.py b/ml_exp/misc.py
index e9142b05f..4bd68eefc 100644
--- a/ml_exp/misc.py
+++ b/ml_exp/misc.py
@@ -29,29 +29,25 @@ init()
 def printc(text, color):
     """
     Prints texts normaly, but in color. Using colorama.
-    text: string with the text to print.
+    text: text to print.
     color: color to be used, same as available in colorama.
     """
-    color_dic = {'BLACK': Fore.BLACK,
-                 'RED': Fore.RED,
-                 'GREEN': Fore.GREEN,
-                 'YELLOW': Fore.YELLOW,
-                 'BLUE': Fore.BLUE,
-                 'MAGENTA': Fore.MAGENTA,
-                 'CYAN': Fore.CYAN,
-                 'WHITE': Fore.WHITE,
-                 'RESET': Fore.RESET}
+    color = color.upper()
+    colors = {'BLACK': Fore.BLACK,
+              'RED': Fore.RED,
+              'GREEN': Fore.GREEN,
+              'YELLOW': Fore.YELLOW,
+              'BLUE': Fore.BLUE,
+              'MAGENTA': Fore.MAGENTA,
+              'CYAN': Fore.CYAN,
+              'WHITE': Fore.WHITE,
+              'RESET': Fore.RESET}
 
-    color_dic_keys = color_dic.keys()
-    if color not in color_dic_keys:
-        print(Fore.RED
-              + '\'{}\' not found, using default color.'.format(color)
-              + Style.RESET_ALL)
-        actual_color = Fore.RESET
-    else:
-        actual_color = color_dic[color]
+    av_colors = colors.keys()
+    if color not in av_colors:
+        raise KeyError(f'{color} not found.')
 
-    print(actual_color + text + Style.RESET_ALL)
+    print(colors[color] + text + Style.RESET_ALL)
 
 
 def plot_benchmarks():
diff --git a/ml_exp/parallel_create_matrices.py b/ml_exp/parallel_create_matrices.py
deleted file mode 100644
index a68122d48..000000000
--- a/ml_exp/parallel_create_matrices.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from multiprocessing import Process, Pipe
-from ml_exp.c_matrix import c_matrix_multiple
-from ml_exp.lj_matrix import lj_matrix_multiple
-
-
-def parallel_create_matrices(mol_data,
-                             nc_data,
-                             ljm_diag_value=None,
-                             ljm_sigma=1.0,
-                             ljm_epsilon=1.0,
-                             max_len=25,
-                             as_eig=True,
-                             bohr_radius_units=False):
-    """
-    Creates the Coulomb and L-J matrices in parallel.
-    mol_data: molecule data, matrix of atom coordinates.
-    nc_data: nuclear charge data, array of atom data.
-    ljm_diag_value: if special diagonal value is to be used for lj matrix.
-    ljm_sigma: sigma value for lj matrix.
-    ljm_epsilon: psilon value for lj matrix.
-    max_len: maximum amount of atoms in molecule.
-    as_eig: if data should be returned as matrix or array of eigenvalues.
-    bohr_radius_units: if units should be in bohr's radius units.
-    """
-
-    # Matrices calculation.
-    procs = []
-    pipes = []
-
-    cm_recv, cm_send = Pipe(False)
-    p1 = Process(target=c_matrix_multiple,
-                 args=(mol_data,
-                       nc_data,
-                       cm_send,
-                       max_len,
-                       as_eig,
-                       bohr_radius_units))
-    procs.append(p1)
-    pipes.append(cm_recv)
-    p1.start()
-
-    ljm_recv, ljm_send = Pipe(False)
-    p2 = Process(target=lj_matrix_multiple,
-                 args=(mol_data,
-                       nc_data,
-                       ljm_send,
-                       ljm_diag_value,
-                       ljm_sigma,
-                       ljm_epsilon,
-                       max_len,
-                       as_eig,
-                       bohr_radius_units))
-    procs.append(p2)
-    pipes.append(ljm_recv)
-    p2.start()
-
-    cm_data = pipes[0].recv()
-    ljm_data = pipes[1].recv()
-
-    for proc in procs:
-        proc.join()
-
-    return cm_data, ljm_data
diff --git a/ml_exp/frob_norm.py b/ml_exp/qm7db.py
index 4c3a2945d..3fd46b6bc 100644
--- a/ml_exp/frob_norm.py
+++ b/ml_exp/qm7db.py
@@ -20,32 +20,36 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-import math
+from ml_exp.compound import Compound
+import numpy as np
+import random
 
 
-def frob_norm(array):
+def qm7db(db_path='data',
+          is_shuffled=True,
+          r_seed=111):
     """
-    Calculates the frobenius norm of a given array or matrix.
-    array: array of data.
+    Creates a list of compounds with the qm7 database.
+    db_path: path to the database directory.
+    is_shuffled: if the resulting list of compounds should be shuffled.
+    r_seed: random seed to use for the shuffling.
     """
+    fname = f'{db_path}/hof_qm7.txt'
+    with open(fname, 'r') as f:
+        lines = f.readlines()
 
-    arr_sh_len = len(array.shape)
-    arr_range = range(len(array))
-    fn = 0.0
+    compounds = []
+    for i, line in enumerate(lines):
+        line = line.split()
+        compounds.append(Compound(f'{db_path}/{line[0]}'))
+        compounds[i].pbe0 = float(line[1])
+        compounds[i].delta = float(line[1]) - float(line[2])
 
-    # If it is a 'vector'.
-    if arr_sh_len == 1:
-        for i in arr_range:
-            fn += array[i]*array[i]
+    if is_shuffled:
+        random.seed(r_seed)
+        random.shuffle(compounds)
 
-        return math.sqrt(fn)
+    e_pbe0 = np.array([compound.pbe0 for compound in compounds], dtype=float)
+    e_delta = np.array([compound.delta for compound in compounds], dtype=float)
 
-    # If it is a matrix.
-    elif arr_sh_len == 2:
-        for i in arr_range:
-            for j in arr_range:
-                fn += array[i, j]*array[i, j]
-
-        return math.sqrt(fn)
-    else:
-        print('Error. Array size greater than 2 ({}).'.format(arr_sh_len))
+    return compounds, e_pbe0, e_delta
diff --git a/ml_exp/read_qm7_data.py b/ml_exp/read_qm7_data.py
deleted file mode 100644
index 81be05a17..000000000
--- a/ml_exp/read_qm7_data.py
+++ /dev/null
@@ -1,162 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import os
-import time
-import numpy as np
-import random
-from ml_exp.misc import printc
-
-
-# 'periodic_table_of_elements.txt' retrieved from
-# https://gist.github.com/GoodmanSciences/c2dd862cd38f21b0ad36b8f96b4bf1ee
-def read_nc_data(data_path):
-    """
-    Reads nuclear charge data from file and returns a dictionary.
-    data_path: path to the data directory.
-    """
-    fname = 'periodic_table_of_elements.txt'
-    with open(''.join([data_path, '/', fname]), 'r') as infile:
-        temp_lines = infile.readlines()
-
-    del temp_lines[0]
-
-    lines = []
-    for temp_line in temp_lines:
-        new_line = temp_line.split(sep=',')
-        lines.append(new_line)
-
-    # Dictionary of nuclear charge.
-    return {line[2]: int(line[0]) for line in lines}
-
-
-# 'hof_qm7.txt.txt' retrieved from
-# https://github.com/qmlcode/tutorial
-def read_db_data(zi_data,
-                 data_path,
-                 r_seed=111,
-                 return_atoms=False):
-    """
-    Reads molecule database and extracts
-    its contents as usable variables.
-    zi_data: dictionary containing nuclear charge data.
-    data_path: path to the data directory.
-    r_seed: random seed to use for the shuffling.
-    return_atoms: if atom list should be returned.
-    """
-    os.chdir(data_path)
-
-    fname = 'hof_qm7.txt'
-    with open(fname, 'r') as infile:
-        lines = infile.readlines()
-
-    # Temporary energy dictionary.
-    energy_temp = dict()
-
-    for line in lines:
-        xyz_data = line.split()
-
-        xyz_name = xyz_data[0]
-        hof = float(xyz_data[1])
-        dftb = float(xyz_data[2])
-        # print(xyz_name, hof, dftb)
-
-        energy_temp[xyz_name] = np.array([hof, hof - dftb])
-
-    # Use a random seed.
-    random.seed(r_seed)
-
-    et_keys = list(energy_temp.keys())
-    random.shuffle(et_keys)
-
-    # Temporary energy dictionary, shuffled.
-    energy_temp_shuffled = dict()
-    for key in et_keys:
-        energy_temp_shuffled.update({key: energy_temp[key]})
-
-    mol_data = []
-    mol_nc_data = []
-    atoms = []
-    # Actual reading of the xyz files.
-    for i, k in enumerate(energy_temp_shuffled.keys()):
-        with open(k, 'r') as xyz_file:
-            lines = xyz_file.readlines()
-
-        len_lines = len(lines)
-        mol_temp_data = []
-        mol_nc_temp_data = np.array(np.zeros(len_lines-2))
-        atoms_temp = []
-        for j, line in enumerate(lines[2:len_lines]):
-            line_list = line.split()
-
-            atoms_temp.append(line_list[0])
-            mol_nc_temp_data[j] = float(zi_data[line_list[0]])
-            line_data = np.array(np.asarray(line_list[1:4], dtype=float))
-            mol_temp_data.append(line_data)
-
-        mol_data.append(mol_temp_data)
-        mol_nc_data.append(mol_nc_temp_data)
-        atoms.append(atoms_temp)
-
-    # Convert everything to a numpy array.
-    molecules = np.array([np.array(mol) for mol in mol_data])
-    nuclear_charge = np.array([nc_d for nc_d in mol_nc_data])
-    energy_pbe0 = np.array([energy_temp_shuffled[k][0]
-                            for k in energy_temp_shuffled.keys()])
-    energy_delta = np.array([energy_temp_shuffled[k][1]
-                             for k in energy_temp_shuffled.keys()])
-
-    if return_atoms:
-        return molecules, nuclear_charge, energy_pbe0, energy_delta, atoms
-    return molecules, nuclear_charge, energy_pbe0, energy_delta
-
-
-def read_qm7_data(data_path='data',
-                  r_seed=111,
-                  return_atoms=False):
-    """
-    Reads all the qm7 data.
-    data_path: path to the data directory.
-    r_seed: random seed to use for the shuffling.
-    return_atoms: if atom list should be returned.
-    """
-    tic = time.perf_counter()
-    printc('Data reading started.', 'CYAN')
-
-    init_path = os.getcwd()
-    os.chdir(data_path)
-    data_path = os.getcwd()
-
-    zi_data = read_nc_data(data_path)
-    if return_atoms:
-        molecules, nuclear_charge, energy_pbe0, energy_delta, atoms = \
-            read_db_data(zi_data, data_path, r_seed, return_atoms)
-    else:
-        molecules, nuclear_charge, energy_pbe0, energy_delta = \
-            read_db_data(zi_data, data_path, r_seed)
-
-    os.chdir(init_path)
-    toc = time.perf_counter()
-    printc('\tData reading took {:.4f} seconds.'.format(toc-tic), 'GREEN')
-    if return_atoms:
-        return molecules, nuclear_charge, energy_pbe0, energy_delta, atoms
-    return molecules, nuclear_charge, energy_pbe0, energy_delta
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
new file mode 100644
index 000000000..ea079595e
--- /dev/null
+++ b/ml_exp/representations.py
@@ -0,0 +1,384 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import numpy as np
+from collections import Counter
+
+
+def coulomb_matrix(coords,
+                   nc,
+                   size=23,
+                   as_eig=True,
+                   bohr_ru=False):
+    """
+    Creates the Coulomb Matrix from the molecule data given.
+    coords: compound coordinates.
+    nc: nuclear charge data.
+    size: compound size.
+    as_eig: if the representation should be as the eigenvalues.
+    bohr_ru: if radius units should be in bohr's radius units.
+    """
+    if bohr_ru:
+        cr = 0.52917721067
+    else:
+        cr = 1.0
+
+    n = coords.shape[0]
+
+    if not n == nc.shape[0]:
+        raise ValueError('Compound size is different than the nuclear charge \
+size. Arrays are not of the right shape.')
+
+    if size < n:
+        print('Error. Compound size (n) is greater han (size). Using (n)',
+              'instead of (size).')
+        size = n
+
+    cm = np.zeros((size, size), dtype=float)
+
+    # Actual calculation of the coulomb matrix.
+    for i, xyz_i in enumerate(coords):
+        for j, xyz_j in enumerate(coords):
+            rv = xyz_i - xyz_j
+            r = np.linalg.norm(rv)/cr
+            if i == j:
+                cm[i, j] = (0.5*nc[i]**2.4)
+            else:
+                cm[i, j] = nc[i]*nc[j]/r
+
+    # Now the value will be returned.
+    if as_eig:
+        cm_sorted = np.sort(np.linalg.eig(cm)[0])[::-1]
+        # Thanks to SO for the following lines of code.
+        # https://stackoverflow.com/a/43011036
+
+        # Keep zeros at the end.
+        mask = cm_sorted != 0.
+        f_mask = mask.sum(0, keepdims=1) >\
+            np.arange(cm_sorted.shape[0]-1, -1, -1)
+
+        f_mask = f_mask[::-1]
+        cm_sorted[f_mask] = cm_sorted[mask]
+        cm_sorted[~f_mask] = 0.
+
+        return cm_sorted
+    else:
+        return cm
+
+
+def lennard_jones_matrix(coords,
+                         nc,
+                         diag_value=None,
+                         sigma=1.0,
+                         epsilon=1.0,
+                         size=23,
+                         as_eig=True,
+                         bohr_ru=False):
+    """
+    Creates the Lennard-Jones Matrix from the molecule data given.
+    coords: compound coordinates.
+    nc: nuclear charge data.
+    diag_value: if special diagonal value is to be used.
+    sigma: sigma value.
+    epsilon: epsilon value.
+    size: compound size.
+    as_eig: if the representation should be as the eigenvalues.
+    bohr_ru: if radius units should be in bohr's radius units.
+    """
+    if bohr_ru:
+        cr = 0.52917721067
+    else:
+        cr = 1.0
+
+    n = coords.shape[0]
+
+    if not n == nc.shape[0]:
+        raise ValueError('Compound size is different than the nuclear charge \
+size. Arrays are not of the right shape.')
+
+    if size < n:
+        print('Error. Compound size (n) is greater han (size). Using (n)',
+              'instead of (size).')
+        size = n
+
+    lj = np.zeros((size, size), dtype=float)
+
+    # Actual calculation of the lennard-jones matrix.
+    for i, xyz_i in enumerate(coords):
+        for j, xyz_j in enumerate(coords):
+            if i == j:
+                if diag_value is None:
+                    lj[i, j] = (0.5*nc[i]**2.4)
+                else:
+                    lj[i, j] = diag_value
+            else:
+                # Calculations are done after i==j is checked
+                # so no division by zero is done.
+
+                # A little play with r exponents
+                # so no square root is calculated.
+                # Conversion factor is included in r^2.
+                rv = xyz_i - xyz_j
+                r = np.linalg.norm(rv)/cr
+
+                # 1/r^n
+                r_2 = sigma**2/r**2
+                r_6 = r_2**3
+                r_12 = r_6**2
+                lj[i, j] = (4*epsilon*(r_12 - r_6))
+
+    # Now the value will be returned.
+    if as_eig:
+        lj_sorted = np.sort(np.linalg.eig(lj)[0])[::-1]
+        # Thanks to SO for the following lines of code.
+        # https://stackoverflow.com/a/43011036
+
+        # Keep zeros at the end.
+        mask = lj_sorted != 0.
+        f_mask = mask.sum(0, keepdims=1) >\
+            np.arange(lj_sorted.shape[0]-1, -1, -1)
+
+        f_mask = f_mask[::-1]
+        lj_sorted[f_mask] = lj_sorted[mask]
+        lj_sorted[~f_mask] = 0.
+
+        return lj_sorted
+    else:
+        return lj
+
+
+def first_neighbor_matrix(coords,
+                          nc,
+                          atoms,
+                          size=23,
+                          use_forces=False,
+                          bohr_ru=False):
+    """
+    Creates the First Neighbor Matrix from the molecule data given.
+    coords: compound coordinates.
+    nc: nuclear charge data.
+    atoms: list of atoms.
+    use_forces: if the use of forces instead of k_cx should be used.
+    bohr_ru: if radius units should be in bohr's radius units.
+    NOTE: Bond distance of carbon to other elements
+        are (for atoms present in the qm7 dataset):
+            C: 1.20 - 1.54 A (Edited to 1.19 - 1.54 A)
+            H: 1.06 - 1.12 A
+            O: 1.43 - 2.15 A
+            N: 1.47 - 2.10 A
+            S: 1.81 - 2.55 A
+    """
+    if bohr_ru:
+        cr = 0.52917721067
+    else:
+        cr = 1.0
+
+    n = coords.shape[0]
+
+    if not n == nc.shape[0]:
+        raise ValueError('Compound size is different than the nuclear charge \
+size. Arrays are not of the right shape.')
+
+    if size < n:
+        print('Error. Compound size (n) is greater han (size). Using (n)',
+              'instead of (size).')
+        size = n
+
+    # Possible bonds.
+    cc_bond = sorted(['C', 'C'])
+    ch_bond = sorted(['C', 'H'])
+    co_bond = sorted(['C', 'O'])
+    cn_bond = sorted(['C', 'N'])
+    cs_bond = sorted(['C', 'S'])
+
+    fnm = np.zeros((size, size), dtype=float)
+
+    bonds = []
+    forces = []
+    for i, xyz_i in enumerate(coords):
+        for j, xyz_j in enumerate(coords):
+            # Ignore the diagonal.
+            if i != j:
+                bond = sorted([atoms[i], atoms[j]])
+                rv = xyz_i - xyz_j
+                r = np.linalg.norm(rv)/cr
+
+                # Check for each type of bond.
+                if (cc_bond == bond) and (r >= 1.19 and r <= 1.54):
+                    fnm[i, j] = 1.0
+                    if j > i:
+                        bonds.append((i, j))
+                        if use_forces:
+                            forces.append(rv*nc[i]*nc[j]/r**3)
+                elif (ch_bond == bond) and (r >= 1.06 and r <= 1.12):
+                    fnm[i, j] = 1.0
+                    if j > i:
+                        bonds.append((i, j))
+                        if use_forces:
+                            forces.append(rv*nc[i]*nc[j]/r**3)
+                elif (co_bond == bond) and (r >= 1.43 and r <= 2.15):
+                    fnm[i, j] = 0.8
+                    if j > i:
+                        bonds.append((i, j))
+                        if use_forces:
+                            forces.append(rv*nc[i]*nc[j]/r**3)
+                elif (cn_bond == bond) and (r >= 1.47 and r <= 2.10):
+                    fnm[i, j] = 1.0
+                    if j > i:
+                        bonds.append((i, j))
+                        if use_forces:
+                            forces.append(rv*nc[i]*nc[j]/r**3)
+                elif (cs_bond == bond) and (r >= 1.81 and r <= 2.55):
+                    fnm[i, j] = 0.7
+                    if j > i:
+                        bonds.append((i, j))
+                        if use_forces:
+                            forces.append(rv*nc[i]*nc[j]/r**3)
+
+    return fnm, bonds, forces
+
+
+def adjacency_matrix(fnm,
+                     bonds,
+                     forces,
+                     size=22):
+    """
+    Calculates the adjacency matrix given the bond list.
+    fnm: first neighbour matrix.
+    bonds: list of bonds (tuple of indexes).
+    forces: list of forces.
+    size: compund size.
+    """
+    n = len(bonds)
+
+    if size < n:
+        print('Error. Compound size (n) is greater han (size). Using (n)\
+              instead of (size).')
+        size = n
+
+    am = np.zeros((size, size), dtype=float)
+
+    for i, bond_i in enumerate(bonds):
+        for j, bond_j in enumerate(bonds):
+            # Ignore the diagonal.
+            if i != j:
+                if (bond_i[0] in bond_j) or (bond_i[1] in bond_j):
+                    if forces:
+                        am[i, j] = np.dot(forces[i], forces[j])
+                    else:
+                        am[i, j] = fnm[bond_i[0], bond_i[1]]
+
+    return am
+
+
+def check_bond(bags,
+               bond):
+    """
+    Checks if a bond is in a bag.
+    bags: list of bags, containing a bag per entry, which in turn
+        contains a list of bond-values.
+    bond: bond to check.
+    """
+    if bags == []:
+        return False, None
+
+    for i, bag in enumerate(bags):
+        if bag[0] == bond:
+            return True, i
+
+    return False, None
+
+
+def bag_of_stuff(cm,
+                 atoms,
+                 size=23,
+                 stuff='bonds'):
+    """
+    Creates the Bag of Bonds using the Coulomb Matrix.
+    cm: coulomb matrix.
+    atoms: list of atoms.
+    size: compound size.
+    """
+    if cm is None:
+        raise ValueError('Coulomb Matrix hasn\'t been initialized for the \
+current compound.')
+
+    if cm.ndim == 1:
+        raise ValueError('Coulomb Matrix (CM) dimension is 1. Maybe it was \
+generated as the vector of eigenvalues, try (re-)generating the CM.')
+
+    n = len(atoms)
+
+    if size < n:
+        print('Error. Compound size (n) is greater han (size). Using (n)',
+              'instead of (size).')
+        size = n
+
+    # Bond max length, calculated using only the upper triangular matrix.
+    bond_size = int((size * size - size)/2 + size)
+
+    # List where each bag data is stored.
+    bags = []
+    for i, atom_i in enumerate(atoms):
+        for j, atom_j in enumerate(atoms):
+            # Work only in the upper triangle of the coulomb matrix.
+            if j >= i:
+                # Get the string of the current bond.
+                if i == j:
+                    current_bond = atom_i
+                else:
+                    current_bond = ''.join(sorted([atom_i, atom_j]))
+
+                # Check if that bond is already in a bag.
+                checker = check_bond(bags, current_bond)
+                # Either create a new bag or add values to an existing one.
+                if not checker[0]:
+                    bags.append([current_bond, cm[i, j]])
+                else:
+                    bags[checker[1]].append(cm[i, j])
+
+    # Create the actual bond list ordered.
+    atom_counter = Counter(atoms)
+    atom_list = sorted(list(set(atoms)))
+    bonds = []
+    for i, a_i in enumerate(atom_list):
+        if atom_counter[a_i] > 1:
+            for a_j in atom_list[i:]:
+                bonds.append(''.join(sorted([a_i, a_j])))
+    bonds = atom_list + bonds
+
+    # Create the final vector for the bos.
+    bos = np.zeros(bond_size, dtype=float)
+    c_i = 0
+    for i, bond in enumerate(bonds):
+        checker = check_bond(bags, bond)
+        if checker[0]:
+            for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
+                # Use c_i as the index for bos if the zero padding should
+                # be at the end of the vector instead of between each bond.
+                bos[i*size + j] = num
+                c_i += 1
+        else:
+            print(f'Error. Bond {bond} from bond list coudn\'t be found',
+                  'in the bags list. This could be a case where the atom',
+                  'is only present oncce in the molecule.')
+    return bos
diff --git a/requirements.txt b/requirements.txt
index 437e0428a..4b242770c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 colorama==0.4.3
 numpy==1.18.1
-pandas==0.25.3
-\ No newline at end of file
+pandas==1.0.1