summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano <55825613+luevano@users.noreply.github.com>2020-01-27 00:16:44 -0700
committerDavid Luevano <55825613+luevano@users.noreply.github.com>2020-01-27 00:16:44 -0700
commitc245130b09450c3b37ec150a0e58d0d42ac2d754 (patch)
treeed510e09d909441efd1bacd42c8a6e26d85b4ded
parent202d1b595344e46bf29fc12a3cddf5183a1941c3 (diff)
Add bag of bonds
-rw-r--r--ml_exp/__main__.py12
-rw-r--r--ml_exp/bob.py105
2 files changed, 116 insertions, 1 deletions
diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py
index 6d62333e2..9b4a18cc3 100644
--- a/ml_exp/__main__.py
+++ b/ml_exp/__main__.py
@@ -23,7 +23,9 @@ SOFTWARE.
# from ml_exp.do_ml import do_ml
# from ml_exp.misc import plot_benchmarks
from ml_exp.read_qm7_data import read_qm7_data
-from ml_exp.adj_matrix import fneig_matrix, adj_matrix
+# from ml_exp.adj_matrix import fneig_matrix, adj_matrix
+from ml_exp.c_matrix import c_matrix
+from ml_exp.bob import bob
if __name__ == '__main__':
"""
@@ -39,6 +41,7 @@ if __name__ == '__main__':
show_msgs=True)
"""
# plot_benchmarks()
+ """
xyz, nc, pbe0, delta, atoms = read_qm7_data(return_atoms=True)
for i in range(1):
fnm, bonds, forces = fneig_matrix(atoms[i], xyz[i], nc[i], True)
@@ -50,3 +53,10 @@ if __name__ == '__main__':
print(f'{i} adjacency matrix\n{am}')
print('-'*30)
print('OK!')
+ """
+ xyz, nc, pbe0, delta, atoms = read_qm7_data(return_atoms=True)
+ for i in range(1):
+ cm = c_matrix(xyz[i], nc[i], as_eig=False)
+ bob = bob(cm, atoms[i])
+ print(f'{i} coulomb matrix\n{cm}')
+ print(f'{i} bag of bonds\n{bob}')
diff --git a/ml_exp/bob.py b/ml_exp/bob.py
new file mode 100644
index 000000000..0d31face0
--- /dev/null
+++ b/ml_exp/bob.py
@@ -0,0 +1,105 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+from numpy import array, zeros
+
+
+def check_bond(bags,
+ bond):
+ """
+ Checks if a bond is in a bag.
+ bags: list of bags, containing a bag per entry, which in turn
+ contains a list of bond-values.
+ bond: bond to check.
+ """
+ if bags == []:
+ return False, None
+
+ for i, bag in enumerate(bags):
+ if bag[0] == bond:
+ return True, i
+
+ return False, None
+
+
+def bob(c_matrix,
+ atoms,
+ max_bond_len=325):
+ """
+ Creates the bag of bond using the coulomb matrix data.
+ c_matrix: coulomb matrix.
+ atoms: list of atoms.
+ max_len: maximum amount of bonds in molecule.
+ """
+ n = len(atoms)
+ bond_n = (n*n-n)/2+n
+ n_r = range(n)
+
+ if max_bond_len < bond_n:
+ print(''.join(['Error. Molecule bond lenght (bond_n) is ',
+ 'greater than max_bond_len. Using bond_n.']))
+ max_bond_len = None
+
+ # List where each bag data is stored.
+ bags = []
+ for i in n_r:
+ for j in n_r:
+ # Work only in the upper triangle of the coulomb matrix.
+ if j >= i:
+ # Get the string of the current bond.
+ if i == j:
+ current_bond = atoms[i]
+ else:
+ current_bond = ''.join(sorted([atoms[i], atoms[j]]))
+
+ # Check if that bond is already in a bag.
+ checker = check_bond(bags, current_bond)
+ # Either create a new bag or add values to an existing one.
+ if not checker[0]:
+ bags.append([current_bond, c_matrix[i, j]])
+ else:
+ bags[checker[1]].append(c_matrix[i, j])
+
+ # Create the actual bond list ordered.
+ atom_list = sorted(list(set(atoms)))
+ bonds = []
+ for i, a_i in enumerate(atom_list):
+ for a_j in atom_list[i:]:
+ bonds.append(''.join(sorted([a_i, a_j])))
+ bonds = atom_list + bonds
+
+ # Create the final vector for the bob.
+ bob = array(zeros(max_bond_len), dtype=float)
+ c_i = 0
+ for bond in bonds:
+ checker = check_bond(bags, bond)
+ if checker[0]:
+ for num in bags[checker[1]][1:]:
+ bob[c_i] = num
+ c_i += 1
+ # This is set to false because this was a debugging measure.
+ elif False:
+ print(''.join([f'Error. Bond {bond} from bond list coudn\'t',
+ ' be found in the bags list. This could be',
+ ' a case where the atom is only present once',
+ ' in the molecule.']))
+ return bob