From c245130b09450c3b37ec150a0e58d0d42ac2d754 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Mon, 27 Jan 2020 00:16:44 -0700 Subject: Add bag of bonds --- ml_exp/__main__.py | 12 +++++- ml_exp/bob.py | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 ml_exp/bob.py diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py index 6d62333e2..9b4a18cc3 100644 --- a/ml_exp/__main__.py +++ b/ml_exp/__main__.py @@ -23,7 +23,9 @@ SOFTWARE. # from ml_exp.do_ml import do_ml # from ml_exp.misc import plot_benchmarks from ml_exp.read_qm7_data import read_qm7_data -from ml_exp.adj_matrix import fneig_matrix, adj_matrix +# from ml_exp.adj_matrix import fneig_matrix, adj_matrix +from ml_exp.c_matrix import c_matrix +from ml_exp.bob import bob if __name__ == '__main__': """ @@ -39,6 +41,7 @@ if __name__ == '__main__': show_msgs=True) """ # plot_benchmarks() + """ xyz, nc, pbe0, delta, atoms = read_qm7_data(return_atoms=True) for i in range(1): fnm, bonds, forces = fneig_matrix(atoms[i], xyz[i], nc[i], True) @@ -50,3 +53,10 @@ if __name__ == '__main__': print(f'{i} adjacency matrix\n{am}') print('-'*30) print('OK!') + """ + xyz, nc, pbe0, delta, atoms = read_qm7_data(return_atoms=True) + for i in range(1): + cm = c_matrix(xyz[i], nc[i], as_eig=False) + bob = bob(cm, atoms[i]) + print(f'{i} coulomb matrix\n{cm}') + print(f'{i} bag of bonds\n{bob}') diff --git a/ml_exp/bob.py b/ml_exp/bob.py new file mode 100644 index 000000000..0d31face0 --- /dev/null +++ b/ml_exp/bob.py @@ -0,0 +1,105 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +from numpy import array, zeros + + +def check_bond(bags, + bond): + """ + Checks if a bond is in a bag. + bags: list of bags, containing a bag per entry, which in turn + contains a list of bond-values. + bond: bond to check. + """ + if bags == []: + return False, None + + for i, bag in enumerate(bags): + if bag[0] == bond: + return True, i + + return False, None + + +def bob(c_matrix, + atoms, + max_bond_len=325): + """ + Creates the bag of bond using the coulomb matrix data. + c_matrix: coulomb matrix. + atoms: list of atoms. + max_len: maximum amount of bonds in molecule. + """ + n = len(atoms) + bond_n = (n*n-n)/2+n + n_r = range(n) + + if max_bond_len < bond_n: + print(''.join(['Error. Molecule bond lenght (bond_n) is ', + 'greater than max_bond_len. Using bond_n.'])) + max_bond_len = None + + # List where each bag data is stored. + bags = [] + for i in n_r: + for j in n_r: + # Work only in the upper triangle of the coulomb matrix. + if j >= i: + # Get the string of the current bond. + if i == j: + current_bond = atoms[i] + else: + current_bond = ''.join(sorted([atoms[i], atoms[j]])) + + # Check if that bond is already in a bag. + checker = check_bond(bags, current_bond) + # Either create a new bag or add values to an existing one. + if not checker[0]: + bags.append([current_bond, c_matrix[i, j]]) + else: + bags[checker[1]].append(c_matrix[i, j]) + + # Create the actual bond list ordered. + atom_list = sorted(list(set(atoms))) + bonds = [] + for i, a_i in enumerate(atom_list): + for a_j in atom_list[i:]: + bonds.append(''.join(sorted([a_i, a_j]))) + bonds = atom_list + bonds + + # Create the final vector for the bob. + bob = array(zeros(max_bond_len), dtype=float) + c_i = 0 + for bond in bonds: + checker = check_bond(bags, bond) + if checker[0]: + for num in bags[checker[1]][1:]: + bob[c_i] = num + c_i += 1 + # This is set to false because this was a debugging measure. + elif False: + print(''.join([f'Error. Bond {bond} from bond list coudn\'t', + ' be found in the bags list. This could be', + ' a case where the atom is only present once', + ' in the molecule.'])) + return bob -- cgit v1.2.3-54-g00ecf