From cf88764c4571cdb6dec2c69fb14288adf377c0e3 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sat, 1 Feb 2020 13:55:06 -0700 Subject: Add bag of k_cx and fix bug on adj_matrix.py --- ml_exp/__main__.py | 11 +++++++- ml_exp/adj_matrix.py | 21 ++++++++++---- ml_exp/bostuff.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 6 deletions(-) diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py index 9b4a18cc3..a1f2f0030 100644 --- a/ml_exp/__main__.py +++ b/ml_exp/__main__.py @@ -23,9 +23,10 @@ SOFTWARE. # from ml_exp.do_ml import do_ml # from ml_exp.misc import plot_benchmarks from ml_exp.read_qm7_data import read_qm7_data -# from ml_exp.adj_matrix import fneig_matrix, adj_matrix +from ml_exp.adj_matrix import fneig_matrix, adj_matrix from ml_exp.c_matrix import c_matrix from ml_exp.bob import bob +from ml_exp.bostuff import bok_cx if __name__ == '__main__': """ @@ -58,5 +59,13 @@ if __name__ == '__main__': for i in range(1): cm = c_matrix(xyz[i], nc[i], as_eig=False) bob = bob(cm, atoms[i]) + print(f'{i} coulomb matrix\n{cm}') print(f'{i} bag of bonds\n{bob}') + + fnm, bonds = fneig_matrix(atoms[i], xyz[i], nc[i], False) + am = adj_matrix(fnm, bonds, None) + bok_cx = bok_cx(am, atoms[i]) + + print(f'{i} adjacency matrix\n{am}') + print(f'{i} bag of k_cx\n{bok_cx}') diff --git a/ml_exp/adj_matrix.py b/ml_exp/adj_matrix.py index 67eab79cd..de484b5be 100644 --- a/ml_exp/adj_matrix.py +++ b/ml_exp/adj_matrix.py @@ -33,6 +33,8 @@ def fneig_matrix(atoms, Creates the first neighbor matrix of the given molecule data. atoms: list of atoms. xyz: matrix of atomic coords. + nc: nuclear charge info. + use_forces: if the use of forces instead of k_cx should be used. NOTE: Bond distance of carbon to other elements are (for atoms present in the qm7 dataset): C: 1.20 - 1.53 A @@ -71,7 +73,7 @@ def fneig_matrix(atoms, if use_forces: forces.append(rv*nc[i]*nc[j]/r**3) elif (ch_bond == bond) and (r >= 1.06 and r <= 1.12): - fnm[i, j] = 0.5 + fnm[i, j] = 1.0 if j > i: bonds.append((i, j)) if use_forces: @@ -99,15 +101,24 @@ def fneig_matrix(atoms, return fnm, bonds -def adj_matrix(bonds, - forces=None): +def adj_matrix(fneig_matrix, + bonds, + forces=None, + max_len=25): """ Calculates the adjacency matrix given the bond list. bonds: list of bonds (tuple of indexes). + max_len: maximum amount of atoms in molecule. forces: list of forces. """ n = len(bonds) - am = array(zeros((n, n))) + + if max_len < n: + print(''.join(['Error. Molecule matrix dimension (mol_n) is ', + 'greater than max_len. Using mol_n.'])) + max_len = n + + am = array(zeros((max_len, max_len))) for i, bond_i in enumerate(bonds): for j, bond_j in enumerate(bonds): # Ignore the diagonal. @@ -116,5 +127,5 @@ def adj_matrix(bonds, if forces: am[i, j] = dot(forces[i], forces[j]) else: - am[i, j] = 1 + am[i, j] = fneig_matrix[bond_i[0], bond_i[1]] return am diff --git a/ml_exp/bostuff.py b/ml_exp/bostuff.py index 48cd14913..3e3f65e66 100644 --- a/ml_exp/bostuff.py +++ b/ml_exp/bostuff.py @@ -20,3 +20,80 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +from numpy import array, zeros +from collections import Counter +from ml_exp.bob import check_bond + + +def bok_cx(adj_matrix, + atoms, + max_n=25, + max_bond_len=325): + """ + Creates the bag of k_cx using the adjacency matrix data. + adj_matrix: adjacency matrix. + max_n: maximum amount of atoms. + max_bond_len: maximum amount of bonds in molecule. + """ + n = len(atoms) + bond_n = (n * n - n) / 2 + n + n_r = range(n) + + if max_n < n: + print(''.join(['Error. Molecule matrix dimension (mol_n) is ', + 'greater than max_len. Using mol_n.'])) + max_n = n + + if max_bond_len < bond_n: + print(''.join(['Error. Molecule bond lenght (bond_n) is ', + 'greater than max_bond_len. Using bond_n.'])) + max_bond_len = bond_n + + # List where each bag data is stored. + bags = [] + for i in n_r: + for j in n_r: + # Work only in the upper triangle of the coulomb matrix. + if j >= i: + # Get the string of the current bond. + if i == j: + current_bond = atoms[i] + else: + current_bond = ''.join(sorted([atoms[i], atoms[j]])) + + # Check if that bond is already in a bag. + checker = check_bond(bags, current_bond) + # Either create a new bag or add values to an existing one. + if not checker[0]: + bags.append([current_bond, adj_matrix[i, j]]) + else: + bags[checker[1]].append(adj_matrix[i, j]) + + # Create the actual bond list ordered. + atom_counter = Counter(atoms) + atom_list = sorted(list(set(atoms))) + bonds = [] + for i, a_i in enumerate(atom_list): + if atom_counter[a_i] > 1: + for a_j in atom_list[i:]: + bonds.append(''.join(sorted([a_i, a_j]))) + bonds = atom_list + bonds + + # Create the final vector for the bok_cx. + bok_cx = array(zeros(max_bond_len), dtype=float) + c_i = 0 + for i, bond in enumerate(bonds): + checker = check_bond(bags, bond) + if checker[0]: + for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]): + # Use c_i as the index for bok_cx if the zero padding should + # be at the end of the vector instead of between each bond. + bok_cx[i*max_n + j] = num + c_i += 1 + # This is set to false because this was a debugging measure. + else: + print(''.join([f'Error. Bond {bond} from bond list coudn\'t', + ' be found in the bags list. This could be', + ' a case where the atom is only present once', + ' in the molecule.'])) + return bok_cx -- cgit v1.2.3-54-g00ecf