From b300e365d0886695b0d0220aad1be6cb38cf3c36 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com> Date: Sun, 23 Feb 2020 22:06:30 -0700 Subject: Add bos to compound, and bugfix to bos --- ml_exp/__init__.py | 6 ++++-- ml_exp/compound.py | 15 ++++++++++++++- ml_exp/representations.py | 20 ++++++++++++-------- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py index d81f0999a..dcb10a1df 100644 --- a/ml_exp/__init__.py +++ b/ml_exp/__init__.py @@ -22,7 +22,7 @@ SOFTWARE. """ from ml_exp.compound import Compound from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\ - first_neighbor_matrix, adjacency_matrix + first_neighbor_matrix, adjacency_matrix, check_bond, bag_of_stuff # If somebody does "from package import *", this is what they will # be able to access: @@ -30,4 +30,6 @@ __all__ = ['Compound', 'coulomb_matrix', 'lennard_jones_matrix', 'first_neighbor_matrix', - 'adjacency_matrix'] + 'adjacency_matrix', + 'check_bond', + 'bag_of_stuff'] diff --git a/ml_exp/compound.py b/ml_exp/compound.py index 595078d55..8b6af0ae9 100644 --- a/ml_exp/compound.py +++ b/ml_exp/compound.py @@ -23,7 +23,7 @@ SOFTWARE. import numpy as np from ml_exp.data import NUCLEAR_CHARGE from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\ - first_neighbor_matrix, adjacency_matrix + first_neighbor_matrix, adjacency_matrix, bag_of_stuff class Compound: @@ -115,6 +115,19 @@ class Compound: forces, size=size) + def gen_bos(self, + size=23, + stuff='bonds'): + """ + Generate the Bag of Stuff for the compound. + size: compound size. + stuff: elements of the bag, by default the known bag of bonds. + """ + self.bos = bag_of_stuff(self.cm, + self.atoms, + size=size, + stuff=stuff) + def read_xyz(self, filename): """ diff --git a/ml_exp/representations.py b/ml_exp/representations.py index 1fe55aa5f..ea079595e 100644 --- a/ml_exp/representations.py +++ b/ml_exp/representations.py @@ -310,8 +310,8 @@ def check_bond(bags, def bag_of_stuff(cm, atoms, - stuff='bonds', - size=23): + size=23, + stuff='bonds'): """ Creates the Bag of Bonds using the Coulomb Matrix. cm: coulomb matrix. @@ -322,6 +322,10 @@ def bag_of_stuff(cm, raise ValueError('Coulomb Matrix hasn\'t been initialized for the \ current compound.') + if cm.ndim == 1: + raise ValueError('Coulomb Matrix (CM) dimension is 1. Maybe it was \ +generated as the vector of eigenvalues, try (re-)generating the CM.') + n = len(atoms) if size < n: @@ -330,7 +334,7 @@ current compound.') size = n # Bond max length, calculated using only the upper triangular matrix. - bond_size = (size * size - size)/2 + size + bond_size = int((size * size - size)/2 + size) # List where each bag data is stored. bags = [] @@ -362,19 +366,19 @@ current compound.') bonds.append(''.join(sorted([a_i, a_j]))) bonds = atom_list + bonds - # Create the final vector for the bob. - bob = np.zeros(bond_size, dtype=float) + # Create the final vector for the bos. + bos = np.zeros(bond_size, dtype=float) c_i = 0 for i, bond in enumerate(bonds): checker = check_bond(bags, bond) if checker[0]: for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]): - # Use c_i as the index for bob if the zero padding should + # Use c_i as the index for bos if the zero padding should # be at the end of the vector instead of between each bond. - bob[i*size + j] = num + bos[i*size + j] = num c_i += 1 else: print(f'Error. Bond {bond} from bond list coudn\'t be found', 'in the bags list. This could be a case where the atom', 'is only present oncce in the molecule.') - return bob + return bos -- cgit v1.2.3-70-g09d2