summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-02-23 22:06:30 -0700
committerDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-02-23 22:06:30 -0700
commitb300e365d0886695b0d0220aad1be6cb38cf3c36 (patch)
tree38440ade31f5412b238ef5a6809d4f5ff01ff72e
parentb82b90ec609ee80629f1e7f4b8a03cbbb53a0f21 (diff)
Add bos to compound, and bugfix to bos
-rw-r--r--ml_exp/__init__.py6
-rw-r--r--ml_exp/compound.py15
-rw-r--r--ml_exp/representations.py20
3 files changed, 30 insertions, 11 deletions
diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index d81f0999a..dcb10a1df 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -22,7 +22,7 @@ SOFTWARE.
"""
from ml_exp.compound import Compound
from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
- first_neighbor_matrix, adjacency_matrix
+ first_neighbor_matrix, adjacency_matrix, check_bond, bag_of_stuff
# If somebody does "from package import *", this is what they will
# be able to access:
@@ -30,4 +30,6 @@ __all__ = ['Compound',
'coulomb_matrix',
'lennard_jones_matrix',
'first_neighbor_matrix',
- 'adjacency_matrix']
+ 'adjacency_matrix',
+ 'check_bond',
+ 'bag_of_stuff']
diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index 595078d55..8b6af0ae9 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -23,7 +23,7 @@ SOFTWARE.
import numpy as np
from ml_exp.data import NUCLEAR_CHARGE
from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
- first_neighbor_matrix, adjacency_matrix
+ first_neighbor_matrix, adjacency_matrix, bag_of_stuff
class Compound:
@@ -115,6 +115,19 @@ class Compound:
forces,
size=size)
+ def gen_bos(self,
+ size=23,
+ stuff='bonds'):
+ """
+ Generate the Bag of Stuff for the compound.
+ size: compound size.
+ stuff: elements of the bag, by default the known bag of bonds.
+ """
+ self.bos = bag_of_stuff(self.cm,
+ self.atoms,
+ size=size,
+ stuff=stuff)
+
def read_xyz(self,
filename):
"""
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index 1fe55aa5f..ea079595e 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -310,8 +310,8 @@ def check_bond(bags,
def bag_of_stuff(cm,
atoms,
- stuff='bonds',
- size=23):
+ size=23,
+ stuff='bonds'):
"""
Creates the Bag of Bonds using the Coulomb Matrix.
cm: coulomb matrix.
@@ -322,6 +322,10 @@ def bag_of_stuff(cm,
raise ValueError('Coulomb Matrix hasn\'t been initialized for the \
current compound.')
+ if cm.ndim == 1:
+ raise ValueError('Coulomb Matrix (CM) dimension is 1. Maybe it was \
+generated as the vector of eigenvalues, try (re-)generating the CM.')
+
n = len(atoms)
if size < n:
@@ -330,7 +334,7 @@ current compound.')
size = n
# Bond max length, calculated using only the upper triangular matrix.
- bond_size = (size * size - size)/2 + size
+ bond_size = int((size * size - size)/2 + size)
# List where each bag data is stored.
bags = []
@@ -362,19 +366,19 @@ current compound.')
bonds.append(''.join(sorted([a_i, a_j])))
bonds = atom_list + bonds
- # Create the final vector for the bob.
- bob = np.zeros(bond_size, dtype=float)
+ # Create the final vector for the bos.
+ bos = np.zeros(bond_size, dtype=float)
c_i = 0
for i, bond in enumerate(bonds):
checker = check_bond(bags, bond)
if checker[0]:
for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
- # Use c_i as the index for bob if the zero padding should
+ # Use c_i as the index for bos if the zero padding should
# be at the end of the vector instead of between each bond.
- bob[i*size + j] = num
+ bos[i*size + j] = num
c_i += 1
else:
print(f'Error. Bond {bond} from bond list coudn\'t be found',
'in the bags list. This could be a case where the atom',
'is only present oncce in the molecule.')
- return bob
+ return bos