summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano <55825613+luevano@users.noreply.github.com>2020-02-01 13:55:06 -0700
committerDavid Luevano <55825613+luevano@users.noreply.github.com>2020-02-01 13:55:06 -0700
commitcf88764c4571cdb6dec2c69fb14288adf377c0e3 (patch)
tree1ef9c85dd2518c2235fcfd59b594b4c6b7be7b7a
parent07673b8a28674a5d670f9da4b5c3014bc7855cb9 (diff)
Add bag of k_cx and fix bug on adj_matrix.py
-rw-r--r--ml_exp/__main__.py11
-rw-r--r--ml_exp/adj_matrix.py21
-rw-r--r--ml_exp/bostuff.py77
3 files changed, 103 insertions, 6 deletions
diff --git a/ml_exp/__main__.py b/ml_exp/__main__.py
index 9b4a18cc3..a1f2f0030 100644
--- a/ml_exp/__main__.py
+++ b/ml_exp/__main__.py
@@ -23,9 +23,10 @@ SOFTWARE.
# from ml_exp.do_ml import do_ml
# from ml_exp.misc import plot_benchmarks
from ml_exp.read_qm7_data import read_qm7_data
-# from ml_exp.adj_matrix import fneig_matrix, adj_matrix
+from ml_exp.adj_matrix import fneig_matrix, adj_matrix
from ml_exp.c_matrix import c_matrix
from ml_exp.bob import bob
+from ml_exp.bostuff import bok_cx
if __name__ == '__main__':
"""
@@ -58,5 +59,13 @@ if __name__ == '__main__':
for i in range(1):
cm = c_matrix(xyz[i], nc[i], as_eig=False)
bob = bob(cm, atoms[i])
+
print(f'{i} coulomb matrix\n{cm}')
print(f'{i} bag of bonds\n{bob}')
+
+ fnm, bonds = fneig_matrix(atoms[i], xyz[i], nc[i], False)
+ am = adj_matrix(fnm, bonds, None)
+ bok_cx = bok_cx(am, atoms[i])
+
+ print(f'{i} adjacency matrix\n{am}')
+ print(f'{i} bag of k_cx\n{bok_cx}')
diff --git a/ml_exp/adj_matrix.py b/ml_exp/adj_matrix.py
index 67eab79cd..de484b5be 100644
--- a/ml_exp/adj_matrix.py
+++ b/ml_exp/adj_matrix.py
@@ -33,6 +33,8 @@ def fneig_matrix(atoms,
Creates the first neighbor matrix of the given molecule data.
atoms: list of atoms.
xyz: matrix of atomic coords.
+ nc: nuclear charge info.
+ use_forces: if the use of forces instead of k_cx should be used.
NOTE: Bond distance of carbon to other elements
are (for atoms present in the qm7 dataset):
C: 1.20 - 1.53 A
@@ -71,7 +73,7 @@ def fneig_matrix(atoms,
if use_forces:
forces.append(rv*nc[i]*nc[j]/r**3)
elif (ch_bond == bond) and (r >= 1.06 and r <= 1.12):
- fnm[i, j] = 0.5
+ fnm[i, j] = 1.0
if j > i:
bonds.append((i, j))
if use_forces:
@@ -99,15 +101,24 @@ def fneig_matrix(atoms,
return fnm, bonds
-def adj_matrix(bonds,
- forces=None):
+def adj_matrix(fneig_matrix,
+ bonds,
+ forces=None,
+ max_len=25):
"""
Calculates the adjacency matrix given the bond list.
bonds: list of bonds (tuple of indexes).
+ max_len: maximum amount of atoms in molecule.
forces: list of forces.
"""
n = len(bonds)
- am = array(zeros((n, n)))
+
+ if max_len < n:
+ print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
+ 'greater than max_len. Using mol_n.']))
+ max_len = n
+
+ am = array(zeros((max_len, max_len)))
for i, bond_i in enumerate(bonds):
for j, bond_j in enumerate(bonds):
# Ignore the diagonal.
@@ -116,5 +127,5 @@ def adj_matrix(bonds,
if forces:
am[i, j] = dot(forces[i], forces[j])
else:
- am[i, j] = 1
+ am[i, j] = fneig_matrix[bond_i[0], bond_i[1]]
return am
diff --git a/ml_exp/bostuff.py b/ml_exp/bostuff.py
index 48cd14913..3e3f65e66 100644
--- a/ml_exp/bostuff.py
+++ b/ml_exp/bostuff.py
@@ -20,3 +20,80 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
+from numpy import array, zeros
+from collections import Counter
+from ml_exp.bob import check_bond
+
+
+def bok_cx(adj_matrix,
+ atoms,
+ max_n=25,
+ max_bond_len=325):
+ """
+ Creates the bag of k_cx using the adjacency matrix data.
+ adj_matrix: adjacency matrix.
+ max_n: maximum amount of atoms.
+ max_bond_len: maximum amount of bonds in molecule.
+ """
+ n = len(atoms)
+ bond_n = (n * n - n) / 2 + n
+ n_r = range(n)
+
+ if max_n < n:
+ print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
+ 'greater than max_len. Using mol_n.']))
+ max_n = n
+
+ if max_bond_len < bond_n:
+ print(''.join(['Error. Molecule bond lenght (bond_n) is ',
+ 'greater than max_bond_len. Using bond_n.']))
+ max_bond_len = bond_n
+
+ # List where each bag data is stored.
+ bags = []
+ for i in n_r:
+ for j in n_r:
+ # Work only in the upper triangle of the coulomb matrix.
+ if j >= i:
+ # Get the string of the current bond.
+ if i == j:
+ current_bond = atoms[i]
+ else:
+ current_bond = ''.join(sorted([atoms[i], atoms[j]]))
+
+ # Check if that bond is already in a bag.
+ checker = check_bond(bags, current_bond)
+ # Either create a new bag or add values to an existing one.
+ if not checker[0]:
+ bags.append([current_bond, adj_matrix[i, j]])
+ else:
+ bags[checker[1]].append(adj_matrix[i, j])
+
+ # Create the actual bond list ordered.
+ atom_counter = Counter(atoms)
+ atom_list = sorted(list(set(atoms)))
+ bonds = []
+ for i, a_i in enumerate(atom_list):
+ if atom_counter[a_i] > 1:
+ for a_j in atom_list[i:]:
+ bonds.append(''.join(sorted([a_i, a_j])))
+ bonds = atom_list + bonds
+
+ # Create the final vector for the bok_cx.
+ bok_cx = array(zeros(max_bond_len), dtype=float)
+ c_i = 0
+ for i, bond in enumerate(bonds):
+ checker = check_bond(bags, bond)
+ if checker[0]:
+ for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
+ # Use c_i as the index for bok_cx if the zero padding should
+ # be at the end of the vector instead of between each bond.
+ bok_cx[i*max_n + j] = num
+ c_i += 1
+ # This is set to false because this was a debugging measure.
+ else:
+ print(''.join([f'Error. Bond {bond} from bond list coudn\'t',
+ ' be found in the bags list. This could be',
+ ' a case where the atom is only present once',
+ ' in the molecule.']))
+ return bok_cx