summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-02-23 21:16:16 -0700
committerDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-02-23 21:16:16 -0700
commit321681e542509869568e9ed610d821c1d9d9d5e6 (patch)
treec51b5179ef73bc4a86171e4665e1d42e5bcd320b
parent336654366a0e0a15263bd0d93bcb81ef48fcb040 (diff)
Move bob to representations
-rw-r--r--ml_exp/bob.py117
-rw-r--r--ml_exp/representations.py94
2 files changed, 94 insertions, 117 deletions
diff --git a/ml_exp/bob.py b/ml_exp/bob.py
deleted file mode 100644
index 86efecdb4..000000000
--- a/ml_exp/bob.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from numpy import array, zeros
-from collections import Counter
-
-
-def check_bond(bags,
- bond):
- """
- Checks if a bond is in a bag.
- bags: list of bags, containing a bag per entry, which in turn
- contains a list of bond-values.
- bond: bond to check.
- """
- if bags == []:
- return False, None
-
- for i, bag in enumerate(bags):
- if bag[0] == bond:
- return True, i
-
- return False, None
-
-
-def bob(c_matrix,
- atoms,
- max_n=25,
- max_bond_len=325):
- """
- Creates the bag of bond using the coulomb matrix data.
- c_matrix: coulomb matrix.
- atoms: list of atoms.
- max_n: maximum amount of atoms.
- max_bond_len: maximum amount of bonds in molecule.
- """
- n = len(atoms)
- bond_n = (n * n - n) / 2 + n
- n_r = range(n)
-
- if max_n < n:
- print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
- 'greater than max_len. Using mol_n.']))
- max_n = n
-
- if max_bond_len < bond_n:
- print(''.join(['Error. Molecule bond lenght (bond_n) is ',
- 'greater than max_bond_len. Using bond_n.']))
- max_bond_len = bond_n
-
- # List where each bag data is stored.
- bags = []
- for i in n_r:
- for j in n_r:
- # Work only in the upper triangle of the coulomb matrix.
- if j >= i:
- # Get the string of the current bond.
- if i == j:
- current_bond = atoms[i]
- else:
- current_bond = ''.join(sorted([atoms[i], atoms[j]]))
-
- # Check if that bond is already in a bag.
- checker = check_bond(bags, current_bond)
- # Either create a new bag or add values to an existing one.
- if not checker[0]:
- bags.append([current_bond, c_matrix[i, j]])
- else:
- bags[checker[1]].append(c_matrix[i, j])
-
- # Create the actual bond list ordered.
- atom_counter = Counter(atoms)
- atom_list = sorted(list(set(atoms)))
- bonds = []
- for i, a_i in enumerate(atom_list):
- if atom_counter[a_i] > 1:
- for a_j in atom_list[i:]:
- bonds.append(''.join(sorted([a_i, a_j])))
- bonds = atom_list + bonds
-
- # Create the final vector for the bob.
- bob = array(zeros(max_bond_len), dtype=float)
- c_i = 0
- for i, bond in enumerate(bonds):
- checker = check_bond(bags, bond)
- if checker[0]:
- for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
- # Use c_i as the index for bob if the zero padding should
- # be at the end of the vector instead of between each bond.
- bob[i*max_n + j] = num
- c_i += 1
- # This is set to false because this was a debugging measure.
- else:
- print(''.join([f'Error. Bond {bond} from bond list coudn\'t',
- ' be found in the bags list. This could be',
- ' a case where the atom is only present once',
- ' in the molecule.']))
- return bob
diff --git a/ml_exp/representations.py b/ml_exp/representations.py
index 11c30dfd2..3119a4a88 100644
--- a/ml_exp/representations.py
+++ b/ml_exp/representations.py
@@ -21,6 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
import numpy as np
+from collections import Counter
def coulomb_matrix(coords,
@@ -287,3 +288,96 @@ def adjacency_matrix(fnm,
am[i, j] = fnm[bond_i[0], bond_i[1]]
return am
+
+
+def check_bond(bags,
+ bond):
+ """
+ Checks if a bond is in a bag.
+ bags: list of bags, containing a bag per entry, which in turn
+ contains a list of bond-values.
+ bond: bond to check.
+ """
+ if bags == []:
+ return False, None
+
+ for i, bag in enumerate(bags):
+ if bag[0] == bond:
+ return True, i
+
+ return False, None
+
+
+def bob(c_matrix,
+ atoms,
+ max_n=25,
+ max_bond_len=325):
+ """
+ Creates the bag of bond using the coulomb matrix data.
+ c_matrix: coulomb matrix.
+ atoms: list of atoms.
+ max_n: maximum amount of atoms.
+ max_bond_len: maximum amount of bonds in molecule.
+ """
+ n = len(atoms)
+ bond_n = (n * n - n) / 2 + n
+ n_r = range(n)
+
+ if max_n < n:
+ print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
+ 'greater than max_len. Using mol_n.']))
+ max_n = n
+
+ if max_bond_len < bond_n:
+ print(''.join(['Error. Molecule bond lenght (bond_n) is ',
+ 'greater than max_bond_len. Using bond_n.']))
+ max_bond_len = bond_n
+
+ # List where each bag data is stored.
+ bags = []
+ for i in n_r:
+ for j in n_r:
+ # Work only in the upper triangle of the coulomb matrix.
+ if j >= i:
+ # Get the string of the current bond.
+ if i == j:
+ current_bond = atoms[i]
+ else:
+ current_bond = ''.join(sorted([atoms[i], atoms[j]]))
+
+ # Check if that bond is already in a bag.
+ checker = check_bond(bags, current_bond)
+ # Either create a new bag or add values to an existing one.
+ if not checker[0]:
+ bags.append([current_bond, c_matrix[i, j]])
+ else:
+ bags[checker[1]].append(c_matrix[i, j])
+
+ # Create the actual bond list ordered.
+ atom_counter = Counter(atoms)
+ atom_list = sorted(list(set(atoms)))
+ bonds = []
+ for i, a_i in enumerate(atom_list):
+ if atom_counter[a_i] > 1:
+ for a_j in atom_list[i:]:
+ bonds.append(''.join(sorted([a_i, a_j])))
+ bonds = atom_list + bonds
+
+ # Create the final vector for the bob.
+ bob = array(zeros(max_bond_len), dtype=float)
+ c_i = 0
+ for i, bond in enumerate(bonds):
+ checker = check_bond(bags, bond)
+ if checker[0]:
+ for j, num in enumerate(sorted(bags[checker[1]][1:])[::-1]):
+ # Use c_i as the index for bob if the zero padding should
+ # be at the end of the vector instead of between each bond.
+ bob[i*max_n + j] = num
+ c_i += 1
+ # This is set to false because this was a debugging measure.
+ else:
+ print(''.join([f'Error. Bond {bond} from bond list coudn\'t',
+ ' be found in the bags list. This could be',
+ ' a case where the atom is only present once',
+ ' in the molecule.']))
+ return bob