From 5bf923679707fd1603a6f73ca4d0ae8ec39e7858 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com> Date: Sat, 29 Feb 2020 07:40:33 -0700 Subject: Almost completely rewrite fnm to hd --- ml_exp/__init__.py | 4 ++-- ml_exp/compound.py | 38 +++++++++++++++++++++++--------------- ml_exp/representations.py | 36 +++++++++++++++++++----------------- 3 files changed, 44 insertions(+), 34 deletions(-) diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py index 48ece56c8..9e64d7754 100644 --- a/ml_exp/__init__.py +++ b/ml_exp/__init__.py @@ -22,7 +22,7 @@ SOFTWARE. """ from ml_exp.compound import Compound from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\ - first_neighbor_matrix, adjacency_matrix, check_bond, bag_of_stuff + get_helping_data, adjacency_matrix, check_bond, bag_of_stuff from ml_exp.math import cholesky_solve from ml_exp.qm7db import qm7db from ml_exp.do_ml import simple_ml, do_ml @@ -30,7 +30,7 @@ from ml_exp.do_ml import simple_ml, do_ml __all__ = ['Compound', 'coulomb_matrix', 'lennard_jones_matrix', - 'first_neighbor_matrix', + 'get_helping_data', 'adjacency_matrix', 'check_bond', 'bag_of_stuff', diff --git a/ml_exp/compound.py b/ml_exp/compound.py index ceace6984..26e393510 100644 --- a/ml_exp/compound.py +++ b/ml_exp/compound.py @@ -23,7 +23,7 @@ SOFTWARE. import numpy as np from ml_exp.data import NUCLEAR_CHARGE from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\ - first_neighbor_matrix, adjacency_matrix, bag_of_stuff + get_helping_data, adjacency_matrix, bag_of_stuff class Compound: @@ -46,7 +46,6 @@ class Compound: # Computed data. self.cm = None self.ljm = None - self.fnm = None self.am = None self.bob = None self.bo_atoms = None @@ -54,8 +53,11 @@ class Compound: self.bof = None # Helping data. + self.fnm = None self.bonds = None - self.forces = None + self.bonds_i = None + self.bonds_k = None + self.bonds_f = None if xyz is not None: self.read_xyz(xyz) @@ -101,28 +103,34 @@ class Compound: as_eig=as_eig, bohr_ru=bohr_ru) - def gen_am(self, - use_forces=False, + def gen_hd(self, size=23, bohr_ru=False): """ + Generate the helping data for use in Adjacency Matrix, for example. + size: compund size. + bohr_ru: if radius units should be in bohr's radius units. + """ + hp = get_helping_data(self.coordinates, + self.nc, + self.atoms, + size=size, + bohr_ru=bohr_ru) + + self.fnm, self.bonds, self.bonds_i, self.bonds_k, self.bonds_f = hp + + def gen_am(self, + use_forces=False, + size=23): + """ Generate the Adjacency Matrix for the compund. use_forces: if the use of forces instead of k_cx should be used. size: compound size. bohr_ru: if radius units should be in bohr's radius units. """ - # First, generate the first neighor matrix. - fnm_data = first_neighbor_matrix(self.coordinates, - self.atoms_nc, - self.atoms, - size=size, - use_forces=use_forces, - bohr_ru=bohr_ru) - self.fnm, self.bonds, self.forces = fnm_data - # Now, generate the adjacency matrix. self.am = adjacency_matrix(self.fnm, self.bonds, - self.forces, + self.bonds_f, size=size) def gen_bob(self, diff --git a/ml_exp/representations.py b/ml_exp/representations.py index 515821290..5e684314b 100644 --- a/ml_exp/representations.py +++ b/ml_exp/representations.py @@ -166,18 +166,17 @@ size. Arrays are not of the right shape.') return lj -def first_neighbor_matrix(coords, - nc, - atoms, - size=23, - use_forces=False, - bohr_ru=False): +def get_helping_data(coords, + nc, + atoms, + size=23, + bohr_ru=False): """ - Creates the First Neighbor Matrix from the molecule data given. + Creates helping data such as the First Neighbor Matrix for the compound. coords: compound coordinates. nc: nuclear charge data. atoms: list of atoms. - use_forces: if the use of forces instead of k_cx should be used. + size: compund size. bohr_ru: if radius units should be in bohr's radius units. NOTE: Bond distance of carbon to other elements are (for atoms present in the qm7 dataset): @@ -209,15 +208,15 @@ size. Arrays are not of the right shape.') co_bond = sorted(['C', 'O']) cn_bond = sorted(['C', 'N']) cs_bond = sorted(['C', 'S']) - - pos_bonds = {cc_bond: (1.19, 1.54), ch_bond: (1.06, 1.12), - co_bond: (1.43, 2.15), cn_bond: (1.47, 2.19), - cs_bond: (1.81, 2.55)} + pos_bonds = {cc_bond: (1.19, 1.54, 1.0), ch_bond: (1.06, 1.12, 1.0), + co_bond: (1.43, 2.15, 0.8), cn_bond: (1.47, 2.19, 1.0), + cs_bond: (1.81, 2.55, 0.7)} fnm = np.zeros((size, size), dtype=bool) - bonds = [] - forces = [] + bonds_i = [] + bonds_k = [] + bonds_f = [] for i, xyz_i in enumerate(coords): for j, xyz_j in enumerate(coords): # Ignore the diagonal. @@ -230,11 +229,14 @@ size. Arrays are not of the right shape.') r = np.linalg.norm(rv)/cr if r >= r_min and r <= r_max: fnm[i, j] = True + # Only add to the list if in the upper triangle. if j > i: - bonds.append((i, j)) - forces.append(rv*nc[i]*nc[j]/r**3) + bonds.append(bond) + bonds_i.append((i, j)) + bonds_k.append(pos_bonds[bond][2]) + bonds_f.append(rv*nc[i]*nc[j]/r**3) - return fnm, bonds, forces + return fnm, bonds, bonds_i, bonds_k, bonds_f def adjacency_matrix(fnm, -- cgit v1.2.3-54-g00ecf