From 2572f9581b24547692a5654a1d452690a2cbcad6 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com> Date: Sat, 22 Feb 2020 16:36:40 -0700 Subject: Add adj matrix to compound) --- ml_exp/compound.py | 35 ++++++++++++++++--- ml_exp/representations.py | 89 ++++++++++++++++++++++++++++------------------- 2 files changed, 83 insertions(+), 41 deletions(-) diff --git a/ml_exp/compound.py b/ml_exp/compound.py index c6d5f8a64..d4dc9b7c0 100644 --- a/ml_exp/compound.py +++ b/ml_exp/compound.py @@ -22,7 +22,8 @@ SOFTWARE. """ import numpy as np from ml_exp.data import NUCLEAR_CHARGE -from ml_exp.representations import coulomb_matrix, lennard_jones_matrix +from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\ + first_neighbor_matrix, adjacency_matrix class Compound: @@ -42,6 +43,7 @@ class Compound: self.cm = None self.ljm = None + self.am = None self.bob = None if xyz is not None: @@ -55,13 +57,13 @@ class Compound: Generate the Coulomb Matrix for the compund. size: compound size. as_eig: if the representation should be as the eigenvalues. - bhor_ru: if radius units should be in bohr's radius units. + bohr_ru: if radius units should be in bohr's radius units. """ self.cm = coulomb_matrix(self.coordinates, self.atoms_nc, size=size, as_eig=as_eig, - bhor_ru=bohr_ru) + bohr_ru=bohr_ru) def gen_ljm(self, diag_value=None, @@ -77,7 +79,7 @@ class Compound: epsilon: epsilon value. size: compound size. as_eig: if the representation should be as the eigenvalues. - bhor_ru: if radius units should be in bohr's radius units. + bohr_ru: if radius units should be in bohr's radius units. """ self.ljm = lennard_jones_matrix(self.coordinates, self.atoms_nc, @@ -86,7 +88,30 @@ class Compound: epsilon=epsilon, size=size, as_eig=as_eig, - bhor_ru=bohr_ru) + bohr_ru=bohr_ru) + + def gen_am(self, + use_forces=False, + size=23, + bohr_ru=False): + """ + Generate the Adjacency Matrix for the compund. + use_forces: if the use of forces instead of k_cx should be used. + size: compound size. + bohr_ru: if radius units should be in bohr's radius units. + """ + # First, generate the first neighor matrix. + fnm, bonds, forces = first_neighbor_matrix(self.coordinates, + self.atoms_nc, + self.atoms, + use_forces=use_forces, + bohr_ru=bohr_ru) + + # Now, generate the adjacency matrix. + self.am = adjacency_matrix(fnm, + bonds, + forces, + size=size) def read_xyz(self, filename): diff --git a/ml_exp/representations.py b/ml_exp/representations.py index f503b5987..483da2898 100644 --- a/ml_exp/representations.py +++ b/ml_exp/representations.py @@ -28,16 +28,16 @@ def coulomb_matrix(coords, nc, size=23, as_eig=True, - bhor_ru=False): + bohr_ru=False): """ Creates the Coulomb Matrix from the molecule data given. coords: compound coordinates. nc: nuclear charge data. size: compound size. as_eig: if the representation should be as the eigenvalues. - bhor_ru: if radius units should be in bohr's radius units. + bohr_ru: if radius units should be in bohr's radius units. """ - if bhor_ru: + if bohr_ru: cr = 0.52917721067 else: cr = 1 @@ -111,7 +111,7 @@ def lennard_jones_matrix(coords, epsilon=1.0, size=23, as_eig=True, - bhor_ru=False): + bohr_ru=False): """ Creates the Lennard-Jones Matrix from the molecule data given. coords: compound coordinates. @@ -121,9 +121,9 @@ def lennard_jones_matrix(coords, epsilon: epsilon value. size: compound size. as_eig: if the representation should be as the eigenvalues. - bhor_ru: if radius units should be in bohr's radius units. + bohr_ru: if radius units should be in bohr's radius units. """ - if bhor_ru: + if bohr_ru: cr = 0.52917721067 else: cr = 1 @@ -206,16 +206,18 @@ def lennard_jones_matrix(coords, return lj -def fneig_matrix(atoms, - xyz, - nc=None, - use_forces=False): +def first_neighbor_matrix(coords, + nc, + atoms, + use_forces=False, + bohr_ru=False): """ - Creates the first neighbor matrix of the given molecule data. + Creates the First Neighbor Matrix from the molecule data given. + coords: compound coordinates. + nc: nuclear charge data. atoms: list of atoms. - xyz: matrix of atomic coords. - nc: nuclear charge info. use_forces: if the use of forces instead of k_cx should be used. + bohr_ru: if radius units should be in bohr's radius units. NOTE: Bond distance of carbon to other elements are (for atoms present in the qm7 dataset): C: 1.20 - 1.53 A @@ -224,9 +226,17 @@ def fneig_matrix(atoms, N: 1.47 - 2.10 A S: 1.81 - 2.55 A """ - if use_forces and nc is None: - # print(f'Error. use_forces={use_forces} but nc={nc}', 'RED') - return None + if bohr_ru: + cr = 0.52917721067 + else: + cr = 1 + + n = coords.shape[0] + + if not n == nc.shape[0]: + raise ValueError('Compound size is different than the nuclear charge\ + size. Arrays are not of the right shape.') + # Possible bonds. cc_bond = sorted(['C', 'C']) ch_bond = sorted(['C', 'H']) @@ -234,19 +244,21 @@ def fneig_matrix(atoms, cn_bond = sorted(['C', 'N']) cs_bond = sorted(['C', 'S']) - # Number of atoms, empty matrix and bond list. - n = len(atoms) - fnm = np.empty((n, n), dtype=float) + if use_forces: + fnm = np.empty((n, n), dtype=float) + else: + fnm = np.empty((n, n), dtype=int) + bonds = [] forces = [] - for i, xyz_i in enumerate(xyz): - for j, xyz_j in enumerate(xyz): + for i, xyz_i in enumerate(coords): + for j, xyz_j in enumerate(coords): # Ignore the diagonal. if i != j: bond = sorted([atoms[i], atoms[j]]) rv = xyz_i - xyz_j - r = np.linalg.norm(rv) + r = np.linalg.norm(rv)/cr # Check for each type of bond. if (cc_bond == bond) and (r >= 1.20 and r <= 1.53): @@ -279,29 +291,33 @@ def fneig_matrix(atoms, bonds.append((i, j)) if use_forces: forces.append(rv*nc[i]*nc[j]/r**3) - if use_forces: - return fnm, bonds, forces - return fnm, bonds + + return fnm, bonds, forces -def adj_matrix(fneig_matrix, - bonds, - forces=None, - max_len=25): +def adjacency_matrix(fnm, + bonds, + forces, + size=23): """ Calculates the adjacency matrix given the bond list. + fnm: first neighbour matrix. bonds: list of bonds (tuple of indexes). - max_len: maximum amount of atoms in molecule. forces: list of forces. + size: compund size. """ n = len(bonds) - if max_len < n: - print(''.join(['Error. Molecule matrix dimension (mol_n) is ', - 'greater than max_len. Using mol_n.'])) - max_len = n + if size < n: + print('Error. Compound size (n) is greater han (size). Using (n)\ + instead of (size).') + size = n + + if forces: + am = np.empty((size, size), dtype=float) + else: + am = np.empty((size, size), dtype=int) - am = np.empty((max_len, max_len), dtype=float) for i, bond_i in enumerate(bonds): for j, bond_j in enumerate(bonds): # Ignore the diagonal. @@ -310,5 +326,6 @@ def adj_matrix(fneig_matrix, if forces: am[i, j] = np.dot(forces[i], forces[j]) else: - am[i, j] = fneig_matrix[bond_i[0], bond_i[1]] + am[i, j] = fnm[bond_i[0], bond_i[1]] + return am -- cgit v1.2.3-54-g00ecf