summaryrefslogtreecommitdiff
path: root/lj_matrix
diff options
context:
space:
mode:
authorDavid Luevano <55825613+luevano@users.noreply.github.com>2020-01-23 18:59:34 -0700
committerDavid Luevano <55825613+luevano@users.noreply.github.com>2020-01-23 18:59:34 -0700
commit6e9b439d4b3d303b246d9e66b3ed3852e3fad9a5 (patch)
treee9c04b1af5552fc149f26553e7e53735f57cf435 /lj_matrix
parent5a48f2e69e301875c7d86f40ae1dab5d27f7fd0f (diff)
Rename lj_matrix to ml_exp
Diffstat (limited to 'lj_matrix')
-rw-r--r--lj_matrix/__init__.py48
-rw-r--r--lj_matrix/__main__.py38
-rw-r--r--lj_matrix/c_matrix.py179
-rw-r--r--lj_matrix/cholesky_solve.py64
-rw-r--r--lj_matrix/do_ml.py227
-rw-r--r--lj_matrix/frob_norm.py51
-rw-r--r--lj_matrix/gauss_kernel.py49
-rw-r--r--lj_matrix/lj_matrix.py222
-rw-r--r--lj_matrix/misc.py174
-rw-r--r--lj_matrix/parallel_create_matrices.py85
-rw-r--r--lj_matrix/read_qm7_data.py145
-rw-r--r--lj_matrix/version.py23
12 files changed, 0 insertions, 1305 deletions
diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py
deleted file mode 100644
index a430aac68..000000000
--- a/lj_matrix/__init__.py
+++ /dev/null
@@ -1,48 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from lj_matrix.read_qm7_data import read_nc_data, read_db_data, read_qm7_data
-from lj_matrix.c_matrix import c_matrix, c_matrix_multiple
-from lj_matrix.lj_matrix import lj_matrix, lj_matrix_multiple
-from lj_matrix.frob_norm import frob_norm
-from lj_matrix.gauss_kernel import gauss_kernel
-from lj_matrix.cholesky_solve import cholesky_solve
-from lj_matrix.do_ml import do_ml
-from lj_matrix.parallel_create_matrices import parallel_create_matrices
-from lj_matrix.misc import plot_benchmarks
-
-
-# If somebody does "from package import *", this is what they will
-# be able to access:
-__all__ = ['read_nc_data',
- 'read_db_data',
- 'read_qm7_data',
- 'c_matrix',
- 'c_matrix_multiple',
- 'lj_matrix',
- 'lj_matrix_multiple',
- 'frob_norm',
- 'gauss_kernel',
- 'cholesky_solve',
- 'do_ml',
- 'parallel_create_matrices',
- 'plot_benchmarks']
diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py
deleted file mode 100644
index 688e5adcc..000000000
--- a/lj_matrix/__main__.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from lj_matrix.do_ml import do_ml
-# from lj_matrix.misc import plot_benchmarks
-
-if __name__ == '__main__':
- do_ml(min_training_size=1500,
- max_training_size=2000,
- training_increment_size=500,
- test_size=None,
- ljm_diag_value=None,
- ljm_sigma=1.0,
- ljm_epsilon=1.0,
- r_seed=111,
- save_benchmarks=False,
- show_msgs=True)
- # plot_benchmarks()
- print('OK!')
diff --git a/lj_matrix/c_matrix.py b/lj_matrix/c_matrix.py
deleted file mode 100644
index f21ccfd8c..000000000
--- a/lj_matrix/c_matrix.py
+++ /dev/null
@@ -1,179 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import time
-import math
-import numpy as np
-from numpy.linalg import eig
-from lj_matrix.misc import printc
-
-
-def c_matrix(mol_data,
- nc_data,
- max_len=25,
- as_eig=True,
- bohr_radius_units=False):
- """
- Creates the Coulomb Matrix from the molecule data given.
- mol_data: molecule data, matrix of atom coordinates.
- nc_data: nuclear charge data, array of atom data.
- max_len: maximum amount of atoms in molecule.
- as_eig: if data should be returned as matrix or array of eigenvalues.
- bohr_radius_units: if units should be in bohr's radius units.
- """
- if bohr_radius_units:
- conversion_rate = 0.52917721067
- else:
- conversion_rate = 1
-
- mol_n = len(mol_data)
- mol_nr = range(mol_n)
-
- if not mol_n == len(nc_data):
- print(''.join(['Error. Molecule matrix dimension is different ',
- 'than the nuclear charge array dimension.']))
- else:
- if max_len < mol_n:
- print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
- 'greater than max_len. Using mol_n.']))
- max_len = None
-
- if max_len:
- cm = np.zeros((max_len, max_len))
- ml_r = range(max_len)
-
- # Actual calculation of the coulomb matrix.
- for i in ml_r:
- if i < mol_n:
- x_i = mol_data[i, 0]
- y_i = mol_data[i, 1]
- z_i = mol_data[i, 2]
- Z_i = nc_data[i]
- else:
- break
-
- for j in ml_r:
- if j < mol_n:
- x_j = mol_data[j, 0]
- y_j = mol_data[j, 1]
- z_j = mol_data[j, 2]
- Z_j = nc_data[j]
-
- x = (x_i-x_j)**2
- y = (y_i-y_j)**2
- z = (z_i-z_j)**2
-
- if i == j:
- cm[i, j] = (0.5*Z_i**2.4)
- else:
- cm[i, j] = (conversion_rate*Z_i*Z_j/math.sqrt(x
- + y
- + z))
- else:
- break
-
- # Now the value will be returned.
- if as_eig:
- cm_sorted = np.sort(eig(cm)[0])[::-1]
- # Thanks to SO for the following lines of code.
- # https://stackoverflow.com/a/43011036
-
- # Keep zeros at the end.
- mask = cm_sorted != 0.
- f_mask = mask.sum(0, keepdims=1) >\
- np.arange(cm_sorted.shape[0]-1, -1, -1)
-
- f_mask = f_mask[::-1]
- cm_sorted[f_mask] = cm_sorted[mask]
- cm_sorted[~f_mask] = 0.
-
- return cm_sorted
-
- else:
- return cm
-
- else:
- cm_temp = []
- # Actual calculation of the coulomb matrix.
- for i in mol_nr:
- x_i = mol_data[i, 0]
- y_i = mol_data[i, 1]
- z_i = mol_data[i, 2]
- Z_i = nc_data[i]
-
- cm_row = []
- for j in mol_nr:
- x_j = mol_data[j, 0]
- y_j = mol_data[j, 1]
- z_j = mol_data[j, 2]
- Z_j = nc_data[j]
-
- x = (x_i-x_j)**2
- y = (y_i-y_j)**2
- z = (z_i-z_j)**2
-
- if i == j:
- cm_row.append(0.5*Z_i**2.4)
- else:
- cm_row.append(conversion_rate*Z_i*Z_j/math.sqrt(x
- + y
- + z))
-
- cm_temp.append(np.array(cm_row))
-
- cm = np.array(cm_temp)
- # Now the value will be returned.
- if as_eig:
- return np.sort(eig(cm)[0])[::-1]
- else:
- return cm
-
-
-def c_matrix_multiple(mol_data,
- nc_data,
- pipe=None,
- max_len=25,
- as_eig=True,
- bohr_radius_units=False):
- """
- Calculates the Coulomb Matrix of multiple molecules.
- mol_data: molecule data, matrix of atom coordinates.
- nc_data: nuclear charge data, array of atom data.
- pipe: for multiprocessing purposes. Sends the data calculated
- through a pipe.
- max_len: maximum amount of atoms in molecule.
- as_eig: if data should be returned as matrix or array of eigenvalues.
- bohr_radius_units: if units should be in bohr's radius units.
- """
- printc('Coulomb Matrices calculation started.', 'CYAN')
- tic = time.perf_counter()
-
- cm_data = np.array([c_matrix(mol, nc, max_len, as_eig, bohr_radius_units)
- for mol, nc in zip(mol_data, nc_data)])
-
- toc = time.perf_counter()
- printc('\tCM calculation took {:.4f} seconds.'.format(toc - tic), 'GREEN')
-
- if pipe:
- pipe.send(cm_data)
-
- return cm_data
diff --git a/lj_matrix/cholesky_solve.py b/lj_matrix/cholesky_solve.py
deleted file mode 100644
index bc6a572a3..000000000
--- a/lj_matrix/cholesky_solve.py
+++ /dev/null
@@ -1,64 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import numpy as np
-from numpy.linalg import cholesky
-
-
-def cholesky_solve(K, y):
- """
- Applies Cholesky decomposition to obtain the 'alpha coeficients'.
- K: kernel.
- y: known parameters.
- """
- # The initial mathematical problem is to solve Ka=y.
-
- # First, add a small lambda value.
- K[np.diag_indices_from(K)] += 1e-8
-
- # Get the Cholesky decomposition of the kernel.
- L = cholesky(K)
- size = len(L)
-
- # Solve Lx=y for x.
- x = np.zeros(size)
- x[0] = y[0] / L[0, 0]
- for i in range(1, size):
- temp_sum = 0.0
- for j in range(i):
- temp_sum += L[i, j] * x[j]
- x[i] = (y[i] - temp_sum) / L[i, i]
-
- # Now, solve LTa=x for a.
- L2 = L.T
- a = np.zeros(size)
- a_ms = size - 1
- a[a_ms] = x[a_ms] / L2[a_ms, a_ms]
- # Because of the form of L2 (upper triangular matriz), an inversion of
- # range() needs to be done.
- for i in range(0, a_ms)[::-1]:
- temp_sum = 0.0
- for j in range(i, size)[::-1]:
- temp_sum += L2[i, j] * a[j]
- a[i] = (x[i] - temp_sum) / L2[i, i]
-
- return a
diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py
deleted file mode 100644
index 25a55e823..000000000
--- a/lj_matrix/do_ml.py
+++ /dev/null
@@ -1,227 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import time
-import numpy as np
-from multiprocessing import Process, Pipe
-from lj_matrix.misc import printc
-from lj_matrix.gauss_kernel import gauss_kernel
-from lj_matrix.cholesky_solve import cholesky_solve
-from lj_matrix.read_qm7_data import read_qm7_data
-from lj_matrix.parallel_create_matrices import parallel_create_matrices
-
-
-def ml(desc_data,
- energy_data,
- training_size,
- desc_type=None,
- pipe=None,
- test_size=None,
- sigma=1000.0,
- show_msgs=True):
- """
- Does the ML methodology.
- desc_data: descriptor (or representation) data.
- energy_data: energy data associated with desc_data.
- training_size: size of the training set to use.
- desc_type: string with the name of the descriptor used.
- pipe: for multiprocessing purposes. Sends the data calculated
- through a pipe.
- test_size: size of the test set to use. If no size is given,
- the last remaining molecules are used.
- sigma: depth of the kernel.
- show_msgs: Show debug messages or not.
- NOTE: desc_type is just a string and is only for identification purposes.
- Also, training is done with the first part of the data and
- testing with the ending part of the data.
- """
- tic = time.perf_counter()
- # Initial calculations for later use.
- d_len = len(desc_data)
- e_len = len(energy_data)
-
- if not desc_type:
- desc_type = 'NOT SPECIFIED'
-
- if d_len != e_len:
- printc(''.join(['ERROR. Descriptor data size different ',
- 'than energy data size.']), 'RED')
- return None
-
- if training_size >= d_len:
- printc('ERROR. Training size greater or equal than data size.', 'RED')
- return None
-
- if not test_size:
- test_size = d_len - training_size
- if test_size > 1500:
- test_size = 1500
-
- if show_msgs:
- printc('{} ML started.'.format(desc_type), 'GREEN')
- printc('\tTraining size: {}'.format(training_size), 'CYAN')
- printc('\tTest size: {}'.format(test_size), 'CYAN')
- printc('\tSigma: {}'.format(sigma), 'CYAN')
-
- X_training = desc_data[:training_size]
- Y_training = energy_data[:training_size]
- K_training = gauss_kernel(X_training, X_training, sigma)
- alpha_ = cholesky_solve(K_training, Y_training)
-
- X_test = desc_data[-test_size:]
- Y_test = energy_data[-test_size:]
- K_test = gauss_kernel(X_test, X_training, sigma)
- Y_predicted = np.dot(K_test, alpha_)
-
- mae = np.mean(np.abs(Y_predicted - Y_test))
- if show_msgs:
- printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN')
-
- toc = time.perf_counter()
- tictoc = toc - tic
- if show_msgs:
- printc('\t{} ML took {:.4f} seconds.'.format(desc_type, tictoc),
- 'GREEN')
- printc('\t\tTraining size: {}'.format(training_size), 'CYAN')
- printc('\t\tTest size: {}'.format(test_size), 'CYAN')
- printc('\t\tSigma: {}'.format(sigma), 'CYAN')
-
- if pipe:
- pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc])
-
- return mae, tictoc
-
-
-def do_ml(min_training_size,
- max_training_size=None,
- training_increment_size=500,
- test_size=None,
- ljm_diag_value=None,
- ljm_sigma=1.0,
- ljm_epsilon=1.0,
- r_seed=111,
- save_benchmarks=False,
- max_len=25,
- as_eig=True,
- bohr_radius_units=False,
- sigma=1000.0,
- show_msgs=True):
- """
- Main function that does the whole ML process.
- min_training_size: minimum training size.
- max_training_size: maximum training size.
- training_increment_size: training increment size.
- test_size: size of the test set to use. If no size is given,
- the last remaining molecules are used.
- ljm_diag_value: if a special diagonal value should be used in lj matrix.
- ljm_sigma: sigma value for lj matrix.
- ljm_epsilon: epsilon value for lj matrix.
- r_seed: random seed to use for the shuffling.
- save_benchmarks: if benchmarks should be saved.
- max_len: maximum amount of atoms in molecule.
- as_eig: if data should be returned as matrix or array of eigenvalues.
- bohr_radius_units: if units should be in bohr's radius units.
- sigma: depth of the kernel.
- show_msgs: Show debug messages or not.
- """
- # Initialization time.
- init_time = time.perf_counter()
- if not max_training_size:
- max_training_size = min_training_size + training_increment_size
-
- # Data reading.
- molecules, nuclear_charge, energy_pbe0, energy_delta =\
- read_qm7_data(r_seed)
-
- # Matrices calculation.
- cm_data, ljm_data = parallel_create_matrices(molecules,
- nuclear_charge,
- ljm_diag_value,
- ljm_sigma,
- ljm_epsilon,
- max_len,
- as_eig,
- bohr_radius_units)
-
- # ML calculation.
- procs = []
- cm_pipes = []
- ljm_pipes = []
- for i in range(min_training_size,
- max_training_size + 1,
- training_increment_size):
- cm_recv, cm_send = Pipe(False)
- p1 = Process(target=ml,
- args=(cm_data,
- energy_pbe0,
- i,
- 'CM',
- cm_send,
- test_size,
- sigma,
- show_msgs))
- procs.append(p1)
- cm_pipes.append(cm_recv)
- p1.start()
-
- ljm_recv, ljm_send = Pipe(False)
- p2 = Process(target=ml,
- args=(ljm_data,
- energy_pbe0,
- i,
- 'L-JM',
- ljm_send,
- test_size,
- sigma,
- show_msgs))
- procs.append(p2)
- ljm_pipes.append(ljm_recv)
- p2.start()
-
- cm_bench_results = []
- ljm_bench_results = []
- for cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes):
- cm_bench_results.append(cd_pipe.recv())
- ljm_bench_results.append(ljd_pipe.recv())
-
- for proc in procs:
- proc.join()
-
- if save_benchmarks:
- with open('data\\benchmarks.csv', 'a') as save_file:
- # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,',
- # 'mae,time,lj_s,lj_e,date_ran\n']))
- ltime = time.localtime()[:3][::-1]
- ljm_se = ',' + str(ljm_sigma) + ',' + str(ljm_epsilon) + ','
- date = '/'.join([str(field) for field in ltime])
- for cm, ljm, in zip(cm_bench_results, ljm_bench_results):
- cm_text = ','.join([str(field) for field in cm])\
- + ',' + date + '\n'
- ljm_text = ','.join([str(field) for field in ljm])\
- + ljm_se + date + '\n'
- save_file.write(cm_text)
- save_file.write(ljm_text)
-
- # End of program
- end_time = time.perf_counter()
- printc('Program took {:.4f} seconds.'.format(end_time - init_time),
- 'CYAN')
diff --git a/lj_matrix/frob_norm.py b/lj_matrix/frob_norm.py
deleted file mode 100644
index 4c3a2945d..000000000
--- a/lj_matrix/frob_norm.py
+++ /dev/null
@@ -1,51 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import math
-
-
-def frob_norm(array):
- """
- Calculates the frobenius norm of a given array or matrix.
- array: array of data.
- """
-
- arr_sh_len = len(array.shape)
- arr_range = range(len(array))
- fn = 0.0
-
- # If it is a 'vector'.
- if arr_sh_len == 1:
- for i in arr_range:
- fn += array[i]*array[i]
-
- return math.sqrt(fn)
-
- # If it is a matrix.
- elif arr_sh_len == 2:
- for i in arr_range:
- for j in arr_range:
- fn += array[i, j]*array[i, j]
-
- return math.sqrt(fn)
- else:
- print('Error. Array size greater than 2 ({}).'.format(arr_sh_len))
diff --git a/lj_matrix/gauss_kernel.py b/lj_matrix/gauss_kernel.py
deleted file mode 100644
index 5dd8e6406..000000000
--- a/lj_matrix/gauss_kernel.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import math
-import numpy as np
-from lj_matrix.frob_norm import frob_norm
-
-
-def gauss_kernel(X_1, X_2, sigma):
- """
- Calculates the Gaussian Kernel.
- X_1: first representations.
- X_2: second representations.
- sigma: kernel width.
- """
- x1_l = len(X_1)
- x1_range = range(x1_l)
- x2_l = len(X_2)
- x2_range = range(x2_l)
-
- inv_sigma = -0.5 / (sigma*sigma)
-
- K = np.zeros((x1_l, x2_l))
- for i in x1_range:
- for j in x2_range:
- f_norm = frob_norm(X_1[i] - X_2[j])
- # print(f_norm)
- K[i, j] = math.exp(inv_sigma * f_norm)
-
- return K
diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py
deleted file mode 100644
index 6739ae283..000000000
--- a/lj_matrix/lj_matrix.py
+++ /dev/null
@@ -1,222 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import time
-import math
-import numpy as np
-from numpy.linalg import eig
-from lj_matrix.misc import printc
-
-
-def lj_matrix(mol_data,
- nc_data,
- diag_value=None,
- sigma=1.0,
- epsilon=1.0,
- max_len=25,
- as_eig=True,
- bohr_radius_units=False):
- """
- Creates the Lennard-Jones Matrix from the molecule data given.
- mol_data: molecule data, matrix of atom coordinates.
- nc_data: nuclear charge data, array of atom data.
- diag_value: if special diagonal value is to be used.
- sigma: sigma value.
- epsilon: epsilon value.
- max_len: maximum amount of atoms in molecule.
- as_eig: if data should be returned as matrix or array of eigenvalues.
- bohr_radius_units: if units should be in bohr's radius units.
- """
- if bohr_radius_units:
- conversion_rate = 0.52917721067
- else:
- conversion_rate = 1
-
- mol_n = len(mol_data)
- mol_nr = range(mol_n)
-
- if not mol_n == len(nc_data):
- print(''.join(['Error. Molecule matrix dimension is different ',
- 'than the nuclear charge array dimension.']))
- else:
- if max_len < mol_n:
- print(''.join(['Error. Molecule matrix dimension (mol_n) is ',
- 'greater than max_len. Using mol_n.']))
- max_len = None
-
- if max_len:
- lj = np.zeros((max_len, max_len))
- ml_r = range(max_len)
-
- # Actual calculation of the coulomb matrix.
- for i in ml_r:
- if i < mol_n:
- x_i = mol_data[i, 0]
- y_i = mol_data[i, 1]
- z_i = mol_data[i, 2]
- Z_i = nc_data[i]
- else:
- break
-
- for j in ml_r:
- if j < mol_n:
- x_j = mol_data[j, 0]
- y_j = mol_data[j, 1]
- z_j = mol_data[j, 2]
-
- x = (x_i-x_j)**2
- y = (y_i-y_j)**2
- z = (z_i-z_j)**2
-
- if i == j:
- if diag_value is None:
- lj[i, j] = (0.5*Z_i**2.4)
- else:
- lj[i, j] = diag_value
- else:
- # Calculations are done after i==j is checked
- # so no division by zero is done.
-
- # A little play with r exponents
- # so no square root is calculated.
- # Conversion factor is included in r^2.
-
- # 1/r^2
- r_2 = sigma**2/(conversion_rate**2*(x + y + z))
-
- r_6 = math.pow(r_2, 3)
- r_12 = math.pow(r_6, 2)
- lj[i, j] = (4*epsilon*(r_12 - r_6))
- else:
- break
-
- # Now the value will be returned.
- if as_eig:
- lj_sorted = np.sort(eig(lj)[0])[::-1]
- # Thanks to SO for the following lines of code.
- # https://stackoverflow.com/a/43011036
-
- # Keep zeros at the end.
- mask = lj_sorted != 0.
- f_mask = mask.sum(0, keepdims=1) >\
- np.arange(lj_sorted.shape[0]-1, -1, -1)
-
- f_mask = f_mask[::-1]
- lj_sorted[f_mask] = lj_sorted[mask]
- lj_sorted[~f_mask] = 0.
-
- return lj_sorted
-
- else:
- return lj
-
- else:
- lj_temp = []
- # Actual calculation of the coulomb matrix.
- for i in mol_nr:
- x_i = mol_data[i, 0]
- y_i = mol_data[i, 1]
- z_i = mol_data[i, 2]
- Z_i = nc_data[i]
-
- lj_row = []
- for j in mol_nr:
- x_j = mol_data[j, 0]
- y_j = mol_data[j, 1]
- z_j = mol_data[j, 2]
-
- x = (x_i-x_j)**2
- y = (y_i-y_j)**2
- z = (z_i-z_j)**2
-
- if i == j:
- if not diag_value:
- lj_row.append(0.5*Z_i**2.4)
- else:
- lj_row.append(diag_value)
- else:
- # Calculations are done after i==j is checked
- # so no division by zero is done.
-
- # A little play with r exponents
- # so no square root is calculated.
- # Conversion factor is included in r^2.
-
- # 1/r^2
- r_2 = sigma**2/(conversion_rate**2*(x + y + z))
-
- r_6 = math.pow(r_2, 3)
- r_12 = math.pow(r_6, 2)
- lj_row.append(4*epsilon*(r_12 - r_6))
-
- lj_temp.append(np.array(lj_row))
-
- lj = np.array(lj_temp)
- # Now the value will be returned.
- if as_eig:
- return np.sort(eig(lj)[0])[::-1]
- else:
- return lj
-
-
-def lj_matrix_multiple(mol_data,
- nc_data,
- pipe=None,
- diag_value=None,
- sigma=1.0,
- epsilon=1.0,
- max_len=25,
- as_eig=True,
- bohr_radius_units=False):
- """
- Calculates the Lennard-Jones Matrix of multiple molecules.
- mol_data: molecule data, matrix of atom coordinates.
- nc_data: nuclear charge data, array of atom data.
- pipe: for multiprocessing purposes. Sends the data calculated
- through a pipe.
- diag_value: if special diagonal value is to be used.
- sigma: sigma value.
- epsilon: epsilon value.
- max_len: maximum amount of atoms in molecule.
- as_eig: if data should be returned as matrix or array of eigenvalues.
- bohr_radius_units: if units should be in bohr's radius units.
- """
- printc('L-J Matrices calculation started.', 'CYAN')
- tic = time.perf_counter()
-
- ljm_data = np.array([lj_matrix(mol,
- nc,
- diag_value,
- sigma,
- epsilon,
- max_len,
- as_eig,
- bohr_radius_units)
- for mol, nc in zip(mol_data, nc_data)])
-
- toc = time.perf_counter()
- printc('\tL-JM calculation took {:.4f} seconds.'.format(toc-tic), 'GREEN')
-
- if pipe:
- pipe.send(ljm_data)
-
- return ljm_data
diff --git a/lj_matrix/misc.py b/lj_matrix/misc.py
deleted file mode 100644
index e9142b05f..000000000
--- a/lj_matrix/misc.py
+++ /dev/null
@@ -1,174 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from colorama import init, Fore, Style
-import pandas as pd
-
-init()
-
-
-def printc(text, color):
- """
- Prints texts normaly, but in color. Using colorama.
- text: string with the text to print.
- color: color to be used, same as available in colorama.
- """
- color_dic = {'BLACK': Fore.BLACK,
- 'RED': Fore.RED,
- 'GREEN': Fore.GREEN,
- 'YELLOW': Fore.YELLOW,
- 'BLUE': Fore.BLUE,
- 'MAGENTA': Fore.MAGENTA,
- 'CYAN': Fore.CYAN,
- 'WHITE': Fore.WHITE,
- 'RESET': Fore.RESET}
-
- color_dic_keys = color_dic.keys()
- if color not in color_dic_keys:
- print(Fore.RED
- + '\'{}\' not found, using default color.'.format(color)
- + Style.RESET_ALL)
- actual_color = Fore.RESET
- else:
- actual_color = color_dic[color]
-
- print(actual_color + text + Style.RESET_ALL)
-
-
-def plot_benchmarks():
- """
- For plotting the benchmarks.
- """
- # Original columns.
- or_cols = ['ml_type',
- 'tr_size',
- 'te_size',
- 'kernel_s',
- 'mae',
- 'time',
- 'lj_s',
- 'lj_e',
- 'date_ran']
- # Drop some original columns.
- dor_cols = ['te_size',
- 'kernel_s',
- 'time',
- 'date_ran']
-
- # Read benchmarks data and drop some columns.
- data_temp = pd.read_csv('data\\benchmarks.csv',)
- data = pd.DataFrame(data_temp, columns=or_cols)
- data = data.drop(columns=dor_cols)
-
- # Get the data of the first benchmarks and drop unnecesary columns.
- first_data = pd.DataFrame(data, index=range(0, 22))
- first_data = first_data.drop(columns=['lj_s', 'lj_e'])
-
- # Columns to keep temporarily.
- fd_columns = ['ml_type',
- 'tr_size',
- 'mae']
-
- # Create new dataframes for each matrix descriptor and fill them.
- first_data_cm = pd.DataFrame(columns=fd_columns)
- first_data_ljm = pd.DataFrame(columns=fd_columns)
- for i in range(first_data.shape[0]):
- temp_df = first_data.iloc[[i]]
- if first_data.at[i, 'ml_type'] == 'CM':
- first_data_cm = first_data_cm.append(temp_df)
- else:
- first_data_ljm = first_data_ljm.append(temp_df)
-
- # Drop unnecesary column and rename 'mae' for later use.
- first_data_cm = first_data_cm.drop(columns=['ml_type'])\
- .rename(columns={'mae': 'cm_mae'})
- first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\
- .rename(columns={'mae': 'ljm_mae'})
- # print(first_data_cm)
- # print(first_data_ljm)
-
- # Get the cm data axis so it can be joined with the ljm data axis.
- cm_axis = first_data_cm.plot(x='tr_size',
- y='cm_mae',
- kind='line')
- # Get the ljm data axis and join it with the cm one.
- plot_axis = first_data_ljm.plot(ax=cm_axis,
- x='tr_size',
- y='ljm_mae',
- kind='line')
- plot_axis.set_xlabel('tr_size')
- plot_axis.set_ylabel('mae')
- plot_axis.set_title('mae for different tr_sizes')
- # Get the figure and save it.
- # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf')
-
- # Get the rest of the benchmark data and drop unnecesary column.
- new_data = data.drop(index=range(0, 22))
- new_data = new_data.drop(columns=['ml_type'])
-
- # Get the first set and rename it.
- nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'})
- ndf_axis = nd_first.plot(x='tr_size',
- y='1, 1',
- kind='line')
- last_axis = ndf_axis
- for i in range(22, 99, 11):
- lj_s = new_data['lj_s'][i]
- lj_e = new_data['lj_e'][i]
- new_mae = '{}, {}'.format(lj_s, lj_e)
- nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
- .drop(columns=['lj_s', 'lj_e'])\
- .rename(columns={'mae': new_mae})
- last_axis = nd_temp.plot(ax=last_axis,
- x='tr_size',
- y=new_mae,
- kind='line')
- print(nd_temp)
-
- last_axis.set_xlabel('tr_size')
- last_axis.set_ylabel('mae')
- last_axis.set_title('mae for different parameters of lj(s)')
-
- last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf')
-
- ndf_axis = nd_first.plot(x='tr_size',
- y='1, 1',
- kind='line')
- last_axis = ndf_axis
- for i in range(99, data.shape[0], 11):
- lj_s = new_data['lj_s'][i]
- lj_e = new_data['lj_e'][i]
- new_mae = '{}, {}'.format(lj_s, lj_e)
- nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
- .drop(columns=['lj_s', 'lj_e'])\
- .rename(columns={'mae': new_mae})
- last_axis = nd_temp.plot(ax=last_axis,
- x='tr_size',
- y=new_mae,
- kind='line')
- print(nd_temp)
-
- last_axis.set_xlabel('tr_size')
- last_axis.set_ylabel('mae')
- last_axis.set_title('mae for different parameters of lj(e)')
-
- last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf')
diff --git a/lj_matrix/parallel_create_matrices.py b/lj_matrix/parallel_create_matrices.py
deleted file mode 100644
index cd5ef5c8e..000000000
--- a/lj_matrix/parallel_create_matrices.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from multiprocessing import Process, Pipe
-from lj_matrix.c_matrix import c_matrix_multiple
-from lj_matrix.lj_matrix import lj_matrix_multiple
-
-
-def parallel_create_matrices(mol_data,
- nc_data,
- ljm_diag_value=None,
- ljm_sigma=1.0,
- ljm_epsilon=1.0,
- max_len=25,
- as_eig=True,
- bohr_radius_units=False):
- """
- Creates the Coulomb and L-J matrices in parallel.
- mol_data: molecule data, matrix of atom coordinates.
- nc_data: nuclear charge data, array of atom data.
- ljm_diag_value: if special diagonal value is to be used for lj matrix.
- ljm_sigma: sigma value for lj matrix.
- ljm_epsilon: psilon value for lj matrix.
- max_len: maximum amount of atoms in molecule.
- as_eig: if data should be returned as matrix or array of eigenvalues.
- bohr_radius_units: if units should be in bohr's radius units.
- """
-
- # Matrices calculation.
- procs = []
- pipes = []
-
- cm_recv, cm_send = Pipe(False)
- p1 = Process(target=c_matrix_multiple,
- args=(mol_data,
- nc_data,
- cm_send,
- max_len,
- as_eig,
- bohr_radius_units))
- procs.append(p1)
- pipes.append(cm_recv)
- p1.start()
-
- ljm_recv, ljm_send = Pipe(False)
- p2 = Process(target=lj_matrix_multiple,
- args=(mol_data,
- nc_data,
- ljm_send,
- ljm_diag_value,
- ljm_sigma,
- ljm_epsilon,
- max_len,
- as_eig,
- bohr_radius_units))
- procs.append(p2)
- pipes.append(ljm_recv)
- p2.start()
-
- cm_data = pipes[0].recv()
- ljm_data = pipes[1].recv()
-
- for proc in procs:
- proc.join()
-
- return cm_data, ljm_data
diff --git a/lj_matrix/read_qm7_data.py b/lj_matrix/read_qm7_data.py
deleted file mode 100644
index 4401ca1c0..000000000
--- a/lj_matrix/read_qm7_data.py
+++ /dev/null
@@ -1,145 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import os
-import time
-import numpy as np
-import random
-from lj_matrix.misc import printc
-
-
-# 'periodic_table_of_elements.txt' retrieved from
-# https://gist.github.com/GoodmanSciences/c2dd862cd38f21b0ad36b8f96b4bf1ee
-def read_nc_data(data_path):
- """
- Reads nuclear charge data from file and returns a dictionary.
- data_path: path to the data directory.
- """
- fname = 'periodic_table_of_elements.txt'
- with open(''.join([data_path, '\\', fname]), 'r') as infile:
- temp_lines = infile.readlines()
-
- del temp_lines[0]
-
- lines = []
- for temp_line in temp_lines:
- new_line = temp_line.split(sep=',')
- lines.append(new_line)
-
- # Dictionary of nuclear charge.
- return {line[2]: int(line[0]) for line in lines}
-
-
-# 'hof_qm7.txt.txt' retrieved from
-# https://github.com/qmlcode/tutorial
-def read_db_data(zi_data,
- data_path,
- r_seed=111):
- """
- Reads molecule database and extracts
- its contents as usable variables.
- zi_data: dictionary containing nuclear charge data.
- data_path: path to the data directory.
- r_seed: random seed to use for the shuffling.
- """
- os.chdir(data_path)
-
- fname = 'hof_qm7.txt'
- with open(fname, 'r') as infile:
- lines = infile.readlines()
-
- # Temporary energy dictionary.
- energy_temp = dict()
-
- for line in lines:
- xyz_data = line.split()
-
- xyz_name = xyz_data[0]
- hof = float(xyz_data[1])
- dftb = float(xyz_data[2])
- # print(xyz_name, hof, dftb)
-
- energy_temp[xyz_name] = np.array([hof, hof - dftb])
-
- # Use a random seed.
- random.seed(r_seed)
-
- et_keys = list(energy_temp.keys())
- random.shuffle(et_keys)
-
- # Temporary energy dictionary, shuffled.
- energy_temp_shuffled = dict()
- for key in et_keys:
- energy_temp_shuffled.update({key: energy_temp[key]})
-
- mol_data = []
- mol_nc_data = []
- # Actual reading of the xyz files.
- for i, k in enumerate(energy_temp_shuffled.keys()):
- with open(k, 'r') as xyz_file:
- lines = xyz_file.readlines()
-
- len_lines = len(lines)
- mol_temp_data = []
- mol_nc_temp_data = np.array(np.zeros(len_lines-2))
- for j, line in enumerate(lines[2:len_lines]):
- line_list = line.split()
-
- mol_nc_temp_data[j] = float(zi_data[line_list[0]])
- line_data = np.array(np.asarray(line_list[1:4], dtype=float))
- mol_temp_data.append(line_data)
-
- mol_data.append(mol_temp_data)
- mol_nc_data.append(mol_nc_temp_data)
-
- # Convert everything to a numpy array.
- molecules = np.array([np.array(mol) for mol in mol_data])
- nuclear_charge = np.array([nc_d for nc_d in mol_nc_data])
- energy_pbe0 = np.array([energy_temp_shuffled[k][0]
- for k in energy_temp_shuffled.keys()])
- energy_delta = np.array([energy_temp_shuffled[k][1]
- for k in energy_temp_shuffled.keys()])
-
- return molecules, nuclear_charge, energy_pbe0, energy_delta
-
-
-def read_qm7_data(r_seed=111):
- """
- Reads all the qm7 data.
- r_seed: random seed to use for the shuffling.
- """
- tic = time.perf_counter()
- printc('Data reading started.', 'CYAN')
-
- init_path = os.getcwd()
- os.chdir('data')
- data_path = os.getcwd()
-
- zi_data = read_nc_data(data_path)
- molecules, nuclear_charge, energy_pbe0, energy_delta = \
- read_db_data(zi_data, data_path, r_seed)
-
- os.chdir(init_path)
- toc = time.perf_counter()
- printc('\tData reading took {:.4f} seconds.'.format(toc-tic), 'GREEN')
-
- return molecules, nuclear_charge, energy_pbe0, energy_delta
diff --git a/lj_matrix/version.py b/lj_matrix/version.py
deleted file mode 100644
index fab58433d..000000000
--- a/lj_matrix/version.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-__version__ = '0.0.1'