diff options
author | David Luevano Alvarado <55825613+luevano@users.noreply.github.com> | 2020-02-29 09:06:54 -0700 |
---|---|---|
committer | David Luevano Alvarado <55825613+luevano@users.noreply.github.com> | 2020-02-29 09:06:54 -0700 |
commit | da6b1428cda40534b820642d8e9be398d7fee8be (patch) | |
tree | 394de3268190dce5cff9367c2355c64101164863 | |
parent | 759b48a965470ac18910951b2cb0599d371d1808 (diff) |
Rewrite kernel, gotta fix adj mat
-rw-r--r-- | ml_exp/compound.py | 1 | ||||
-rw-r--r-- | ml_exp/do_ml.py | 29 | ||||
-rw-r--r-- | ml_exp/kernels.py | 25 |
3 files changed, 41 insertions, 14 deletions
diff --git a/ml_exp/compound.py b/ml_exp/compound.py index ec5085797..f603ddd4e 100644 --- a/ml_exp/compound.py +++ b/ml_exp/compound.py @@ -126,7 +126,6 @@ class Compound: Generate the Adjacency Matrix for the compund. use_forces: if the use of forces instead of k_cx should be used. size: compound size. - bohr_ru: if radius units should be in bohr's radius units. """ self.am = adjacency_matrix(self.fnm, self.bonds, diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py index 3cb67675f..758bf01ae 100644 --- a/ml_exp/do_ml.py +++ b/ml_exp/do_ml.py @@ -33,6 +33,7 @@ def simple_ml(descriptors, training_size, test_size=None, sigma=1000.0, + opt=True, identifier=None, show_msgs=True): """ @@ -43,6 +44,7 @@ def simple_ml(descriptors, test_size: size of the test set to use. If no size is given, the last remaining molecules are used. sigma: depth of the kernel. + opt: if the optimized algorithm should be used. For benchmarking purposes. identifier: string with the name of the descriptor used. show_msgs: if debug messages should be shown. NOTE: identifier is just a string and is only for identification purposes. @@ -76,13 +78,21 @@ def simple_ml(descriptors, X_training = descriptors[:training_size] Y_training = energies[:training_size] - K_training = gaussian_kernel(X_training, X_training, sigma) - alpha = cholesky_solve(K_training, Y_training) + K_training = gaussian_kernel(X_training, + X_training, + sigma, + opt=opt) + alpha = cholesky_solve(K_training, + Y_training) X_test = descriptors[-test_size:] Y_test = energies[-test_size:] - K_test = gaussian_kernel(X_test, X_training, sigma) - Y_predicted = np.dot(K_test, alpha) + K_test = gaussian_kernel(X_test, + X_training, + sigma, + opt=opt) + Y_predicted = np.dot(K_test, + alpha) mae = np.mean(np.abs(Y_predicted - Y_test)) if show_msgs: @@ -113,6 +123,7 @@ def do_ml(db_path='data', training_size=1500, test_size=None, sigma=1000.0, + opt=True, identifiers=["CM"], show_msgs=True): """ @@ -132,6 +143,7 @@ def do_ml(db_path='data', test_size: size of the test set to use. If no size is given, the last remaining molecules are used. sigma: depth of the kernel. + opt: if the optimized algorithm should be used. For benchmarking purposes. identifiers: list of names (strings) of descriptors to use. show_msgs: if debug messages should be shown. """ @@ -165,9 +177,10 @@ def do_ml(db_path='data', as_eig=as_eig, bohr_ru=bohr_ru) if 'AM' in identifiers: - compound.gen_am(use_forces=use_forces, - size=size, + compound.gen_hd(size=size, bohr_ru=bohr_ru) + compound.gen_am(use_forces=use_forces, + size=size) if 'BOB' in identifiers: compound.gen_bob(size=size) @@ -193,6 +206,7 @@ def do_ml(db_path='data', training_size=training_size, test_size=test_size, sigma=sigma, + opt=opt, identifier='CM', show_msgs=show_msgs) if 'LJM' in identifiers: @@ -201,6 +215,7 @@ def do_ml(db_path='data', training_size=training_size, test_size=test_size, sigma=sigma, + opt=opt, identifier='LJM', show_msgs=show_msgs) if 'AM' in identifiers: @@ -209,6 +224,7 @@ def do_ml(db_path='data', training_size=training_size, test_size=test_size, sigma=sigma, + opt=opt, identifier='CM', show_msgs=show_msgs) if 'BOB' in identifiers: @@ -217,6 +233,7 @@ def do_ml(db_path='data', training_size=training_size, test_size=test_size, sigma=sigma, + opt=opt, identifier='CM', show_msgs=show_msgs) diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py index 3914ffc20..e6855f97e 100644 --- a/ml_exp/kernels.py +++ b/ml_exp/kernels.py @@ -26,20 +26,31 @@ import numpy as np def gaussian_kernel(X1, X2, - sigma): + sigma, + opt=True): """ Calculates the Gaussian Kernel. X1: first representations. X2: second representations. sigma: kernel width. + opt: if the optimized algorithm should be used. For benchmarking purposes. """ - inv_sigma = -0.5 / (sigma*sigma) + i_sigma = -0.5 / (sigma*sigma) K = np.zeros((X1.shape[0], X2.shape[0]), dtype=float) - for i, x1 in enumerate(X1): - for j, x2 in enumerate(X2): - f_norm = np.linalg.norm(x1 - x2) - # print(f_norm) - K[i, j] = math.exp(inv_sigma * f_norm) + if opt: + # Faster way of calculating the kernel. + for i, x1 in enumerate(X1): + if X2.ndim == 3: + norm = np.linalg.norm(X2 - x1, axis=(1, 2)) + else: + norm = np.linalg.norm(X2 - x1, axis=-1) + K[i, :] = np.exp(i_sigma * np.square(norm)) + else: + for i, x1 in enumerate(X1): + for j, x2 in enumerate(X2): + f_norm = np.linalg.norm(x2 - x1) + # print(f_norm) + K[i, j] = math.exp(i_sigma * f_norm**2) return K |