summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-02-29 09:06:54 -0700
committerDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-02-29 09:06:54 -0700
commitda6b1428cda40534b820642d8e9be398d7fee8be (patch)
tree394de3268190dce5cff9367c2355c64101164863
parent759b48a965470ac18910951b2cb0599d371d1808 (diff)
Rewrite kernel, gotta fix adj mat
-rw-r--r--ml_exp/compound.py1
-rw-r--r--ml_exp/do_ml.py29
-rw-r--r--ml_exp/kernels.py25
3 files changed, 41 insertions, 14 deletions
diff --git a/ml_exp/compound.py b/ml_exp/compound.py
index ec5085797..f603ddd4e 100644
--- a/ml_exp/compound.py
+++ b/ml_exp/compound.py
@@ -126,7 +126,6 @@ class Compound:
Generate the Adjacency Matrix for the compund.
use_forces: if the use of forces instead of k_cx should be used.
size: compound size.
- bohr_ru: if radius units should be in bohr's radius units.
"""
self.am = adjacency_matrix(self.fnm,
self.bonds,
diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index 3cb67675f..758bf01ae 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -33,6 +33,7 @@ def simple_ml(descriptors,
training_size,
test_size=None,
sigma=1000.0,
+ opt=True,
identifier=None,
show_msgs=True):
"""
@@ -43,6 +44,7 @@ def simple_ml(descriptors,
test_size: size of the test set to use. If no size is given,
the last remaining molecules are used.
sigma: depth of the kernel.
+ opt: if the optimized algorithm should be used. For benchmarking purposes.
identifier: string with the name of the descriptor used.
show_msgs: if debug messages should be shown.
NOTE: identifier is just a string and is only for identification purposes.
@@ -76,13 +78,21 @@ def simple_ml(descriptors,
X_training = descriptors[:training_size]
Y_training = energies[:training_size]
- K_training = gaussian_kernel(X_training, X_training, sigma)
- alpha = cholesky_solve(K_training, Y_training)
+ K_training = gaussian_kernel(X_training,
+ X_training,
+ sigma,
+ opt=opt)
+ alpha = cholesky_solve(K_training,
+ Y_training)
X_test = descriptors[-test_size:]
Y_test = energies[-test_size:]
- K_test = gaussian_kernel(X_test, X_training, sigma)
- Y_predicted = np.dot(K_test, alpha)
+ K_test = gaussian_kernel(X_test,
+ X_training,
+ sigma,
+ opt=opt)
+ Y_predicted = np.dot(K_test,
+ alpha)
mae = np.mean(np.abs(Y_predicted - Y_test))
if show_msgs:
@@ -113,6 +123,7 @@ def do_ml(db_path='data',
training_size=1500,
test_size=None,
sigma=1000.0,
+ opt=True,
identifiers=["CM"],
show_msgs=True):
"""
@@ -132,6 +143,7 @@ def do_ml(db_path='data',
test_size: size of the test set to use. If no size is given,
the last remaining molecules are used.
sigma: depth of the kernel.
+ opt: if the optimized algorithm should be used. For benchmarking purposes.
identifiers: list of names (strings) of descriptors to use.
show_msgs: if debug messages should be shown.
"""
@@ -165,9 +177,10 @@ def do_ml(db_path='data',
as_eig=as_eig,
bohr_ru=bohr_ru)
if 'AM' in identifiers:
- compound.gen_am(use_forces=use_forces,
- size=size,
+ compound.gen_hd(size=size,
bohr_ru=bohr_ru)
+ compound.gen_am(use_forces=use_forces,
+ size=size)
if 'BOB' in identifiers:
compound.gen_bob(size=size)
@@ -193,6 +206,7 @@ def do_ml(db_path='data',
training_size=training_size,
test_size=test_size,
sigma=sigma,
+ opt=opt,
identifier='CM',
show_msgs=show_msgs)
if 'LJM' in identifiers:
@@ -201,6 +215,7 @@ def do_ml(db_path='data',
training_size=training_size,
test_size=test_size,
sigma=sigma,
+ opt=opt,
identifier='LJM',
show_msgs=show_msgs)
if 'AM' in identifiers:
@@ -209,6 +224,7 @@ def do_ml(db_path='data',
training_size=training_size,
test_size=test_size,
sigma=sigma,
+ opt=opt,
identifier='CM',
show_msgs=show_msgs)
if 'BOB' in identifiers:
@@ -217,6 +233,7 @@ def do_ml(db_path='data',
training_size=training_size,
test_size=test_size,
sigma=sigma,
+ opt=opt,
identifier='CM',
show_msgs=show_msgs)
diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py
index 3914ffc20..e6855f97e 100644
--- a/ml_exp/kernels.py
+++ b/ml_exp/kernels.py
@@ -26,20 +26,31 @@ import numpy as np
def gaussian_kernel(X1,
X2,
- sigma):
+ sigma,
+ opt=True):
"""
Calculates the Gaussian Kernel.
X1: first representations.
X2: second representations.
sigma: kernel width.
+ opt: if the optimized algorithm should be used. For benchmarking purposes.
"""
- inv_sigma = -0.5 / (sigma*sigma)
+ i_sigma = -0.5 / (sigma*sigma)
K = np.zeros((X1.shape[0], X2.shape[0]), dtype=float)
- for i, x1 in enumerate(X1):
- for j, x2 in enumerate(X2):
- f_norm = np.linalg.norm(x1 - x2)
- # print(f_norm)
- K[i, j] = math.exp(inv_sigma * f_norm)
+ if opt:
+ # Faster way of calculating the kernel.
+ for i, x1 in enumerate(X1):
+ if X2.ndim == 3:
+ norm = np.linalg.norm(X2 - x1, axis=(1, 2))
+ else:
+ norm = np.linalg.norm(X2 - x1, axis=-1)
+ K[i, :] = np.exp(i_sigma * np.square(norm))
+ else:
+ for i, x1 in enumerate(X1):
+ for j, x2 in enumerate(X2):
+ f_norm = np.linalg.norm(x2 - x1)
+ # print(f_norm)
+ K[i, j] = math.exp(i_sigma * f_norm**2)
return K