summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-03-03 22:49:31 -0700
committerDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-03-03 22:49:31 -0700
commit52383ddeb87312708eeb1da765b175fb603f2802 (patch)
tree8ecb2ae66eecfa6ab4fa361167bc013c4e0b0521
parent1647f76052b016e4102a3af234ac47401e04819d (diff)
Possible tf addition, needs bugfixing
-rw-r--r--ml_exp/do_ml.py55
-rw-r--r--ml_exp/kernels.py62
-rw-r--r--ml_exp/qm7db.py9
3 files changed, 95 insertions, 31 deletions
diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index d22074952..379d0efd0 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -31,11 +31,12 @@ from ml_exp.qm7db import qm7db
def simple_ml(descriptors,
energies,
- training_size,
+ training_size=1500,
test_size=None,
sigma=1000.0,
opt=True,
identifier=None,
+ use_tf=True,
show_msgs=True):
"""
Basic ML methodology for a single descriptor type.
@@ -47,6 +48,7 @@ def simple_ml(descriptors,
sigma: depth of the kernel.
opt: if the optimized algorithm should be used. For benchmarking purposes.
identifier: string with the name of the descriptor used.
+ use_tf: if tensorflow should be used.
show_msgs: if debug messages should be shown.
NOTE: identifier is just a string and is only for identification purposes.
Also, training is done with the first part of the data and
@@ -82,19 +84,33 @@ def simple_ml(descriptors,
K_training = gaussian_kernel(X_training,
X_training,
sigma,
- opt=opt)
- alpha = LA.cho_solve(LA.cho_factor(K_training), Y_training)
+ use_tf=use_tf)
+ if use_tf:
+ # Y_training = tf.expand_dims(Y_training, 1)
+ alpha = tf.linalg.cholesky_solve(tf.linalg.cholesky(K_training),
+ Y_training)
+ else:
+ alpha = LA.cho_solve(LA.cho_factor(K_training),
+ Y_training)
X_test = descriptors[-test_size:]
Y_test = energies[-test_size:]
K_test = gaussian_kernel(X_test,
X_training,
sigma,
- opt=opt)
- Y_predicted = np.dot(K_test,
- alpha)
+ use_tf=use_tf)
+ if use_tf:
+ # Y_test = tf.expand_dims(Y_test, 1)
+ Y_predicted = tf.tensordot(K_test, alpha, 1)
+ else:
+ Y_predicted = np.dot(K_test, alpha)
+
+ print('Ducky')
+ if use_tf:
+ mae = tf.reduce_mean(tf.abs(Y_predicted - Y_test))
+ else:
+ mae = np.mean(np.abs(Y_predicted - Y_test))
- mae = np.mean(np.abs(Y_predicted - Y_test))
if show_msgs:
printc(f'\tMAE for {identifier}: {mae:.4f}', 'GREEN')
@@ -158,11 +174,6 @@ def do_ml(db_path='data',
is_shuffled=is_shuffled,
r_seed=r_seed,
use_tf=use_tf)
- print('test')
- print(type(energy_pbe0), energy_pbe0.device.endswith('GPU:0'),
- type(energy_delta), energy_delta.device.endswith('GPU:0'))
- print(tf.config.experimental.list_physical_devices('GPU'))
- raise TypeError('test')
toc = time.perf_counter()
tictoc = toc - tic
if show_msgs:
@@ -192,7 +203,7 @@ def do_ml(db_path='data',
if 'BOB' in identifiers:
compound.gen_bob(size=size)
- # Create a numpy array for the descriptors.
+ # Create a numpy array (or tensorflow tensor) for the descriptors.
if 'CM' in identifiers:
cm_data = np.array([comp.cm for comp in compounds], dtype=np.float64)
if 'LJM' in identifiers:
@@ -204,6 +215,20 @@ def do_ml(db_path='data',
if 'BOB' in identifiers:
bob_data = np.array([comp.bob for comp in compounds], dtype=np.float64)
+ if use_tf:
+ if tf.config.experimental.list_physical_devices('GPU'):
+ with tf.device('GPU:0'):
+ if 'CM' in identifiers:
+ cm_data = tf.convert_to_tensor(cm_data)
+ if 'LJM' in identifiers:
+ ljm_data = tf.convert_to_tensor(ljm_data)
+ # if 'AM' in identifiers:
+ # am_data = tf.convert_to_tensor(am_data)
+ if 'BOB' in identifiers:
+ bob_data = tf.convert_to_tensor(bob_data)
+ else:
+ raise TypeError('No GPU found, could not create Tensor objects.')
+
toc = time.perf_counter()
tictoc = toc - tic
if show_msgs:
@@ -217,6 +242,7 @@ def do_ml(db_path='data',
test_size=test_size,
sigma=sigma,
identifier='CM',
+ use_tf=use_tf,
show_msgs=show_msgs)
if 'LJM' in identifiers:
ljm_mae, ljm_tictoc = simple_ml(ljm_data,
@@ -225,6 +251,7 @@ def do_ml(db_path='data',
test_size=test_size,
sigma=sigma,
identifier='LJM',
+ use_tf=use_tf,
show_msgs=show_msgs)
"""
if 'AM' in identifiers:
@@ -234,6 +261,7 @@ def do_ml(db_path='data',
test_size=test_size,
sigma=sigma,
identifier='AM',
+ use_tf=use_tf,
show_msgs=show_msgs)
"""
if 'BOB' in identifiers:
@@ -243,6 +271,7 @@ def do_ml(db_path='data',
test_size=test_size,
sigma=sigma,
identifier='BOB',
+ use_tf=use_tf,
show_msgs=show_msgs)
# End of program
diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py
index c79f93efa..26ff0d77b 100644
--- a/ml_exp/kernels.py
+++ b/ml_exp/kernels.py
@@ -22,34 +22,64 @@ SOFTWARE.
"""
# import math
import numpy as np
+import tensorflow as tf
def gaussian_kernel(X1,
X2,
- sigma):
+ sigma,
+ use_tf=True):
"""
Calculates the Gaussian Kernel.
X1: first representations.
X2: second representations.
sigma: kernel width.
+ use_tf: if tensorflow should be used.
"""
+ X1_size = X1.shape[0]
+ X2_size = X2.shape[0]
i_sigma = -0.5 / (sigma*sigma)
- K = np.zeros((X1.shape[0], X2.shape[0]), dtype=np.float64)
- # Faster way of calculating the kernel (no numba support).
- for i, x1 in enumerate(X1):
- if X2.ndim == 3:
- norm = np.linalg.norm(X2 - x1, axis=(1, 2))
- else:
- norm = np.linalg.norm(X2 - x1, axis=-1)
- K[i, :] = np.exp(i_sigma * np.square(norm))
+ if use_tf:
+ if tf.config.experimental.list_physical_devices('GPU'):
+ with tf.device('GPU:0'):
+ X1 = tf.convert_to_tensor(X1)
+ X2 = tf.convert_to_tensor(X2)
+ X2r = tf.rank(X2)
- # Old way of calculating the kernel (numba support).
- """
- for i, x1 in enumerate(X1):
- for j, x2 in enumerate(X2):
- f_norm = np.linalg.norm(x2 - x1)
- K[i, j] = math.exp(i_sigma * f_norm**2)
- """
+ def cond(i, _):
+ return tf.less(i, X1_size)
+
+ def body(i, K):
+ if X2r == 3:
+ norm = tf.norm(X2 - X1[i], axis=(1, 2))
+ else:
+ norm = tf.norm(X2 - X1[i], axis=-1)
+
+ return (i + 1,
+ K.write(i, tf.exp(i_sigma * tf.square(norm))))
+
+ K = tf.TensorArray(dtype=tf.float64,
+ size=X1_size)
+ i_state = (0, K)
+ n, K = tf.while_loop(cond, body, i_state)
+ K = K.stack()
+ else:
+ K = np.zeros((X1_size, X2_size), dtype=np.float64)
+ # Faster way of calculating the kernel (no numba support).
+ for i in range(X1_size):
+ if X2.ndim == 3:
+ norm = np.linalg.norm(X2 - X1[i], axis=(1, 2))
+ else:
+ norm = np.linalg.norm(X2 - X1[i], axis=-1)
+ K[i, :] = np.exp(i_sigma * np.square(norm))
+
+ # Old way of calculating the kernel (numba support).
+ """
+ for i, x1 in enumerate(X1):
+ for j, x2 in enumerate(X2):
+ f_norm = np.linalg.norm(x2 - x1)
+ K[i, j] = math.exp(i_sigma * f_norm**2)
+ """
return K
diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
index 29bda6a59..c20df018e 100644
--- a/ml_exp/qm7db.py
+++ b/ml_exp/qm7db.py
@@ -56,7 +56,12 @@ def qm7db(db_path='data',
e_delta = np.array([comp.delta for comp in compounds], dtype=np.float64)
if use_tf:
- e_pbe0 = tf.convert_to_tensor(e_pbe0)
- e_delta = tf.convert_to_tensor(e_delta)
+ # Check if there's a gpu available and use the first one.
+ if tf.config.experimental.list_physical_devices('GPU'):
+ with tf.device('GPU:0'):
+ e_pbe0 = tf.convert_to_tensor(e_pbe0)
+ e_delta = tf.convert_to_tensor(e_delta)
+ else:
+ raise TypeError('No GPU found, could not create Tensor objects.')
return compounds, e_pbe0, e_delta