From 52383ddeb87312708eeb1da765b175fb603f2802 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com> Date: Tue, 3 Mar 2020 22:49:31 -0700 Subject: Possible tf addition, needs bugfixing --- ml_exp/do_ml.py | 55 ++++++++++++++++++++++++++++++++++++------------ ml_exp/kernels.py | 62 +++++++++++++++++++++++++++++++++++++++++-------------- ml_exp/qm7db.py | 9 ++++++-- 3 files changed, 95 insertions(+), 31 deletions(-) diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py index d22074952..379d0efd0 100644 --- a/ml_exp/do_ml.py +++ b/ml_exp/do_ml.py @@ -31,11 +31,12 @@ from ml_exp.qm7db import qm7db def simple_ml(descriptors, energies, - training_size, + training_size=1500, test_size=None, sigma=1000.0, opt=True, identifier=None, + use_tf=True, show_msgs=True): """ Basic ML methodology for a single descriptor type. @@ -47,6 +48,7 @@ def simple_ml(descriptors, sigma: depth of the kernel. opt: if the optimized algorithm should be used. For benchmarking purposes. identifier: string with the name of the descriptor used. + use_tf: if tensorflow should be used. show_msgs: if debug messages should be shown. NOTE: identifier is just a string and is only for identification purposes. Also, training is done with the first part of the data and @@ -82,19 +84,33 @@ def simple_ml(descriptors, K_training = gaussian_kernel(X_training, X_training, sigma, - opt=opt) - alpha = LA.cho_solve(LA.cho_factor(K_training), Y_training) + use_tf=use_tf) + if use_tf: + # Y_training = tf.expand_dims(Y_training, 1) + alpha = tf.linalg.cholesky_solve(tf.linalg.cholesky(K_training), + Y_training) + else: + alpha = LA.cho_solve(LA.cho_factor(K_training), + Y_training) X_test = descriptors[-test_size:] Y_test = energies[-test_size:] K_test = gaussian_kernel(X_test, X_training, sigma, - opt=opt) - Y_predicted = np.dot(K_test, - alpha) + use_tf=use_tf) + if use_tf: + # Y_test = tf.expand_dims(Y_test, 1) + Y_predicted = tf.tensordot(K_test, alpha, 1) + else: + Y_predicted = np.dot(K_test, alpha) + + print('Ducky') + if use_tf: + mae = tf.reduce_mean(tf.abs(Y_predicted - Y_test)) + else: + mae = np.mean(np.abs(Y_predicted - Y_test)) - mae = np.mean(np.abs(Y_predicted - Y_test)) if show_msgs: printc(f'\tMAE for {identifier}: {mae:.4f}', 'GREEN') @@ -158,11 +174,6 @@ def do_ml(db_path='data', is_shuffled=is_shuffled, r_seed=r_seed, use_tf=use_tf) - print('test') - print(type(energy_pbe0), energy_pbe0.device.endswith('GPU:0'), - type(energy_delta), energy_delta.device.endswith('GPU:0')) - print(tf.config.experimental.list_physical_devices('GPU')) - raise TypeError('test') toc = time.perf_counter() tictoc = toc - tic if show_msgs: @@ -192,7 +203,7 @@ def do_ml(db_path='data', if 'BOB' in identifiers: compound.gen_bob(size=size) - # Create a numpy array for the descriptors. + # Create a numpy array (or tensorflow tensor) for the descriptors. if 'CM' in identifiers: cm_data = np.array([comp.cm for comp in compounds], dtype=np.float64) if 'LJM' in identifiers: @@ -204,6 +215,20 @@ def do_ml(db_path='data', if 'BOB' in identifiers: bob_data = np.array([comp.bob for comp in compounds], dtype=np.float64) + if use_tf: + if tf.config.experimental.list_physical_devices('GPU'): + with tf.device('GPU:0'): + if 'CM' in identifiers: + cm_data = tf.convert_to_tensor(cm_data) + if 'LJM' in identifiers: + ljm_data = tf.convert_to_tensor(ljm_data) + # if 'AM' in identifiers: + # am_data = tf.convert_to_tensor(am_data) + if 'BOB' in identifiers: + bob_data = tf.convert_to_tensor(bob_data) + else: + raise TypeError('No GPU found, could not create Tensor objects.') + toc = time.perf_counter() tictoc = toc - tic if show_msgs: @@ -217,6 +242,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='CM', + use_tf=use_tf, show_msgs=show_msgs) if 'LJM' in identifiers: ljm_mae, ljm_tictoc = simple_ml(ljm_data, @@ -225,6 +251,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='LJM', + use_tf=use_tf, show_msgs=show_msgs) """ if 'AM' in identifiers: @@ -234,6 +261,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='AM', + use_tf=use_tf, show_msgs=show_msgs) """ if 'BOB' in identifiers: @@ -243,6 +271,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='BOB', + use_tf=use_tf, show_msgs=show_msgs) # End of program diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py index c79f93efa..26ff0d77b 100644 --- a/ml_exp/kernels.py +++ b/ml_exp/kernels.py @@ -22,34 +22,64 @@ SOFTWARE. """ # import math import numpy as np +import tensorflow as tf def gaussian_kernel(X1, X2, - sigma): + sigma, + use_tf=True): """ Calculates the Gaussian Kernel. X1: first representations. X2: second representations. sigma: kernel width. + use_tf: if tensorflow should be used. """ + X1_size = X1.shape[0] + X2_size = X2.shape[0] i_sigma = -0.5 / (sigma*sigma) - K = np.zeros((X1.shape[0], X2.shape[0]), dtype=np.float64) - # Faster way of calculating the kernel (no numba support). - for i, x1 in enumerate(X1): - if X2.ndim == 3: - norm = np.linalg.norm(X2 - x1, axis=(1, 2)) - else: - norm = np.linalg.norm(X2 - x1, axis=-1) - K[i, :] = np.exp(i_sigma * np.square(norm)) + if use_tf: + if tf.config.experimental.list_physical_devices('GPU'): + with tf.device('GPU:0'): + X1 = tf.convert_to_tensor(X1) + X2 = tf.convert_to_tensor(X2) + X2r = tf.rank(X2) - # Old way of calculating the kernel (numba support). - """ - for i, x1 in enumerate(X1): - for j, x2 in enumerate(X2): - f_norm = np.linalg.norm(x2 - x1) - K[i, j] = math.exp(i_sigma * f_norm**2) - """ + def cond(i, _): + return tf.less(i, X1_size) + + def body(i, K): + if X2r == 3: + norm = tf.norm(X2 - X1[i], axis=(1, 2)) + else: + norm = tf.norm(X2 - X1[i], axis=-1) + + return (i + 1, + K.write(i, tf.exp(i_sigma * tf.square(norm)))) + + K = tf.TensorArray(dtype=tf.float64, + size=X1_size) + i_state = (0, K) + n, K = tf.while_loop(cond, body, i_state) + K = K.stack() + else: + K = np.zeros((X1_size, X2_size), dtype=np.float64) + # Faster way of calculating the kernel (no numba support). + for i in range(X1_size): + if X2.ndim == 3: + norm = np.linalg.norm(X2 - X1[i], axis=(1, 2)) + else: + norm = np.linalg.norm(X2 - X1[i], axis=-1) + K[i, :] = np.exp(i_sigma * np.square(norm)) + + # Old way of calculating the kernel (numba support). + """ + for i, x1 in enumerate(X1): + for j, x2 in enumerate(X2): + f_norm = np.linalg.norm(x2 - x1) + K[i, j] = math.exp(i_sigma * f_norm**2) + """ return K diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py index 29bda6a59..c20df018e 100644 --- a/ml_exp/qm7db.py +++ b/ml_exp/qm7db.py @@ -56,7 +56,12 @@ def qm7db(db_path='data', e_delta = np.array([comp.delta for comp in compounds], dtype=np.float64) if use_tf: - e_pbe0 = tf.convert_to_tensor(e_pbe0) - e_delta = tf.convert_to_tensor(e_delta) + # Check if there's a gpu available and use the first one. + if tf.config.experimental.list_physical_devices('GPU'): + with tf.device('GPU:0'): + e_pbe0 = tf.convert_to_tensor(e_pbe0) + e_delta = tf.convert_to_tensor(e_delta) + else: + raise TypeError('No GPU found, could not create Tensor objects.') return compounds, e_pbe0, e_delta -- cgit v1.2.3-54-g00ecf