From f57f38c2886052bef92b3885acc2790ac5bc340d Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com> Date: Sat, 7 Mar 2020 10:11:53 -0700 Subject: Add laplacian kernel --- ml_exp/__init__.py | 3 +++ ml_exp/do_ml.py | 61 ++++++++++++++++++++++++++++++++++++++---------------- ml_exp/kernels.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 18 deletions(-) diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py index bc5afe03a..e49104e12 100644 --- a/ml_exp/__init__.py +++ b/ml_exp/__init__.py @@ -24,6 +24,7 @@ from ml_exp.compound import Compound from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\ get_helping_data, adjacency_matrix, check_bond, bag_of_bonds from ml_exp.qm7db import qm7db +from ml_exp.kernels import gaussian_kernel, laplacian_kernel from ml_exp.do_ml import simple_ml, do_ml __all__ = ['Compound', @@ -34,5 +35,7 @@ __all__ = ['Compound', 'check_bond', 'bag_of_bonds', 'qm7db', + 'gaussian_kernel', + 'laplacian_kernel', 'simple_ml', 'do_ml'] diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py index f4742b66d..b30d73811 100644 --- a/ml_exp/do_ml.py +++ b/ml_exp/do_ml.py @@ -30,7 +30,7 @@ except ImportError: print('Tensorflow couldn\'t be imported. Maybe it is not installed.') TF_AV = False from ml_exp.misc import printc -from ml_exp.kernels import gaussian_kernel +from ml_exp.kernels import gaussian_kernel, laplacian_kernel from ml_exp.qm7db import qm7db @@ -93,11 +93,18 @@ def simple_ml(descriptors, with tf.device('GPU:0'): X_tr = descriptors[:training_size] Y_tr = energies[:training_size] - K_tr = gaussian_kernel(X_tr, - X_tr, - sigma, - use_tf=use_tf) - + if identifier == 'BOB': + K_tr = laplacian_kernel(X_tr, + X_tr, + sigma, + use_tf=use_tf) + else: + K_tr = gaussian_kernel(X_tr, + X_tr, + sigma, + use_tf=use_tf) + + # Adding a small value on the diagonal for cho_solve. dv = tf.linalg.tensor_diag(tf.constant(1e-8, shape=(training_size), dtype=tf.float64)) @@ -108,10 +115,16 @@ def simple_ml(descriptors, X_te = descriptors[-test_size:] Y_te = energies[-test_size:] - K_te = gaussian_kernel(X_te, - X_tr, - sigma, - use_tf=use_tf) + if identifier == 'BOB': + K_te = laplacian_kernel(X_te, + X_tr, + sigma, + use_tf=use_tf) + else: + K_te = gaussian_kernel(X_te, + X_tr, + sigma, + use_tf=use_tf) Y_te = tf.expand_dims(Y_te, 1) Y_pr = tf.tensordot(K_te, alpha, 1) @@ -120,10 +133,16 @@ def simple_ml(descriptors, else: X_tr = descriptors[:training_size] Y_tr = energies[:training_size] - K_tr = gaussian_kernel(X_tr, - X_tr, - sigma, - use_tf=use_tf) + if identifier == 'BOB': + K_tr = laplacian_kernel(X_tr, + X_tr, + sigma, + use_tf=use_tf) + else: + K_tr = gaussian_kernel(X_tr, + X_tr, + sigma, + use_tf=use_tf) # Adding a small value on the diagonal for cho_solve. K_tr[np.diag_indices_from(K_tr)] += 1e-8 @@ -132,10 +151,16 @@ def simple_ml(descriptors, X_te = descriptors[-test_size:] Y_te = energies[-test_size:] - K_te = gaussian_kernel(X_te, - X_tr, - sigma, - use_tf=use_tf) + if identifier == 'BOB': + K_te = laplacian_kernel(X_te, + X_tr, + sigma, + use_tf=use_tf) + else: + K_te = gaussian_kernel(X_te, + X_tr, + sigma, + use_tf=use_tf) Y_pr = np.dot(K_te, alpha) mae = np.mean(np.abs(Y_pr - Y_te)) diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py index abc71f7af..488232392 100644 --- a/ml_exp/kernels.py +++ b/ml_exp/kernels.py @@ -83,3 +83,58 @@ def gaussian_kernel(X1, norm = np.linalg.norm(X2 - X1[i], axis=-1) K[i, :] = np.exp(i_sigma * np.square(norm)) return K + + +def laplacian_kernel(X1, + X2, + sigma, + use_tf=True): + """ + Calculates the Laplacian Kernel. + X1: first representations. + X2: second representations. + sigma: kernel width. + use_tf: if tensorflow should be used. + """ + # If tf is to be used but couldn't be imported, don't try to use it. + if use_tf and not TF_AV: + use_tf = False + + X1_size = X1.shape[0] + X2_size = X2.shape[0] + i_sigma = -0.5 / sigma + + if use_tf: + if tf.config.experimental.list_physical_devices('GPU'): + with tf.device('GPU:0'): + X1 = tf.convert_to_tensor(X1) + X2 = tf.convert_to_tensor(X2) + X2r = tf.rank(X2) + + def cond(i, _): + return tf.less(i, X1_size) + + def body(i, K): + if X2r == 3: + norm = tf.norm(X2 - X1[i], axis=(1, 2)) + else: + norm = tf.norm(X2 - X1[i], axis=-1) + + return (i + 1, + K.write(i, tf.exp(i_sigma * norm))) + + K = tf.TensorArray(dtype=tf.float64, + size=X1_size) + i_state = (0, K) + n, K = tf.while_loop(cond, body, i_state) + K = K.stack() + else: + K = np.zeros((X1_size, X2_size), dtype=np.float64) + # Faster way of calculating the kernel (no numba support). + for i in range(X1_size): + if X2.ndim == 3: + norm = np.linalg.norm(X2 - X1[i], axis=(1, 2)) + else: + norm = np.linalg.norm(X2 - X1[i], axis=-1) + K[i, :] = np.exp(i_sigma * norm) + return K -- cgit v1.2.3-70-g09d2