From 52383ddeb87312708eeb1da765b175fb603f2802 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Tue, 3 Mar 2020 22:49:31 -0700
Subject: Possible tf addition, needs bugfixing

---
 ml_exp/do_ml.py   | 55 ++++++++++++++++++++++++++++++++++++------------
 ml_exp/kernels.py | 62 +++++++++++++++++++++++++++++++++++++++++--------------
 ml_exp/qm7db.py   |  9 ++++++--
 3 files changed, 95 insertions(+), 31 deletions(-)

diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index d22074952..379d0efd0 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -31,11 +31,12 @@ from ml_exp.qm7db import qm7db
 
 def simple_ml(descriptors,
               energies,
-              training_size,
+              training_size=1500,
               test_size=None,
               sigma=1000.0,
               opt=True,
               identifier=None,
+              use_tf=True,
               show_msgs=True):
     """
     Basic ML methodology for a single descriptor type.
@@ -47,6 +48,7 @@ def simple_ml(descriptors,
     sigma: depth of the kernel.
     opt: if the optimized algorithm should be used. For benchmarking purposes.
     identifier: string with the name of the descriptor used.
+    use_tf: if tensorflow should be used.
     show_msgs: if debug messages should be shown.
     NOTE: identifier is just a string and is only for identification purposes.
     Also, training is done with the first part of the data and
@@ -82,19 +84,33 @@ def simple_ml(descriptors,
     K_training = gaussian_kernel(X_training,
                                  X_training,
                                  sigma,
-                                 opt=opt)
-    alpha = LA.cho_solve(LA.cho_factor(K_training), Y_training)
+                                 use_tf=use_tf)
+    if use_tf:
+        # Y_training = tf.expand_dims(Y_training, 1)
+        alpha = tf.linalg.cholesky_solve(tf.linalg.cholesky(K_training),
+                                         Y_training)
+    else:
+        alpha = LA.cho_solve(LA.cho_factor(K_training),
+                             Y_training)
 
     X_test = descriptors[-test_size:]
     Y_test = energies[-test_size:]
     K_test = gaussian_kernel(X_test,
                              X_training,
                              sigma,
-                             opt=opt)
-    Y_predicted = np.dot(K_test,
-                         alpha)
+                             use_tf=use_tf)
+    if use_tf:
+        # Y_test = tf.expand_dims(Y_test, 1)
+        Y_predicted = tf.tensordot(K_test, alpha, 1)
+    else:
+        Y_predicted = np.dot(K_test, alpha)
+
+    print('Ducky')
+    if use_tf:
+        mae = tf.reduce_mean(tf.abs(Y_predicted - Y_test))
+    else:
+        mae = np.mean(np.abs(Y_predicted - Y_test))
 
-    mae = np.mean(np.abs(Y_predicted - Y_test))
     if show_msgs:
         printc(f'\tMAE for {identifier}: {mae:.4f}', 'GREEN')
 
@@ -158,11 +174,6 @@ def do_ml(db_path='data',
                                                  is_shuffled=is_shuffled,
                                                  r_seed=r_seed,
                                                  use_tf=use_tf)
-    print('test')
-    print(type(energy_pbe0), energy_pbe0.device.endswith('GPU:0'),
-          type(energy_delta), energy_delta.device.endswith('GPU:0'))
-    print(tf.config.experimental.list_physical_devices('GPU'))
-    raise TypeError('test')
     toc = time.perf_counter()
     tictoc = toc - tic
     if show_msgs:
@@ -192,7 +203,7 @@ def do_ml(db_path='data',
         if 'BOB' in identifiers:
             compound.gen_bob(size=size)
 
-    # Create a numpy array for the descriptors.
+    # Create a numpy array (or tensorflow tensor) for the descriptors.
     if 'CM' in identifiers:
         cm_data = np.array([comp.cm for comp in compounds], dtype=np.float64)
     if 'LJM' in identifiers:
@@ -204,6 +215,20 @@ def do_ml(db_path='data',
     if 'BOB' in identifiers:
         bob_data = np.array([comp.bob for comp in compounds], dtype=np.float64)
 
+    if use_tf:
+        if tf.config.experimental.list_physical_devices('GPU'):
+            with tf.device('GPU:0'):
+                if 'CM' in identifiers:
+                    cm_data = tf.convert_to_tensor(cm_data)
+                if 'LJM' in identifiers:
+                    ljm_data = tf.convert_to_tensor(ljm_data)
+                # if 'AM' in identifiers:
+                #     am_data = tf.convert_to_tensor(am_data)
+                if 'BOB' in identifiers:
+                    bob_data = tf.convert_to_tensor(bob_data)
+        else:
+            raise TypeError('No GPU found, could not create Tensor objects.')
+
     toc = time.perf_counter()
     tictoc = toc - tic
     if show_msgs:
@@ -217,6 +242,7 @@ def do_ml(db_path='data',
                                       test_size=test_size,
                                       sigma=sigma,
                                       identifier='CM',
+                                      use_tf=use_tf,
                                       show_msgs=show_msgs)
     if 'LJM' in identifiers:
         ljm_mae, ljm_tictoc = simple_ml(ljm_data,
@@ -225,6 +251,7 @@ def do_ml(db_path='data',
                                         test_size=test_size,
                                         sigma=sigma,
                                         identifier='LJM',
+                                        use_tf=use_tf,
                                         show_msgs=show_msgs)
     """
     if 'AM' in identifiers:
@@ -234,6 +261,7 @@ def do_ml(db_path='data',
                                       test_size=test_size,
                                       sigma=sigma,
                                       identifier='AM',
+                                      use_tf=use_tf,
                                       show_msgs=show_msgs)
     """
     if 'BOB' in identifiers:
@@ -243,6 +271,7 @@ def do_ml(db_path='data',
                                         test_size=test_size,
                                         sigma=sigma,
                                         identifier='BOB',
+                                        use_tf=use_tf,
                                         show_msgs=show_msgs)
 
     # End of program
diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py
index c79f93efa..26ff0d77b 100644
--- a/ml_exp/kernels.py
+++ b/ml_exp/kernels.py
@@ -22,34 +22,64 @@ SOFTWARE.
 """
 # import math
 import numpy as np
+import tensorflow as tf
 
 
 def gaussian_kernel(X1,
                     X2,
-                    sigma):
+                    sigma,
+                    use_tf=True):
     """
     Calculates the Gaussian Kernel.
     X1: first representations.
     X2: second representations.
     sigma: kernel width.
+    use_tf: if tensorflow should be used.
     """
+    X1_size = X1.shape[0]
+    X2_size = X2.shape[0]
     i_sigma = -0.5 / (sigma*sigma)
 
-    K = np.zeros((X1.shape[0], X2.shape[0]), dtype=np.float64)
-    # Faster way of calculating the kernel (no numba support).
-    for i, x1 in enumerate(X1):
-        if X2.ndim == 3:
-            norm = np.linalg.norm(X2 - x1, axis=(1, 2))
-        else:
-            norm = np.linalg.norm(X2 - x1, axis=-1)
-        K[i, :] = np.exp(i_sigma * np.square(norm))
+    if use_tf:
+        if tf.config.experimental.list_physical_devices('GPU'):
+            with tf.device('GPU:0'):
+                X1 = tf.convert_to_tensor(X1)
+                X2 = tf.convert_to_tensor(X2)
+                X2r = tf.rank(X2)
 
-    # Old way of calculating the kernel (numba support).
-    """
-    for i, x1 in enumerate(X1):
-        for j, x2 in enumerate(X2):
-            f_norm = np.linalg.norm(x2 - x1)
-            K[i, j] = math.exp(i_sigma * f_norm**2)
-    """
+                def cond(i, _):
+                    return tf.less(i, X1_size)
+
+                def body(i, K):
+                    if X2r == 3:
+                        norm = tf.norm(X2 - X1[i], axis=(1, 2))
+                    else:
+                        norm = tf.norm(X2 - X1[i], axis=-1)
+
+                    return (i + 1,
+                            K.write(i, tf.exp(i_sigma * tf.square(norm))))
+
+                K = tf.TensorArray(dtype=tf.float64,
+                                   size=X1_size)
+                i_state = (0, K)
+                n, K = tf.while_loop(cond, body, i_state)
+                K = K.stack()
+    else:
+        K = np.zeros((X1_size, X2_size), dtype=np.float64)
+        # Faster way of calculating the kernel (no numba support).
+        for i in range(X1_size):
+            if X2.ndim == 3:
+                norm = np.linalg.norm(X2 - X1[i], axis=(1, 2))
+            else:
+                norm = np.linalg.norm(X2 - X1[i], axis=-1)
+            K[i, :] = np.exp(i_sigma * np.square(norm))
+
+        # Old way of calculating the kernel (numba support).
+        """
+        for i, x1 in enumerate(X1):
+            for j, x2 in enumerate(X2):
+                f_norm = np.linalg.norm(x2 - x1)
+                K[i, j] = math.exp(i_sigma * f_norm**2)
+        """
 
     return K
diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
index 29bda6a59..c20df018e 100644
--- a/ml_exp/qm7db.py
+++ b/ml_exp/qm7db.py
@@ -56,7 +56,12 @@ def qm7db(db_path='data',
     e_delta = np.array([comp.delta for comp in compounds], dtype=np.float64)
 
     if use_tf:
-        e_pbe0 = tf.convert_to_tensor(e_pbe0)
-        e_delta = tf.convert_to_tensor(e_delta)
+        # Check if there's a gpu available and use the first one.
+        if tf.config.experimental.list_physical_devices('GPU'):
+            with tf.device('GPU:0'):
+                e_pbe0 = tf.convert_to_tensor(e_pbe0)
+                e_delta = tf.convert_to_tensor(e_delta)
+        else:
+            raise TypeError('No GPU found, could not create Tensor objects.')
 
     return compounds, e_pbe0, e_delta
-- 
cgit v1.2.3-70-g09d2