From 1de0cb1f33ac3a72a286f8f67187162440001a4b Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Sat, 7 Mar 2020 10:55:01 -0700
Subject: Rewrite kernel usage

---
 ml_exp/do_ml.py   | 71 ++++++++++++++++++----------------------------
 ml_exp/kernels.py | 85 ++++++++++++++++---------------------------------------
 2 files changed, 53 insertions(+), 103 deletions(-)

diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index b30d73811..e1aece986 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -30,7 +30,7 @@ except ImportError:
     print('Tensorflow couldn\'t be imported. Maybe it is not installed.')
     TF_AV = False
 from ml_exp.misc import printc
-from ml_exp.kernels import gaussian_kernel, laplacian_kernel
+from ml_exp.kernels import laplauss_kernel
 from ml_exp.qm7db import qm7db
 
 
@@ -41,6 +41,7 @@ def simple_ml(descriptors,
               sigma=1000.0,
               opt=True,
               identifier=None,
+              laplauss='gauss',
               use_tf=True,
               show_msgs=True):
     """
@@ -93,16 +94,11 @@ def simple_ml(descriptors,
             with tf.device('GPU:0'):
                 X_tr = descriptors[:training_size]
                 Y_tr = energies[:training_size]
-                if identifier == 'BOB':
-                    K_tr = laplacian_kernel(X_tr,
-                                            X_tr,
-                                            sigma,
-                                            use_tf=use_tf)
-                else:
-                    K_tr = gaussian_kernel(X_tr,
-                                           X_tr,
-                                           sigma,
-                                           use_tf=use_tf)
+                K_tr = laplauss_kernel(X_tr,
+                                       X_tr,
+                                       sigma,
+                                       laplauss=laplauss,
+                                       use_tf=use_tf)
 
                 # Adding a small value on the diagonal for cho_solve.
                 dv = tf.linalg.tensor_diag(tf.constant(1e-8,
@@ -115,16 +111,11 @@ def simple_ml(descriptors,
 
                 X_te = descriptors[-test_size:]
                 Y_te = energies[-test_size:]
-                if identifier == 'BOB':
-                    K_te = laplacian_kernel(X_te,
-                                            X_tr,
-                                            sigma,
-                                            use_tf=use_tf)
-                else:
-                    K_te = gaussian_kernel(X_te,
-                                           X_tr,
-                                           sigma,
-                                           use_tf=use_tf)
+                K_te = laplauss_kernel(X_te,
+                                       X_tr,
+                                       sigma,
+                                       laplauss=laplauss,
+                                       use_tf=use_tf)
 
                 Y_te = tf.expand_dims(Y_te, 1)
                 Y_pr = tf.tensordot(K_te, alpha, 1)
@@ -133,16 +124,11 @@ def simple_ml(descriptors,
     else:
         X_tr = descriptors[:training_size]
         Y_tr = energies[:training_size]
-        if identifier == 'BOB':
-            K_tr = laplacian_kernel(X_tr,
-                                    X_tr,
-                                    sigma,
-                                    use_tf=use_tf)
-        else:
-            K_tr = gaussian_kernel(X_tr,
-                                   X_tr,
-                                   sigma,
-                                   use_tf=use_tf)
+        K_tr = laplauss_kernel(X_tr,
+                               X_tr,
+                               sigma,
+                               laplauss=laplauss,
+                               use_tf=use_tf)
 
         # Adding a small value on the diagonal for cho_solve.
         K_tr[np.diag_indices_from(K_tr)] += 1e-8
@@ -151,16 +137,11 @@ def simple_ml(descriptors,
 
         X_te = descriptors[-test_size:]
         Y_te = energies[-test_size:]
-        if identifier == 'BOB':
-            K_te = laplacian_kernel(X_te,
-                                    X_tr,
-                                    sigma,
-                                    use_tf=use_tf)
-        else:
-            K_te = gaussian_kernel(X_te,
-                                   X_tr,
-                                   sigma,
-                                   use_tf=use_tf)
+        K_te = laplauss_kernel(X_te,
+                               X_tr,
+                               sigma,
+                               laplauss=laplauss,
+                               use_tf=use_tf)
         Y_pr = np.dot(K_te, alpha)
 
         mae = np.mean(np.abs(Y_pr - Y_te))
@@ -276,8 +257,8 @@ def do_ml(db_path='data',
                     cm_data = tf.convert_to_tensor(cm_data)
                 if 'LJM' in identifiers:
                     ljm_data = tf.convert_to_tensor(ljm_data)
-                # if 'AM' in identifiers:
-                #     am_data = tf.convert_to_tensor(am_data)
+                if 'AM' in identifiers:
+                    am_data = tf.convert_to_tensor(am_data)
                 if 'BOB' in identifiers:
                     bob_data = tf.convert_to_tensor(bob_data)
         else:
@@ -296,6 +277,7 @@ def do_ml(db_path='data',
                                       test_size=test_size,
                                       sigma=sigma,
                                       identifier='CM',
+                                      laplauss='gauss',
                                       use_tf=use_tf,
                                       show_msgs=show_msgs)
     if 'LJM' in identifiers:
@@ -305,6 +287,7 @@ def do_ml(db_path='data',
                                         test_size=test_size,
                                         sigma=sigma,
                                         identifier='LJM',
+                                        laplauss='gauss',
                                         use_tf=use_tf,
                                         show_msgs=show_msgs)
     if 'AM' in identifiers:
@@ -314,6 +297,7 @@ def do_ml(db_path='data',
                                       test_size=test_size,
                                       sigma=sigma,
                                       identifier='AM',
+                                      laplauss='gauss',
                                       use_tf=use_tf,
                                       show_msgs=show_msgs)
     if 'BOB' in identifiers:
@@ -323,6 +307,7 @@ def do_ml(db_path='data',
                                         test_size=test_size,
                                         sigma=sigma,
                                         identifier='BOB',
+                                        laplauss='laplace',
                                         use_tf=use_tf,
                                         show_msgs=show_msgs)
 
diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py
index 488232392..11679460b 100644
--- a/ml_exp/kernels.py
+++ b/ml_exp/kernels.py
@@ -30,15 +30,17 @@ except ImportError:
     TF_AV = False
 
 
-def gaussian_kernel(X1,
+def laplauss_kernel(X1,
                     X2,
                     sigma,
+                    laplauss='gauss',
                     use_tf=True):
     """
-    Calculates the Gaussian Kernel.
+    Calculates the Lpalacian or Gaussian Kernel.
     X1: first representations.
     X2: second representations.
     sigma: kernel width.
+    laplauss: which kernel to calculate.
     use_tf: if tensorflow should be used.
     """
     # If tf is to be used but couldn't be imported, don't try to use it.
@@ -47,62 +49,12 @@ def gaussian_kernel(X1,
 
     X1_size = X1.shape[0]
     X2_size = X2.shape[0]
-    i_sigma = -0.5 / (sigma*sigma)
-
-    if use_tf:
-        if tf.config.experimental.list_physical_devices('GPU'):
-            with tf.device('GPU:0'):
-                X1 = tf.convert_to_tensor(X1)
-                X2 = tf.convert_to_tensor(X2)
-                X2r = tf.rank(X2)
-
-                def cond(i, _):
-                    return tf.less(i, X1_size)
-
-                def body(i, K):
-                    if X2r == 3:
-                        norm = tf.norm(X2 - X1[i], axis=(1, 2))
-                    else:
-                        norm = tf.norm(X2 - X1[i], axis=-1)
-
-                    return (i + 1,
-                            K.write(i, tf.exp(i_sigma * tf.square(norm))))
-
-                K = tf.TensorArray(dtype=tf.float64,
-                                   size=X1_size)
-                i_state = (0, K)
-                n, K = tf.while_loop(cond, body, i_state)
-                K = K.stack()
+    if laplauss == 'gauss':
+        i_sigma = -0.5 / (sigma*sigma)
+    elif laplauss == 'laplace':
+        i_sigma = -0.5 / sigma
     else:
-        K = np.zeros((X1_size, X2_size), dtype=np.float64)
-        # Faster way of calculating the kernel (no numba support).
-        for i in range(X1_size):
-            if X2.ndim == 3:
-                norm = np.linalg.norm(X2 - X1[i], axis=(1, 2))
-            else:
-                norm = np.linalg.norm(X2 - X1[i], axis=-1)
-            K[i, :] = np.exp(i_sigma * np.square(norm))
-    return K
-
-
-def laplacian_kernel(X1,
-                     X2,
-                     sigma,
-                     use_tf=True):
-    """
-    Calculates the Laplacian Kernel.
-    X1: first representations.
-    X2: second representations.
-    sigma: kernel width.
-    use_tf: if tensorflow should be used.
-    """
-    # If tf is to be used but couldn't be imported, don't try to use it.
-    if use_tf and not TF_AV:
-        use_tf = False
-
-    X1_size = X1.shape[0]
-    X2_size = X2.shape[0]
-    i_sigma = -0.5 / sigma
+        i_sigma = -0.5 / (sigma**2)
 
     if use_tf:
         if tf.config.experimental.list_physical_devices('GPU'):
@@ -120,8 +72,15 @@ def laplacian_kernel(X1,
                     else:
                         norm = tf.norm(X2 - X1[i], axis=-1)
 
-                    return (i + 1,
-                            K.write(i, tf.exp(i_sigma * norm)))
+                    if laplauss == 'gauss':
+                        return (i + 1,
+                                K.write(i, tf.exp(i_sigma * tf.square(norm))))
+                    elif laplauss == 'laplace':
+                        return (i + 1,
+                                K.write(i, tf.exp(i_sigma * norm)))
+                    else:
+                        return (i + 1,
+                                K.write(i, tf.exp(i_sigma * tf.square(norm))))
 
                 K = tf.TensorArray(dtype=tf.float64,
                                    size=X1_size)
@@ -136,5 +95,11 @@ def laplacian_kernel(X1,
                 norm = np.linalg.norm(X2 - X1[i], axis=(1, 2))
             else:
                 norm = np.linalg.norm(X2 - X1[i], axis=-1)
-            K[i, :] = np.exp(i_sigma * norm)
+            if laplauuss == 'gauss':
+                K[i, :] = np.exp(i_sigma * np.square(norm))
+            elif laplauce == 'laplace':
+                K[i, :] = np.exp(i_sigma * norm)
+            else:
+                K[i, :] = np.exp(i_sigma * np.square(norm))
+
     return K
-- 
cgit v1.2.3-70-g09d2