From 1647f76052b016e4102a3af234ac47401e04819d Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Mon, 2 Mar 2020 14:33:19 -0700
Subject: Start to add tf support

---
 ml_exp/__init__.py |  2 --
 ml_exp/do_ml.py    | 17 ++++++++++-------
 ml_exp/kernels.py  | 34 +++++++++++++++++-----------------
 ml_exp/qm7db.py    |  9 ++++++++-
 4 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index 4d672efd7..bc5afe03a 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -23,7 +23,6 @@ SOFTWARE.
 from ml_exp.compound import Compound
 from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
         get_helping_data, adjacency_matrix, check_bond, bag_of_bonds
-from ml_exp.math import cholesky_solve
 from ml_exp.qm7db import qm7db
 from ml_exp.do_ml import simple_ml, do_ml
 
@@ -34,7 +33,6 @@ __all__ = ['Compound',
            'adjacency_matrix',
            'check_bond',
            'bag_of_bonds',
-           'cholesky_solve',
            'qm7db',
            'simple_ml',
            'do_ml']
diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index 480ca9b8f..d22074952 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -23,6 +23,7 @@ SOFTWARE.
 import time
 import numpy as np
 from scipy import linalg as LA
+import tensorflow as tf
 from ml_exp.misc import printc
 from ml_exp.kernels import gaussian_kernel
 from ml_exp.qm7db import qm7db
@@ -122,8 +123,8 @@ def do_ml(db_path='data',
           training_size=1500,
           test_size=None,
           sigma=1000.0,
-          opt=True,
           identifiers=['CM'],
+          use_tf=True,
           show_msgs=True):
     """
     Main function that does the whole ML process.
@@ -142,8 +143,8 @@ def do_ml(db_path='data',
     test_size: size of the test set to use. If no size is given,
         the last remaining molecules are used.
     sigma: depth of the kernel.
-    opt: if the optimized algorithm should be used. For benchmarking purposes.
     identifiers: list of names (strings) of descriptors to use.
+    use_tf: if tensorflow should be used.
     show_msgs: if debug messages should be shown.
     """
     if type(identifiers) != list:
@@ -155,7 +156,13 @@ def do_ml(db_path='data',
     tic = time.perf_counter()
     compounds, energy_pbe0, energy_delta = qm7db(db_path=db_path,
                                                  is_shuffled=is_shuffled,
-                                                 r_seed=r_seed)
+                                                 r_seed=r_seed,
+                                                 use_tf=use_tf)
+    print('test')
+    print(type(energy_pbe0), energy_pbe0.device.endswith('GPU:0'),
+          type(energy_delta), energy_delta.device.endswith('GPU:0'))
+    print(tf.config.experimental.list_physical_devices('GPU'))
+    raise TypeError('test')
     toc = time.perf_counter()
     tictoc = toc - tic
     if show_msgs:
@@ -209,7 +216,6 @@ def do_ml(db_path='data',
                                       training_size=training_size,
                                       test_size=test_size,
                                       sigma=sigma,
-                                      opt=opt,
                                       identifier='CM',
                                       show_msgs=show_msgs)
     if 'LJM' in identifiers:
@@ -218,7 +224,6 @@ def do_ml(db_path='data',
                                         training_size=training_size,
                                         test_size=test_size,
                                         sigma=sigma,
-                                        opt=opt,
                                         identifier='LJM',
                                         show_msgs=show_msgs)
     """
@@ -228,7 +233,6 @@ def do_ml(db_path='data',
                                       training_size=training_size,
                                       test_size=test_size,
                                       sigma=sigma,
-                                      opt=opt,
                                       identifier='AM',
                                       show_msgs=show_msgs)
     """
@@ -238,7 +242,6 @@ def do_ml(db_path='data',
                                         training_size=training_size,
                                         test_size=test_size,
                                         sigma=sigma,
-                                        opt=opt,
                                         identifier='BOB',
                                         show_msgs=show_msgs)
 
diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py
index feaf9a990..c79f93efa 100644
--- a/ml_exp/kernels.py
+++ b/ml_exp/kernels.py
@@ -20,36 +20,36 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-import math
+# import math
 import numpy as np
 
 
 def gaussian_kernel(X1,
                     X2,
-                    sigma,
-                    opt=True):
+                    sigma):
     """
     Calculates the Gaussian Kernel.
     X1: first representations.
     X2: second representations.
     sigma: kernel width.
-    opt: if the optimized algorithm should be used. For benchmarking purposes.
     """
     i_sigma = -0.5 / (sigma*sigma)
 
     K = np.zeros((X1.shape[0], X2.shape[0]), dtype=np.float64)
-    if opt:
-        # Faster way of calculating the kernel (no numba support).
-        for i, x1 in enumerate(X1):
-            if X2.ndim == 3:
-                norm = np.linalg.norm(X2 - x1, axis=(1, 2))
-            else:
-                norm = np.linalg.norm(X2 - x1, axis=-1)
-            K[i, :] = np.exp(i_sigma * np.square(norm))
-    else:
-        for i, x1 in enumerate(X1):
-            for j, x2 in enumerate(X2):
-                f_norm = np.linalg.norm(x2 - x1)
-                K[i, j] = math.exp(i_sigma * f_norm**2)
+    # Faster way of calculating the kernel (no numba support).
+    for i, x1 in enumerate(X1):
+        if X2.ndim == 3:
+            norm = np.linalg.norm(X2 - x1, axis=(1, 2))
+        else:
+            norm = np.linalg.norm(X2 - x1, axis=-1)
+        K[i, :] = np.exp(i_sigma * np.square(norm))
+
+    # Old way of calculating the kernel (numba support).
+    """
+    for i, x1 in enumerate(X1):
+        for j, x2 in enumerate(X2):
+            f_norm = np.linalg.norm(x2 - x1)
+            K[i, j] = math.exp(i_sigma * f_norm**2)
+    """
 
     return K
diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
index 3ba2c5814..29bda6a59 100644
--- a/ml_exp/qm7db.py
+++ b/ml_exp/qm7db.py
@@ -22,17 +22,20 @@ SOFTWARE.
 """
 from ml_exp.compound import Compound
 import numpy as np
+import tensorflow as tf
 import random
 
 
 def qm7db(db_path='data',
           is_shuffled=True,
-          r_seed=111):
+          r_seed=111,
+          use_tf=True):
     """
     Creates a list of compounds with the qm7 database.
     db_path: path to the database directory.
     is_shuffled: if the resulting list of compounds should be shuffled.
     r_seed: random seed to use for the shuffling.
+    use_tf: if tensorflow should be used.
     """
     fname = f'{db_path}/hof_qm7.txt'
     with open(fname, 'r') as f:
@@ -52,4 +55,8 @@ def qm7db(db_path='data',
     e_pbe0 = np.array([comp.pbe0 for comp in compounds], dtype=np.float64)
     e_delta = np.array([comp.delta for comp in compounds], dtype=np.float64)
 
+    if use_tf:
+        e_pbe0 = tf.convert_to_tensor(e_pbe0)
+        e_delta = tf.convert_to_tensor(e_delta)
+
     return compounds, e_pbe0, e_delta
-- 
cgit v1.2.3-70-g09d2