diff options
author | David Luevano Alvarado <55825613+luevano@users.noreply.github.com> | 2020-03-02 14:33:19 -0700 |
---|---|---|
committer | David Luevano Alvarado <55825613+luevano@users.noreply.github.com> | 2020-03-02 14:33:19 -0700 |
commit | 1647f76052b016e4102a3af234ac47401e04819d (patch) | |
tree | 224c292377839449b00d99f5dafabf2b756b1fb4 | |
parent | 01aba134690889e05e02529ea861442f3fed3832 (diff) |
Start to add tf support
-rw-r--r-- | ml_exp/__init__.py | 2 | ||||
-rw-r--r-- | ml_exp/do_ml.py | 17 | ||||
-rw-r--r-- | ml_exp/kernels.py | 34 | ||||
-rw-r--r-- | ml_exp/qm7db.py | 9 |
4 files changed, 35 insertions, 27 deletions
diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py index 4d672efd7..bc5afe03a 100644 --- a/ml_exp/__init__.py +++ b/ml_exp/__init__.py @@ -23,7 +23,6 @@ SOFTWARE. from ml_exp.compound import Compound from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\ get_helping_data, adjacency_matrix, check_bond, bag_of_bonds -from ml_exp.math import cholesky_solve from ml_exp.qm7db import qm7db from ml_exp.do_ml import simple_ml, do_ml @@ -34,7 +33,6 @@ __all__ = ['Compound', 'adjacency_matrix', 'check_bond', 'bag_of_bonds', - 'cholesky_solve', 'qm7db', 'simple_ml', 'do_ml'] diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py index 480ca9b8f..d22074952 100644 --- a/ml_exp/do_ml.py +++ b/ml_exp/do_ml.py @@ -23,6 +23,7 @@ SOFTWARE. import time import numpy as np from scipy import linalg as LA +import tensorflow as tf from ml_exp.misc import printc from ml_exp.kernels import gaussian_kernel from ml_exp.qm7db import qm7db @@ -122,8 +123,8 @@ def do_ml(db_path='data', training_size=1500, test_size=None, sigma=1000.0, - opt=True, identifiers=['CM'], + use_tf=True, show_msgs=True): """ Main function that does the whole ML process. @@ -142,8 +143,8 @@ def do_ml(db_path='data', test_size: size of the test set to use. If no size is given, the last remaining molecules are used. sigma: depth of the kernel. - opt: if the optimized algorithm should be used. For benchmarking purposes. identifiers: list of names (strings) of descriptors to use. + use_tf: if tensorflow should be used. show_msgs: if debug messages should be shown. """ if type(identifiers) != list: @@ -155,7 +156,13 @@ def do_ml(db_path='data', tic = time.perf_counter() compounds, energy_pbe0, energy_delta = qm7db(db_path=db_path, is_shuffled=is_shuffled, - r_seed=r_seed) + r_seed=r_seed, + use_tf=use_tf) + print('test') + print(type(energy_pbe0), energy_pbe0.device.endswith('GPU:0'), + type(energy_delta), energy_delta.device.endswith('GPU:0')) + print(tf.config.experimental.list_physical_devices('GPU')) + raise TypeError('test') toc = time.perf_counter() tictoc = toc - tic if show_msgs: @@ -209,7 +216,6 @@ def do_ml(db_path='data', training_size=training_size, test_size=test_size, sigma=sigma, - opt=opt, identifier='CM', show_msgs=show_msgs) if 'LJM' in identifiers: @@ -218,7 +224,6 @@ def do_ml(db_path='data', training_size=training_size, test_size=test_size, sigma=sigma, - opt=opt, identifier='LJM', show_msgs=show_msgs) """ @@ -228,7 +233,6 @@ def do_ml(db_path='data', training_size=training_size, test_size=test_size, sigma=sigma, - opt=opt, identifier='AM', show_msgs=show_msgs) """ @@ -238,7 +242,6 @@ def do_ml(db_path='data', training_size=training_size, test_size=test_size, sigma=sigma, - opt=opt, identifier='BOB', show_msgs=show_msgs) diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py index feaf9a990..c79f93efa 100644 --- a/ml_exp/kernels.py +++ b/ml_exp/kernels.py @@ -20,36 +20,36 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import math +# import math import numpy as np def gaussian_kernel(X1, X2, - sigma, - opt=True): + sigma): """ Calculates the Gaussian Kernel. X1: first representations. X2: second representations. sigma: kernel width. - opt: if the optimized algorithm should be used. For benchmarking purposes. """ i_sigma = -0.5 / (sigma*sigma) K = np.zeros((X1.shape[0], X2.shape[0]), dtype=np.float64) - if opt: - # Faster way of calculating the kernel (no numba support). - for i, x1 in enumerate(X1): - if X2.ndim == 3: - norm = np.linalg.norm(X2 - x1, axis=(1, 2)) - else: - norm = np.linalg.norm(X2 - x1, axis=-1) - K[i, :] = np.exp(i_sigma * np.square(norm)) - else: - for i, x1 in enumerate(X1): - for j, x2 in enumerate(X2): - f_norm = np.linalg.norm(x2 - x1) - K[i, j] = math.exp(i_sigma * f_norm**2) + # Faster way of calculating the kernel (no numba support). + for i, x1 in enumerate(X1): + if X2.ndim == 3: + norm = np.linalg.norm(X2 - x1, axis=(1, 2)) + else: + norm = np.linalg.norm(X2 - x1, axis=-1) + K[i, :] = np.exp(i_sigma * np.square(norm)) + + # Old way of calculating the kernel (numba support). + """ + for i, x1 in enumerate(X1): + for j, x2 in enumerate(X2): + f_norm = np.linalg.norm(x2 - x1) + K[i, j] = math.exp(i_sigma * f_norm**2) + """ return K diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py index 3ba2c5814..29bda6a59 100644 --- a/ml_exp/qm7db.py +++ b/ml_exp/qm7db.py @@ -22,17 +22,20 @@ SOFTWARE. """ from ml_exp.compound import Compound import numpy as np +import tensorflow as tf import random def qm7db(db_path='data', is_shuffled=True, - r_seed=111): + r_seed=111, + use_tf=True): """ Creates a list of compounds with the qm7 database. db_path: path to the database directory. is_shuffled: if the resulting list of compounds should be shuffled. r_seed: random seed to use for the shuffling. + use_tf: if tensorflow should be used. """ fname = f'{db_path}/hof_qm7.txt' with open(fname, 'r') as f: @@ -52,4 +55,8 @@ def qm7db(db_path='data', e_pbe0 = np.array([comp.pbe0 for comp in compounds], dtype=np.float64) e_delta = np.array([comp.delta for comp in compounds], dtype=np.float64) + if use_tf: + e_pbe0 = tf.convert_to_tensor(e_pbe0) + e_delta = tf.convert_to_tensor(e_delta) + return compounds, e_pbe0, e_delta |