summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-03-02 14:33:19 -0700
committerDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-03-02 14:33:19 -0700
commit1647f76052b016e4102a3af234ac47401e04819d (patch)
tree224c292377839449b00d99f5dafabf2b756b1fb4
parent01aba134690889e05e02529ea861442f3fed3832 (diff)
Start to add tf support
-rw-r--r--ml_exp/__init__.py2
-rw-r--r--ml_exp/do_ml.py17
-rw-r--r--ml_exp/kernels.py34
-rw-r--r--ml_exp/qm7db.py9
4 files changed, 35 insertions, 27 deletions
diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index 4d672efd7..bc5afe03a 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -23,7 +23,6 @@ SOFTWARE.
from ml_exp.compound import Compound
from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
get_helping_data, adjacency_matrix, check_bond, bag_of_bonds
-from ml_exp.math import cholesky_solve
from ml_exp.qm7db import qm7db
from ml_exp.do_ml import simple_ml, do_ml
@@ -34,7 +33,6 @@ __all__ = ['Compound',
'adjacency_matrix',
'check_bond',
'bag_of_bonds',
- 'cholesky_solve',
'qm7db',
'simple_ml',
'do_ml']
diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index 480ca9b8f..d22074952 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -23,6 +23,7 @@ SOFTWARE.
import time
import numpy as np
from scipy import linalg as LA
+import tensorflow as tf
from ml_exp.misc import printc
from ml_exp.kernels import gaussian_kernel
from ml_exp.qm7db import qm7db
@@ -122,8 +123,8 @@ def do_ml(db_path='data',
training_size=1500,
test_size=None,
sigma=1000.0,
- opt=True,
identifiers=['CM'],
+ use_tf=True,
show_msgs=True):
"""
Main function that does the whole ML process.
@@ -142,8 +143,8 @@ def do_ml(db_path='data',
test_size: size of the test set to use. If no size is given,
the last remaining molecules are used.
sigma: depth of the kernel.
- opt: if the optimized algorithm should be used. For benchmarking purposes.
identifiers: list of names (strings) of descriptors to use.
+ use_tf: if tensorflow should be used.
show_msgs: if debug messages should be shown.
"""
if type(identifiers) != list:
@@ -155,7 +156,13 @@ def do_ml(db_path='data',
tic = time.perf_counter()
compounds, energy_pbe0, energy_delta = qm7db(db_path=db_path,
is_shuffled=is_shuffled,
- r_seed=r_seed)
+ r_seed=r_seed,
+ use_tf=use_tf)
+ print('test')
+ print(type(energy_pbe0), energy_pbe0.device.endswith('GPU:0'),
+ type(energy_delta), energy_delta.device.endswith('GPU:0'))
+ print(tf.config.experimental.list_physical_devices('GPU'))
+ raise TypeError('test')
toc = time.perf_counter()
tictoc = toc - tic
if show_msgs:
@@ -209,7 +216,6 @@ def do_ml(db_path='data',
training_size=training_size,
test_size=test_size,
sigma=sigma,
- opt=opt,
identifier='CM',
show_msgs=show_msgs)
if 'LJM' in identifiers:
@@ -218,7 +224,6 @@ def do_ml(db_path='data',
training_size=training_size,
test_size=test_size,
sigma=sigma,
- opt=opt,
identifier='LJM',
show_msgs=show_msgs)
"""
@@ -228,7 +233,6 @@ def do_ml(db_path='data',
training_size=training_size,
test_size=test_size,
sigma=sigma,
- opt=opt,
identifier='AM',
show_msgs=show_msgs)
"""
@@ -238,7 +242,6 @@ def do_ml(db_path='data',
training_size=training_size,
test_size=test_size,
sigma=sigma,
- opt=opt,
identifier='BOB',
show_msgs=show_msgs)
diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py
index feaf9a990..c79f93efa 100644
--- a/ml_exp/kernels.py
+++ b/ml_exp/kernels.py
@@ -20,36 +20,36 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
-import math
+# import math
import numpy as np
def gaussian_kernel(X1,
X2,
- sigma,
- opt=True):
+ sigma):
"""
Calculates the Gaussian Kernel.
X1: first representations.
X2: second representations.
sigma: kernel width.
- opt: if the optimized algorithm should be used. For benchmarking purposes.
"""
i_sigma = -0.5 / (sigma*sigma)
K = np.zeros((X1.shape[0], X2.shape[0]), dtype=np.float64)
- if opt:
- # Faster way of calculating the kernel (no numba support).
- for i, x1 in enumerate(X1):
- if X2.ndim == 3:
- norm = np.linalg.norm(X2 - x1, axis=(1, 2))
- else:
- norm = np.linalg.norm(X2 - x1, axis=-1)
- K[i, :] = np.exp(i_sigma * np.square(norm))
- else:
- for i, x1 in enumerate(X1):
- for j, x2 in enumerate(X2):
- f_norm = np.linalg.norm(x2 - x1)
- K[i, j] = math.exp(i_sigma * f_norm**2)
+ # Faster way of calculating the kernel (no numba support).
+ for i, x1 in enumerate(X1):
+ if X2.ndim == 3:
+ norm = np.linalg.norm(X2 - x1, axis=(1, 2))
+ else:
+ norm = np.linalg.norm(X2 - x1, axis=-1)
+ K[i, :] = np.exp(i_sigma * np.square(norm))
+
+ # Old way of calculating the kernel (numba support).
+ """
+ for i, x1 in enumerate(X1):
+ for j, x2 in enumerate(X2):
+ f_norm = np.linalg.norm(x2 - x1)
+ K[i, j] = math.exp(i_sigma * f_norm**2)
+ """
return K
diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
index 3ba2c5814..29bda6a59 100644
--- a/ml_exp/qm7db.py
+++ b/ml_exp/qm7db.py
@@ -22,17 +22,20 @@ SOFTWARE.
"""
from ml_exp.compound import Compound
import numpy as np
+import tensorflow as tf
import random
def qm7db(db_path='data',
is_shuffled=True,
- r_seed=111):
+ r_seed=111,
+ use_tf=True):
"""
Creates a list of compounds with the qm7 database.
db_path: path to the database directory.
is_shuffled: if the resulting list of compounds should be shuffled.
r_seed: random seed to use for the shuffling.
+ use_tf: if tensorflow should be used.
"""
fname = f'{db_path}/hof_qm7.txt'
with open(fname, 'r') as f:
@@ -52,4 +55,8 @@ def qm7db(db_path='data',
e_pbe0 = np.array([comp.pbe0 for comp in compounds], dtype=np.float64)
e_delta = np.array([comp.delta for comp in compounds], dtype=np.float64)
+ if use_tf:
+ e_pbe0 = tf.convert_to_tensor(e_pbe0)
+ e_delta = tf.convert_to_tensor(e_delta)
+
return compounds, e_pbe0, e_delta