summaryrefslogtreecommitdiff
path: root/ml_exp/do_ml.py
diff options
context:
space:
mode:
Diffstat (limited to 'ml_exp/do_ml.py')
-rw-r--r--ml_exp/do_ml.py55
1 files changed, 42 insertions, 13 deletions
diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py
index d22074952..379d0efd0 100644
--- a/ml_exp/do_ml.py
+++ b/ml_exp/do_ml.py
@@ -31,11 +31,12 @@ from ml_exp.qm7db import qm7db
def simple_ml(descriptors,
energies,
- training_size,
+ training_size=1500,
test_size=None,
sigma=1000.0,
opt=True,
identifier=None,
+ use_tf=True,
show_msgs=True):
"""
Basic ML methodology for a single descriptor type.
@@ -47,6 +48,7 @@ def simple_ml(descriptors,
sigma: depth of the kernel.
opt: if the optimized algorithm should be used. For benchmarking purposes.
identifier: string with the name of the descriptor used.
+ use_tf: if tensorflow should be used.
show_msgs: if debug messages should be shown.
NOTE: identifier is just a string and is only for identification purposes.
Also, training is done with the first part of the data and
@@ -82,19 +84,33 @@ def simple_ml(descriptors,
K_training = gaussian_kernel(X_training,
X_training,
sigma,
- opt=opt)
- alpha = LA.cho_solve(LA.cho_factor(K_training), Y_training)
+ use_tf=use_tf)
+ if use_tf:
+ # Y_training = tf.expand_dims(Y_training, 1)
+ alpha = tf.linalg.cholesky_solve(tf.linalg.cholesky(K_training),
+ Y_training)
+ else:
+ alpha = LA.cho_solve(LA.cho_factor(K_training),
+ Y_training)
X_test = descriptors[-test_size:]
Y_test = energies[-test_size:]
K_test = gaussian_kernel(X_test,
X_training,
sigma,
- opt=opt)
- Y_predicted = np.dot(K_test,
- alpha)
+ use_tf=use_tf)
+ if use_tf:
+ # Y_test = tf.expand_dims(Y_test, 1)
+ Y_predicted = tf.tensordot(K_test, alpha, 1)
+ else:
+ Y_predicted = np.dot(K_test, alpha)
+
+ print('Ducky')
+ if use_tf:
+ mae = tf.reduce_mean(tf.abs(Y_predicted - Y_test))
+ else:
+ mae = np.mean(np.abs(Y_predicted - Y_test))
- mae = np.mean(np.abs(Y_predicted - Y_test))
if show_msgs:
printc(f'\tMAE for {identifier}: {mae:.4f}', 'GREEN')
@@ -158,11 +174,6 @@ def do_ml(db_path='data',
is_shuffled=is_shuffled,
r_seed=r_seed,
use_tf=use_tf)
- print('test')
- print(type(energy_pbe0), energy_pbe0.device.endswith('GPU:0'),
- type(energy_delta), energy_delta.device.endswith('GPU:0'))
- print(tf.config.experimental.list_physical_devices('GPU'))
- raise TypeError('test')
toc = time.perf_counter()
tictoc = toc - tic
if show_msgs:
@@ -192,7 +203,7 @@ def do_ml(db_path='data',
if 'BOB' in identifiers:
compound.gen_bob(size=size)
- # Create a numpy array for the descriptors.
+ # Create a numpy array (or tensorflow tensor) for the descriptors.
if 'CM' in identifiers:
cm_data = np.array([comp.cm for comp in compounds], dtype=np.float64)
if 'LJM' in identifiers:
@@ -204,6 +215,20 @@ def do_ml(db_path='data',
if 'BOB' in identifiers:
bob_data = np.array([comp.bob for comp in compounds], dtype=np.float64)
+ if use_tf:
+ if tf.config.experimental.list_physical_devices('GPU'):
+ with tf.device('GPU:0'):
+ if 'CM' in identifiers:
+ cm_data = tf.convert_to_tensor(cm_data)
+ if 'LJM' in identifiers:
+ ljm_data = tf.convert_to_tensor(ljm_data)
+ # if 'AM' in identifiers:
+ # am_data = tf.convert_to_tensor(am_data)
+ if 'BOB' in identifiers:
+ bob_data = tf.convert_to_tensor(bob_data)
+ else:
+ raise TypeError('No GPU found, could not create Tensor objects.')
+
toc = time.perf_counter()
tictoc = toc - tic
if show_msgs:
@@ -217,6 +242,7 @@ def do_ml(db_path='data',
test_size=test_size,
sigma=sigma,
identifier='CM',
+ use_tf=use_tf,
show_msgs=show_msgs)
if 'LJM' in identifiers:
ljm_mae, ljm_tictoc = simple_ml(ljm_data,
@@ -225,6 +251,7 @@ def do_ml(db_path='data',
test_size=test_size,
sigma=sigma,
identifier='LJM',
+ use_tf=use_tf,
show_msgs=show_msgs)
"""
if 'AM' in identifiers:
@@ -234,6 +261,7 @@ def do_ml(db_path='data',
test_size=test_size,
sigma=sigma,
identifier='AM',
+ use_tf=use_tf,
show_msgs=show_msgs)
"""
if 'BOB' in identifiers:
@@ -243,6 +271,7 @@ def do_ml(db_path='data',
test_size=test_size,
sigma=sigma,
identifier='BOB',
+ use_tf=use_tf,
show_msgs=show_msgs)
# End of program