diff options
Diffstat (limited to 'ml_exp/do_ml.py')
-rw-r--r-- | ml_exp/do_ml.py | 55 |
1 files changed, 42 insertions, 13 deletions
diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py index d22074952..379d0efd0 100644 --- a/ml_exp/do_ml.py +++ b/ml_exp/do_ml.py @@ -31,11 +31,12 @@ from ml_exp.qm7db import qm7db def simple_ml(descriptors, energies, - training_size, + training_size=1500, test_size=None, sigma=1000.0, opt=True, identifier=None, + use_tf=True, show_msgs=True): """ Basic ML methodology for a single descriptor type. @@ -47,6 +48,7 @@ def simple_ml(descriptors, sigma: depth of the kernel. opt: if the optimized algorithm should be used. For benchmarking purposes. identifier: string with the name of the descriptor used. + use_tf: if tensorflow should be used. show_msgs: if debug messages should be shown. NOTE: identifier is just a string and is only for identification purposes. Also, training is done with the first part of the data and @@ -82,19 +84,33 @@ def simple_ml(descriptors, K_training = gaussian_kernel(X_training, X_training, sigma, - opt=opt) - alpha = LA.cho_solve(LA.cho_factor(K_training), Y_training) + use_tf=use_tf) + if use_tf: + # Y_training = tf.expand_dims(Y_training, 1) + alpha = tf.linalg.cholesky_solve(tf.linalg.cholesky(K_training), + Y_training) + else: + alpha = LA.cho_solve(LA.cho_factor(K_training), + Y_training) X_test = descriptors[-test_size:] Y_test = energies[-test_size:] K_test = gaussian_kernel(X_test, X_training, sigma, - opt=opt) - Y_predicted = np.dot(K_test, - alpha) + use_tf=use_tf) + if use_tf: + # Y_test = tf.expand_dims(Y_test, 1) + Y_predicted = tf.tensordot(K_test, alpha, 1) + else: + Y_predicted = np.dot(K_test, alpha) + + print('Ducky') + if use_tf: + mae = tf.reduce_mean(tf.abs(Y_predicted - Y_test)) + else: + mae = np.mean(np.abs(Y_predicted - Y_test)) - mae = np.mean(np.abs(Y_predicted - Y_test)) if show_msgs: printc(f'\tMAE for {identifier}: {mae:.4f}', 'GREEN') @@ -158,11 +174,6 @@ def do_ml(db_path='data', is_shuffled=is_shuffled, r_seed=r_seed, use_tf=use_tf) - print('test') - print(type(energy_pbe0), energy_pbe0.device.endswith('GPU:0'), - type(energy_delta), energy_delta.device.endswith('GPU:0')) - print(tf.config.experimental.list_physical_devices('GPU')) - raise TypeError('test') toc = time.perf_counter() tictoc = toc - tic if show_msgs: @@ -192,7 +203,7 @@ def do_ml(db_path='data', if 'BOB' in identifiers: compound.gen_bob(size=size) - # Create a numpy array for the descriptors. + # Create a numpy array (or tensorflow tensor) for the descriptors. if 'CM' in identifiers: cm_data = np.array([comp.cm for comp in compounds], dtype=np.float64) if 'LJM' in identifiers: @@ -204,6 +215,20 @@ def do_ml(db_path='data', if 'BOB' in identifiers: bob_data = np.array([comp.bob for comp in compounds], dtype=np.float64) + if use_tf: + if tf.config.experimental.list_physical_devices('GPU'): + with tf.device('GPU:0'): + if 'CM' in identifiers: + cm_data = tf.convert_to_tensor(cm_data) + if 'LJM' in identifiers: + ljm_data = tf.convert_to_tensor(ljm_data) + # if 'AM' in identifiers: + # am_data = tf.convert_to_tensor(am_data) + if 'BOB' in identifiers: + bob_data = tf.convert_to_tensor(bob_data) + else: + raise TypeError('No GPU found, could not create Tensor objects.') + toc = time.perf_counter() tictoc = toc - tic if show_msgs: @@ -217,6 +242,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='CM', + use_tf=use_tf, show_msgs=show_msgs) if 'LJM' in identifiers: ljm_mae, ljm_tictoc = simple_ml(ljm_data, @@ -225,6 +251,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='LJM', + use_tf=use_tf, show_msgs=show_msgs) """ if 'AM' in identifiers: @@ -234,6 +261,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='AM', + use_tf=use_tf, show_msgs=show_msgs) """ if 'BOB' in identifiers: @@ -243,6 +271,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='BOB', + use_tf=use_tf, show_msgs=show_msgs) # End of program |