diff options
-rw-r--r-- | ml_exp/do_ml.py | 71 | ||||
-rw-r--r-- | ml_exp/kernels.py | 85 |
2 files changed, 53 insertions, 103 deletions
diff --git a/ml_exp/do_ml.py b/ml_exp/do_ml.py index b30d73811..e1aece986 100644 --- a/ml_exp/do_ml.py +++ b/ml_exp/do_ml.py @@ -30,7 +30,7 @@ except ImportError: print('Tensorflow couldn\'t be imported. Maybe it is not installed.') TF_AV = False from ml_exp.misc import printc -from ml_exp.kernels import gaussian_kernel, laplacian_kernel +from ml_exp.kernels import laplauss_kernel from ml_exp.qm7db import qm7db @@ -41,6 +41,7 @@ def simple_ml(descriptors, sigma=1000.0, opt=True, identifier=None, + laplauss='gauss', use_tf=True, show_msgs=True): """ @@ -93,16 +94,11 @@ def simple_ml(descriptors, with tf.device('GPU:0'): X_tr = descriptors[:training_size] Y_tr = energies[:training_size] - if identifier == 'BOB': - K_tr = laplacian_kernel(X_tr, - X_tr, - sigma, - use_tf=use_tf) - else: - K_tr = gaussian_kernel(X_tr, - X_tr, - sigma, - use_tf=use_tf) + K_tr = laplauss_kernel(X_tr, + X_tr, + sigma, + laplauss=laplauss, + use_tf=use_tf) # Adding a small value on the diagonal for cho_solve. dv = tf.linalg.tensor_diag(tf.constant(1e-8, @@ -115,16 +111,11 @@ def simple_ml(descriptors, X_te = descriptors[-test_size:] Y_te = energies[-test_size:] - if identifier == 'BOB': - K_te = laplacian_kernel(X_te, - X_tr, - sigma, - use_tf=use_tf) - else: - K_te = gaussian_kernel(X_te, - X_tr, - sigma, - use_tf=use_tf) + K_te = laplauss_kernel(X_te, + X_tr, + sigma, + laplauss=laplauss, + use_tf=use_tf) Y_te = tf.expand_dims(Y_te, 1) Y_pr = tf.tensordot(K_te, alpha, 1) @@ -133,16 +124,11 @@ def simple_ml(descriptors, else: X_tr = descriptors[:training_size] Y_tr = energies[:training_size] - if identifier == 'BOB': - K_tr = laplacian_kernel(X_tr, - X_tr, - sigma, - use_tf=use_tf) - else: - K_tr = gaussian_kernel(X_tr, - X_tr, - sigma, - use_tf=use_tf) + K_tr = laplauss_kernel(X_tr, + X_tr, + sigma, + laplauss=laplauss, + use_tf=use_tf) # Adding a small value on the diagonal for cho_solve. K_tr[np.diag_indices_from(K_tr)] += 1e-8 @@ -151,16 +137,11 @@ def simple_ml(descriptors, X_te = descriptors[-test_size:] Y_te = energies[-test_size:] - if identifier == 'BOB': - K_te = laplacian_kernel(X_te, - X_tr, - sigma, - use_tf=use_tf) - else: - K_te = gaussian_kernel(X_te, - X_tr, - sigma, - use_tf=use_tf) + K_te = laplauss_kernel(X_te, + X_tr, + sigma, + laplauss=laplauss, + use_tf=use_tf) Y_pr = np.dot(K_te, alpha) mae = np.mean(np.abs(Y_pr - Y_te)) @@ -276,8 +257,8 @@ def do_ml(db_path='data', cm_data = tf.convert_to_tensor(cm_data) if 'LJM' in identifiers: ljm_data = tf.convert_to_tensor(ljm_data) - # if 'AM' in identifiers: - # am_data = tf.convert_to_tensor(am_data) + if 'AM' in identifiers: + am_data = tf.convert_to_tensor(am_data) if 'BOB' in identifiers: bob_data = tf.convert_to_tensor(bob_data) else: @@ -296,6 +277,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='CM', + laplauss='gauss', use_tf=use_tf, show_msgs=show_msgs) if 'LJM' in identifiers: @@ -305,6 +287,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='LJM', + laplauss='gauss', use_tf=use_tf, show_msgs=show_msgs) if 'AM' in identifiers: @@ -314,6 +297,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='AM', + laplauss='gauss', use_tf=use_tf, show_msgs=show_msgs) if 'BOB' in identifiers: @@ -323,6 +307,7 @@ def do_ml(db_path='data', test_size=test_size, sigma=sigma, identifier='BOB', + laplauss='laplace', use_tf=use_tf, show_msgs=show_msgs) diff --git a/ml_exp/kernels.py b/ml_exp/kernels.py index 488232392..11679460b 100644 --- a/ml_exp/kernels.py +++ b/ml_exp/kernels.py @@ -30,15 +30,17 @@ except ImportError: TF_AV = False -def gaussian_kernel(X1, +def laplauss_kernel(X1, X2, sigma, + laplauss='gauss', use_tf=True): """ - Calculates the Gaussian Kernel. + Calculates the Lpalacian or Gaussian Kernel. X1: first representations. X2: second representations. sigma: kernel width. + laplauss: which kernel to calculate. use_tf: if tensorflow should be used. """ # If tf is to be used but couldn't be imported, don't try to use it. @@ -47,62 +49,12 @@ def gaussian_kernel(X1, X1_size = X1.shape[0] X2_size = X2.shape[0] - i_sigma = -0.5 / (sigma*sigma) - - if use_tf: - if tf.config.experimental.list_physical_devices('GPU'): - with tf.device('GPU:0'): - X1 = tf.convert_to_tensor(X1) - X2 = tf.convert_to_tensor(X2) - X2r = tf.rank(X2) - - def cond(i, _): - return tf.less(i, X1_size) - - def body(i, K): - if X2r == 3: - norm = tf.norm(X2 - X1[i], axis=(1, 2)) - else: - norm = tf.norm(X2 - X1[i], axis=-1) - - return (i + 1, - K.write(i, tf.exp(i_sigma * tf.square(norm)))) - - K = tf.TensorArray(dtype=tf.float64, - size=X1_size) - i_state = (0, K) - n, K = tf.while_loop(cond, body, i_state) - K = K.stack() + if laplauss == 'gauss': + i_sigma = -0.5 / (sigma*sigma) + elif laplauss == 'laplace': + i_sigma = -0.5 / sigma else: - K = np.zeros((X1_size, X2_size), dtype=np.float64) - # Faster way of calculating the kernel (no numba support). - for i in range(X1_size): - if X2.ndim == 3: - norm = np.linalg.norm(X2 - X1[i], axis=(1, 2)) - else: - norm = np.linalg.norm(X2 - X1[i], axis=-1) - K[i, :] = np.exp(i_sigma * np.square(norm)) - return K - - -def laplacian_kernel(X1, - X2, - sigma, - use_tf=True): - """ - Calculates the Laplacian Kernel. - X1: first representations. - X2: second representations. - sigma: kernel width. - use_tf: if tensorflow should be used. - """ - # If tf is to be used but couldn't be imported, don't try to use it. - if use_tf and not TF_AV: - use_tf = False - - X1_size = X1.shape[0] - X2_size = X2.shape[0] - i_sigma = -0.5 / sigma + i_sigma = -0.5 / (sigma**2) if use_tf: if tf.config.experimental.list_physical_devices('GPU'): @@ -120,8 +72,15 @@ def laplacian_kernel(X1, else: norm = tf.norm(X2 - X1[i], axis=-1) - return (i + 1, - K.write(i, tf.exp(i_sigma * norm))) + if laplauss == 'gauss': + return (i + 1, + K.write(i, tf.exp(i_sigma * tf.square(norm)))) + elif laplauss == 'laplace': + return (i + 1, + K.write(i, tf.exp(i_sigma * norm))) + else: + return (i + 1, + K.write(i, tf.exp(i_sigma * tf.square(norm)))) K = tf.TensorArray(dtype=tf.float64, size=X1_size) @@ -136,5 +95,11 @@ def laplacian_kernel(X1, norm = np.linalg.norm(X2 - X1[i], axis=(1, 2)) else: norm = np.linalg.norm(X2 - X1[i], axis=-1) - K[i, :] = np.exp(i_sigma * norm) + if laplauuss == 'gauss': + K[i, :] = np.exp(i_sigma * np.square(norm)) + elif laplauce == 'laplace': + K[i, :] = np.exp(i_sigma * norm) + else: + K[i, :] = np.exp(i_sigma * np.square(norm)) + return K |