Add methodology for ML

author: David Luevano <55825613+luevano@users.noreply.github.com> 2019-12-12 03:14:29 -0700
committer: David Luevano <55825613+luevano@users.noreply.github.com> 2019-12-12 03:14:29 -0700
commit: 1ca1c071f511f6e8d933b0eb021a3da05462c22c (patch)
tree: c955355576d06aef52120f91bdad0539bbbaee49
parent: a805f125b5e0158b4887bf913a4853cccde358bc (diff)
1 files changed, 50 insertions, 2 deletions
diff --git a/do_ml.py b/do_ml.py
index 740f8257d..b92b15a99 100644
--- a/do_ml.py
+++ b/do_ml.py
@@ -20,7 +20,55 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
+import time
+from colorama import Fore, Style
+import numpy as np
+from gauss_kernel import gauss_kernel
+from cholesky_solve import cholesky_solve
 
 
-def do_ml():
-    pass
+def printc(text, color):
+    """
+    Prints texts normaly, but in color. Using colorama.
+    text: string with the text to print.
+    """
+    print(color + text + Style.RESET_ALL)
+
+
+def do_ml(desc_data,
+          desc_type,
+          energy_data,
+          training_size,
+          test_size,
+          sigma=1000.0):
+    """
+    Does the ML methodology.
+    desc_data: descriptor (or representation) data.
+    desc_type: string with the name of the descriptor used.
+    energy_data: energy data associated with desc_data.
+    training_size: size of the training set to use.
+    test_size: size of the test set to use.
+    sigma: depth of the kernel.
+    NOTE: desc_type is just a string and is only for identification purposes.
+    Also, training is done with the first part of the data and
+    testing with the ending part of the data.
+    """
+    tic = time.perf_counter()
+    printc('{} ML started.'.format(desc_type), Fore.CYAN)
+
+    Xcm_training = desc_data[:training_size]
+    Ycm_training = energy_data[:training_size]
+    Kcm_training = gauss_kernel(Xcm_training, Xcm_training, sigma)
+    alpha_cm = cholesky_solve(Kcm_training, Ycm_training)
+
+    Xcm_test = desc_data[-test_size:]
+    Ycm_test = energy_data[-test_size:]
+    Kcm_test = gauss_kernel(Xcm_test, Xcm_training, sigma)
+    Ycm_predicted = np.dot(Kcm_test, alpha_cm)
+
+    print('\tMAE for {}: {}'.format(desc_type,
+                                    np.mean(np.abs(Ycm_predicted - Ycm_test))))
+
+    toc = time.perf_counter()
+    printc('\t{} ML took {:.4f} seconds.'.format(desc_type, toc-tic),
+           Fore.GREEN)
author	David Luevano <55825613+luevano@users.noreply.github.com>	2019-12-12 03:14:29 -0700
committer	David Luevano <55825613+luevano@users.noreply.github.com>	2019-12-12 03:14:29 -0700
commit	1ca1c071f511f6e8d933b0eb021a3da05462c22c (patch)
tree	c955355576d06aef52120f91bdad0539bbbaee49
parent	a805f125b5e0158b4887bf913a4853cccde358bc (diff)