From 487bf8840846b5d4d694b38985268c308aadb36e Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Wed, 18 Dec 2019 07:21:35 -0700
Subject: Refactor files

---
 do_ml.py | 108 ---------------------------------------------------------------
 1 file changed, 108 deletions(-)
 delete mode 100644 do_ml.py

(limited to 'do_ml.py')

diff --git a/do_ml.py b/do_ml.py
deleted file mode 100644
index c88533e68..000000000
--- a/do_ml.py
+++ /dev/null
@@ -1,108 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import time
-from misc import printc
-import numpy as np
-from gauss_kernel import gauss_kernel
-from cholesky_solve import cholesky_solve
-
-
-def do_ml(desc_data,
-          energy_data,
-          training_size,
-          desc_type=None,
-          pipe=None,
-          test_size=None,
-          sigma=1000.0,
-          show_msgs=True):
-    """
-    Does the ML methodology.
-    desc_data: descriptor (or representation) data.
-    energy_data: energy data associated with desc_data.
-    training_size: size of the training set to use.
-    desc_type: string with the name of the descriptor used.
-    pipe: for multiprocessing purposes. Sends the data calculated
-        through a pipe.
-    test_size: size of the test set to use. If no size is given,
-        the last remaining molecules are used.
-    sigma: depth of the kernel.
-    show_msgs: Show debug messages or not.
-    NOTE: desc_type is just a string and is only for identification purposes.
-    Also, training is done with the first part of the data and
-    testing with the ending part of the data.
-    """
-    # Initial calculations for later use.
-    d_len = len(desc_data)
-    e_len = len(energy_data)
-
-    if not desc_type:
-        desc_type = 'NOT SPECIFIED'
-
-    if d_len != e_len:
-        printc(''.join(['ERROR. Descriptor data size different ',
-                        'than energy data size.']), 'RED')
-        return None
-
-    if training_size >= d_len:
-        printc('ERROR. Training size greater or equal than data size.', 'RED')
-        return None
-
-    if not test_size:
-        test_size = d_len - training_size
-        if test_size > 1500:
-            test_size = 1500
-
-    tic = time.perf_counter()
-    if show_msgs:
-        printc('{} ML started.'.format(desc_type), 'GREEN')
-        printc('\tTraining size: {}'.format(training_size), 'CYAN')
-        printc('\tTest size: {}'.format(test_size), 'CYAN')
-        printc('\tSigma: {}'.format(sigma), 'CYAN')
-
-    Xcm_training = desc_data[:training_size]
-    Ycm_training = energy_data[:training_size]
-    Kcm_training = gauss_kernel(Xcm_training, Xcm_training, sigma)
-    alpha_cm = cholesky_solve(Kcm_training, Ycm_training)
-
-    Xcm_test = desc_data[-test_size:]
-    Ycm_test = energy_data[-test_size:]
-    Kcm_test = gauss_kernel(Xcm_test, Xcm_training, sigma)
-    Ycm_predicted = np.dot(Kcm_test, alpha_cm)
-
-    mae = np.mean(np.abs(Ycm_predicted - Ycm_test))
-    if show_msgs:
-        printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN')
-
-    toc = time.perf_counter()
-    tictoc = toc - tic
-    if show_msgs:
-        printc('\t{} ML took {:.4f} seconds.'.format(desc_type, tictoc),
-               'GREEN')
-        printc('\t\tTraining size: {}'.format(training_size), 'CYAN')
-        printc('\t\tTest size: {}'.format(test_size), 'CYAN')
-        printc('\t\tSigma: {}'.format(sigma), 'CYAN')
-
-    if pipe:
-        pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc])
-
-    return mae, tictoc
-- 
cgit v1.2.3-70-g09d2