From f9cd430d8e66cdac5d78a643f87445e3dd6bdf8e Mon Sep 17 00:00:00 2001
From: David Luevano <55825613+luevano@users.noreply.github.com>
Date: Sat, 28 Dec 2019 10:54:36 -0700
Subject: Refactor code

---
 lj_matrix/__init__.py |   4 +-
 lj_matrix/__main__.py | 125 +-------------------------------------------------
 lj_matrix/misc.py     | 121 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+), 124 deletions(-)

(limited to 'lj_matrix')

diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py
index d59e3481c..a430aac68 100644
--- a/lj_matrix/__init__.py
+++ b/lj_matrix/__init__.py
@@ -28,6 +28,7 @@ from lj_matrix.gauss_kernel import gauss_kernel
 from lj_matrix.cholesky_solve import cholesky_solve
 from lj_matrix.do_ml import do_ml
 from lj_matrix.parallel_create_matrices import parallel_create_matrices
+from lj_matrix.misc import plot_benchmarks
 
 
 # If somebody does "from package import *", this is what they will
@@ -43,4 +44,5 @@ __all__ = ['read_nc_data',
            'gauss_kernel',
            'cholesky_solve',
            'do_ml',
-           'parallel_create_matrices']
+           'parallel_create_matrices',
+           'plot_benchmarks']
diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py
index f7e4065da..98f341e1e 100644
--- a/lj_matrix/__main__.py
+++ b/lj_matrix/__main__.py
@@ -20,131 +20,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-import pandas as pd
 from lj_matrix.do_ml import do_ml
-
-
-def pl():
-    """
-    Function for plotting the benchmarks.
-    """
-    # Original columns.
-    or_cols = ['ml_type',
-               'tr_size',
-               'te_size',
-               'kernel_s',
-               'mae',
-               'time',
-               'lj_s',
-               'lj_e',
-               'date_ran']
-    # Drop some original columns.
-    dor_cols = ['te_size',
-                'kernel_s',
-                'time',
-                'date_ran']
-
-    # Read benchmarks data and drop some columns.
-    data_temp = pd.read_csv('data\\benchmarks.csv',)
-    data = pd.DataFrame(data_temp, columns=or_cols)
-    data = data.drop(columns=dor_cols)
-
-    # Get the data of the first benchmarks and drop unnecesary columns.
-    first_data = pd.DataFrame(data, index=range(0, 22))
-    first_data = first_data.drop(columns=['lj_s', 'lj_e'])
-
-    # Columns to keep temporarily.
-    fd_columns = ['ml_type',
-                  'tr_size',
-                  'mae']
-
-    # Create new dataframes for each matrix descriptor and fill them.
-    first_data_cm = pd.DataFrame(columns=fd_columns)
-    first_data_ljm = pd.DataFrame(columns=fd_columns)
-    for i in range(first_data.shape[0]):
-        temp_df = first_data.iloc[[i]]
-        if first_data.at[i, 'ml_type'] == 'CM':
-            first_data_cm = first_data_cm.append(temp_df)
-        else:
-            first_data_ljm = first_data_ljm.append(temp_df)
-
-    # Drop unnecesary column and rename 'mae' for later use.
-    first_data_cm = first_data_cm.drop(columns=['ml_type'])\
-        .rename(columns={'mae': 'cm_mae'})
-    first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\
-        .rename(columns={'mae': 'ljm_mae'})
-    # print(first_data_cm)
-    # print(first_data_ljm)
-
-    # Get the cm data axis so it can be joined with the ljm data axis.
-    cm_axis = first_data_cm.plot(x='tr_size',
-                                 y='cm_mae',
-                                 kind='line')
-    # Get the ljm data axis and join it with the cm one.
-    plot_axis = first_data_ljm.plot(ax=cm_axis,
-                                    x='tr_size',
-                                    y='ljm_mae',
-                                    kind='line')
-    plot_axis.set_xlabel('tr_size')
-    plot_axis.set_ylabel('mae')
-    plot_axis.set_title('mae for different tr_sizes')
-    # Get the figure and save it.
-    # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf')
-
-    # Get the rest of the benchmark data and drop unnecesary column.
-    new_data = data.drop(index=range(0, 22))
-    new_data = new_data.drop(columns=['ml_type'])
-
-    # Get the first set and rename it.
-    nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'})
-    ndf_axis = nd_first.plot(x='tr_size',
-                             y='1, 1',
-                             kind='line')
-    last_axis = ndf_axis
-    for i in range(22, 99, 11):
-        lj_s = new_data['lj_s'][i]
-        lj_e = new_data['lj_e'][i]
-        new_mae = '{}, {}'.format(lj_s, lj_e)
-        nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
-            .drop(columns=['lj_s', 'lj_e'])\
-            .rename(columns={'mae': new_mae})
-        last_axis = nd_temp.plot(ax=last_axis,
-                                 x='tr_size',
-                                 y=new_mae,
-                                 kind='line')
-        print(nd_temp)
-
-    last_axis.set_xlabel('tr_size')
-    last_axis.set_ylabel('mae')
-    last_axis.set_title('mae for different parameters of lj(s)')
-
-    last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf')
-
-    ndf_axis = nd_first.plot(x='tr_size',
-                             y='1, 1',
-                             kind='line')
-    last_axis = ndf_axis
-    for i in range(99, data.shape[0], 11):
-        lj_s = new_data['lj_s'][i]
-        lj_e = new_data['lj_e'][i]
-        new_mae = '{}, {}'.format(lj_s, lj_e)
-        nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
-            .drop(columns=['lj_s', 'lj_e'])\
-            .rename(columns={'mae': new_mae})
-        last_axis = nd_temp.plot(ax=last_axis,
-                                 x='tr_size',
-                                 y=new_mae,
-                                 kind='line')
-        print(nd_temp)
-
-    last_axis.set_xlabel('tr_size')
-    last_axis.set_ylabel('mae')
-    last_axis.set_title('mae for different parameters of lj(e)')
-
-    last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf')
-
+# from lj_matrix.misc import plot_benchmarks
 
 if __name__ == '__main__':
     do_ml(min_training_size=1500, max_training_size=3000)
-    # pl()
+    # plot_benchmarks()
     print('OK!')
diff --git a/lj_matrix/misc.py b/lj_matrix/misc.py
index c50653a5c..e9142b05f 100644
--- a/lj_matrix/misc.py
+++ b/lj_matrix/misc.py
@@ -21,6 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 from colorama import init, Fore, Style
+import pandas as pd
 
 init()
 
@@ -51,3 +52,123 @@ def printc(text, color):
         actual_color = color_dic[color]
 
     print(actual_color + text + Style.RESET_ALL)
+
+
+def plot_benchmarks():
+    """
+    For plotting the benchmarks.
+    """
+    # Original columns.
+    or_cols = ['ml_type',
+               'tr_size',
+               'te_size',
+               'kernel_s',
+               'mae',
+               'time',
+               'lj_s',
+               'lj_e',
+               'date_ran']
+    # Drop some original columns.
+    dor_cols = ['te_size',
+                'kernel_s',
+                'time',
+                'date_ran']
+
+    # Read benchmarks data and drop some columns.
+    data_temp = pd.read_csv('data\\benchmarks.csv',)
+    data = pd.DataFrame(data_temp, columns=or_cols)
+    data = data.drop(columns=dor_cols)
+
+    # Get the data of the first benchmarks and drop unnecesary columns.
+    first_data = pd.DataFrame(data, index=range(0, 22))
+    first_data = first_data.drop(columns=['lj_s', 'lj_e'])
+
+    # Columns to keep temporarily.
+    fd_columns = ['ml_type',
+                  'tr_size',
+                  'mae']
+
+    # Create new dataframes for each matrix descriptor and fill them.
+    first_data_cm = pd.DataFrame(columns=fd_columns)
+    first_data_ljm = pd.DataFrame(columns=fd_columns)
+    for i in range(first_data.shape[0]):
+        temp_df = first_data.iloc[[i]]
+        if first_data.at[i, 'ml_type'] == 'CM':
+            first_data_cm = first_data_cm.append(temp_df)
+        else:
+            first_data_ljm = first_data_ljm.append(temp_df)
+
+    # Drop unnecesary column and rename 'mae' for later use.
+    first_data_cm = first_data_cm.drop(columns=['ml_type'])\
+        .rename(columns={'mae': 'cm_mae'})
+    first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\
+        .rename(columns={'mae': 'ljm_mae'})
+    # print(first_data_cm)
+    # print(first_data_ljm)
+
+    # Get the cm data axis so it can be joined with the ljm data axis.
+    cm_axis = first_data_cm.plot(x='tr_size',
+                                 y='cm_mae',
+                                 kind='line')
+    # Get the ljm data axis and join it with the cm one.
+    plot_axis = first_data_ljm.plot(ax=cm_axis,
+                                    x='tr_size',
+                                    y='ljm_mae',
+                                    kind='line')
+    plot_axis.set_xlabel('tr_size')
+    plot_axis.set_ylabel('mae')
+    plot_axis.set_title('mae for different tr_sizes')
+    # Get the figure and save it.
+    # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf')
+
+    # Get the rest of the benchmark data and drop unnecesary column.
+    new_data = data.drop(index=range(0, 22))
+    new_data = new_data.drop(columns=['ml_type'])
+
+    # Get the first set and rename it.
+    nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'})
+    ndf_axis = nd_first.plot(x='tr_size',
+                             y='1, 1',
+                             kind='line')
+    last_axis = ndf_axis
+    for i in range(22, 99, 11):
+        lj_s = new_data['lj_s'][i]
+        lj_e = new_data['lj_e'][i]
+        new_mae = '{}, {}'.format(lj_s, lj_e)
+        nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
+            .drop(columns=['lj_s', 'lj_e'])\
+            .rename(columns={'mae': new_mae})
+        last_axis = nd_temp.plot(ax=last_axis,
+                                 x='tr_size',
+                                 y=new_mae,
+                                 kind='line')
+        print(nd_temp)
+
+    last_axis.set_xlabel('tr_size')
+    last_axis.set_ylabel('mae')
+    last_axis.set_title('mae for different parameters of lj(s)')
+
+    last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf')
+
+    ndf_axis = nd_first.plot(x='tr_size',
+                             y='1, 1',
+                             kind='line')
+    last_axis = ndf_axis
+    for i in range(99, data.shape[0], 11):
+        lj_s = new_data['lj_s'][i]
+        lj_e = new_data['lj_e'][i]
+        new_mae = '{}, {}'.format(lj_s, lj_e)
+        nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
+            .drop(columns=['lj_s', 'lj_e'])\
+            .rename(columns={'mae': new_mae})
+        last_axis = nd_temp.plot(ax=last_axis,
+                                 x='tr_size',
+                                 y=new_mae,
+                                 kind='line')
+        print(nd_temp)
+
+    last_axis.set_xlabel('tr_size')
+    last_axis.set_ylabel('mae')
+    last_axis.set_title('mae for different parameters of lj(e)')
+
+    last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf')
-- 
cgit v1.2.3-70-g09d2