summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano <55825613+luevano@users.noreply.github.com>2019-12-28 10:54:36 -0700
committerDavid Luevano <55825613+luevano@users.noreply.github.com>2019-12-28 10:54:36 -0700
commitf9cd430d8e66cdac5d78a643f87445e3dd6bdf8e (patch)
tree7b5484bef3fdd3868b2175ff7f0b354d014e2f77
parentcdbb1ac890cb0d062cdb2f216c347f681fbfa7b8 (diff)
Refactor code
-rw-r--r--lj_matrix/__init__.py4
-rw-r--r--lj_matrix/__main__.py125
-rw-r--r--lj_matrix/misc.py121
3 files changed, 126 insertions, 124 deletions
diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py
index d59e3481c..a430aac68 100644
--- a/lj_matrix/__init__.py
+++ b/lj_matrix/__init__.py
@@ -28,6 +28,7 @@ from lj_matrix.gauss_kernel import gauss_kernel
from lj_matrix.cholesky_solve import cholesky_solve
from lj_matrix.do_ml import do_ml
from lj_matrix.parallel_create_matrices import parallel_create_matrices
+from lj_matrix.misc import plot_benchmarks
# If somebody does "from package import *", this is what they will
@@ -43,4 +44,5 @@ __all__ = ['read_nc_data',
'gauss_kernel',
'cholesky_solve',
'do_ml',
- 'parallel_create_matrices']
+ 'parallel_create_matrices',
+ 'plot_benchmarks']
diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py
index f7e4065da..98f341e1e 100644
--- a/lj_matrix/__main__.py
+++ b/lj_matrix/__main__.py
@@ -20,131 +20,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
-import pandas as pd
from lj_matrix.do_ml import do_ml
-
-
-def pl():
- """
- Function for plotting the benchmarks.
- """
- # Original columns.
- or_cols = ['ml_type',
- 'tr_size',
- 'te_size',
- 'kernel_s',
- 'mae',
- 'time',
- 'lj_s',
- 'lj_e',
- 'date_ran']
- # Drop some original columns.
- dor_cols = ['te_size',
- 'kernel_s',
- 'time',
- 'date_ran']
-
- # Read benchmarks data and drop some columns.
- data_temp = pd.read_csv('data\\benchmarks.csv',)
- data = pd.DataFrame(data_temp, columns=or_cols)
- data = data.drop(columns=dor_cols)
-
- # Get the data of the first benchmarks and drop unnecesary columns.
- first_data = pd.DataFrame(data, index=range(0, 22))
- first_data = first_data.drop(columns=['lj_s', 'lj_e'])
-
- # Columns to keep temporarily.
- fd_columns = ['ml_type',
- 'tr_size',
- 'mae']
-
- # Create new dataframes for each matrix descriptor and fill them.
- first_data_cm = pd.DataFrame(columns=fd_columns)
- first_data_ljm = pd.DataFrame(columns=fd_columns)
- for i in range(first_data.shape[0]):
- temp_df = first_data.iloc[[i]]
- if first_data.at[i, 'ml_type'] == 'CM':
- first_data_cm = first_data_cm.append(temp_df)
- else:
- first_data_ljm = first_data_ljm.append(temp_df)
-
- # Drop unnecesary column and rename 'mae' for later use.
- first_data_cm = first_data_cm.drop(columns=['ml_type'])\
- .rename(columns={'mae': 'cm_mae'})
- first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\
- .rename(columns={'mae': 'ljm_mae'})
- # print(first_data_cm)
- # print(first_data_ljm)
-
- # Get the cm data axis so it can be joined with the ljm data axis.
- cm_axis = first_data_cm.plot(x='tr_size',
- y='cm_mae',
- kind='line')
- # Get the ljm data axis and join it with the cm one.
- plot_axis = first_data_ljm.plot(ax=cm_axis,
- x='tr_size',
- y='ljm_mae',
- kind='line')
- plot_axis.set_xlabel('tr_size')
- plot_axis.set_ylabel('mae')
- plot_axis.set_title('mae for different tr_sizes')
- # Get the figure and save it.
- # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf')
-
- # Get the rest of the benchmark data and drop unnecesary column.
- new_data = data.drop(index=range(0, 22))
- new_data = new_data.drop(columns=['ml_type'])
-
- # Get the first set and rename it.
- nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'})
- ndf_axis = nd_first.plot(x='tr_size',
- y='1, 1',
- kind='line')
- last_axis = ndf_axis
- for i in range(22, 99, 11):
- lj_s = new_data['lj_s'][i]
- lj_e = new_data['lj_e'][i]
- new_mae = '{}, {}'.format(lj_s, lj_e)
- nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
- .drop(columns=['lj_s', 'lj_e'])\
- .rename(columns={'mae': new_mae})
- last_axis = nd_temp.plot(ax=last_axis,
- x='tr_size',
- y=new_mae,
- kind='line')
- print(nd_temp)
-
- last_axis.set_xlabel('tr_size')
- last_axis.set_ylabel('mae')
- last_axis.set_title('mae for different parameters of lj(s)')
-
- last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf')
-
- ndf_axis = nd_first.plot(x='tr_size',
- y='1, 1',
- kind='line')
- last_axis = ndf_axis
- for i in range(99, data.shape[0], 11):
- lj_s = new_data['lj_s'][i]
- lj_e = new_data['lj_e'][i]
- new_mae = '{}, {}'.format(lj_s, lj_e)
- nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
- .drop(columns=['lj_s', 'lj_e'])\
- .rename(columns={'mae': new_mae})
- last_axis = nd_temp.plot(ax=last_axis,
- x='tr_size',
- y=new_mae,
- kind='line')
- print(nd_temp)
-
- last_axis.set_xlabel('tr_size')
- last_axis.set_ylabel('mae')
- last_axis.set_title('mae for different parameters of lj(e)')
-
- last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf')
-
+# from lj_matrix.misc import plot_benchmarks
if __name__ == '__main__':
do_ml(min_training_size=1500, max_training_size=3000)
- # pl()
+ # plot_benchmarks()
print('OK!')
diff --git a/lj_matrix/misc.py b/lj_matrix/misc.py
index c50653a5c..e9142b05f 100644
--- a/lj_matrix/misc.py
+++ b/lj_matrix/misc.py
@@ -21,6 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
from colorama import init, Fore, Style
+import pandas as pd
init()
@@ -51,3 +52,123 @@ def printc(text, color):
actual_color = color_dic[color]
print(actual_color + text + Style.RESET_ALL)
+
+
+def plot_benchmarks():
+ """
+ For plotting the benchmarks.
+ """
+ # Original columns.
+ or_cols = ['ml_type',
+ 'tr_size',
+ 'te_size',
+ 'kernel_s',
+ 'mae',
+ 'time',
+ 'lj_s',
+ 'lj_e',
+ 'date_ran']
+ # Drop some original columns.
+ dor_cols = ['te_size',
+ 'kernel_s',
+ 'time',
+ 'date_ran']
+
+ # Read benchmarks data and drop some columns.
+ data_temp = pd.read_csv('data\\benchmarks.csv',)
+ data = pd.DataFrame(data_temp, columns=or_cols)
+ data = data.drop(columns=dor_cols)
+
+ # Get the data of the first benchmarks and drop unnecesary columns.
+ first_data = pd.DataFrame(data, index=range(0, 22))
+ first_data = first_data.drop(columns=['lj_s', 'lj_e'])
+
+ # Columns to keep temporarily.
+ fd_columns = ['ml_type',
+ 'tr_size',
+ 'mae']
+
+ # Create new dataframes for each matrix descriptor and fill them.
+ first_data_cm = pd.DataFrame(columns=fd_columns)
+ first_data_ljm = pd.DataFrame(columns=fd_columns)
+ for i in range(first_data.shape[0]):
+ temp_df = first_data.iloc[[i]]
+ if first_data.at[i, 'ml_type'] == 'CM':
+ first_data_cm = first_data_cm.append(temp_df)
+ else:
+ first_data_ljm = first_data_ljm.append(temp_df)
+
+ # Drop unnecesary column and rename 'mae' for later use.
+ first_data_cm = first_data_cm.drop(columns=['ml_type'])\
+ .rename(columns={'mae': 'cm_mae'})
+ first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\
+ .rename(columns={'mae': 'ljm_mae'})
+ # print(first_data_cm)
+ # print(first_data_ljm)
+
+ # Get the cm data axis so it can be joined with the ljm data axis.
+ cm_axis = first_data_cm.plot(x='tr_size',
+ y='cm_mae',
+ kind='line')
+ # Get the ljm data axis and join it with the cm one.
+ plot_axis = first_data_ljm.plot(ax=cm_axis,
+ x='tr_size',
+ y='ljm_mae',
+ kind='line')
+ plot_axis.set_xlabel('tr_size')
+ plot_axis.set_ylabel('mae')
+ plot_axis.set_title('mae for different tr_sizes')
+ # Get the figure and save it.
+ # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf')
+
+ # Get the rest of the benchmark data and drop unnecesary column.
+ new_data = data.drop(index=range(0, 22))
+ new_data = new_data.drop(columns=['ml_type'])
+
+ # Get the first set and rename it.
+ nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'})
+ ndf_axis = nd_first.plot(x='tr_size',
+ y='1, 1',
+ kind='line')
+ last_axis = ndf_axis
+ for i in range(22, 99, 11):
+ lj_s = new_data['lj_s'][i]
+ lj_e = new_data['lj_e'][i]
+ new_mae = '{}, {}'.format(lj_s, lj_e)
+ nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
+ .drop(columns=['lj_s', 'lj_e'])\
+ .rename(columns={'mae': new_mae})
+ last_axis = nd_temp.plot(ax=last_axis,
+ x='tr_size',
+ y=new_mae,
+ kind='line')
+ print(nd_temp)
+
+ last_axis.set_xlabel('tr_size')
+ last_axis.set_ylabel('mae')
+ last_axis.set_title('mae for different parameters of lj(s)')
+
+ last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf')
+
+ ndf_axis = nd_first.plot(x='tr_size',
+ y='1, 1',
+ kind='line')
+ last_axis = ndf_axis
+ for i in range(99, data.shape[0], 11):
+ lj_s = new_data['lj_s'][i]
+ lj_e = new_data['lj_e'][i]
+ new_mae = '{}, {}'.format(lj_s, lj_e)
+ nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\
+ .drop(columns=['lj_s', 'lj_e'])\
+ .rename(columns={'mae': new_mae})
+ last_axis = nd_temp.plot(ax=last_axis,
+ x='tr_size',
+ y=new_mae,
+ kind='line')
+ print(nd_temp)
+
+ last_axis.set_xlabel('tr_size')
+ last_axis.set_ylabel('mae')
+ last_axis.set_title('mae for different parameters of lj(e)')
+
+ last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf')