From f9cd430d8e66cdac5d78a643f87445e3dd6bdf8e Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sat, 28 Dec 2019 10:54:36 -0700 Subject: Refactor code --- lj_matrix/__init__.py | 4 +- lj_matrix/__main__.py | 125 +------------------------------------------------- lj_matrix/misc.py | 121 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 124 deletions(-) (limited to 'lj_matrix') diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py index d59e3481c..a430aac68 100644 --- a/lj_matrix/__init__.py +++ b/lj_matrix/__init__.py @@ -28,6 +28,7 @@ from lj_matrix.gauss_kernel import gauss_kernel from lj_matrix.cholesky_solve import cholesky_solve from lj_matrix.do_ml import do_ml from lj_matrix.parallel_create_matrices import parallel_create_matrices +from lj_matrix.misc import plot_benchmarks # If somebody does "from package import *", this is what they will @@ -43,4 +44,5 @@ __all__ = ['read_nc_data', 'gauss_kernel', 'cholesky_solve', 'do_ml', - 'parallel_create_matrices'] + 'parallel_create_matrices', + 'plot_benchmarks'] diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py index f7e4065da..98f341e1e 100644 --- a/lj_matrix/__main__.py +++ b/lj_matrix/__main__.py @@ -20,131 +20,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import pandas as pd from lj_matrix.do_ml import do_ml - - -def pl(): - """ - Function for plotting the benchmarks. - """ - # Original columns. - or_cols = ['ml_type', - 'tr_size', - 'te_size', - 'kernel_s', - 'mae', - 'time', - 'lj_s', - 'lj_e', - 'date_ran'] - # Drop some original columns. - dor_cols = ['te_size', - 'kernel_s', - 'time', - 'date_ran'] - - # Read benchmarks data and drop some columns. - data_temp = pd.read_csv('data\\benchmarks.csv',) - data = pd.DataFrame(data_temp, columns=or_cols) - data = data.drop(columns=dor_cols) - - # Get the data of the first benchmarks and drop unnecesary columns. - first_data = pd.DataFrame(data, index=range(0, 22)) - first_data = first_data.drop(columns=['lj_s', 'lj_e']) - - # Columns to keep temporarily. - fd_columns = ['ml_type', - 'tr_size', - 'mae'] - - # Create new dataframes for each matrix descriptor and fill them. - first_data_cm = pd.DataFrame(columns=fd_columns) - first_data_ljm = pd.DataFrame(columns=fd_columns) - for i in range(first_data.shape[0]): - temp_df = first_data.iloc[[i]] - if first_data.at[i, 'ml_type'] == 'CM': - first_data_cm = first_data_cm.append(temp_df) - else: - first_data_ljm = first_data_ljm.append(temp_df) - - # Drop unnecesary column and rename 'mae' for later use. - first_data_cm = first_data_cm.drop(columns=['ml_type'])\ - .rename(columns={'mae': 'cm_mae'}) - first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\ - .rename(columns={'mae': 'ljm_mae'}) - # print(first_data_cm) - # print(first_data_ljm) - - # Get the cm data axis so it can be joined with the ljm data axis. - cm_axis = first_data_cm.plot(x='tr_size', - y='cm_mae', - kind='line') - # Get the ljm data axis and join it with the cm one. - plot_axis = first_data_ljm.plot(ax=cm_axis, - x='tr_size', - y='ljm_mae', - kind='line') - plot_axis.set_xlabel('tr_size') - plot_axis.set_ylabel('mae') - plot_axis.set_title('mae for different tr_sizes') - # Get the figure and save it. - # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf') - - # Get the rest of the benchmark data and drop unnecesary column. - new_data = data.drop(index=range(0, 22)) - new_data = new_data.drop(columns=['ml_type']) - - # Get the first set and rename it. - nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'}) - ndf_axis = nd_first.plot(x='tr_size', - y='1, 1', - kind='line') - last_axis = ndf_axis - for i in range(22, 99, 11): - lj_s = new_data['lj_s'][i] - lj_e = new_data['lj_e'][i] - new_mae = '{}, {}'.format(lj_s, lj_e) - nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ - .drop(columns=['lj_s', 'lj_e'])\ - .rename(columns={'mae': new_mae}) - last_axis = nd_temp.plot(ax=last_axis, - x='tr_size', - y=new_mae, - kind='line') - print(nd_temp) - - last_axis.set_xlabel('tr_size') - last_axis.set_ylabel('mae') - last_axis.set_title('mae for different parameters of lj(s)') - - last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf') - - ndf_axis = nd_first.plot(x='tr_size', - y='1, 1', - kind='line') - last_axis = ndf_axis - for i in range(99, data.shape[0], 11): - lj_s = new_data['lj_s'][i] - lj_e = new_data['lj_e'][i] - new_mae = '{}, {}'.format(lj_s, lj_e) - nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ - .drop(columns=['lj_s', 'lj_e'])\ - .rename(columns={'mae': new_mae}) - last_axis = nd_temp.plot(ax=last_axis, - x='tr_size', - y=new_mae, - kind='line') - print(nd_temp) - - last_axis.set_xlabel('tr_size') - last_axis.set_ylabel('mae') - last_axis.set_title('mae for different parameters of lj(e)') - - last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf') - +# from lj_matrix.misc import plot_benchmarks if __name__ == '__main__': do_ml(min_training_size=1500, max_training_size=3000) - # pl() + # plot_benchmarks() print('OK!') diff --git a/lj_matrix/misc.py b/lj_matrix/misc.py index c50653a5c..e9142b05f 100644 --- a/lj_matrix/misc.py +++ b/lj_matrix/misc.py @@ -21,6 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ from colorama import init, Fore, Style +import pandas as pd init() @@ -51,3 +52,123 @@ def printc(text, color): actual_color = color_dic[color] print(actual_color + text + Style.RESET_ALL) + + +def plot_benchmarks(): + """ + For plotting the benchmarks. + """ + # Original columns. + or_cols = ['ml_type', + 'tr_size', + 'te_size', + 'kernel_s', + 'mae', + 'time', + 'lj_s', + 'lj_e', + 'date_ran'] + # Drop some original columns. + dor_cols = ['te_size', + 'kernel_s', + 'time', + 'date_ran'] + + # Read benchmarks data and drop some columns. + data_temp = pd.read_csv('data\\benchmarks.csv',) + data = pd.DataFrame(data_temp, columns=or_cols) + data = data.drop(columns=dor_cols) + + # Get the data of the first benchmarks and drop unnecesary columns. + first_data = pd.DataFrame(data, index=range(0, 22)) + first_data = first_data.drop(columns=['lj_s', 'lj_e']) + + # Columns to keep temporarily. + fd_columns = ['ml_type', + 'tr_size', + 'mae'] + + # Create new dataframes for each matrix descriptor and fill them. + first_data_cm = pd.DataFrame(columns=fd_columns) + first_data_ljm = pd.DataFrame(columns=fd_columns) + for i in range(first_data.shape[0]): + temp_df = first_data.iloc[[i]] + if first_data.at[i, 'ml_type'] == 'CM': + first_data_cm = first_data_cm.append(temp_df) + else: + first_data_ljm = first_data_ljm.append(temp_df) + + # Drop unnecesary column and rename 'mae' for later use. + first_data_cm = first_data_cm.drop(columns=['ml_type'])\ + .rename(columns={'mae': 'cm_mae'}) + first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\ + .rename(columns={'mae': 'ljm_mae'}) + # print(first_data_cm) + # print(first_data_ljm) + + # Get the cm data axis so it can be joined with the ljm data axis. + cm_axis = first_data_cm.plot(x='tr_size', + y='cm_mae', + kind='line') + # Get the ljm data axis and join it with the cm one. + plot_axis = first_data_ljm.plot(ax=cm_axis, + x='tr_size', + y='ljm_mae', + kind='line') + plot_axis.set_xlabel('tr_size') + plot_axis.set_ylabel('mae') + plot_axis.set_title('mae for different tr_sizes') + # Get the figure and save it. + # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf') + + # Get the rest of the benchmark data and drop unnecesary column. + new_data = data.drop(index=range(0, 22)) + new_data = new_data.drop(columns=['ml_type']) + + # Get the first set and rename it. + nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'}) + ndf_axis = nd_first.plot(x='tr_size', + y='1, 1', + kind='line') + last_axis = ndf_axis + for i in range(22, 99, 11): + lj_s = new_data['lj_s'][i] + lj_e = new_data['lj_e'][i] + new_mae = '{}, {}'.format(lj_s, lj_e) + nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ + .drop(columns=['lj_s', 'lj_e'])\ + .rename(columns={'mae': new_mae}) + last_axis = nd_temp.plot(ax=last_axis, + x='tr_size', + y=new_mae, + kind='line') + print(nd_temp) + + last_axis.set_xlabel('tr_size') + last_axis.set_ylabel('mae') + last_axis.set_title('mae for different parameters of lj(s)') + + last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf') + + ndf_axis = nd_first.plot(x='tr_size', + y='1, 1', + kind='line') + last_axis = ndf_axis + for i in range(99, data.shape[0], 11): + lj_s = new_data['lj_s'][i] + lj_e = new_data['lj_e'][i] + new_mae = '{}, {}'.format(lj_s, lj_e) + nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ + .drop(columns=['lj_s', 'lj_e'])\ + .rename(columns={'mae': new_mae}) + last_axis = nd_temp.plot(ax=last_axis, + x='tr_size', + y=new_mae, + kind='line') + print(nd_temp) + + last_axis.set_xlabel('tr_size') + last_axis.set_ylabel('mae') + last_axis.set_title('mae for different parameters of lj(e)') + + last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf') -- cgit v1.2.3-54-g00ecf