From ada04a42528c29ea86e48f0a19cb8723c8bc0a66 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Thu, 12 Dec 2019 20:49:41 -0700 Subject: First working parallelism --- c_matrix.py | 10 +++++-- lj_matrix.py | 10 +++++-- main.py | 97 +++++++++++++++++++++++++++++++++++++++--------------------- 3 files changed, 80 insertions(+), 37 deletions(-) diff --git a/c_matrix.py b/c_matrix.py index 2bc4d4c0c..75cc4d8a2 100644 --- a/c_matrix.py +++ b/c_matrix.py @@ -30,7 +30,7 @@ from numpy.linalg import eig def c_matrix(mol_data, nc_data, max_len=25, - as_eig=False, + as_eig=True, bohr_radius_units=False): """ Creates the Coulomb Matrix from the molecule data given. @@ -150,13 +150,16 @@ def c_matrix(mol_data, def c_matrix_multiple(mol_data, nc_data, + pipe=None, max_len=25, - as_eig=False, + as_eig=True, bohr_radius_units=False): """ Calculates the Coulomb Matrix of multiple molecules. mol_data: molecule data, matrix of atom coordinates. nc_data: nuclear charge data, array of atom data. + pipe: for multiprocessing purposes. Sends the data calculated + through a pipe. max_len: maximum amount of atoms in molecule. as_eig: if data should be returned as matrix or array of eigenvalues. bohr_radius_units: if units should be in bohr's radius units. @@ -167,6 +170,9 @@ def c_matrix_multiple(mol_data, cm_data = np.array([c_matrix(mol, nc, max_len, as_eig, bohr_radius_units) for mol, nc in zip(mol_data, nc_data)]) + if pipe: + pipe.send(cm_data) + toc = time.perf_counter() printc('\tCM calculation took {:.4f} seconds.'.format(toc - tic), 'GREEN') diff --git a/lj_matrix.py b/lj_matrix.py index 6769bc0c3..55e729c56 100644 --- a/lj_matrix.py +++ b/lj_matrix.py @@ -30,7 +30,7 @@ from numpy.linalg import eig def lj_matrix(mol_data, nc_data, max_len=25, - as_eig=False, + as_eig=True, bohr_radius_units=False): """ Creates the Lennard-Jones Matrix from the molecule data given. @@ -168,13 +168,16 @@ def lj_matrix(mol_data, def lj_matrix_multiple(mol_data, nc_data, + pipe=None, max_len=25, - as_eig=False, + as_eig=True, bohr_radius_units=False): """ Calculates the Lennard-Jones Matrix of multiple molecules. mol_data: molecule data, matrix of atom coordinates. nc_data: nuclear charge data, array of atom data. + pipe: for multiprocessing purposes. Sends the data calculated + through a pipe. max_len: maximum amount of atoms in molecule. as_eig: if data should be returned as matrix or array of eigenvalues. bohr_radius_units: if units should be in bohr's radius units. @@ -185,6 +188,9 @@ def lj_matrix_multiple(mol_data, ljm_data = np.array([lj_matrix(mol, nc, max_len, as_eig, bohr_radius_units) for mol, nc in zip(mol_data, nc_data)]) + if pipe: + pipe.send(ljm_data) + toc = time.perf_counter() printc('\tL-JM calculation took {:.4f} seconds.'.format(toc-tic), 'GREEN') diff --git a/main.py b/main.py index 734069920..9d7d3a645 100644 --- a/main.py +++ b/main.py @@ -21,40 +21,71 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import time -from misc import printc +from multiprocessing import Process, Pipe # import matplotlib.pyplot as plt +from misc import printc from read_qm7_data import read_qm7_data from c_matrix import c_matrix_multiple from lj_matrix import lj_matrix_multiple -from do_ml import do_ml - - -# Initialization time. -init_time = time.perf_counter() - -# Data reading. -zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\ - read_qm7_data() - -# Matrices calculation. -cm_data = c_matrix_multiple(molecules, nuclear_charge, as_eig=True) -ljm_data = lj_matrix_multiple(molecules, nuclear_charge, as_eig=True) - -# ML calculation. -do_ml(cm_data, - energy_pbe0, - 1000, - test_size=100, - sigma=1000.0, - desc_type='CM') -do_ml(ljm_data, - energy_pbe0, - 1000, - test_size=100, - sigma=1000.0, - desc_type='L-JM') - -# End of program -end_time = time.perf_counter() -printc('Program took {:.4f} seconds of runtime.'.format(end_time - init_time), - 'CYAN') +# from do_ml import do_ml + + +def main(): + # Initialization time. + init_time = time.perf_counter() + procs = [] + pipes = [] + + # Data reading. + zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\ + read_qm7_data() + + # Matrices calculation. + cm_recv, cm_send = Pipe() + pipes.append(cm_send) + p1 = Process(target=c_matrix_multiple, + args=(molecules, nuclear_charge, cm_send)) + procs.append(p1) + p1.start() + + ljm_recv, ljm_send = Pipe() + pipes.append(ljm_send) + p2 = Process(target=lj_matrix_multiple, + args=(molecules, nuclear_charge, ljm_send)) + procs.append(p2) + p2.start() + + cm_data = cm_recv.recv() + ljm_data = ljm_recv.recv() + + for pipe, proc in zip(pipes, procs): + pipe.close() + proc.join() + + print(type(cm_data), cm_data[0]) + print(type(ljm_data), ljm_data[0]) + + """ + # ML calculation. + do_ml(cm_data, + energy_pbe0, + 1000, + test_size=100, + sigma=1000.0, + desc_type='CM') + do_ml(ljm_data, + energy_pbe0, + 1000, + test_size=100, + sigma=1000.0, + desc_type='L-JM') + """ + + # End of program + end_time = time.perf_counter() + printc('Program took {:.4f} seconds.'.format(end_time - init_time), + 'CYAN') + + +if __name__ == '__main__': + main() -- cgit v1.2.3-54-g00ecf From 91bd79feefa7b1bcfae79b4efd39955a6e301391 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Thu, 12 Dec 2019 21:11:04 -0700 Subject: Test --- main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/main.py b/main.py index 9d7d3a645..c8df47800 100644 --- a/main.py +++ b/main.py @@ -30,6 +30,7 @@ from lj_matrix import lj_matrix_multiple # from do_ml import do_ml +# Test def main(): # Initialization time. init_time = time.perf_counter() -- cgit v1.2.3-54-g00ecf From 651ec37ba8efa8fc1ffe3f490182e68bc468969d Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Thu, 12 Dec 2019 23:06:10 -0700 Subject: First working parallelization test --- c_matrix.py | 6 +++--- do_ml.py | 23 ++++++++++++++------ lj_matrix.py | 6 +++--- main.py | 69 ++++++++++++++++++++++++++++++++++++------------------------ 4 files changed, 63 insertions(+), 41 deletions(-) diff --git a/c_matrix.py b/c_matrix.py index 75cc4d8a2..4de711a1b 100644 --- a/c_matrix.py +++ b/c_matrix.py @@ -170,10 +170,10 @@ def c_matrix_multiple(mol_data, cm_data = np.array([c_matrix(mol, nc, max_len, as_eig, bohr_radius_units) for mol, nc in zip(mol_data, nc_data)]) - if pipe: - pipe.send(cm_data) - toc = time.perf_counter() printc('\tCM calculation took {:.4f} seconds.'.format(toc - tic), 'GREEN') + if pipe: + pipe.send(cm_data) + return cm_data diff --git a/do_ml.py b/do_ml.py index 63a6fc671..87e8b72db 100644 --- a/do_ml.py +++ b/do_ml.py @@ -30,19 +30,22 @@ from cholesky_solve import cholesky_solve def do_ml(desc_data, energy_data, training_size, + desc_type=None, + pipe=None, test_size=None, sigma=1000.0, - desc_type=None, show_msgs=True): """ Does the ML methodology. desc_data: descriptor (or representation) data. energy_data: energy data associated with desc_data. training_size: size of the training set to use. + desc_type: string with the name of the descriptor used. + pipe: for multiprocessing purposes. Sends the data calculated + through a pipe. test_size: size of the test set to use. If no size is given, the last remaining molecules are used. sigma: depth of the kernel. - desc_type: string with the name of the descriptor used. show_msgs: Show debug messages or not. NOTE: desc_type is just a string and is only for identification purposes. Also, training is done with the first part of the data and @@ -69,10 +72,10 @@ def do_ml(desc_data, tic = time.perf_counter() if show_msgs: - printc('{} ML started, with parameters:'.format(desc_type), 'CYAN') - printc('\tTraining size: {}'.format(training_size), 'BLUE') - printc('\tTest size: {}'.format(test_size), 'BLUE') - printc('\tSigma: {}'.format(sigma), 'BLUE') + printc('{} ML started.'.format(desc_type), 'GREEN') + printc('\tTraining size: {}'.format(training_size), 'CYAN') + printc('\tTest size: {}'.format(test_size), 'CYAN') + printc('\tSigma: {}'.format(sigma), 'CYAN') Xcm_training = desc_data[:training_size] Ycm_training = energy_data[:training_size] @@ -86,12 +89,18 @@ def do_ml(desc_data, mae = np.mean(np.abs(Ycm_predicted - Ycm_test)) if show_msgs: - print('\tMAE for {}: {:.4f}'.format(desc_type, mae)) + printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN') toc = time.perf_counter() tictoc = toc - tic if show_msgs: printc('\t{} ML took {:.4f} seconds.'.format(desc_type, tictoc), 'GREEN') + printc('\t\tTraining size: {}'.format(training_size), 'CYAN') + printc('\t\tTest size: {}'.format(test_size), 'CYAN') + printc('\t\tSigma: {}'.format(sigma), 'CYAN') + + if pipe: + pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc]) return mae, tictoc diff --git a/lj_matrix.py b/lj_matrix.py index 55e729c56..5cb1b5a8d 100644 --- a/lj_matrix.py +++ b/lj_matrix.py @@ -188,10 +188,10 @@ def lj_matrix_multiple(mol_data, ljm_data = np.array([lj_matrix(mol, nc, max_len, as_eig, bohr_radius_units) for mol, nc in zip(mol_data, nc_data)]) - if pipe: - pipe.send(ljm_data) - toc = time.perf_counter() printc('\tL-JM calculation took {:.4f} seconds.'.format(toc-tic), 'GREEN') + if pipe: + pipe.send(ljm_data) + return ljm_data diff --git a/main.py b/main.py index c8df47800..f39b9b57b 100644 --- a/main.py +++ b/main.py @@ -27,60 +27,73 @@ from misc import printc from read_qm7_data import read_qm7_data from c_matrix import c_matrix_multiple from lj_matrix import lj_matrix_multiple -# from do_ml import do_ml +from do_ml import do_ml # Test def main(): # Initialization time. init_time = time.perf_counter() - procs = [] - pipes = [] # Data reading. zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\ read_qm7_data() # Matrices calculation. - cm_recv, cm_send = Pipe() - pipes.append(cm_send) + procs = [] + pipes = [] + + cm_recv, cm_send = Pipe(False) p1 = Process(target=c_matrix_multiple, args=(molecules, nuclear_charge, cm_send)) procs.append(p1) + pipes.append(cm_recv) p1.start() - ljm_recv, ljm_send = Pipe() - pipes.append(ljm_send) + ljm_recv, ljm_send = Pipe(False) p2 = Process(target=lj_matrix_multiple, args=(molecules, nuclear_charge, ljm_send)) procs.append(p2) + pipes.append(ljm_recv) p2.start() - cm_data = cm_recv.recv() - ljm_data = ljm_recv.recv() + cm_data = pipes[0].recv() + ljm_data = pipes[1].recv() - for pipe, proc in zip(pipes, procs): - pipe.close() + for proc in procs: proc.join() - print(type(cm_data), cm_data[0]) - print(type(ljm_data), ljm_data[0]) - - """ # ML calculation. - do_ml(cm_data, - energy_pbe0, - 1000, - test_size=100, - sigma=1000.0, - desc_type='CM') - do_ml(ljm_data, - energy_pbe0, - 1000, - test_size=100, - sigma=1000.0, - desc_type='L-JM') - """ + procs = [] + cm_pipes = [] + ljm_pipes = [] + for i in range(500, 1500 + 1, 500): + cm_recv, cm_send = Pipe(False) + p1 = Process(target=do_ml, + args=(cm_data, energy_pbe0, i, 'CM', cm_send, 500)) + procs.append(p1) + cm_pipes.append(cm_recv) + p1.start() + + ljm_recv, ljm_send = Pipe(False) + p2 = Process(target=do_ml, + args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send, 500)) + procs.append(p2) + ljm_pipes.append(ljm_recv) + p2.start() + + for proc in procs: + proc.join() + + cm_bench_results = [] + ljm_bench_results = [] + for cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes): + cm_bench_results.append(cd_pipe.recv()) + ljm_bench_results.append(ljd_pipe.recv()) + + for cm, ljm, in zip(cm_bench_results, ljm_bench_results): + print(cm) + print(ljm) # End of program end_time = time.perf_counter() -- cgit v1.2.3-54-g00ecf From a4c78116861f6d21185d6cc0db777edb3569970f Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Thu, 12 Dec 2019 23:19:13 -0700 Subject: Add benchmarking and fix bug --- benchmarks.txt | 7 +++++++ main.py | 16 ++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) create mode 100644 benchmarks.txt diff --git a/benchmarks.txt b/benchmarks.txt new file mode 100644 index 000000000..45ff5ddaf --- /dev/null +++ b/benchmarks.txt @@ -0,0 +1,7 @@ +ml_type tr_size te_size sigma mae time +CM 500 500 1000.0 43.85581118438912 10.458203600000001 +L-JM 500 500 1000.0 23.306555762464523 10.1301297 +CM 1000 500 1000.0 35.99848579852107 30.1420201 +L-JM 1000 500 1000.0 16.234721245433807 29.1719274 +CM 1500 500 1000.0 31.535353227520467 57.1196739 +L-JM 1500 500 1000.0 14.212853687139276 57.751048 diff --git a/main.py b/main.py index f39b9b57b..059fa2213 100644 --- a/main.py +++ b/main.py @@ -82,18 +82,22 @@ def main(): ljm_pipes.append(ljm_recv) p2.start() - for proc in procs: - proc.join() - cm_bench_results = [] ljm_bench_results = [] for cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes): cm_bench_results.append(cd_pipe.recv()) ljm_bench_results.append(ljd_pipe.recv()) - for cm, ljm, in zip(cm_bench_results, ljm_bench_results): - print(cm) - print(ljm) + for proc in procs: + proc.join() + + with open('benchmarks.csv', 'w') as save_file: + save_file.write('ml_type,tr_size,te_size,sigma,mae,time\n') + for cm, ljm, in zip(cm_bench_results, ljm_bench_results): + cm_text = ','.join([str(field) for field in cm]) + '\n' + ljm_text = ','.join([str(field) for field in ljm]) + '\n' + save_file.write(cm_text) + save_file.write(ljm_text) # End of program end_time = time.perf_counter() -- cgit v1.2.3-54-g00ecf From 065ac868da9d5ae9cb1da628a953a96a6282b610 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Thu, 12 Dec 2019 23:22:35 -0700 Subject: Change benchmarks extension --- benchmarks.csv | 7 +++++++ benchmarks.txt | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 benchmarks.csv delete mode 100644 benchmarks.txt diff --git a/benchmarks.csv b/benchmarks.csv new file mode 100644 index 000000000..d1806270b --- /dev/null +++ b/benchmarks.csv @@ -0,0 +1,7 @@ +ml_type,tr_size,te_size,sigma,mae,time +CM,500,500,1000.0,43.85581118438912,10.36867 +L-JM,500,500,1000.0,23.306555762464523,10.6035533 +CM,1000,500,1000.0,35.99848579852107,29.508385399999998 +L-JM,1000,500,1000.0,16.234721245433807,29.9137466 +CM,1500,500,1000.0,31.535353227520467,57.493393 +L-JM,1500,500,1000.0,14.212853687139276,58.378428799999995 diff --git a/benchmarks.txt b/benchmarks.txt deleted file mode 100644 index 45ff5ddaf..000000000 --- a/benchmarks.txt +++ /dev/null @@ -1,7 +0,0 @@ -ml_type tr_size te_size sigma mae time -CM 500 500 1000.0 43.85581118438912 10.458203600000001 -L-JM 500 500 1000.0 23.306555762464523 10.1301297 -CM 1000 500 1000.0 35.99848579852107 30.1420201 -L-JM 1000 500 1000.0 16.234721245433807 29.1719274 -CM 1500 500 1000.0 31.535353227520467 57.1196739 -L-JM 1500 500 1000.0 14.212853687139276 57.751048 -- cgit v1.2.3-54-g00ecf From 36a50c3cbb6920b055962e10d75e8fd0939e7f82 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Thu, 12 Dec 2019 23:37:02 -0700 Subject: Setup for new benchmark and bugfix --- do_ml.py | 5 ++++- main.py | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/do_ml.py b/do_ml.py index 87e8b72db..a424a610a 100644 --- a/do_ml.py +++ b/do_ml.py @@ -68,7 +68,10 @@ def do_ml(desc_data, return None if not test_size: - test_size = d_len - training_size + if d_len - training_size > 1500: + test_size = 1500 + else: + test_size = d_len - training_size tic = time.perf_counter() if show_msgs: diff --git a/main.py b/main.py index 059fa2213..9144099d5 100644 --- a/main.py +++ b/main.py @@ -67,17 +67,17 @@ def main(): procs = [] cm_pipes = [] ljm_pipes = [] - for i in range(500, 1500 + 1, 500): + for i in range(2500, 6000 + 1, 500): cm_recv, cm_send = Pipe(False) p1 = Process(target=do_ml, - args=(cm_data, energy_pbe0, i, 'CM', cm_send, 500)) + args=(cm_data, energy_pbe0, i, 'CM', cm_send)) procs.append(p1) cm_pipes.append(cm_recv) p1.start() ljm_recv, ljm_send = Pipe(False) p2 = Process(target=do_ml, - args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send, 500)) + args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send)) procs.append(p2) ljm_pipes.append(ljm_recv) p2.start() -- cgit v1.2.3-54-g00ecf From 56b64719ebec9c24d4c421f0b059640308e49ed8 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Thu, 12 Dec 2019 23:38:35 -0700 Subject: Fix bug --- do_ml.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/do_ml.py b/do_ml.py index a424a610a..c88533e68 100644 --- a/do_ml.py +++ b/do_ml.py @@ -68,10 +68,9 @@ def do_ml(desc_data, return None if not test_size: - if d_len - training_size > 1500: + test_size = d_len - training_size + if test_size > 1500: test_size = 1500 - else: - test_size = d_len - training_size tic = time.perf_counter() if show_msgs: -- cgit v1.2.3-54-g00ecf From 1911a22d0dbc2568296c1c65f418184a36878c1a Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Fri, 13 Dec 2019 00:09:12 -0700 Subject: Add new benchmark --- benchmarks.csv | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/benchmarks.csv b/benchmarks.csv index d1806270b..181da4e77 100644 --- a/benchmarks.csv +++ b/benchmarks.csv @@ -1,7 +1,17 @@ ml_type,tr_size,te_size,sigma,mae,time -CM,500,500,1000.0,43.85581118438912,10.36867 -L-JM,500,500,1000.0,23.306555762464523,10.6035533 -CM,1000,500,1000.0,35.99848579852107,29.508385399999998 -L-JM,1000,500,1000.0,16.234721245433807,29.9137466 -CM,1500,500,1000.0,31.535353227520467,57.493393 -L-JM,1500,500,1000.0,14.212853687139276,58.378428799999995 +CM,2500,1500,1000.0,26.97361909951274,282.1443384 +L-JM,2500,1500,1000.0,11.785532138202667,277.3047686 +CM,3000,1500,1000.0,24.641045831013365,368.9830951 +L-JM,3000,1500,1000.0,11.08713556535848,370.8802103 +CM,3500,1500,1000.0,23.152765029544195,467.1278212 +L-JM,3500,1500,1000.0,10.430573100461325,464.2503922 +CM,4000,1500,1000.0,22.15854674774742,562.7823053 +L-JM,4000,1500,1000.0,10.117842321543852,564.9598646000001 +CM,4500,1500,1000.0,21.180935112846054,655.8574701 +L-JM,4500,1500,1000.0,9.677330400431318,652.4534939 +CM,5000,1500,1000.0,20.30113013118839,755.6121498 +L-JM,5000,1500,1000.0,9.143465812164308,759.3658756 +CM,5500,1500,1000.0,19.749349939851125,867.1811531999999 +L-JM,5500,1500,1000.0,8.902193965087893,863.8858544000001 +CM,6000,1101,1000.0,18.989775833821966,934.9760266999999 +L-JM,6000,1101,1000.0,8.595942764490113,932.6673659 -- cgit v1.2.3-54-g00ecf From 785bca571df73b641a9d722f694a6b13f6274482 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Fri, 13 Dec 2019 03:52:53 -0700 Subject: Add more notes to readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1f24ac463..78d3225a5 100644 --- a/README.md +++ b/README.md @@ -10,4 +10,4 @@ An implementation of existing representations (for now only Coulomb matrix), thi * The *QM9* dataset is obtained from the [quantum-machine webpage](http://www.quantum-machine.org/datasets/), but it's slightly modified for its use with python. * On the other hand, the *periodic table of elements* data was retrieved from [this handy Gist](https://gist.github.com/GoodmanSciences/c2dd862cd38f21b0ad36b8f96b4bf1ee). -*NOTE*: This is not supposed to be a python package (for now). +*NOTE*: This is not supposed to be a python package (for now), but rather, the basis for future work. Please give credit if you use(d) the code presented here (contact me for more). -- cgit v1.2.3-54-g00ecf From 5547a83e628d462b7e006e477f4b1b0d112f18a6 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sat, 14 Dec 2019 23:15:43 -0700 Subject: Fix benchmark results and prepare for new ones --- benchmarks.csv | 40 +++++++++++++++++++++++----------------- lj_matrix.py | 20 +++++++++++++++----- main.py | 56 ++++++++++++++++++++++++++++++-------------------------- 3 files changed, 68 insertions(+), 48 deletions(-) diff --git a/benchmarks.csv b/benchmarks.csv index 181da4e77..eb0ff0333 100644 --- a/benchmarks.csv +++ b/benchmarks.csv @@ -1,17 +1,23 @@ -ml_type,tr_size,te_size,sigma,mae,time -CM,2500,1500,1000.0,26.97361909951274,282.1443384 -L-JM,2500,1500,1000.0,11.785532138202667,277.3047686 -CM,3000,1500,1000.0,24.641045831013365,368.9830951 -L-JM,3000,1500,1000.0,11.08713556535848,370.8802103 -CM,3500,1500,1000.0,23.152765029544195,467.1278212 -L-JM,3500,1500,1000.0,10.430573100461325,464.2503922 -CM,4000,1500,1000.0,22.15854674774742,562.7823053 -L-JM,4000,1500,1000.0,10.117842321543852,564.9598646000001 -CM,4500,1500,1000.0,21.180935112846054,655.8574701 -L-JM,4500,1500,1000.0,9.677330400431318,652.4534939 -CM,5000,1500,1000.0,20.30113013118839,755.6121498 -L-JM,5000,1500,1000.0,9.143465812164308,759.3658756 -CM,5500,1500,1000.0,19.749349939851125,867.1811531999999 -L-JM,5500,1500,1000.0,8.902193965087893,863.8858544000001 -CM,6000,1101,1000.0,18.989775833821966,934.9760266999999 -L-JM,6000,1101,1000.0,8.595942764490113,932.6673659 +ml_type,tr_size,te_size,kernel_s,mae,time,lj_s,lj_e,date_ran +CM,1500,1500,1000.0,31.512240475332977,100.3640273,na,na,14/12/2019 +L-JM,1500,1500,1000.0,14.071581760048153,101.6336081,1,1,14/12/2019 +CM,2000,1500,1000.0,28.245321081132253,152.88077109999998,na,na,14/12/2019 +L-JM,2000,1500,1000.0,12.641839980374655,155.5011396,1,1,14/12/2019 +CM,2500,1500,1000.0,26.97361909951274,215.9536518,na,na,14/12/2019 +L-JM,2500,1500,1000.0,11.785532138202667,211.8681615,1,1,14/12/2019 +CM,3000,1500,1000.0,24.641045831013365,278.5150912,na,na,14/12/2019 +L-JM,3000,1500,1000.0,11.08713556535848,278.5971606,1,1,14/12/2019 +CM,3500,1500,1000.0,23.152765029544195,401.9234568,na,na,14/12/2019 +L-JM,3500,1500,1000.0,10.430573100461325,408.3161812,1,1,14/12/2019 +CM,4000,1500,1000.0,22.15854674774742,499.55023489999996,na,na,14/12/2019 +L-JM,4000,1500,1000.0,10.117842321543852,503.7487102,1,1,14/12/2019 +CM,4500,1500,1000.0,21.180935112846054,605.4631191999999,na,na,14/12/2019 +L-JM,4500,1500,1000.0,9.677330400431318,604.4122219,1,1,14/12/2019 +CM,5000,1500,1000.0,20.30113013118839,699.0641616,na,na,14/12/2019 +L-JM,5000,1500,1000.0,9.143465812164308,701.9964291,1,1,14/12/2019 +CM,5500,1500,1000.0,19.749349939851125,816.2225933,na,na,14/12/2019 +L-JM,5500,1500,1000.0,8.902193965087893,800.8945613,1,1,14/12/2019 +CM,6000,1101,1000.0,18.989775833821966,894.5558989,na,na,14/12/2019 +L-JM,6000,1101,1000.0,8.595942764490113,885.4234397,1,1,14/12/2019 +CM,6500,601,1000.0,19.424238707695146,956.2706457,na,na,14/12/2019 +L-JM,6500,601,1000.0,8.636351001125403,950.2348745,1,1,14/12/2019 diff --git a/lj_matrix.py b/lj_matrix.py index 5cb1b5a8d..2a8e0d956 100644 --- a/lj_matrix.py +++ b/lj_matrix.py @@ -29,6 +29,8 @@ from numpy.linalg import eig def lj_matrix(mol_data, nc_data, + sigma=1.0, + epsilon=1.0, max_len=25, as_eig=True, bohr_radius_units=False): @@ -92,11 +94,11 @@ def lj_matrix(mol_data, # Conversion factor is included in r^2. # 1/r^2 - r_2 = 1/(conversion_rate**2*(x + y + z)) + r_2 = sigma**2/(conversion_rate**2*(x + y + z)) r_6 = math.pow(r_2, 3) r_12 = math.pow(r_6, 2) - lj[i, j] = (4*(r_12 - r_6)) + lj[i, j] = (4*epsilon*(r_12 - r_6)) else: break @@ -150,11 +152,11 @@ def lj_matrix(mol_data, # Conversion factor is included in r^2. # 1/r^2 - r_2 = 1/(conversion_rate**2*(x + y + z)) + r_2 = sigma**2/(conversion_rate**2*(x + y + z)) r_6 = math.pow(r_2, 3) r_12 = math.pow(r_6, 2) - lj_row.append(4*(r_12 - r_6)) + lj_row.append(4*epsilon*(r_12 - r_6)) lj_temp.append(np.array(lj_row)) @@ -169,6 +171,8 @@ def lj_matrix(mol_data, def lj_matrix_multiple(mol_data, nc_data, pipe=None, + sigma=1, + epsilon=1, max_len=25, as_eig=True, bohr_radius_units=False): @@ -185,7 +189,13 @@ def lj_matrix_multiple(mol_data, printc('L-J Matrices calculation started.', 'CYAN') tic = time.perf_counter() - ljm_data = np.array([lj_matrix(mol, nc, max_len, as_eig, bohr_radius_units) + ljm_data = np.array([lj_matrix(mol, + nc, + sigma, + epsilon, + max_len, + as_eig, + bohr_radius_units) for mol, nc in zip(mol_data, nc_data)]) toc = time.perf_counter() diff --git a/main.py b/main.py index 9144099d5..88f0a26f5 100644 --- a/main.py +++ b/main.py @@ -43,37 +43,37 @@ def main(): procs = [] pipes = [] - cm_recv, cm_send = Pipe(False) - p1 = Process(target=c_matrix_multiple, - args=(molecules, nuclear_charge, cm_send)) - procs.append(p1) - pipes.append(cm_recv) - p1.start() + # cm_recv, cm_send = Pipe(False) + # p1 = Process(target=c_matrix_multiple, + # args=(molecules, nuclear_charge, cm_send)) + # procs.append(p1) + # pipes.append(cm_recv) + # p1.start() ljm_recv, ljm_send = Pipe(False) p2 = Process(target=lj_matrix_multiple, - args=(molecules, nuclear_charge, ljm_send)) + args=(molecules, nuclear_charge, ljm_send, 1.5)) procs.append(p2) pipes.append(ljm_recv) p2.start() - cm_data = pipes[0].recv() - ljm_data = pipes[1].recv() + # cm_data = pipes[0].recv() + ljm_data = pipes[0].recv() for proc in procs: proc.join() # ML calculation. procs = [] - cm_pipes = [] + # cm_pipes = [] ljm_pipes = [] - for i in range(2500, 6000 + 1, 500): - cm_recv, cm_send = Pipe(False) - p1 = Process(target=do_ml, - args=(cm_data, energy_pbe0, i, 'CM', cm_send)) - procs.append(p1) - cm_pipes.append(cm_recv) - p1.start() + for i in range(1500, 6500 + 1, 500): + # cm_recv, cm_send = Pipe(False) + # p1 = Process(target=do_ml, + # args=(cm_data, energy_pbe0, i, 'CM', cm_send)) + # procs.append(p1) + # cm_pipes.append(cm_recv) + # p1.start() ljm_recv, ljm_send = Pipe(False) p2 = Process(target=do_ml, @@ -82,21 +82,25 @@ def main(): ljm_pipes.append(ljm_recv) p2.start() - cm_bench_results = [] + # cm_bench_results = [] ljm_bench_results = [] - for cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes): - cm_bench_results.append(cd_pipe.recv()) + for ljd_pipe in ljm_pipes: # cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes): + # cm_bench_results.append(cd_pipe.recv()) ljm_bench_results.append(ljd_pipe.recv()) for proc in procs: proc.join() - with open('benchmarks.csv', 'w') as save_file: - save_file.write('ml_type,tr_size,te_size,sigma,mae,time\n') - for cm, ljm, in zip(cm_bench_results, ljm_bench_results): - cm_text = ','.join([str(field) for field in cm]) + '\n' - ljm_text = ','.join([str(field) for field in ljm]) + '\n' - save_file.write(cm_text) + with open('benchmarks.csv', 'a') as save_file: + # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,', + # 'mae,time,lj_s,lj_e,date_ran\n'])) + date = '/'.join([str(field) for field in time.localtime()[:3][::-1]]) + for ljm in ljm_bench_results: # cm, ljm, in zip(cm_bench_results, ljm_bench_results): + # cm_text = ','.join([str(field) for field in cm])\ + # + ',' + date + '\n' + ljm_text = ','.join([str(field) for field in ljm])\ + + ',1.5,1,' + date + '\n' + # save_file.write(cm_text) save_file.write(ljm_text) # End of program -- cgit v1.2.3-54-g00ecf From 3c282cdee55b63504e31b6ce1ac32261a95d5c52 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sun, 15 Dec 2019 01:18:58 -0700 Subject: Add more benchmarks --- benchmarks.csv | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.py | 4 +-- 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/benchmarks.csv b/benchmarks.csv index eb0ff0333..55ed72635 100644 --- a/benchmarks.csv +++ b/benchmarks.csv @@ -21,3 +21,80 @@ CM,6000,1101,1000.0,18.989775833821966,894.5558989,na,na,14/12/2019 L-JM,6000,1101,1000.0,8.595942764490113,885.4234397,1,1,14/12/2019 CM,6500,601,1000.0,19.424238707695146,956.2706457,na,na,14/12/2019 L-JM,6500,601,1000.0,8.636351001125403,950.2348745,1,1,14/12/2019 +L-JM,1500,1500,1000.0,25.971068591988388,113.6740279,1.5,1,14/12/2019 +L-JM,2000,1500,1000.0,23.332141255879883,176.44235379999998,1.5,1,14/12/2019 +L-JM,2500,1500,1000.0,21.692867970005768,245.19673870000003,1.5,1,14/12/2019 +L-JM,3000,1500,1000.0,20.59071302639572,328.621371,1.5,1,14/12/2019 +L-JM,3500,1500,1000.0,19.735266507048408,415.9199613,1.5,1,14/12/2019 +L-JM,4000,1500,1000.0,18.834051404314636,519.1557511,1.5,1,14/12/2019 +L-JM,4500,1500,1000.0,17.957638371625624,617.7238379,1.5,1,14/12/2019 +L-JM,5000,1500,1000.0,16.972922026017606,730.4018292,1.5,1,14/12/2019 +L-JM,5500,1500,1000.0,16.395492789509397,846.8666075,1.5,1,14/12/2019 +L-JM,6000,1101,1000.0,15.777389296181523,927.473866,1.5,1,14/12/2019 +L-JM,6500,601,1000.0,14.209911716905589,1009.5176146,1.5,1,14/12/2019 +L-JM,1500,1500,1000.0,28.83987694576565,113.8754708,2,1,14/12/2019 +L-JM,2000,1500,1000.0,26.596450258715805,175.00251580000003,2,1,14/12/2019 +L-JM,2500,1500,1000.0,25.24225833702623,250.13395500000001,2,1,14/12/2019 +L-JM,3000,1500,1000.0,24.218064992425827,331.4902702,2,1,14/12/2019 +L-JM,3500,1500,1000.0,23.753584334828926,418.1716937,2,1,14/12/2019 +L-JM,4000,1500,1000.0,22.796982394926633,508.1620121,2,1,14/12/2019 +L-JM,4500,1500,1000.0,22.094972526780207,620.0088331000001,2,1,14/12/2019 +L-JM,5000,1500,1000.0,21.32928554974122,719.5075616,2,1,14/12/2019 +L-JM,5500,1500,1000.0,20.839213236756795,853.7258727,2,1,14/12/2019 +L-JM,6000,1101,1000.0,20.469720091240188,919.2635181,2,1,14/12/2019 +L-JM,6500,601,1000.0,18.97332428701078,991.9277882,2,1,14/12/2019 +L-JM,1500,1500,1000.0,28.834687237181544,112.75118160000001,2.5,1,15/12/2019 +L-JM,2000,1500,1000.0,26.524798614302835,174.36785940000001,2.5,1,15/12/2019 +L-JM,2500,1500,1000.0,25.21431444031365,247.27555790000002,2.5,1,15/12/2019 +L-JM,3000,1500,1000.0,24.1868798509265,327.2753287,2.5,1,15/12/2019 +L-JM,3500,1500,1000.0,23.70991305338383,414.2050979,2.5,1,15/12/2019 +L-JM,4000,1500,1000.0,22.76540519550355,511.5177748,2.5,1,15/12/2019 +L-JM,4500,1500,1000.0,22.114765667859746,609.2158499,2.5,1,15/12/2019 +L-JM,5000,1500,1000.0,21.358232378530058,717.5175853000001,2.5,1,15/12/2019 +L-JM,5500,1500,1000.0,20.85827359348054,833.1339069,2.5,1,15/12/2019 +L-JM,6000,1101,1000.0,20.47802544079168,913.8767508999999,2.5,1,15/12/2019 +L-JM,6500,601,1000.0,18.974561750612214,983.2796894999999,2.5,1,15/12/2019 +L-JM,1500,1500,1000.0,32.66494718071635,115.4841954,3,1,15/12/2019 +L-JM,2000,1500,1000.0,28.7706412893922,177.2390435,3,1,15/12/2019 +L-JM,2500,1500,1000.0,26.93394969655416,249.6755776,3,1,15/12/2019 +L-JM,3000,1500,1000.0,25.592547303146002,331.0301091,3,1,15/12/2019 +L-JM,3500,1500,1000.0,24.823386571052417,424.3857741,3,1,15/12/2019 +L-JM,4000,1500,1000.0,23.723362785716677,516.4350475,3,1,15/12/2019 +L-JM,4500,1500,1000.0,22.96364383080498,615.7131781,3,1,15/12/2019 +L-JM,5000,1500,1000.0,22.097596606763833,732.2697349,3,1,15/12/2019 +L-JM,5500,1500,1000.0,21.463723095992123,850.7737075,3,1,15/12/2019 +L-JM,6000,1101,1000.0,21.098415947303895,909.3228279,3,1,15/12/2019 +L-JM,6500,601,1000.0,19.604458837712464,985.2199813000001,3,1,15/12/2019 +L-JM,1500,1500,1000.0,13.25081292192078,113.2111716,0.75,1,15/12/2019 +L-JM,2000,1500,1000.0,11.898306360562803,176.1226983,0.75,1,15/12/2019 +L-JM,2500,1500,1000.0,11.331479928387962,245.3395849,0.75,1,15/12/2019 +L-JM,3000,1500,1000.0,10.822743636769616,328.4253929,0.75,1,15/12/2019 +L-JM,3500,1500,1000.0,10.341161537587485,416.6295876,0.75,1,15/12/2019 +L-JM,4000,1500,1000.0,10.05422347022756,512.5809536,0.75,1,15/12/2019 +L-JM,4500,1500,1000.0,9.678447398895264,616.4628192,0.75,1,15/12/2019 +L-JM,5000,1500,1000.0,9.348294603434246,722.3488736,0.75,1,15/12/2019 +L-JM,5500,1500,1000.0,9.158434114440919,839.2501411999999,0.75,1,15/12/2019 +L-JM,6000,1101,1000.0,8.85969851676255,918.4742979,0.75,1,15/12/2019 +L-JM,6500,601,1000.0,8.978569760943016,997.4762726,0.75,1,15/12/2019 +L-JM,1500,1500,1000.0,14.31648555157471,112.708166,0.5,1,15/12/2019 +L-JM,2000,1500,1000.0,13.100616600819906,173.49587910000002,0.5,1,15/12/2019 +L-JM,2500,1500,1000.0,12.631016832621258,244.59541330000002,0.5,1,15/12/2019 +L-JM,3000,1500,1000.0,12.372139003845216,325.2085343,0.5,1,15/12/2019 +L-JM,3500,1500,1000.0,12.052149805135091,412.00539760000004,0.5,1,15/12/2019 +L-JM,4000,1500,1000.0,11.926307619750977,506.9327447,0.5,1,15/12/2019 +L-JM,4500,1500,1000.0,11.605206087137859,603.1267876000001,0.5,1,15/12/2019 +L-JM,5000,1500,1000.0,11.364976841634116,709.4308111,0.5,1,15/12/2019 +L-JM,5500,1500,1000.0,11.303823874104818,825.9382915000001,0.5,1,15/12/2019 +L-JM,6000,1101,1000.0,11.054803368952145,915.2881399,0.5,1,15/12/2019 +L-JM,6500,601,1000.0,11.402072457096146,984.6962941,0.5,1,15/12/2019 +L-JM,1500,1500,1000.0,16.515109934611004,117.3274013,0.25,1,15/12/2019 +L-JM,2000,1500,1000.0,15.618779733296712,177.3680046,0.25,1,15/12/2019 +L-JM,2500,1500,1000.0,15.289220743123371,247.515579,0.25,1,15/12/2019 +L-JM,3000,1500,1000.0,15.11142683183797,327.60220580000004,0.25,1,15/12/2019 +L-JM,3500,1500,1000.0,14.843900964528402,411.6829181,0.25,1,15/12/2019 +L-JM,4000,1500,1000.0,14.726752273071288,513.3648273,0.25,1,15/12/2019 +L-JM,4500,1500,1000.0,14.419325491251628,613.8661824000001,0.25,1,15/12/2019 +L-JM,5000,1500,1000.0,14.256678564778646,711.8209704,0.25,1,15/12/2019 +L-JM,5500,1500,1000.0,14.237603645751953,825.8206864,0.25,1,15/12/2019 +L-JM,6000,1101,1000.0,14.04869658132167,909.8488734,0.25,1,15/12/2019 +L-JM,6500,601,1000.0,14.660941845581258,994.2622263000001,0.25,1,15/12/2019 diff --git a/main.py b/main.py index 88f0a26f5..013ae50fc 100644 --- a/main.py +++ b/main.py @@ -52,7 +52,7 @@ def main(): ljm_recv, ljm_send = Pipe(False) p2 = Process(target=lj_matrix_multiple, - args=(molecules, nuclear_charge, ljm_send, 1.5)) + args=(molecules, nuclear_charge, ljm_send)) procs.append(p2) pipes.append(ljm_recv) p2.start() @@ -99,7 +99,7 @@ def main(): # cm_text = ','.join([str(field) for field in cm])\ # + ',' + date + '\n' ljm_text = ','.join([str(field) for field in ljm])\ - + ',1.5,1,' + date + '\n' + + ',0.25,1,' + date + '\n' # save_file.write(cm_text) save_file.write(ljm_text) -- cgit v1.2.3-54-g00ecf From c7fccbab8ebf35cdbb007f71bcfc609c0c10d8ce Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sun, 15 Dec 2019 19:09:03 -0700 Subject: Add more benchmarks --- benchmarks.csv | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.py | 4 +-- 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/benchmarks.csv b/benchmarks.csv index 55ed72635..f81c902e1 100644 --- a/benchmarks.csv +++ b/benchmarks.csv @@ -98,3 +98,80 @@ L-JM,5000,1500,1000.0,14.256678564778646,711.8209704,0.25,1,15/12/2019 L-JM,5500,1500,1000.0,14.237603645751953,825.8206864,0.25,1,15/12/2019 L-JM,6000,1101,1000.0,14.04869658132167,909.8488734,0.25,1,15/12/2019 L-JM,6500,601,1000.0,14.660941845581258,994.2622263000001,0.25,1,15/12/2019 +L-JM,1500,1500,1000.0,13.917265603817388,111.8063593,1,1.5,15/12/2019 +L-JM,2000,1500,1000.0,12.393746784561161,174.0463799,1,1.5,15/12/2019 +L-JM,2500,1500,1000.0,11.310800296285631,242.5622126,1,1.5,15/12/2019 +L-JM,3000,1500,1000.0,10.573622634081524,323.0661685,1,1.5,15/12/2019 +L-JM,3500,1500,1000.0,9.875938797266643,406.5680506,1,1.5,15/12/2019 +L-JM,4000,1500,1000.0,9.510982350137393,503.7767193,1,1.5,15/12/2019 +L-JM,4500,1500,1000.0,9.119124757146201,596.6460913,1,1.5,15/12/2019 +L-JM,5000,1500,1000.0,8.599406629679363,706.859301,1,1.5,15/12/2019 +L-JM,5500,1500,1000.0,8.368502939493816,818.7217491,1,1.5,15/12/2019 +L-JM,6000,1101,1000.0,8.063868108397717,897.7200943,1,1.5,15/12/2019 +L-JM,6500,601,1000.0,8.136913764498198,974.7133005,1,1.5,15/12/2019 +L-JM,1500,1500,1000.0,14.09852163185247,111.0557463,1,2,15/12/2019 +L-JM,2000,1500,1000.0,12.44589603171794,172.8484317,1,2,15/12/2019 +L-JM,2500,1500,1000.0,11.252518187896092,245.6580888,1,2,15/12/2019 +L-JM,3000,1500,1000.0,10.49970513110606,319.36993149999995,1,2,15/12/2019 +L-JM,3500,1500,1000.0,9.76052582794444,409.32987959999997,1,2,15/12/2019 +L-JM,4000,1500,1000.0,9.367906394826731,505.0419491,1,2,15/12/2019 +L-JM,4500,1500,1000.0,8.953196634539921,613.9279513,1,2,15/12/2019 +L-JM,5000,1500,1000.0,8.49635838229116,708.8533507000001,1,2,15/12/2019 +L-JM,5500,1500,1000.0,8.250896130411785,824.6985384,1,2,15/12/2019 +L-JM,6000,1101,1000.0,7.941628870483316,896.7534047,1,2,15/12/2019 +L-JM,6500,601,1000.0,7.984307747999562,967.4740421,1,2,15/12/2019 +L-JM,1500,1500,1000.0,14.530905711745902,113.61682520000001,1,2.5,15/12/2019 +L-JM,2000,1500,1000.0,12.780882489283243,178.76914259999998,1,2.5,15/12/2019 +L-JM,2500,1500,1000.0,11.50745587341436,255.22816189999998,1,2.5,15/12/2019 +L-JM,3000,1500,1000.0,10.72320711825816,329.5140962,1,2.5,15/12/2019 +L-JM,3500,1500,1000.0,9.959901313243869,418.59089370000004,1,2.5,15/12/2019 +L-JM,4000,1500,1000.0,9.540940698993367,514.2713362,1,2.5,15/12/2019 +L-JM,4500,1500,1000.0,9.075100561608632,622.6434419,1,2.5,15/12/2019 +L-JM,5000,1500,1000.0,8.632106214462283,715.8135177,1,2.5,15/12/2019 +L-JM,5500,1500,1000.0,8.353497416613262,834.1856596,1,2.5,15/12/2019 +L-JM,6000,1101,1000.0,7.997364105073893,919.106896,1,2.5,15/12/2019 +L-JM,6500,601,1000.0,8.032743632894189,1003.1057952,1,2.5,15/12/2019 +L-JM,1500,1500,1000.0,14.880831269627889,112.38430980000001,1,3,15/12/2019 +L-JM,2000,1500,1000.0,13.064014818353021,174.3569823,1,3,15/12/2019 +L-JM,2500,1500,1000.0,11.762643989761354,243.2211045,1,3,15/12/2019 +L-JM,3000,1500,1000.0,10.968917869575504,324.5920986,1,3,15/12/2019 +L-JM,3500,1500,1000.0,10.200866189011894,410.9065092,1,3,15/12/2019 +L-JM,4000,1500,1000.0,9.746818656939825,498.6132275,1,3,15/12/2019 +L-JM,4500,1500,1000.0,9.23931041631953,606.2137346000001,1,3,15/12/2019 +L-JM,5000,1500,1000.0,8.780373388102213,705.5607821000001,1,3,15/12/2019 +L-JM,5500,1500,1000.0,8.485089657131196,820.0846994,1,3,15/12/2019 +L-JM,6000,1101,1000.0,8.135665582644734,895.5069686,1,3,15/12/2019 +L-JM,6500,601,1000.0,8.187855192343129,958.1212525999999,1,3,15/12/2019 +L-JM,1500,1500,1000.0,14.046177239428841,114.7886533,1,0.75,15/12/2019 +L-JM,2000,1500,1000.0,12.700155820409142,177.8000633,1,0.75,15/12/2019 +L-JM,2500,1500,1000.0,11.999409877098083,249.8877674,1,0.75,15/12/2019 +L-JM,3000,1500,1000.0,11.349961215230307,331.60657760000004,1,0.75,15/12/2019 +L-JM,3500,1500,1000.0,10.72300334940847,421.4194003,1,0.75,15/12/2019 +L-JM,4000,1500,1000.0,10.448034243490858,514.5383338,1,0.75,15/12/2019 +L-JM,4500,1500,1000.0,10.017663810821535,617.0386821,1,0.75,15/12/2019 +L-JM,5000,1500,1000.0,9.517057082204184,716.4373566,1,0.75,15/12/2019 +L-JM,5500,1500,1000.0,9.273669176727296,832.6763739,1,0.75,15/12/2019 +L-JM,6000,1101,1000.0,8.995569295995784,911.0825957,1,0.75,15/12/2019 +L-JM,6500,601,1000.0,9.010769006846546,979.3535823999999,1,0.75,15/12/2019 +L-JM,1500,1500,1000.0,13.967123082429293,111.65990760000001,1,0.5,15/12/2019 +L-JM,2000,1500,1000.0,12.697881535001121,172.2946484,1,0.5,15/12/2019 +L-JM,2500,1500,1000.0,12.145461895324708,241.1312698,1,0.5,15/12/2019 +L-JM,3000,1500,1000.0,11.588876514109295,323.0631537,1,0.5,15/12/2019 +L-JM,3500,1500,1000.0,11.044433803100585,415.137312,1,0.5,15/12/2019 +L-JM,4000,1500,1000.0,10.80793491441091,504.8102572,1,0.5,15/12/2019 +L-JM,4500,1500,1000.0,10.405684053039552,602.5219337,1,0.5,15/12/2019 +L-JM,5000,1500,1000.0,9.965184934692385,711.8197224,1,0.5,15/12/2019 +L-JM,5500,1500,1000.0,9.748910655822755,825.7830675,1,0.5,15/12/2019 +L-JM,6000,1101,1000.0,9.48353206175095,908.3770869,1,0.5,15/12/2019 +L-JM,6500,601,1000.0,9.485751465046862,971.9107026,1,0.5,15/12/2019 +L-JM,1500,1500,1000.0,14.028847422150934,112.4397257,1,0.25,15/12/2019 +L-JM,2000,1500,1000.0,12.743222420338949,174.4159427,1,0.25,15/12/2019 +L-JM,2500,1500,1000.0,12.272876975964866,249.5782289,1,0.25,15/12/2019 +L-JM,3000,1500,1000.0,11.804296119425455,328.94519329999997,1,0.25,15/12/2019 +L-JM,3500,1500,1000.0,11.369661207082114,412.7472373,1,0.25,15/12/2019 +L-JM,4000,1500,1000.0,11.183031419891359,511.8257998,1,0.25,15/12/2019 +L-JM,4500,1500,1000.0,10.825626690989177,609.6951702,1,0.25,15/12/2019 +L-JM,5000,1500,1000.0,10.460318843851725,721.2592548,1,0.25,15/12/2019 +L-JM,5500,1500,1000.0,10.294279163879395,825.8027392,1,0.25,15/12/2019 +L-JM,6000,1101,1000.0,10.018516495014302,921.2132997,1,0.25,15/12/2019 +L-JM,6500,601,1000.0,10.097973922018602,980.0041144,1,0.25,15/12/2019 diff --git a/main.py b/main.py index 013ae50fc..1e9a09631 100644 --- a/main.py +++ b/main.py @@ -52,7 +52,7 @@ def main(): ljm_recv, ljm_send = Pipe(False) p2 = Process(target=lj_matrix_multiple, - args=(molecules, nuclear_charge, ljm_send)) + args=(molecules, nuclear_charge, ljm_send, 1, 0.25)) procs.append(p2) pipes.append(ljm_recv) p2.start() @@ -99,7 +99,7 @@ def main(): # cm_text = ','.join([str(field) for field in cm])\ # + ',' + date + '\n' ljm_text = ','.join([str(field) for field in ljm])\ - + ',0.25,1,' + date + '\n' + + ',1,0.25,' + date + '\n' # save_file.write(cm_text) save_file.write(ljm_text) -- cgit v1.2.3-54-g00ecf From 1bfd89c336cba5f53f8a80f6dcb2aab36ec3e115 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sun, 15 Dec 2019 21:31:28 -0700 Subject: Add first figure --- data/figs/mae_diff_tr_sizes.pdf | Bin 0 -> 10839 bytes main.py | 66 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 data/figs/mae_diff_tr_sizes.pdf diff --git a/data/figs/mae_diff_tr_sizes.pdf b/data/figs/mae_diff_tr_sizes.pdf new file mode 100644 index 000000000..e49e70411 Binary files /dev/null and b/data/figs/mae_diff_tr_sizes.pdf differ diff --git a/main.py b/main.py index 1e9a09631..d2466c9d6 100644 --- a/main.py +++ b/main.py @@ -23,6 +23,7 @@ SOFTWARE. import time from multiprocessing import Process, Pipe # import matplotlib.pyplot as plt +import pandas as pd from misc import printc from read_qm7_data import read_qm7_data from c_matrix import c_matrix_multiple @@ -31,7 +32,10 @@ from do_ml import do_ml # Test -def main(): +def ml(): + """ + Main function that does the whole ML process. + """ # Initialization time. init_time = time.perf_counter() @@ -109,5 +113,63 @@ def main(): 'CYAN') +def pl(): + """ + Function for plotting the benchmarks. + """ + or_cols = ['ml_type', + 'tr_size', + 'te_size', + 'kernel_s', + 'mae', + 'time', + 'lj_s', + 'lj_e', + 'date_ran'] + dor_cols = ['te_size', + 'kernel_s', + 'time', + 'date_ran'] + + data_temp = pd.read_csv('benchmarks.csv',) + data = pd.DataFrame(data_temp, columns=or_cols) + data = data.drop(columns=dor_cols) + # print(data) + + first_data = pd.DataFrame(data, index=range(0, 22)) + first_data = first_data.drop(columns=['lj_s', 'lj_e']) + + fd_columns = ['ml_type', 'tr_size', 'mae'] + first_data_cm = pd.DataFrame(columns=fd_columns) + first_data_ljm = pd.DataFrame(columns=fd_columns) + for i in range(first_data.shape[0]): + temp_df = first_data.iloc[[i]] + if first_data.at[i, 'ml_type'] == 'CM': + first_data_cm = first_data_cm.append(temp_df) + else: + first_data_ljm = first_data_ljm.append(temp_df) + first_data_cm = first_data_cm.drop(columns=['ml_type']).rename(columns={'mae': 'cm_mae'}) + first_data_ljm = first_data_ljm.drop(columns=['ml_type']).rename(columns={'mae': 'ljm_mae'}) + print(first_data_cm) + print(first_data_ljm) + + cm_axis = first_data_cm.plot(x='tr_size', + y='cm_mae', + kind='line') + plot_axis = first_data_ljm.plot(ax=cm_axis, + x='tr_size', + y='ljm_mae', + kind='line') + plot_axis.set_xlabel('tr_size') + plot_axis.set_ylabel('mae') + plot_axis.set_title('mae for different tr_sizes') + plot_axis.get_figure().savefig('data\\figs\\mae_diff_tr_sizes.pdf') + + new_data = data.drop(index=range(0, 22)) + new_data = new_data.drop(columns=['ml_type']) + # print(new_data) + + if __name__ == '__main__': - main() + # ml() + pl() -- cgit v1.2.3-54-g00ecf From 776a74ac34c556e1d421c7b1babf1282c2590851 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sun, 15 Dec 2019 21:32:02 -0700 Subject: Move benchmarks file --- benchmarks.csv | 177 ---------------------------------------------------- data/benchmarks.csv | 177 ++++++++++++++++++++++++++++++++++++++++++++++++++++ main.py | 2 +- 3 files changed, 178 insertions(+), 178 deletions(-) delete mode 100644 benchmarks.csv create mode 100644 data/benchmarks.csv diff --git a/benchmarks.csv b/benchmarks.csv deleted file mode 100644 index f81c902e1..000000000 --- a/benchmarks.csv +++ /dev/null @@ -1,177 +0,0 @@ -ml_type,tr_size,te_size,kernel_s,mae,time,lj_s,lj_e,date_ran -CM,1500,1500,1000.0,31.512240475332977,100.3640273,na,na,14/12/2019 -L-JM,1500,1500,1000.0,14.071581760048153,101.6336081,1,1,14/12/2019 -CM,2000,1500,1000.0,28.245321081132253,152.88077109999998,na,na,14/12/2019 -L-JM,2000,1500,1000.0,12.641839980374655,155.5011396,1,1,14/12/2019 -CM,2500,1500,1000.0,26.97361909951274,215.9536518,na,na,14/12/2019 -L-JM,2500,1500,1000.0,11.785532138202667,211.8681615,1,1,14/12/2019 -CM,3000,1500,1000.0,24.641045831013365,278.5150912,na,na,14/12/2019 -L-JM,3000,1500,1000.0,11.08713556535848,278.5971606,1,1,14/12/2019 -CM,3500,1500,1000.0,23.152765029544195,401.9234568,na,na,14/12/2019 -L-JM,3500,1500,1000.0,10.430573100461325,408.3161812,1,1,14/12/2019 -CM,4000,1500,1000.0,22.15854674774742,499.55023489999996,na,na,14/12/2019 -L-JM,4000,1500,1000.0,10.117842321543852,503.7487102,1,1,14/12/2019 -CM,4500,1500,1000.0,21.180935112846054,605.4631191999999,na,na,14/12/2019 -L-JM,4500,1500,1000.0,9.677330400431318,604.4122219,1,1,14/12/2019 -CM,5000,1500,1000.0,20.30113013118839,699.0641616,na,na,14/12/2019 -L-JM,5000,1500,1000.0,9.143465812164308,701.9964291,1,1,14/12/2019 -CM,5500,1500,1000.0,19.749349939851125,816.2225933,na,na,14/12/2019 -L-JM,5500,1500,1000.0,8.902193965087893,800.8945613,1,1,14/12/2019 -CM,6000,1101,1000.0,18.989775833821966,894.5558989,na,na,14/12/2019 -L-JM,6000,1101,1000.0,8.595942764490113,885.4234397,1,1,14/12/2019 -CM,6500,601,1000.0,19.424238707695146,956.2706457,na,na,14/12/2019 -L-JM,6500,601,1000.0,8.636351001125403,950.2348745,1,1,14/12/2019 -L-JM,1500,1500,1000.0,25.971068591988388,113.6740279,1.5,1,14/12/2019 -L-JM,2000,1500,1000.0,23.332141255879883,176.44235379999998,1.5,1,14/12/2019 -L-JM,2500,1500,1000.0,21.692867970005768,245.19673870000003,1.5,1,14/12/2019 -L-JM,3000,1500,1000.0,20.59071302639572,328.621371,1.5,1,14/12/2019 -L-JM,3500,1500,1000.0,19.735266507048408,415.9199613,1.5,1,14/12/2019 -L-JM,4000,1500,1000.0,18.834051404314636,519.1557511,1.5,1,14/12/2019 -L-JM,4500,1500,1000.0,17.957638371625624,617.7238379,1.5,1,14/12/2019 -L-JM,5000,1500,1000.0,16.972922026017606,730.4018292,1.5,1,14/12/2019 -L-JM,5500,1500,1000.0,16.395492789509397,846.8666075,1.5,1,14/12/2019 -L-JM,6000,1101,1000.0,15.777389296181523,927.473866,1.5,1,14/12/2019 -L-JM,6500,601,1000.0,14.209911716905589,1009.5176146,1.5,1,14/12/2019 -L-JM,1500,1500,1000.0,28.83987694576565,113.8754708,2,1,14/12/2019 -L-JM,2000,1500,1000.0,26.596450258715805,175.00251580000003,2,1,14/12/2019 -L-JM,2500,1500,1000.0,25.24225833702623,250.13395500000001,2,1,14/12/2019 -L-JM,3000,1500,1000.0,24.218064992425827,331.4902702,2,1,14/12/2019 -L-JM,3500,1500,1000.0,23.753584334828926,418.1716937,2,1,14/12/2019 -L-JM,4000,1500,1000.0,22.796982394926633,508.1620121,2,1,14/12/2019 -L-JM,4500,1500,1000.0,22.094972526780207,620.0088331000001,2,1,14/12/2019 -L-JM,5000,1500,1000.0,21.32928554974122,719.5075616,2,1,14/12/2019 -L-JM,5500,1500,1000.0,20.839213236756795,853.7258727,2,1,14/12/2019 -L-JM,6000,1101,1000.0,20.469720091240188,919.2635181,2,1,14/12/2019 -L-JM,6500,601,1000.0,18.97332428701078,991.9277882,2,1,14/12/2019 -L-JM,1500,1500,1000.0,28.834687237181544,112.75118160000001,2.5,1,15/12/2019 -L-JM,2000,1500,1000.0,26.524798614302835,174.36785940000001,2.5,1,15/12/2019 -L-JM,2500,1500,1000.0,25.21431444031365,247.27555790000002,2.5,1,15/12/2019 -L-JM,3000,1500,1000.0,24.1868798509265,327.2753287,2.5,1,15/12/2019 -L-JM,3500,1500,1000.0,23.70991305338383,414.2050979,2.5,1,15/12/2019 -L-JM,4000,1500,1000.0,22.76540519550355,511.5177748,2.5,1,15/12/2019 -L-JM,4500,1500,1000.0,22.114765667859746,609.2158499,2.5,1,15/12/2019 -L-JM,5000,1500,1000.0,21.358232378530058,717.5175853000001,2.5,1,15/12/2019 -L-JM,5500,1500,1000.0,20.85827359348054,833.1339069,2.5,1,15/12/2019 -L-JM,6000,1101,1000.0,20.47802544079168,913.8767508999999,2.5,1,15/12/2019 -L-JM,6500,601,1000.0,18.974561750612214,983.2796894999999,2.5,1,15/12/2019 -L-JM,1500,1500,1000.0,32.66494718071635,115.4841954,3,1,15/12/2019 -L-JM,2000,1500,1000.0,28.7706412893922,177.2390435,3,1,15/12/2019 -L-JM,2500,1500,1000.0,26.93394969655416,249.6755776,3,1,15/12/2019 -L-JM,3000,1500,1000.0,25.592547303146002,331.0301091,3,1,15/12/2019 -L-JM,3500,1500,1000.0,24.823386571052417,424.3857741,3,1,15/12/2019 -L-JM,4000,1500,1000.0,23.723362785716677,516.4350475,3,1,15/12/2019 -L-JM,4500,1500,1000.0,22.96364383080498,615.7131781,3,1,15/12/2019 -L-JM,5000,1500,1000.0,22.097596606763833,732.2697349,3,1,15/12/2019 -L-JM,5500,1500,1000.0,21.463723095992123,850.7737075,3,1,15/12/2019 -L-JM,6000,1101,1000.0,21.098415947303895,909.3228279,3,1,15/12/2019 -L-JM,6500,601,1000.0,19.604458837712464,985.2199813000001,3,1,15/12/2019 -L-JM,1500,1500,1000.0,13.25081292192078,113.2111716,0.75,1,15/12/2019 -L-JM,2000,1500,1000.0,11.898306360562803,176.1226983,0.75,1,15/12/2019 -L-JM,2500,1500,1000.0,11.331479928387962,245.3395849,0.75,1,15/12/2019 -L-JM,3000,1500,1000.0,10.822743636769616,328.4253929,0.75,1,15/12/2019 -L-JM,3500,1500,1000.0,10.341161537587485,416.6295876,0.75,1,15/12/2019 -L-JM,4000,1500,1000.0,10.05422347022756,512.5809536,0.75,1,15/12/2019 -L-JM,4500,1500,1000.0,9.678447398895264,616.4628192,0.75,1,15/12/2019 -L-JM,5000,1500,1000.0,9.348294603434246,722.3488736,0.75,1,15/12/2019 -L-JM,5500,1500,1000.0,9.158434114440919,839.2501411999999,0.75,1,15/12/2019 -L-JM,6000,1101,1000.0,8.85969851676255,918.4742979,0.75,1,15/12/2019 -L-JM,6500,601,1000.0,8.978569760943016,997.4762726,0.75,1,15/12/2019 -L-JM,1500,1500,1000.0,14.31648555157471,112.708166,0.5,1,15/12/2019 -L-JM,2000,1500,1000.0,13.100616600819906,173.49587910000002,0.5,1,15/12/2019 -L-JM,2500,1500,1000.0,12.631016832621258,244.59541330000002,0.5,1,15/12/2019 -L-JM,3000,1500,1000.0,12.372139003845216,325.2085343,0.5,1,15/12/2019 -L-JM,3500,1500,1000.0,12.052149805135091,412.00539760000004,0.5,1,15/12/2019 -L-JM,4000,1500,1000.0,11.926307619750977,506.9327447,0.5,1,15/12/2019 -L-JM,4500,1500,1000.0,11.605206087137859,603.1267876000001,0.5,1,15/12/2019 -L-JM,5000,1500,1000.0,11.364976841634116,709.4308111,0.5,1,15/12/2019 -L-JM,5500,1500,1000.0,11.303823874104818,825.9382915000001,0.5,1,15/12/2019 -L-JM,6000,1101,1000.0,11.054803368952145,915.2881399,0.5,1,15/12/2019 -L-JM,6500,601,1000.0,11.402072457096146,984.6962941,0.5,1,15/12/2019 -L-JM,1500,1500,1000.0,16.515109934611004,117.3274013,0.25,1,15/12/2019 -L-JM,2000,1500,1000.0,15.618779733296712,177.3680046,0.25,1,15/12/2019 -L-JM,2500,1500,1000.0,15.289220743123371,247.515579,0.25,1,15/12/2019 -L-JM,3000,1500,1000.0,15.11142683183797,327.60220580000004,0.25,1,15/12/2019 -L-JM,3500,1500,1000.0,14.843900964528402,411.6829181,0.25,1,15/12/2019 -L-JM,4000,1500,1000.0,14.726752273071288,513.3648273,0.25,1,15/12/2019 -L-JM,4500,1500,1000.0,14.419325491251628,613.8661824000001,0.25,1,15/12/2019 -L-JM,5000,1500,1000.0,14.256678564778646,711.8209704,0.25,1,15/12/2019 -L-JM,5500,1500,1000.0,14.237603645751953,825.8206864,0.25,1,15/12/2019 -L-JM,6000,1101,1000.0,14.04869658132167,909.8488734,0.25,1,15/12/2019 -L-JM,6500,601,1000.0,14.660941845581258,994.2622263000001,0.25,1,15/12/2019 -L-JM,1500,1500,1000.0,13.917265603817388,111.8063593,1,1.5,15/12/2019 -L-JM,2000,1500,1000.0,12.393746784561161,174.0463799,1,1.5,15/12/2019 -L-JM,2500,1500,1000.0,11.310800296285631,242.5622126,1,1.5,15/12/2019 -L-JM,3000,1500,1000.0,10.573622634081524,323.0661685,1,1.5,15/12/2019 -L-JM,3500,1500,1000.0,9.875938797266643,406.5680506,1,1.5,15/12/2019 -L-JM,4000,1500,1000.0,9.510982350137393,503.7767193,1,1.5,15/12/2019 -L-JM,4500,1500,1000.0,9.119124757146201,596.6460913,1,1.5,15/12/2019 -L-JM,5000,1500,1000.0,8.599406629679363,706.859301,1,1.5,15/12/2019 -L-JM,5500,1500,1000.0,8.368502939493816,818.7217491,1,1.5,15/12/2019 -L-JM,6000,1101,1000.0,8.063868108397717,897.7200943,1,1.5,15/12/2019 -L-JM,6500,601,1000.0,8.136913764498198,974.7133005,1,1.5,15/12/2019 -L-JM,1500,1500,1000.0,14.09852163185247,111.0557463,1,2,15/12/2019 -L-JM,2000,1500,1000.0,12.44589603171794,172.8484317,1,2,15/12/2019 -L-JM,2500,1500,1000.0,11.252518187896092,245.6580888,1,2,15/12/2019 -L-JM,3000,1500,1000.0,10.49970513110606,319.36993149999995,1,2,15/12/2019 -L-JM,3500,1500,1000.0,9.76052582794444,409.32987959999997,1,2,15/12/2019 -L-JM,4000,1500,1000.0,9.367906394826731,505.0419491,1,2,15/12/2019 -L-JM,4500,1500,1000.0,8.953196634539921,613.9279513,1,2,15/12/2019 -L-JM,5000,1500,1000.0,8.49635838229116,708.8533507000001,1,2,15/12/2019 -L-JM,5500,1500,1000.0,8.250896130411785,824.6985384,1,2,15/12/2019 -L-JM,6000,1101,1000.0,7.941628870483316,896.7534047,1,2,15/12/2019 -L-JM,6500,601,1000.0,7.984307747999562,967.4740421,1,2,15/12/2019 -L-JM,1500,1500,1000.0,14.530905711745902,113.61682520000001,1,2.5,15/12/2019 -L-JM,2000,1500,1000.0,12.780882489283243,178.76914259999998,1,2.5,15/12/2019 -L-JM,2500,1500,1000.0,11.50745587341436,255.22816189999998,1,2.5,15/12/2019 -L-JM,3000,1500,1000.0,10.72320711825816,329.5140962,1,2.5,15/12/2019 -L-JM,3500,1500,1000.0,9.959901313243869,418.59089370000004,1,2.5,15/12/2019 -L-JM,4000,1500,1000.0,9.540940698993367,514.2713362,1,2.5,15/12/2019 -L-JM,4500,1500,1000.0,9.075100561608632,622.6434419,1,2.5,15/12/2019 -L-JM,5000,1500,1000.0,8.632106214462283,715.8135177,1,2.5,15/12/2019 -L-JM,5500,1500,1000.0,8.353497416613262,834.1856596,1,2.5,15/12/2019 -L-JM,6000,1101,1000.0,7.997364105073893,919.106896,1,2.5,15/12/2019 -L-JM,6500,601,1000.0,8.032743632894189,1003.1057952,1,2.5,15/12/2019 -L-JM,1500,1500,1000.0,14.880831269627889,112.38430980000001,1,3,15/12/2019 -L-JM,2000,1500,1000.0,13.064014818353021,174.3569823,1,3,15/12/2019 -L-JM,2500,1500,1000.0,11.762643989761354,243.2211045,1,3,15/12/2019 -L-JM,3000,1500,1000.0,10.968917869575504,324.5920986,1,3,15/12/2019 -L-JM,3500,1500,1000.0,10.200866189011894,410.9065092,1,3,15/12/2019 -L-JM,4000,1500,1000.0,9.746818656939825,498.6132275,1,3,15/12/2019 -L-JM,4500,1500,1000.0,9.23931041631953,606.2137346000001,1,3,15/12/2019 -L-JM,5000,1500,1000.0,8.780373388102213,705.5607821000001,1,3,15/12/2019 -L-JM,5500,1500,1000.0,8.485089657131196,820.0846994,1,3,15/12/2019 -L-JM,6000,1101,1000.0,8.135665582644734,895.5069686,1,3,15/12/2019 -L-JM,6500,601,1000.0,8.187855192343129,958.1212525999999,1,3,15/12/2019 -L-JM,1500,1500,1000.0,14.046177239428841,114.7886533,1,0.75,15/12/2019 -L-JM,2000,1500,1000.0,12.700155820409142,177.8000633,1,0.75,15/12/2019 -L-JM,2500,1500,1000.0,11.999409877098083,249.8877674,1,0.75,15/12/2019 -L-JM,3000,1500,1000.0,11.349961215230307,331.60657760000004,1,0.75,15/12/2019 -L-JM,3500,1500,1000.0,10.72300334940847,421.4194003,1,0.75,15/12/2019 -L-JM,4000,1500,1000.0,10.448034243490858,514.5383338,1,0.75,15/12/2019 -L-JM,4500,1500,1000.0,10.017663810821535,617.0386821,1,0.75,15/12/2019 -L-JM,5000,1500,1000.0,9.517057082204184,716.4373566,1,0.75,15/12/2019 -L-JM,5500,1500,1000.0,9.273669176727296,832.6763739,1,0.75,15/12/2019 -L-JM,6000,1101,1000.0,8.995569295995784,911.0825957,1,0.75,15/12/2019 -L-JM,6500,601,1000.0,9.010769006846546,979.3535823999999,1,0.75,15/12/2019 -L-JM,1500,1500,1000.0,13.967123082429293,111.65990760000001,1,0.5,15/12/2019 -L-JM,2000,1500,1000.0,12.697881535001121,172.2946484,1,0.5,15/12/2019 -L-JM,2500,1500,1000.0,12.145461895324708,241.1312698,1,0.5,15/12/2019 -L-JM,3000,1500,1000.0,11.588876514109295,323.0631537,1,0.5,15/12/2019 -L-JM,3500,1500,1000.0,11.044433803100585,415.137312,1,0.5,15/12/2019 -L-JM,4000,1500,1000.0,10.80793491441091,504.8102572,1,0.5,15/12/2019 -L-JM,4500,1500,1000.0,10.405684053039552,602.5219337,1,0.5,15/12/2019 -L-JM,5000,1500,1000.0,9.965184934692385,711.8197224,1,0.5,15/12/2019 -L-JM,5500,1500,1000.0,9.748910655822755,825.7830675,1,0.5,15/12/2019 -L-JM,6000,1101,1000.0,9.48353206175095,908.3770869,1,0.5,15/12/2019 -L-JM,6500,601,1000.0,9.485751465046862,971.9107026,1,0.5,15/12/2019 -L-JM,1500,1500,1000.0,14.028847422150934,112.4397257,1,0.25,15/12/2019 -L-JM,2000,1500,1000.0,12.743222420338949,174.4159427,1,0.25,15/12/2019 -L-JM,2500,1500,1000.0,12.272876975964866,249.5782289,1,0.25,15/12/2019 -L-JM,3000,1500,1000.0,11.804296119425455,328.94519329999997,1,0.25,15/12/2019 -L-JM,3500,1500,1000.0,11.369661207082114,412.7472373,1,0.25,15/12/2019 -L-JM,4000,1500,1000.0,11.183031419891359,511.8257998,1,0.25,15/12/2019 -L-JM,4500,1500,1000.0,10.825626690989177,609.6951702,1,0.25,15/12/2019 -L-JM,5000,1500,1000.0,10.460318843851725,721.2592548,1,0.25,15/12/2019 -L-JM,5500,1500,1000.0,10.294279163879395,825.8027392,1,0.25,15/12/2019 -L-JM,6000,1101,1000.0,10.018516495014302,921.2132997,1,0.25,15/12/2019 -L-JM,6500,601,1000.0,10.097973922018602,980.0041144,1,0.25,15/12/2019 diff --git a/data/benchmarks.csv b/data/benchmarks.csv new file mode 100644 index 000000000..f81c902e1 --- /dev/null +++ b/data/benchmarks.csv @@ -0,0 +1,177 @@ +ml_type,tr_size,te_size,kernel_s,mae,time,lj_s,lj_e,date_ran +CM,1500,1500,1000.0,31.512240475332977,100.3640273,na,na,14/12/2019 +L-JM,1500,1500,1000.0,14.071581760048153,101.6336081,1,1,14/12/2019 +CM,2000,1500,1000.0,28.245321081132253,152.88077109999998,na,na,14/12/2019 +L-JM,2000,1500,1000.0,12.641839980374655,155.5011396,1,1,14/12/2019 +CM,2500,1500,1000.0,26.97361909951274,215.9536518,na,na,14/12/2019 +L-JM,2500,1500,1000.0,11.785532138202667,211.8681615,1,1,14/12/2019 +CM,3000,1500,1000.0,24.641045831013365,278.5150912,na,na,14/12/2019 +L-JM,3000,1500,1000.0,11.08713556535848,278.5971606,1,1,14/12/2019 +CM,3500,1500,1000.0,23.152765029544195,401.9234568,na,na,14/12/2019 +L-JM,3500,1500,1000.0,10.430573100461325,408.3161812,1,1,14/12/2019 +CM,4000,1500,1000.0,22.15854674774742,499.55023489999996,na,na,14/12/2019 +L-JM,4000,1500,1000.0,10.117842321543852,503.7487102,1,1,14/12/2019 +CM,4500,1500,1000.0,21.180935112846054,605.4631191999999,na,na,14/12/2019 +L-JM,4500,1500,1000.0,9.677330400431318,604.4122219,1,1,14/12/2019 +CM,5000,1500,1000.0,20.30113013118839,699.0641616,na,na,14/12/2019 +L-JM,5000,1500,1000.0,9.143465812164308,701.9964291,1,1,14/12/2019 +CM,5500,1500,1000.0,19.749349939851125,816.2225933,na,na,14/12/2019 +L-JM,5500,1500,1000.0,8.902193965087893,800.8945613,1,1,14/12/2019 +CM,6000,1101,1000.0,18.989775833821966,894.5558989,na,na,14/12/2019 +L-JM,6000,1101,1000.0,8.595942764490113,885.4234397,1,1,14/12/2019 +CM,6500,601,1000.0,19.424238707695146,956.2706457,na,na,14/12/2019 +L-JM,6500,601,1000.0,8.636351001125403,950.2348745,1,1,14/12/2019 +L-JM,1500,1500,1000.0,25.971068591988388,113.6740279,1.5,1,14/12/2019 +L-JM,2000,1500,1000.0,23.332141255879883,176.44235379999998,1.5,1,14/12/2019 +L-JM,2500,1500,1000.0,21.692867970005768,245.19673870000003,1.5,1,14/12/2019 +L-JM,3000,1500,1000.0,20.59071302639572,328.621371,1.5,1,14/12/2019 +L-JM,3500,1500,1000.0,19.735266507048408,415.9199613,1.5,1,14/12/2019 +L-JM,4000,1500,1000.0,18.834051404314636,519.1557511,1.5,1,14/12/2019 +L-JM,4500,1500,1000.0,17.957638371625624,617.7238379,1.5,1,14/12/2019 +L-JM,5000,1500,1000.0,16.972922026017606,730.4018292,1.5,1,14/12/2019 +L-JM,5500,1500,1000.0,16.395492789509397,846.8666075,1.5,1,14/12/2019 +L-JM,6000,1101,1000.0,15.777389296181523,927.473866,1.5,1,14/12/2019 +L-JM,6500,601,1000.0,14.209911716905589,1009.5176146,1.5,1,14/12/2019 +L-JM,1500,1500,1000.0,28.83987694576565,113.8754708,2,1,14/12/2019 +L-JM,2000,1500,1000.0,26.596450258715805,175.00251580000003,2,1,14/12/2019 +L-JM,2500,1500,1000.0,25.24225833702623,250.13395500000001,2,1,14/12/2019 +L-JM,3000,1500,1000.0,24.218064992425827,331.4902702,2,1,14/12/2019 +L-JM,3500,1500,1000.0,23.753584334828926,418.1716937,2,1,14/12/2019 +L-JM,4000,1500,1000.0,22.796982394926633,508.1620121,2,1,14/12/2019 +L-JM,4500,1500,1000.0,22.094972526780207,620.0088331000001,2,1,14/12/2019 +L-JM,5000,1500,1000.0,21.32928554974122,719.5075616,2,1,14/12/2019 +L-JM,5500,1500,1000.0,20.839213236756795,853.7258727,2,1,14/12/2019 +L-JM,6000,1101,1000.0,20.469720091240188,919.2635181,2,1,14/12/2019 +L-JM,6500,601,1000.0,18.97332428701078,991.9277882,2,1,14/12/2019 +L-JM,1500,1500,1000.0,28.834687237181544,112.75118160000001,2.5,1,15/12/2019 +L-JM,2000,1500,1000.0,26.524798614302835,174.36785940000001,2.5,1,15/12/2019 +L-JM,2500,1500,1000.0,25.21431444031365,247.27555790000002,2.5,1,15/12/2019 +L-JM,3000,1500,1000.0,24.1868798509265,327.2753287,2.5,1,15/12/2019 +L-JM,3500,1500,1000.0,23.70991305338383,414.2050979,2.5,1,15/12/2019 +L-JM,4000,1500,1000.0,22.76540519550355,511.5177748,2.5,1,15/12/2019 +L-JM,4500,1500,1000.0,22.114765667859746,609.2158499,2.5,1,15/12/2019 +L-JM,5000,1500,1000.0,21.358232378530058,717.5175853000001,2.5,1,15/12/2019 +L-JM,5500,1500,1000.0,20.85827359348054,833.1339069,2.5,1,15/12/2019 +L-JM,6000,1101,1000.0,20.47802544079168,913.8767508999999,2.5,1,15/12/2019 +L-JM,6500,601,1000.0,18.974561750612214,983.2796894999999,2.5,1,15/12/2019 +L-JM,1500,1500,1000.0,32.66494718071635,115.4841954,3,1,15/12/2019 +L-JM,2000,1500,1000.0,28.7706412893922,177.2390435,3,1,15/12/2019 +L-JM,2500,1500,1000.0,26.93394969655416,249.6755776,3,1,15/12/2019 +L-JM,3000,1500,1000.0,25.592547303146002,331.0301091,3,1,15/12/2019 +L-JM,3500,1500,1000.0,24.823386571052417,424.3857741,3,1,15/12/2019 +L-JM,4000,1500,1000.0,23.723362785716677,516.4350475,3,1,15/12/2019 +L-JM,4500,1500,1000.0,22.96364383080498,615.7131781,3,1,15/12/2019 +L-JM,5000,1500,1000.0,22.097596606763833,732.2697349,3,1,15/12/2019 +L-JM,5500,1500,1000.0,21.463723095992123,850.7737075,3,1,15/12/2019 +L-JM,6000,1101,1000.0,21.098415947303895,909.3228279,3,1,15/12/2019 +L-JM,6500,601,1000.0,19.604458837712464,985.2199813000001,3,1,15/12/2019 +L-JM,1500,1500,1000.0,13.25081292192078,113.2111716,0.75,1,15/12/2019 +L-JM,2000,1500,1000.0,11.898306360562803,176.1226983,0.75,1,15/12/2019 +L-JM,2500,1500,1000.0,11.331479928387962,245.3395849,0.75,1,15/12/2019 +L-JM,3000,1500,1000.0,10.822743636769616,328.4253929,0.75,1,15/12/2019 +L-JM,3500,1500,1000.0,10.341161537587485,416.6295876,0.75,1,15/12/2019 +L-JM,4000,1500,1000.0,10.05422347022756,512.5809536,0.75,1,15/12/2019 +L-JM,4500,1500,1000.0,9.678447398895264,616.4628192,0.75,1,15/12/2019 +L-JM,5000,1500,1000.0,9.348294603434246,722.3488736,0.75,1,15/12/2019 +L-JM,5500,1500,1000.0,9.158434114440919,839.2501411999999,0.75,1,15/12/2019 +L-JM,6000,1101,1000.0,8.85969851676255,918.4742979,0.75,1,15/12/2019 +L-JM,6500,601,1000.0,8.978569760943016,997.4762726,0.75,1,15/12/2019 +L-JM,1500,1500,1000.0,14.31648555157471,112.708166,0.5,1,15/12/2019 +L-JM,2000,1500,1000.0,13.100616600819906,173.49587910000002,0.5,1,15/12/2019 +L-JM,2500,1500,1000.0,12.631016832621258,244.59541330000002,0.5,1,15/12/2019 +L-JM,3000,1500,1000.0,12.372139003845216,325.2085343,0.5,1,15/12/2019 +L-JM,3500,1500,1000.0,12.052149805135091,412.00539760000004,0.5,1,15/12/2019 +L-JM,4000,1500,1000.0,11.926307619750977,506.9327447,0.5,1,15/12/2019 +L-JM,4500,1500,1000.0,11.605206087137859,603.1267876000001,0.5,1,15/12/2019 +L-JM,5000,1500,1000.0,11.364976841634116,709.4308111,0.5,1,15/12/2019 +L-JM,5500,1500,1000.0,11.303823874104818,825.9382915000001,0.5,1,15/12/2019 +L-JM,6000,1101,1000.0,11.054803368952145,915.2881399,0.5,1,15/12/2019 +L-JM,6500,601,1000.0,11.402072457096146,984.6962941,0.5,1,15/12/2019 +L-JM,1500,1500,1000.0,16.515109934611004,117.3274013,0.25,1,15/12/2019 +L-JM,2000,1500,1000.0,15.618779733296712,177.3680046,0.25,1,15/12/2019 +L-JM,2500,1500,1000.0,15.289220743123371,247.515579,0.25,1,15/12/2019 +L-JM,3000,1500,1000.0,15.11142683183797,327.60220580000004,0.25,1,15/12/2019 +L-JM,3500,1500,1000.0,14.843900964528402,411.6829181,0.25,1,15/12/2019 +L-JM,4000,1500,1000.0,14.726752273071288,513.3648273,0.25,1,15/12/2019 +L-JM,4500,1500,1000.0,14.419325491251628,613.8661824000001,0.25,1,15/12/2019 +L-JM,5000,1500,1000.0,14.256678564778646,711.8209704,0.25,1,15/12/2019 +L-JM,5500,1500,1000.0,14.237603645751953,825.8206864,0.25,1,15/12/2019 +L-JM,6000,1101,1000.0,14.04869658132167,909.8488734,0.25,1,15/12/2019 +L-JM,6500,601,1000.0,14.660941845581258,994.2622263000001,0.25,1,15/12/2019 +L-JM,1500,1500,1000.0,13.917265603817388,111.8063593,1,1.5,15/12/2019 +L-JM,2000,1500,1000.0,12.393746784561161,174.0463799,1,1.5,15/12/2019 +L-JM,2500,1500,1000.0,11.310800296285631,242.5622126,1,1.5,15/12/2019 +L-JM,3000,1500,1000.0,10.573622634081524,323.0661685,1,1.5,15/12/2019 +L-JM,3500,1500,1000.0,9.875938797266643,406.5680506,1,1.5,15/12/2019 +L-JM,4000,1500,1000.0,9.510982350137393,503.7767193,1,1.5,15/12/2019 +L-JM,4500,1500,1000.0,9.119124757146201,596.6460913,1,1.5,15/12/2019 +L-JM,5000,1500,1000.0,8.599406629679363,706.859301,1,1.5,15/12/2019 +L-JM,5500,1500,1000.0,8.368502939493816,818.7217491,1,1.5,15/12/2019 +L-JM,6000,1101,1000.0,8.063868108397717,897.7200943,1,1.5,15/12/2019 +L-JM,6500,601,1000.0,8.136913764498198,974.7133005,1,1.5,15/12/2019 +L-JM,1500,1500,1000.0,14.09852163185247,111.0557463,1,2,15/12/2019 +L-JM,2000,1500,1000.0,12.44589603171794,172.8484317,1,2,15/12/2019 +L-JM,2500,1500,1000.0,11.252518187896092,245.6580888,1,2,15/12/2019 +L-JM,3000,1500,1000.0,10.49970513110606,319.36993149999995,1,2,15/12/2019 +L-JM,3500,1500,1000.0,9.76052582794444,409.32987959999997,1,2,15/12/2019 +L-JM,4000,1500,1000.0,9.367906394826731,505.0419491,1,2,15/12/2019 +L-JM,4500,1500,1000.0,8.953196634539921,613.9279513,1,2,15/12/2019 +L-JM,5000,1500,1000.0,8.49635838229116,708.8533507000001,1,2,15/12/2019 +L-JM,5500,1500,1000.0,8.250896130411785,824.6985384,1,2,15/12/2019 +L-JM,6000,1101,1000.0,7.941628870483316,896.7534047,1,2,15/12/2019 +L-JM,6500,601,1000.0,7.984307747999562,967.4740421,1,2,15/12/2019 +L-JM,1500,1500,1000.0,14.530905711745902,113.61682520000001,1,2.5,15/12/2019 +L-JM,2000,1500,1000.0,12.780882489283243,178.76914259999998,1,2.5,15/12/2019 +L-JM,2500,1500,1000.0,11.50745587341436,255.22816189999998,1,2.5,15/12/2019 +L-JM,3000,1500,1000.0,10.72320711825816,329.5140962,1,2.5,15/12/2019 +L-JM,3500,1500,1000.0,9.959901313243869,418.59089370000004,1,2.5,15/12/2019 +L-JM,4000,1500,1000.0,9.540940698993367,514.2713362,1,2.5,15/12/2019 +L-JM,4500,1500,1000.0,9.075100561608632,622.6434419,1,2.5,15/12/2019 +L-JM,5000,1500,1000.0,8.632106214462283,715.8135177,1,2.5,15/12/2019 +L-JM,5500,1500,1000.0,8.353497416613262,834.1856596,1,2.5,15/12/2019 +L-JM,6000,1101,1000.0,7.997364105073893,919.106896,1,2.5,15/12/2019 +L-JM,6500,601,1000.0,8.032743632894189,1003.1057952,1,2.5,15/12/2019 +L-JM,1500,1500,1000.0,14.880831269627889,112.38430980000001,1,3,15/12/2019 +L-JM,2000,1500,1000.0,13.064014818353021,174.3569823,1,3,15/12/2019 +L-JM,2500,1500,1000.0,11.762643989761354,243.2211045,1,3,15/12/2019 +L-JM,3000,1500,1000.0,10.968917869575504,324.5920986,1,3,15/12/2019 +L-JM,3500,1500,1000.0,10.200866189011894,410.9065092,1,3,15/12/2019 +L-JM,4000,1500,1000.0,9.746818656939825,498.6132275,1,3,15/12/2019 +L-JM,4500,1500,1000.0,9.23931041631953,606.2137346000001,1,3,15/12/2019 +L-JM,5000,1500,1000.0,8.780373388102213,705.5607821000001,1,3,15/12/2019 +L-JM,5500,1500,1000.0,8.485089657131196,820.0846994,1,3,15/12/2019 +L-JM,6000,1101,1000.0,8.135665582644734,895.5069686,1,3,15/12/2019 +L-JM,6500,601,1000.0,8.187855192343129,958.1212525999999,1,3,15/12/2019 +L-JM,1500,1500,1000.0,14.046177239428841,114.7886533,1,0.75,15/12/2019 +L-JM,2000,1500,1000.0,12.700155820409142,177.8000633,1,0.75,15/12/2019 +L-JM,2500,1500,1000.0,11.999409877098083,249.8877674,1,0.75,15/12/2019 +L-JM,3000,1500,1000.0,11.349961215230307,331.60657760000004,1,0.75,15/12/2019 +L-JM,3500,1500,1000.0,10.72300334940847,421.4194003,1,0.75,15/12/2019 +L-JM,4000,1500,1000.0,10.448034243490858,514.5383338,1,0.75,15/12/2019 +L-JM,4500,1500,1000.0,10.017663810821535,617.0386821,1,0.75,15/12/2019 +L-JM,5000,1500,1000.0,9.517057082204184,716.4373566,1,0.75,15/12/2019 +L-JM,5500,1500,1000.0,9.273669176727296,832.6763739,1,0.75,15/12/2019 +L-JM,6000,1101,1000.0,8.995569295995784,911.0825957,1,0.75,15/12/2019 +L-JM,6500,601,1000.0,9.010769006846546,979.3535823999999,1,0.75,15/12/2019 +L-JM,1500,1500,1000.0,13.967123082429293,111.65990760000001,1,0.5,15/12/2019 +L-JM,2000,1500,1000.0,12.697881535001121,172.2946484,1,0.5,15/12/2019 +L-JM,2500,1500,1000.0,12.145461895324708,241.1312698,1,0.5,15/12/2019 +L-JM,3000,1500,1000.0,11.588876514109295,323.0631537,1,0.5,15/12/2019 +L-JM,3500,1500,1000.0,11.044433803100585,415.137312,1,0.5,15/12/2019 +L-JM,4000,1500,1000.0,10.80793491441091,504.8102572,1,0.5,15/12/2019 +L-JM,4500,1500,1000.0,10.405684053039552,602.5219337,1,0.5,15/12/2019 +L-JM,5000,1500,1000.0,9.965184934692385,711.8197224,1,0.5,15/12/2019 +L-JM,5500,1500,1000.0,9.748910655822755,825.7830675,1,0.5,15/12/2019 +L-JM,6000,1101,1000.0,9.48353206175095,908.3770869,1,0.5,15/12/2019 +L-JM,6500,601,1000.0,9.485751465046862,971.9107026,1,0.5,15/12/2019 +L-JM,1500,1500,1000.0,14.028847422150934,112.4397257,1,0.25,15/12/2019 +L-JM,2000,1500,1000.0,12.743222420338949,174.4159427,1,0.25,15/12/2019 +L-JM,2500,1500,1000.0,12.272876975964866,249.5782289,1,0.25,15/12/2019 +L-JM,3000,1500,1000.0,11.804296119425455,328.94519329999997,1,0.25,15/12/2019 +L-JM,3500,1500,1000.0,11.369661207082114,412.7472373,1,0.25,15/12/2019 +L-JM,4000,1500,1000.0,11.183031419891359,511.8257998,1,0.25,15/12/2019 +L-JM,4500,1500,1000.0,10.825626690989177,609.6951702,1,0.25,15/12/2019 +L-JM,5000,1500,1000.0,10.460318843851725,721.2592548,1,0.25,15/12/2019 +L-JM,5500,1500,1000.0,10.294279163879395,825.8027392,1,0.25,15/12/2019 +L-JM,6000,1101,1000.0,10.018516495014302,921.2132997,1,0.25,15/12/2019 +L-JM,6500,601,1000.0,10.097973922018602,980.0041144,1,0.25,15/12/2019 diff --git a/main.py b/main.py index d2466c9d6..7648e44cb 100644 --- a/main.py +++ b/main.py @@ -95,7 +95,7 @@ def ml(): for proc in procs: proc.join() - with open('benchmarks.csv', 'a') as save_file: + with open('data\\benchmarks.csv', 'a') as save_file: # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,', # 'mae,time,lj_s,lj_e,date_ran\n'])) date = '/'.join([str(field) for field in time.localtime()[:3][::-1]]) -- cgit v1.2.3-54-g00ecf From ad2e46473575b703beb0a48abbfab3e2ea2eaf22 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sun, 15 Dec 2019 22:07:30 -0700 Subject: Add new benchmarks figures --- data/figs/mae_diff_param_lj_e.pdf | Bin 0 -> 13696 bytes data/figs/mae_diff_param_lj_s.pdf | Bin 0 -> 12679 bytes data/figs/mae_diff_tr_sizes.pdf | Bin 10839 -> 10839 bytes main.py | 81 +++++++++++++++++++++++++++++++++----- 4 files changed, 72 insertions(+), 9 deletions(-) create mode 100644 data/figs/mae_diff_param_lj_e.pdf create mode 100644 data/figs/mae_diff_param_lj_s.pdf diff --git a/data/figs/mae_diff_param_lj_e.pdf b/data/figs/mae_diff_param_lj_e.pdf new file mode 100644 index 000000000..20e6676eb Binary files /dev/null and b/data/figs/mae_diff_param_lj_e.pdf differ diff --git a/data/figs/mae_diff_param_lj_s.pdf b/data/figs/mae_diff_param_lj_s.pdf new file mode 100644 index 000000000..abf50c9b6 Binary files /dev/null and b/data/figs/mae_diff_param_lj_s.pdf differ diff --git a/data/figs/mae_diff_tr_sizes.pdf b/data/figs/mae_diff_tr_sizes.pdf index e49e70411..0d6fb923c 100644 Binary files a/data/figs/mae_diff_tr_sizes.pdf and b/data/figs/mae_diff_tr_sizes.pdf differ diff --git a/main.py b/main.py index 7648e44cb..3cfa1bfb6 100644 --- a/main.py +++ b/main.py @@ -117,6 +117,7 @@ def pl(): """ Function for plotting the benchmarks. """ + # Original columns. or_cols = ['ml_type', 'tr_size', 'te_size', @@ -126,20 +127,27 @@ def pl(): 'lj_s', 'lj_e', 'date_ran'] + # Drop some original columns. dor_cols = ['te_size', 'kernel_s', 'time', 'date_ran'] - data_temp = pd.read_csv('benchmarks.csv',) + # Read benchmarks data and drop some columns. + data_temp = pd.read_csv('data\\benchmarks.csv',) data = pd.DataFrame(data_temp, columns=or_cols) data = data.drop(columns=dor_cols) - # print(data) + # Get the data of the first benchmarks and drop unnecesary columns. first_data = pd.DataFrame(data, index=range(0, 22)) first_data = first_data.drop(columns=['lj_s', 'lj_e']) - fd_columns = ['ml_type', 'tr_size', 'mae'] + # Columns to keep temporarily. + fd_columns = ['ml_type', + 'tr_size', + 'mae'] + + # Create new dataframes for each matrix descriptor and fill them. first_data_cm = pd.DataFrame(columns=fd_columns) first_data_ljm = pd.DataFrame(columns=fd_columns) for i in range(first_data.shape[0]): @@ -148,14 +156,20 @@ def pl(): first_data_cm = first_data_cm.append(temp_df) else: first_data_ljm = first_data_ljm.append(temp_df) - first_data_cm = first_data_cm.drop(columns=['ml_type']).rename(columns={'mae': 'cm_mae'}) - first_data_ljm = first_data_ljm.drop(columns=['ml_type']).rename(columns={'mae': 'ljm_mae'}) - print(first_data_cm) - print(first_data_ljm) + # Drop unnecesary column and rename 'mae' for later use. + first_data_cm = first_data_cm.drop(columns=['ml_type'])\ + .rename(columns={'mae': 'cm_mae'}) + first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\ + .rename(columns={'mae': 'ljm_mae'}) + # print(first_data_cm) + # print(first_data_ljm) + + # Get the cm data axis so it can be joined with the ljm data axis. cm_axis = first_data_cm.plot(x='tr_size', y='cm_mae', kind='line') + # Get the ljm data axis and join it with the cm one. plot_axis = first_data_ljm.plot(ax=cm_axis, x='tr_size', y='ljm_mae', @@ -163,11 +177,60 @@ def pl(): plot_axis.set_xlabel('tr_size') plot_axis.set_ylabel('mae') plot_axis.set_title('mae for different tr_sizes') - plot_axis.get_figure().savefig('data\\figs\\mae_diff_tr_sizes.pdf') + # Get the figure and save it. + # plot_axis.get_figure().savefig('data\\figs\\mae_diff_tr_sizes.pdf') + # Get the rest of the benchmark data and drop unnecesary column. new_data = data.drop(index=range(0, 22)) new_data = new_data.drop(columns=['ml_type']) - # print(new_data) + + # Get the first set and rename it. + nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'}) + ndf_axis = nd_first.plot(x='tr_size', + y='1, 1', + kind='line') + last_axis = ndf_axis + for i in range(22, 99, 11): + lj_s = new_data['lj_s'][i] + lj_e = new_data['lj_e'][i] + new_mae = '{}, {}'.format(lj_s, lj_e) + nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ + .drop(columns=['lj_s', 'lj_e'])\ + .rename(columns={'mae': new_mae}) + last_axis = nd_temp.plot(ax=last_axis, + x='tr_size', + y=new_mae, + kind='line') + print(nd_temp) + + last_axis.set_xlabel('tr_size') + last_axis.set_ylabel('mae') + last_axis.set_title('mae for different parameters of lj(s)') + + last_axis.get_figure().savefig('data\\figs\\mae_diff_param_lj_s.pdf') + + ndf_axis = nd_first.plot(x='tr_size', + y='1, 1', + kind='line') + last_axis = ndf_axis + for i in range(99, data.shape[0], 11): + lj_s = new_data['lj_s'][i] + lj_e = new_data['lj_e'][i] + new_mae = '{}, {}'.format(lj_s, lj_e) + nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ + .drop(columns=['lj_s', 'lj_e'])\ + .rename(columns={'mae': new_mae}) + last_axis = nd_temp.plot(ax=last_axis, + x='tr_size', + y=new_mae, + kind='line') + print(nd_temp) + + last_axis.set_xlabel('tr_size') + last_axis.set_ylabel('mae') + last_axis.set_title('mae for different parameters of lj(e)') + + last_axis.get_figure().savefig('data\\figs\\mae_diff_param_lj_e.pdf') if __name__ == '__main__': -- cgit v1.2.3-54-g00ecf From b799e335edf24e29e6afa328440cbb9180a25bb2 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Tue, 17 Dec 2019 15:42:05 -0700 Subject: Update requirements --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index f91fd71c2..1856939e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ colorama==0.4.1 numpy==1.17.4 +pandas==0.25.3 +matplotlib==3.1.2 \ No newline at end of file -- cgit v1.2.3-54-g00ecf From 96a3f2b2950451a478c951e642a4aa188219682b Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Tue, 17 Dec 2019 15:44:56 -0700 Subject: Add ignores --- .gitignore | 4 + data/benchmarks.csv | 177 -------------------------------------- data/figs/mae_diff_param_lj_e.pdf | Bin 13696 -> 0 bytes data/figs/mae_diff_param_lj_s.pdf | Bin 12679 -> 0 bytes data/figs/mae_diff_tr_sizes.pdf | Bin 10839 -> 0 bytes main.py | 6 +- 6 files changed, 7 insertions(+), 180 deletions(-) delete mode 100644 data/benchmarks.csv delete mode 100644 data/figs/mae_diff_param_lj_e.pdf delete mode 100644 data/figs/mae_diff_param_lj_s.pdf delete mode 100644 data/figs/mae_diff_tr_sizes.pdf diff --git a/.gitignore b/.gitignore index a1bdb4dde..02ab56ded 100644 --- a/.gitignore +++ b/.gitignore @@ -114,3 +114,7 @@ venv.bak/ # Original data. .original_data/ + +# Benchmarks and figures +benchmarks.csv +.figs/ \ No newline at end of file diff --git a/data/benchmarks.csv b/data/benchmarks.csv deleted file mode 100644 index f81c902e1..000000000 --- a/data/benchmarks.csv +++ /dev/null @@ -1,177 +0,0 @@ -ml_type,tr_size,te_size,kernel_s,mae,time,lj_s,lj_e,date_ran -CM,1500,1500,1000.0,31.512240475332977,100.3640273,na,na,14/12/2019 -L-JM,1500,1500,1000.0,14.071581760048153,101.6336081,1,1,14/12/2019 -CM,2000,1500,1000.0,28.245321081132253,152.88077109999998,na,na,14/12/2019 -L-JM,2000,1500,1000.0,12.641839980374655,155.5011396,1,1,14/12/2019 -CM,2500,1500,1000.0,26.97361909951274,215.9536518,na,na,14/12/2019 -L-JM,2500,1500,1000.0,11.785532138202667,211.8681615,1,1,14/12/2019 -CM,3000,1500,1000.0,24.641045831013365,278.5150912,na,na,14/12/2019 -L-JM,3000,1500,1000.0,11.08713556535848,278.5971606,1,1,14/12/2019 -CM,3500,1500,1000.0,23.152765029544195,401.9234568,na,na,14/12/2019 -L-JM,3500,1500,1000.0,10.430573100461325,408.3161812,1,1,14/12/2019 -CM,4000,1500,1000.0,22.15854674774742,499.55023489999996,na,na,14/12/2019 -L-JM,4000,1500,1000.0,10.117842321543852,503.7487102,1,1,14/12/2019 -CM,4500,1500,1000.0,21.180935112846054,605.4631191999999,na,na,14/12/2019 -L-JM,4500,1500,1000.0,9.677330400431318,604.4122219,1,1,14/12/2019 -CM,5000,1500,1000.0,20.30113013118839,699.0641616,na,na,14/12/2019 -L-JM,5000,1500,1000.0,9.143465812164308,701.9964291,1,1,14/12/2019 -CM,5500,1500,1000.0,19.749349939851125,816.2225933,na,na,14/12/2019 -L-JM,5500,1500,1000.0,8.902193965087893,800.8945613,1,1,14/12/2019 -CM,6000,1101,1000.0,18.989775833821966,894.5558989,na,na,14/12/2019 -L-JM,6000,1101,1000.0,8.595942764490113,885.4234397,1,1,14/12/2019 -CM,6500,601,1000.0,19.424238707695146,956.2706457,na,na,14/12/2019 -L-JM,6500,601,1000.0,8.636351001125403,950.2348745,1,1,14/12/2019 -L-JM,1500,1500,1000.0,25.971068591988388,113.6740279,1.5,1,14/12/2019 -L-JM,2000,1500,1000.0,23.332141255879883,176.44235379999998,1.5,1,14/12/2019 -L-JM,2500,1500,1000.0,21.692867970005768,245.19673870000003,1.5,1,14/12/2019 -L-JM,3000,1500,1000.0,20.59071302639572,328.621371,1.5,1,14/12/2019 -L-JM,3500,1500,1000.0,19.735266507048408,415.9199613,1.5,1,14/12/2019 -L-JM,4000,1500,1000.0,18.834051404314636,519.1557511,1.5,1,14/12/2019 -L-JM,4500,1500,1000.0,17.957638371625624,617.7238379,1.5,1,14/12/2019 -L-JM,5000,1500,1000.0,16.972922026017606,730.4018292,1.5,1,14/12/2019 -L-JM,5500,1500,1000.0,16.395492789509397,846.8666075,1.5,1,14/12/2019 -L-JM,6000,1101,1000.0,15.777389296181523,927.473866,1.5,1,14/12/2019 -L-JM,6500,601,1000.0,14.209911716905589,1009.5176146,1.5,1,14/12/2019 -L-JM,1500,1500,1000.0,28.83987694576565,113.8754708,2,1,14/12/2019 -L-JM,2000,1500,1000.0,26.596450258715805,175.00251580000003,2,1,14/12/2019 -L-JM,2500,1500,1000.0,25.24225833702623,250.13395500000001,2,1,14/12/2019 -L-JM,3000,1500,1000.0,24.218064992425827,331.4902702,2,1,14/12/2019 -L-JM,3500,1500,1000.0,23.753584334828926,418.1716937,2,1,14/12/2019 -L-JM,4000,1500,1000.0,22.796982394926633,508.1620121,2,1,14/12/2019 -L-JM,4500,1500,1000.0,22.094972526780207,620.0088331000001,2,1,14/12/2019 -L-JM,5000,1500,1000.0,21.32928554974122,719.5075616,2,1,14/12/2019 -L-JM,5500,1500,1000.0,20.839213236756795,853.7258727,2,1,14/12/2019 -L-JM,6000,1101,1000.0,20.469720091240188,919.2635181,2,1,14/12/2019 -L-JM,6500,601,1000.0,18.97332428701078,991.9277882,2,1,14/12/2019 -L-JM,1500,1500,1000.0,28.834687237181544,112.75118160000001,2.5,1,15/12/2019 -L-JM,2000,1500,1000.0,26.524798614302835,174.36785940000001,2.5,1,15/12/2019 -L-JM,2500,1500,1000.0,25.21431444031365,247.27555790000002,2.5,1,15/12/2019 -L-JM,3000,1500,1000.0,24.1868798509265,327.2753287,2.5,1,15/12/2019 -L-JM,3500,1500,1000.0,23.70991305338383,414.2050979,2.5,1,15/12/2019 -L-JM,4000,1500,1000.0,22.76540519550355,511.5177748,2.5,1,15/12/2019 -L-JM,4500,1500,1000.0,22.114765667859746,609.2158499,2.5,1,15/12/2019 -L-JM,5000,1500,1000.0,21.358232378530058,717.5175853000001,2.5,1,15/12/2019 -L-JM,5500,1500,1000.0,20.85827359348054,833.1339069,2.5,1,15/12/2019 -L-JM,6000,1101,1000.0,20.47802544079168,913.8767508999999,2.5,1,15/12/2019 -L-JM,6500,601,1000.0,18.974561750612214,983.2796894999999,2.5,1,15/12/2019 -L-JM,1500,1500,1000.0,32.66494718071635,115.4841954,3,1,15/12/2019 -L-JM,2000,1500,1000.0,28.7706412893922,177.2390435,3,1,15/12/2019 -L-JM,2500,1500,1000.0,26.93394969655416,249.6755776,3,1,15/12/2019 -L-JM,3000,1500,1000.0,25.592547303146002,331.0301091,3,1,15/12/2019 -L-JM,3500,1500,1000.0,24.823386571052417,424.3857741,3,1,15/12/2019 -L-JM,4000,1500,1000.0,23.723362785716677,516.4350475,3,1,15/12/2019 -L-JM,4500,1500,1000.0,22.96364383080498,615.7131781,3,1,15/12/2019 -L-JM,5000,1500,1000.0,22.097596606763833,732.2697349,3,1,15/12/2019 -L-JM,5500,1500,1000.0,21.463723095992123,850.7737075,3,1,15/12/2019 -L-JM,6000,1101,1000.0,21.098415947303895,909.3228279,3,1,15/12/2019 -L-JM,6500,601,1000.0,19.604458837712464,985.2199813000001,3,1,15/12/2019 -L-JM,1500,1500,1000.0,13.25081292192078,113.2111716,0.75,1,15/12/2019 -L-JM,2000,1500,1000.0,11.898306360562803,176.1226983,0.75,1,15/12/2019 -L-JM,2500,1500,1000.0,11.331479928387962,245.3395849,0.75,1,15/12/2019 -L-JM,3000,1500,1000.0,10.822743636769616,328.4253929,0.75,1,15/12/2019 -L-JM,3500,1500,1000.0,10.341161537587485,416.6295876,0.75,1,15/12/2019 -L-JM,4000,1500,1000.0,10.05422347022756,512.5809536,0.75,1,15/12/2019 -L-JM,4500,1500,1000.0,9.678447398895264,616.4628192,0.75,1,15/12/2019 -L-JM,5000,1500,1000.0,9.348294603434246,722.3488736,0.75,1,15/12/2019 -L-JM,5500,1500,1000.0,9.158434114440919,839.2501411999999,0.75,1,15/12/2019 -L-JM,6000,1101,1000.0,8.85969851676255,918.4742979,0.75,1,15/12/2019 -L-JM,6500,601,1000.0,8.978569760943016,997.4762726,0.75,1,15/12/2019 -L-JM,1500,1500,1000.0,14.31648555157471,112.708166,0.5,1,15/12/2019 -L-JM,2000,1500,1000.0,13.100616600819906,173.49587910000002,0.5,1,15/12/2019 -L-JM,2500,1500,1000.0,12.631016832621258,244.59541330000002,0.5,1,15/12/2019 -L-JM,3000,1500,1000.0,12.372139003845216,325.2085343,0.5,1,15/12/2019 -L-JM,3500,1500,1000.0,12.052149805135091,412.00539760000004,0.5,1,15/12/2019 -L-JM,4000,1500,1000.0,11.926307619750977,506.9327447,0.5,1,15/12/2019 -L-JM,4500,1500,1000.0,11.605206087137859,603.1267876000001,0.5,1,15/12/2019 -L-JM,5000,1500,1000.0,11.364976841634116,709.4308111,0.5,1,15/12/2019 -L-JM,5500,1500,1000.0,11.303823874104818,825.9382915000001,0.5,1,15/12/2019 -L-JM,6000,1101,1000.0,11.054803368952145,915.2881399,0.5,1,15/12/2019 -L-JM,6500,601,1000.0,11.402072457096146,984.6962941,0.5,1,15/12/2019 -L-JM,1500,1500,1000.0,16.515109934611004,117.3274013,0.25,1,15/12/2019 -L-JM,2000,1500,1000.0,15.618779733296712,177.3680046,0.25,1,15/12/2019 -L-JM,2500,1500,1000.0,15.289220743123371,247.515579,0.25,1,15/12/2019 -L-JM,3000,1500,1000.0,15.11142683183797,327.60220580000004,0.25,1,15/12/2019 -L-JM,3500,1500,1000.0,14.843900964528402,411.6829181,0.25,1,15/12/2019 -L-JM,4000,1500,1000.0,14.726752273071288,513.3648273,0.25,1,15/12/2019 -L-JM,4500,1500,1000.0,14.419325491251628,613.8661824000001,0.25,1,15/12/2019 -L-JM,5000,1500,1000.0,14.256678564778646,711.8209704,0.25,1,15/12/2019 -L-JM,5500,1500,1000.0,14.237603645751953,825.8206864,0.25,1,15/12/2019 -L-JM,6000,1101,1000.0,14.04869658132167,909.8488734,0.25,1,15/12/2019 -L-JM,6500,601,1000.0,14.660941845581258,994.2622263000001,0.25,1,15/12/2019 -L-JM,1500,1500,1000.0,13.917265603817388,111.8063593,1,1.5,15/12/2019 -L-JM,2000,1500,1000.0,12.393746784561161,174.0463799,1,1.5,15/12/2019 -L-JM,2500,1500,1000.0,11.310800296285631,242.5622126,1,1.5,15/12/2019 -L-JM,3000,1500,1000.0,10.573622634081524,323.0661685,1,1.5,15/12/2019 -L-JM,3500,1500,1000.0,9.875938797266643,406.5680506,1,1.5,15/12/2019 -L-JM,4000,1500,1000.0,9.510982350137393,503.7767193,1,1.5,15/12/2019 -L-JM,4500,1500,1000.0,9.119124757146201,596.6460913,1,1.5,15/12/2019 -L-JM,5000,1500,1000.0,8.599406629679363,706.859301,1,1.5,15/12/2019 -L-JM,5500,1500,1000.0,8.368502939493816,818.7217491,1,1.5,15/12/2019 -L-JM,6000,1101,1000.0,8.063868108397717,897.7200943,1,1.5,15/12/2019 -L-JM,6500,601,1000.0,8.136913764498198,974.7133005,1,1.5,15/12/2019 -L-JM,1500,1500,1000.0,14.09852163185247,111.0557463,1,2,15/12/2019 -L-JM,2000,1500,1000.0,12.44589603171794,172.8484317,1,2,15/12/2019 -L-JM,2500,1500,1000.0,11.252518187896092,245.6580888,1,2,15/12/2019 -L-JM,3000,1500,1000.0,10.49970513110606,319.36993149999995,1,2,15/12/2019 -L-JM,3500,1500,1000.0,9.76052582794444,409.32987959999997,1,2,15/12/2019 -L-JM,4000,1500,1000.0,9.367906394826731,505.0419491,1,2,15/12/2019 -L-JM,4500,1500,1000.0,8.953196634539921,613.9279513,1,2,15/12/2019 -L-JM,5000,1500,1000.0,8.49635838229116,708.8533507000001,1,2,15/12/2019 -L-JM,5500,1500,1000.0,8.250896130411785,824.6985384,1,2,15/12/2019 -L-JM,6000,1101,1000.0,7.941628870483316,896.7534047,1,2,15/12/2019 -L-JM,6500,601,1000.0,7.984307747999562,967.4740421,1,2,15/12/2019 -L-JM,1500,1500,1000.0,14.530905711745902,113.61682520000001,1,2.5,15/12/2019 -L-JM,2000,1500,1000.0,12.780882489283243,178.76914259999998,1,2.5,15/12/2019 -L-JM,2500,1500,1000.0,11.50745587341436,255.22816189999998,1,2.5,15/12/2019 -L-JM,3000,1500,1000.0,10.72320711825816,329.5140962,1,2.5,15/12/2019 -L-JM,3500,1500,1000.0,9.959901313243869,418.59089370000004,1,2.5,15/12/2019 -L-JM,4000,1500,1000.0,9.540940698993367,514.2713362,1,2.5,15/12/2019 -L-JM,4500,1500,1000.0,9.075100561608632,622.6434419,1,2.5,15/12/2019 -L-JM,5000,1500,1000.0,8.632106214462283,715.8135177,1,2.5,15/12/2019 -L-JM,5500,1500,1000.0,8.353497416613262,834.1856596,1,2.5,15/12/2019 -L-JM,6000,1101,1000.0,7.997364105073893,919.106896,1,2.5,15/12/2019 -L-JM,6500,601,1000.0,8.032743632894189,1003.1057952,1,2.5,15/12/2019 -L-JM,1500,1500,1000.0,14.880831269627889,112.38430980000001,1,3,15/12/2019 -L-JM,2000,1500,1000.0,13.064014818353021,174.3569823,1,3,15/12/2019 -L-JM,2500,1500,1000.0,11.762643989761354,243.2211045,1,3,15/12/2019 -L-JM,3000,1500,1000.0,10.968917869575504,324.5920986,1,3,15/12/2019 -L-JM,3500,1500,1000.0,10.200866189011894,410.9065092,1,3,15/12/2019 -L-JM,4000,1500,1000.0,9.746818656939825,498.6132275,1,3,15/12/2019 -L-JM,4500,1500,1000.0,9.23931041631953,606.2137346000001,1,3,15/12/2019 -L-JM,5000,1500,1000.0,8.780373388102213,705.5607821000001,1,3,15/12/2019 -L-JM,5500,1500,1000.0,8.485089657131196,820.0846994,1,3,15/12/2019 -L-JM,6000,1101,1000.0,8.135665582644734,895.5069686,1,3,15/12/2019 -L-JM,6500,601,1000.0,8.187855192343129,958.1212525999999,1,3,15/12/2019 -L-JM,1500,1500,1000.0,14.046177239428841,114.7886533,1,0.75,15/12/2019 -L-JM,2000,1500,1000.0,12.700155820409142,177.8000633,1,0.75,15/12/2019 -L-JM,2500,1500,1000.0,11.999409877098083,249.8877674,1,0.75,15/12/2019 -L-JM,3000,1500,1000.0,11.349961215230307,331.60657760000004,1,0.75,15/12/2019 -L-JM,3500,1500,1000.0,10.72300334940847,421.4194003,1,0.75,15/12/2019 -L-JM,4000,1500,1000.0,10.448034243490858,514.5383338,1,0.75,15/12/2019 -L-JM,4500,1500,1000.0,10.017663810821535,617.0386821,1,0.75,15/12/2019 -L-JM,5000,1500,1000.0,9.517057082204184,716.4373566,1,0.75,15/12/2019 -L-JM,5500,1500,1000.0,9.273669176727296,832.6763739,1,0.75,15/12/2019 -L-JM,6000,1101,1000.0,8.995569295995784,911.0825957,1,0.75,15/12/2019 -L-JM,6500,601,1000.0,9.010769006846546,979.3535823999999,1,0.75,15/12/2019 -L-JM,1500,1500,1000.0,13.967123082429293,111.65990760000001,1,0.5,15/12/2019 -L-JM,2000,1500,1000.0,12.697881535001121,172.2946484,1,0.5,15/12/2019 -L-JM,2500,1500,1000.0,12.145461895324708,241.1312698,1,0.5,15/12/2019 -L-JM,3000,1500,1000.0,11.588876514109295,323.0631537,1,0.5,15/12/2019 -L-JM,3500,1500,1000.0,11.044433803100585,415.137312,1,0.5,15/12/2019 -L-JM,4000,1500,1000.0,10.80793491441091,504.8102572,1,0.5,15/12/2019 -L-JM,4500,1500,1000.0,10.405684053039552,602.5219337,1,0.5,15/12/2019 -L-JM,5000,1500,1000.0,9.965184934692385,711.8197224,1,0.5,15/12/2019 -L-JM,5500,1500,1000.0,9.748910655822755,825.7830675,1,0.5,15/12/2019 -L-JM,6000,1101,1000.0,9.48353206175095,908.3770869,1,0.5,15/12/2019 -L-JM,6500,601,1000.0,9.485751465046862,971.9107026,1,0.5,15/12/2019 -L-JM,1500,1500,1000.0,14.028847422150934,112.4397257,1,0.25,15/12/2019 -L-JM,2000,1500,1000.0,12.743222420338949,174.4159427,1,0.25,15/12/2019 -L-JM,2500,1500,1000.0,12.272876975964866,249.5782289,1,0.25,15/12/2019 -L-JM,3000,1500,1000.0,11.804296119425455,328.94519329999997,1,0.25,15/12/2019 -L-JM,3500,1500,1000.0,11.369661207082114,412.7472373,1,0.25,15/12/2019 -L-JM,4000,1500,1000.0,11.183031419891359,511.8257998,1,0.25,15/12/2019 -L-JM,4500,1500,1000.0,10.825626690989177,609.6951702,1,0.25,15/12/2019 -L-JM,5000,1500,1000.0,10.460318843851725,721.2592548,1,0.25,15/12/2019 -L-JM,5500,1500,1000.0,10.294279163879395,825.8027392,1,0.25,15/12/2019 -L-JM,6000,1101,1000.0,10.018516495014302,921.2132997,1,0.25,15/12/2019 -L-JM,6500,601,1000.0,10.097973922018602,980.0041144,1,0.25,15/12/2019 diff --git a/data/figs/mae_diff_param_lj_e.pdf b/data/figs/mae_diff_param_lj_e.pdf deleted file mode 100644 index 20e6676eb..000000000 Binary files a/data/figs/mae_diff_param_lj_e.pdf and /dev/null differ diff --git a/data/figs/mae_diff_param_lj_s.pdf b/data/figs/mae_diff_param_lj_s.pdf deleted file mode 100644 index abf50c9b6..000000000 Binary files a/data/figs/mae_diff_param_lj_s.pdf and /dev/null differ diff --git a/data/figs/mae_diff_tr_sizes.pdf b/data/figs/mae_diff_tr_sizes.pdf deleted file mode 100644 index 0d6fb923c..000000000 Binary files a/data/figs/mae_diff_tr_sizes.pdf and /dev/null differ diff --git a/main.py b/main.py index 3cfa1bfb6..3bf86572e 100644 --- a/main.py +++ b/main.py @@ -178,7 +178,7 @@ def pl(): plot_axis.set_ylabel('mae') plot_axis.set_title('mae for different tr_sizes') # Get the figure and save it. - # plot_axis.get_figure().savefig('data\\figs\\mae_diff_tr_sizes.pdf') + # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf') # Get the rest of the benchmark data and drop unnecesary column. new_data = data.drop(index=range(0, 22)) @@ -207,7 +207,7 @@ def pl(): last_axis.set_ylabel('mae') last_axis.set_title('mae for different parameters of lj(s)') - last_axis.get_figure().savefig('data\\figs\\mae_diff_param_lj_s.pdf') + last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf') ndf_axis = nd_first.plot(x='tr_size', y='1, 1', @@ -230,7 +230,7 @@ def pl(): last_axis.set_ylabel('mae') last_axis.set_title('mae for different parameters of lj(e)') - last_axis.get_figure().savefig('data\\figs\\mae_diff_param_lj_e.pdf') + last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf') if __name__ == '__main__': -- cgit v1.2.3-54-g00ecf From 487bf8840846b5d4d694b38985268c308aadb36e Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Wed, 18 Dec 2019 07:21:35 -0700 Subject: Refactor files --- c_matrix.py | 179 --------------------------------- cholesky_solve.py | 64 ------------ do_ml.py | 108 -------------------- frob_norm.py | 51 ---------- gauss_kernel.py | 49 --------- lj_matrix.py | 207 -------------------------------------- lj_matrix/__init__.py | 22 ++++ lj_matrix/__main__.py | 238 ++++++++++++++++++++++++++++++++++++++++++++ lj_matrix/c_matrix.py | 179 +++++++++++++++++++++++++++++++++ lj_matrix/cholesky_solve.py | 64 ++++++++++++ lj_matrix/do_ml.py | 108 ++++++++++++++++++++ lj_matrix/frob_norm.py | 51 ++++++++++ lj_matrix/gauss_kernel.py | 49 +++++++++ lj_matrix/lj_matrix.py | 207 ++++++++++++++++++++++++++++++++++++++ lj_matrix/misc.py | 53 ++++++++++ lj_matrix/read_qm7_data.py | 144 +++++++++++++++++++++++++++ main.py | 238 -------------------------------------------- misc.py | 53 ---------- read_qm7_data.py | 144 --------------------------- 19 files changed, 1115 insertions(+), 1093 deletions(-) delete mode 100644 c_matrix.py delete mode 100644 cholesky_solve.py delete mode 100644 do_ml.py delete mode 100644 frob_norm.py delete mode 100644 gauss_kernel.py delete mode 100644 lj_matrix.py create mode 100644 lj_matrix/__init__.py create mode 100644 lj_matrix/__main__.py create mode 100644 lj_matrix/c_matrix.py create mode 100644 lj_matrix/cholesky_solve.py create mode 100644 lj_matrix/do_ml.py create mode 100644 lj_matrix/frob_norm.py create mode 100644 lj_matrix/gauss_kernel.py create mode 100644 lj_matrix/lj_matrix.py create mode 100644 lj_matrix/misc.py create mode 100644 lj_matrix/read_qm7_data.py delete mode 100644 main.py delete mode 100644 misc.py delete mode 100644 read_qm7_data.py diff --git a/c_matrix.py b/c_matrix.py deleted file mode 100644 index 4de711a1b..000000000 --- a/c_matrix.py +++ /dev/null @@ -1,179 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -import time -from misc import printc -import math -import numpy as np -from numpy.linalg import eig - - -def c_matrix(mol_data, - nc_data, - max_len=25, - as_eig=True, - bohr_radius_units=False): - """ - Creates the Coulomb Matrix from the molecule data given. - mol_data: molecule data, matrix of atom coordinates. - nc_data: nuclear charge data, array of atom data. - max_len: maximum amount of atoms in molecule. - as_eig: if data should be returned as matrix or array of eigenvalues. - bohr_radius_units: if units should be in bohr's radius units. - """ - if bohr_radius_units: - conversion_rate = 0.52917721067 - else: - conversion_rate = 1 - - mol_n = len(mol_data) - mol_nr = range(mol_n) - - if not mol_n == len(nc_data): - print(''.join(['Error. Molecule matrix dimension is different ', - 'than the nuclear charge array dimension.'])) - else: - if max_len < mol_n: - print(''.join(['Error. Molecule matrix dimension (mol_n) is ', - 'greater than max_len. Using mol_n.'])) - max_len = None - - if max_len: - cm = np.zeros((max_len, max_len)) - ml_r = range(max_len) - - # Actual calculation of the coulomb matrix. - for i in ml_r: - if i < mol_n: - x_i = mol_data[i, 0] - y_i = mol_data[i, 1] - z_i = mol_data[i, 2] - Z_i = nc_data[i] - else: - break - - for j in ml_r: - if j < mol_n: - x_j = mol_data[j, 0] - y_j = mol_data[j, 1] - z_j = mol_data[j, 2] - Z_j = nc_data[j] - - x = (x_i-x_j)**2 - y = (y_i-y_j)**2 - z = (z_i-z_j)**2 - - if i == j: - cm[i, j] = (0.5*Z_i**2.4) - else: - cm[i, j] = (conversion_rate*Z_i*Z_j/math.sqrt(x - + y - + z)) - else: - break - - # Now the value will be returned. - if as_eig: - cm_sorted = np.sort(eig(cm)[0])[::-1] - # Thanks to SO for the following lines of code. - # https://stackoverflow.com/a/43011036 - - # Keep zeros at the end. - mask = cm_sorted != 0. - f_mask = mask.sum(0, keepdims=1) >\ - np.arange(cm_sorted.shape[0]-1, -1, -1) - - f_mask = f_mask[::-1] - cm_sorted[f_mask] = cm_sorted[mask] - cm_sorted[~f_mask] = 0. - - return cm_sorted - - else: - return cm - - else: - cm_temp = [] - # Actual calculation of the coulomb matrix. - for i in mol_nr: - x_i = mol_data[i, 0] - y_i = mol_data[i, 1] - z_i = mol_data[i, 2] - Z_i = nc_data[i] - - cm_row = [] - for j in mol_nr: - x_j = mol_data[j, 0] - y_j = mol_data[j, 1] - z_j = mol_data[j, 2] - Z_j = nc_data[j] - - x = (x_i-x_j)**2 - y = (y_i-y_j)**2 - z = (z_i-z_j)**2 - - if i == j: - cm_row.append(0.5*Z_i**2.4) - else: - cm_row.append(conversion_rate*Z_i*Z_j/math.sqrt(x - + y - + z)) - - cm_temp.append(np.array(cm_row)) - - cm = np.array(cm_temp) - # Now the value will be returned. - if as_eig: - return np.sort(eig(cm)[0])[::-1] - else: - return cm - - -def c_matrix_multiple(mol_data, - nc_data, - pipe=None, - max_len=25, - as_eig=True, - bohr_radius_units=False): - """ - Calculates the Coulomb Matrix of multiple molecules. - mol_data: molecule data, matrix of atom coordinates. - nc_data: nuclear charge data, array of atom data. - pipe: for multiprocessing purposes. Sends the data calculated - through a pipe. - max_len: maximum amount of atoms in molecule. - as_eig: if data should be returned as matrix or array of eigenvalues. - bohr_radius_units: if units should be in bohr's radius units. - """ - printc('Coulomb Matrices calculation started.', 'CYAN') - tic = time.perf_counter() - - cm_data = np.array([c_matrix(mol, nc, max_len, as_eig, bohr_radius_units) - for mol, nc in zip(mol_data, nc_data)]) - - toc = time.perf_counter() - printc('\tCM calculation took {:.4f} seconds.'.format(toc - tic), 'GREEN') - - if pipe: - pipe.send(cm_data) - - return cm_data diff --git a/cholesky_solve.py b/cholesky_solve.py deleted file mode 100644 index bc6a572a3..000000000 --- a/cholesky_solve.py +++ /dev/null @@ -1,64 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -import numpy as np -from numpy.linalg import cholesky - - -def cholesky_solve(K, y): - """ - Applies Cholesky decomposition to obtain the 'alpha coeficients'. - K: kernel. - y: known parameters. - """ - # The initial mathematical problem is to solve Ka=y. - - # First, add a small lambda value. - K[np.diag_indices_from(K)] += 1e-8 - - # Get the Cholesky decomposition of the kernel. - L = cholesky(K) - size = len(L) - - # Solve Lx=y for x. - x = np.zeros(size) - x[0] = y[0] / L[0, 0] - for i in range(1, size): - temp_sum = 0.0 - for j in range(i): - temp_sum += L[i, j] * x[j] - x[i] = (y[i] - temp_sum) / L[i, i] - - # Now, solve LTa=x for a. - L2 = L.T - a = np.zeros(size) - a_ms = size - 1 - a[a_ms] = x[a_ms] / L2[a_ms, a_ms] - # Because of the form of L2 (upper triangular matriz), an inversion of - # range() needs to be done. - for i in range(0, a_ms)[::-1]: - temp_sum = 0.0 - for j in range(i, size)[::-1]: - temp_sum += L2[i, j] * a[j] - a[i] = (x[i] - temp_sum) / L2[i, i] - - return a diff --git a/do_ml.py b/do_ml.py deleted file mode 100644 index c88533e68..000000000 --- a/do_ml.py +++ /dev/null @@ -1,108 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -import time -from misc import printc -import numpy as np -from gauss_kernel import gauss_kernel -from cholesky_solve import cholesky_solve - - -def do_ml(desc_data, - energy_data, - training_size, - desc_type=None, - pipe=None, - test_size=None, - sigma=1000.0, - show_msgs=True): - """ - Does the ML methodology. - desc_data: descriptor (or representation) data. - energy_data: energy data associated with desc_data. - training_size: size of the training set to use. - desc_type: string with the name of the descriptor used. - pipe: for multiprocessing purposes. Sends the data calculated - through a pipe. - test_size: size of the test set to use. If no size is given, - the last remaining molecules are used. - sigma: depth of the kernel. - show_msgs: Show debug messages or not. - NOTE: desc_type is just a string and is only for identification purposes. - Also, training is done with the first part of the data and - testing with the ending part of the data. - """ - # Initial calculations for later use. - d_len = len(desc_data) - e_len = len(energy_data) - - if not desc_type: - desc_type = 'NOT SPECIFIED' - - if d_len != e_len: - printc(''.join(['ERROR. Descriptor data size different ', - 'than energy data size.']), 'RED') - return None - - if training_size >= d_len: - printc('ERROR. Training size greater or equal than data size.', 'RED') - return None - - if not test_size: - test_size = d_len - training_size - if test_size > 1500: - test_size = 1500 - - tic = time.perf_counter() - if show_msgs: - printc('{} ML started.'.format(desc_type), 'GREEN') - printc('\tTraining size: {}'.format(training_size), 'CYAN') - printc('\tTest size: {}'.format(test_size), 'CYAN') - printc('\tSigma: {}'.format(sigma), 'CYAN') - - Xcm_training = desc_data[:training_size] - Ycm_training = energy_data[:training_size] - Kcm_training = gauss_kernel(Xcm_training, Xcm_training, sigma) - alpha_cm = cholesky_solve(Kcm_training, Ycm_training) - - Xcm_test = desc_data[-test_size:] - Ycm_test = energy_data[-test_size:] - Kcm_test = gauss_kernel(Xcm_test, Xcm_training, sigma) - Ycm_predicted = np.dot(Kcm_test, alpha_cm) - - mae = np.mean(np.abs(Ycm_predicted - Ycm_test)) - if show_msgs: - printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN') - - toc = time.perf_counter() - tictoc = toc - tic - if show_msgs: - printc('\t{} ML took {:.4f} seconds.'.format(desc_type, tictoc), - 'GREEN') - printc('\t\tTraining size: {}'.format(training_size), 'CYAN') - printc('\t\tTest size: {}'.format(test_size), 'CYAN') - printc('\t\tSigma: {}'.format(sigma), 'CYAN') - - if pipe: - pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc]) - - return mae, tictoc diff --git a/frob_norm.py b/frob_norm.py deleted file mode 100644 index 4c3a2945d..000000000 --- a/frob_norm.py +++ /dev/null @@ -1,51 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -import math - - -def frob_norm(array): - """ - Calculates the frobenius norm of a given array or matrix. - array: array of data. - """ - - arr_sh_len = len(array.shape) - arr_range = range(len(array)) - fn = 0.0 - - # If it is a 'vector'. - if arr_sh_len == 1: - for i in arr_range: - fn += array[i]*array[i] - - return math.sqrt(fn) - - # If it is a matrix. - elif arr_sh_len == 2: - for i in arr_range: - for j in arr_range: - fn += array[i, j]*array[i, j] - - return math.sqrt(fn) - else: - print('Error. Array size greater than 2 ({}).'.format(arr_sh_len)) diff --git a/gauss_kernel.py b/gauss_kernel.py deleted file mode 100644 index 0dfc65d59..000000000 --- a/gauss_kernel.py +++ /dev/null @@ -1,49 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -import math -import numpy as np -from frob_norm import frob_norm - - -def gauss_kernel(X_1, X_2, sigma): - """ - Calculates the Gaussian Kernel. - X_1: first representations. - X_2: second representations. - sigma: kernel width. - """ - x1_l = len(X_1) - x1_range = range(x1_l) - x2_l = len(X_2) - x2_range = range(x2_l) - - inv_sigma = -0.5 / (sigma*sigma) - - K = np.zeros((x1_l, x2_l)) - for i in x1_range: - for j in x2_range: - f_norm = frob_norm(X_1[i] - X_2[j]) - # print(f_norm) - K[i, j] = math.exp(inv_sigma * f_norm) - - return K diff --git a/lj_matrix.py b/lj_matrix.py deleted file mode 100644 index 2a8e0d956..000000000 --- a/lj_matrix.py +++ /dev/null @@ -1,207 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -import time -from misc import printc -import math -import numpy as np -from numpy.linalg import eig - - -def lj_matrix(mol_data, - nc_data, - sigma=1.0, - epsilon=1.0, - max_len=25, - as_eig=True, - bohr_radius_units=False): - """ - Creates the Lennard-Jones Matrix from the molecule data given. - mol_data: molecule data, matrix of atom coordinates. - nc_data: nuclear charge data, array of atom data. - max_len: maximum amount of atoms in molecule. - as_eig: if data should be returned as matrix or array of eigenvalues. - bohr_radius_units: if units should be in bohr's radius units. - """ - if bohr_radius_units: - conversion_rate = 0.52917721067 - else: - conversion_rate = 1 - - mol_n = len(mol_data) - mol_nr = range(mol_n) - - if not mol_n == len(nc_data): - print(''.join(['Error. Molecule matrix dimension is different ', - 'than the nuclear charge array dimension.'])) - else: - if max_len < mol_n: - print(''.join(['Error. Molecule matrix dimension (mol_n) is ', - 'greater than max_len. Using mol_n.'])) - max_len = None - - if max_len: - lj = np.zeros((max_len, max_len)) - ml_r = range(max_len) - - # Actual calculation of the coulomb matrix. - for i in ml_r: - if i < mol_n: - x_i = mol_data[i, 0] - y_i = mol_data[i, 1] - z_i = mol_data[i, 2] - Z_i = nc_data[i] - else: - break - - for j in ml_r: - if j < mol_n: - x_j = mol_data[j, 0] - y_j = mol_data[j, 1] - z_j = mol_data[j, 2] - - x = (x_i-x_j)**2 - y = (y_i-y_j)**2 - z = (z_i-z_j)**2 - - if i == j: - lj[i, j] = (0.5*Z_i**2.4) - else: - # Calculations are done after i==j is checked - # so no division by zero is done. - - # A little play with r exponents - # so no square root is calculated. - # Conversion factor is included in r^2. - - # 1/r^2 - r_2 = sigma**2/(conversion_rate**2*(x + y + z)) - - r_6 = math.pow(r_2, 3) - r_12 = math.pow(r_6, 2) - lj[i, j] = (4*epsilon*(r_12 - r_6)) - else: - break - - # Now the value will be returned. - if as_eig: - lj_sorted = np.sort(eig(lj)[0])[::-1] - # Thanks to SO for the following lines of code. - # https://stackoverflow.com/a/43011036 - - # Keep zeros at the end. - mask = lj_sorted != 0. - f_mask = mask.sum(0, keepdims=1) >\ - np.arange(lj_sorted.shape[0]-1, -1, -1) - - f_mask = f_mask[::-1] - lj_sorted[f_mask] = lj_sorted[mask] - lj_sorted[~f_mask] = 0. - - return lj_sorted - - else: - return lj - - else: - lj_temp = [] - # Actual calculation of the coulomb matrix. - for i in mol_nr: - x_i = mol_data[i, 0] - y_i = mol_data[i, 1] - z_i = mol_data[i, 2] - Z_i = nc_data[i] - - lj_row = [] - for j in mol_nr: - x_j = mol_data[j, 0] - y_j = mol_data[j, 1] - z_j = mol_data[j, 2] - - x = (x_i-x_j)**2 - y = (y_i-y_j)**2 - z = (z_i-z_j)**2 - - if i == j: - lj_row.append(0.5*Z_i**2.4) - else: - # Calculations are done after i==j is checked - # so no division by zero is done. - - # A little play with r exponents - # so no square root is calculated. - # Conversion factor is included in r^2. - - # 1/r^2 - r_2 = sigma**2/(conversion_rate**2*(x + y + z)) - - r_6 = math.pow(r_2, 3) - r_12 = math.pow(r_6, 2) - lj_row.append(4*epsilon*(r_12 - r_6)) - - lj_temp.append(np.array(lj_row)) - - lj = np.array(lj_temp) - # Now the value will be returned. - if as_eig: - return np.sort(eig(lj)[0])[::-1] - else: - return lj - - -def lj_matrix_multiple(mol_data, - nc_data, - pipe=None, - sigma=1, - epsilon=1, - max_len=25, - as_eig=True, - bohr_radius_units=False): - """ - Calculates the Lennard-Jones Matrix of multiple molecules. - mol_data: molecule data, matrix of atom coordinates. - nc_data: nuclear charge data, array of atom data. - pipe: for multiprocessing purposes. Sends the data calculated - through a pipe. - max_len: maximum amount of atoms in molecule. - as_eig: if data should be returned as matrix or array of eigenvalues. - bohr_radius_units: if units should be in bohr's radius units. - """ - printc('L-J Matrices calculation started.', 'CYAN') - tic = time.perf_counter() - - ljm_data = np.array([lj_matrix(mol, - nc, - sigma, - epsilon, - max_len, - as_eig, - bohr_radius_units) - for mol, nc in zip(mol_data, nc_data)]) - - toc = time.perf_counter() - printc('\tL-JM calculation took {:.4f} seconds.'.format(toc-tic), 'GREEN') - - if pipe: - pipe.send(ljm_data) - - return ljm_data diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py new file mode 100644 index 000000000..48cd14913 --- /dev/null +++ b/lj_matrix/__init__.py @@ -0,0 +1,22 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py new file mode 100644 index 000000000..4e13f4995 --- /dev/null +++ b/lj_matrix/__main__.py @@ -0,0 +1,238 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import time +from multiprocessing import Process, Pipe +# import matplotlib.pyplot as plt +import pandas as pd +from lj_matrix.misc import printc +from lj_matrix.read_qm7_data import read_qm7_data +from lj_matrix.c_matrix import c_matrix_multiple +from lj_matrix.lj_matrix import lj_matrix_multiple +from lj_matrix.do_ml import do_ml + + +# Test +def ml(): + """ + Main function that does the whole ML process. + """ + # Initialization time. + init_time = time.perf_counter() + + # Data reading. + zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\ + read_qm7_data() + + # Matrices calculation. + procs = [] + pipes = [] + + # cm_recv, cm_send = Pipe(False) + # p1 = Process(target=c_matrix_multiple, + # args=(molecules, nuclear_charge, cm_send)) + # procs.append(p1) + # pipes.append(cm_recv) + # p1.start() + + ljm_recv, ljm_send = Pipe(False) + p2 = Process(target=lj_matrix_multiple, + args=(molecules, nuclear_charge, ljm_send, 1, 0.25)) + procs.append(p2) + pipes.append(ljm_recv) + p2.start() + + # cm_data = pipes[0].recv() + ljm_data = pipes[0].recv() + + for proc in procs: + proc.join() + + # ML calculation. + procs = [] + # cm_pipes = [] + ljm_pipes = [] + for i in range(1500, 6500 + 1, 500): + # cm_recv, cm_send = Pipe(False) + # p1 = Process(target=do_ml, + # args=(cm_data, energy_pbe0, i, 'CM', cm_send)) + # procs.append(p1) + # cm_pipes.append(cm_recv) + # p1.start() + + ljm_recv, ljm_send = Pipe(False) + p2 = Process(target=do_ml, + args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send)) + procs.append(p2) + ljm_pipes.append(ljm_recv) + p2.start() + + # cm_bench_results = [] + ljm_bench_results = [] + for ljd_pipe in ljm_pipes: # cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes): + # cm_bench_results.append(cd_pipe.recv()) + ljm_bench_results.append(ljd_pipe.recv()) + + for proc in procs: + proc.join() + + with open('data\\benchmarks.csv', 'a') as save_file: + # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,', + # 'mae,time,lj_s,lj_e,date_ran\n'])) + date = '/'.join([str(field) for field in time.localtime()[:3][::-1]]) + for ljm in ljm_bench_results: # cm, ljm, in zip(cm_bench_results, ljm_bench_results): + # cm_text = ','.join([str(field) for field in cm])\ + # + ',' + date + '\n' + ljm_text = ','.join([str(field) for field in ljm])\ + + ',1,0.25,' + date + '\n' + # save_file.write(cm_text) + save_file.write(ljm_text) + + # End of program + end_time = time.perf_counter() + printc('Program took {:.4f} seconds.'.format(end_time - init_time), + 'CYAN') + + +def pl(): + """ + Function for plotting the benchmarks. + """ + # Original columns. + or_cols = ['ml_type', + 'tr_size', + 'te_size', + 'kernel_s', + 'mae', + 'time', + 'lj_s', + 'lj_e', + 'date_ran'] + # Drop some original columns. + dor_cols = ['te_size', + 'kernel_s', + 'time', + 'date_ran'] + + # Read benchmarks data and drop some columns. + data_temp = pd.read_csv('data\\benchmarks.csv',) + data = pd.DataFrame(data_temp, columns=or_cols) + data = data.drop(columns=dor_cols) + + # Get the data of the first benchmarks and drop unnecesary columns. + first_data = pd.DataFrame(data, index=range(0, 22)) + first_data = first_data.drop(columns=['lj_s', 'lj_e']) + + # Columns to keep temporarily. + fd_columns = ['ml_type', + 'tr_size', + 'mae'] + + # Create new dataframes for each matrix descriptor and fill them. + first_data_cm = pd.DataFrame(columns=fd_columns) + first_data_ljm = pd.DataFrame(columns=fd_columns) + for i in range(first_data.shape[0]): + temp_df = first_data.iloc[[i]] + if first_data.at[i, 'ml_type'] == 'CM': + first_data_cm = first_data_cm.append(temp_df) + else: + first_data_ljm = first_data_ljm.append(temp_df) + + # Drop unnecesary column and rename 'mae' for later use. + first_data_cm = first_data_cm.drop(columns=['ml_type'])\ + .rename(columns={'mae': 'cm_mae'}) + first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\ + .rename(columns={'mae': 'ljm_mae'}) + # print(first_data_cm) + # print(first_data_ljm) + + # Get the cm data axis so it can be joined with the ljm data axis. + cm_axis = first_data_cm.plot(x='tr_size', + y='cm_mae', + kind='line') + # Get the ljm data axis and join it with the cm one. + plot_axis = first_data_ljm.plot(ax=cm_axis, + x='tr_size', + y='ljm_mae', + kind='line') + plot_axis.set_xlabel('tr_size') + plot_axis.set_ylabel('mae') + plot_axis.set_title('mae for different tr_sizes') + # Get the figure and save it. + # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf') + + # Get the rest of the benchmark data and drop unnecesary column. + new_data = data.drop(index=range(0, 22)) + new_data = new_data.drop(columns=['ml_type']) + + # Get the first set and rename it. + nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'}) + ndf_axis = nd_first.plot(x='tr_size', + y='1, 1', + kind='line') + last_axis = ndf_axis + for i in range(22, 99, 11): + lj_s = new_data['lj_s'][i] + lj_e = new_data['lj_e'][i] + new_mae = '{}, {}'.format(lj_s, lj_e) + nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ + .drop(columns=['lj_s', 'lj_e'])\ + .rename(columns={'mae': new_mae}) + last_axis = nd_temp.plot(ax=last_axis, + x='tr_size', + y=new_mae, + kind='line') + print(nd_temp) + + last_axis.set_xlabel('tr_size') + last_axis.set_ylabel('mae') + last_axis.set_title('mae for different parameters of lj(s)') + + last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf') + + ndf_axis = nd_first.plot(x='tr_size', + y='1, 1', + kind='line') + last_axis = ndf_axis + for i in range(99, data.shape[0], 11): + lj_s = new_data['lj_s'][i] + lj_e = new_data['lj_e'][i] + new_mae = '{}, {}'.format(lj_s, lj_e) + nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ + .drop(columns=['lj_s', 'lj_e'])\ + .rename(columns={'mae': new_mae}) + last_axis = nd_temp.plot(ax=last_axis, + x='tr_size', + y=new_mae, + kind='line') + print(nd_temp) + + last_axis.set_xlabel('tr_size') + last_axis.set_ylabel('mae') + last_axis.set_title('mae for different parameters of lj(e)') + + last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf') + + +if __name__ == '__main__': + # ml() + pl() diff --git a/lj_matrix/c_matrix.py b/lj_matrix/c_matrix.py new file mode 100644 index 000000000..f40a18c68 --- /dev/null +++ b/lj_matrix/c_matrix.py @@ -0,0 +1,179 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import time +from lj_matrix.misc import printc +import math +import numpy as np +from numpy.linalg import eig + + +def c_matrix(mol_data, + nc_data, + max_len=25, + as_eig=True, + bohr_radius_units=False): + """ + Creates the Coulomb Matrix from the molecule data given. + mol_data: molecule data, matrix of atom coordinates. + nc_data: nuclear charge data, array of atom data. + max_len: maximum amount of atoms in molecule. + as_eig: if data should be returned as matrix or array of eigenvalues. + bohr_radius_units: if units should be in bohr's radius units. + """ + if bohr_radius_units: + conversion_rate = 0.52917721067 + else: + conversion_rate = 1 + + mol_n = len(mol_data) + mol_nr = range(mol_n) + + if not mol_n == len(nc_data): + print(''.join(['Error. Molecule matrix dimension is different ', + 'than the nuclear charge array dimension.'])) + else: + if max_len < mol_n: + print(''.join(['Error. Molecule matrix dimension (mol_n) is ', + 'greater than max_len. Using mol_n.'])) + max_len = None + + if max_len: + cm = np.zeros((max_len, max_len)) + ml_r = range(max_len) + + # Actual calculation of the coulomb matrix. + for i in ml_r: + if i < mol_n: + x_i = mol_data[i, 0] + y_i = mol_data[i, 1] + z_i = mol_data[i, 2] + Z_i = nc_data[i] + else: + break + + for j in ml_r: + if j < mol_n: + x_j = mol_data[j, 0] + y_j = mol_data[j, 1] + z_j = mol_data[j, 2] + Z_j = nc_data[j] + + x = (x_i-x_j)**2 + y = (y_i-y_j)**2 + z = (z_i-z_j)**2 + + if i == j: + cm[i, j] = (0.5*Z_i**2.4) + else: + cm[i, j] = (conversion_rate*Z_i*Z_j/math.sqrt(x + + y + + z)) + else: + break + + # Now the value will be returned. + if as_eig: + cm_sorted = np.sort(eig(cm)[0])[::-1] + # Thanks to SO for the following lines of code. + # https://stackoverflow.com/a/43011036 + + # Keep zeros at the end. + mask = cm_sorted != 0. + f_mask = mask.sum(0, keepdims=1) >\ + np.arange(cm_sorted.shape[0]-1, -1, -1) + + f_mask = f_mask[::-1] + cm_sorted[f_mask] = cm_sorted[mask] + cm_sorted[~f_mask] = 0. + + return cm_sorted + + else: + return cm + + else: + cm_temp = [] + # Actual calculation of the coulomb matrix. + for i in mol_nr: + x_i = mol_data[i, 0] + y_i = mol_data[i, 1] + z_i = mol_data[i, 2] + Z_i = nc_data[i] + + cm_row = [] + for j in mol_nr: + x_j = mol_data[j, 0] + y_j = mol_data[j, 1] + z_j = mol_data[j, 2] + Z_j = nc_data[j] + + x = (x_i-x_j)**2 + y = (y_i-y_j)**2 + z = (z_i-z_j)**2 + + if i == j: + cm_row.append(0.5*Z_i**2.4) + else: + cm_row.append(conversion_rate*Z_i*Z_j/math.sqrt(x + + y + + z)) + + cm_temp.append(np.array(cm_row)) + + cm = np.array(cm_temp) + # Now the value will be returned. + if as_eig: + return np.sort(eig(cm)[0])[::-1] + else: + return cm + + +def c_matrix_multiple(mol_data, + nc_data, + pipe=None, + max_len=25, + as_eig=True, + bohr_radius_units=False): + """ + Calculates the Coulomb Matrix of multiple molecules. + mol_data: molecule data, matrix of atom coordinates. + nc_data: nuclear charge data, array of atom data. + pipe: for multiprocessing purposes. Sends the data calculated + through a pipe. + max_len: maximum amount of atoms in molecule. + as_eig: if data should be returned as matrix or array of eigenvalues. + bohr_radius_units: if units should be in bohr's radius units. + """ + printc('Coulomb Matrices calculation started.', 'CYAN') + tic = time.perf_counter() + + cm_data = np.array([c_matrix(mol, nc, max_len, as_eig, bohr_radius_units) + for mol, nc in zip(mol_data, nc_data)]) + + toc = time.perf_counter() + printc('\tCM calculation took {:.4f} seconds.'.format(toc - tic), 'GREEN') + + if pipe: + pipe.send(cm_data) + + return cm_data diff --git a/lj_matrix/cholesky_solve.py b/lj_matrix/cholesky_solve.py new file mode 100644 index 000000000..bc6a572a3 --- /dev/null +++ b/lj_matrix/cholesky_solve.py @@ -0,0 +1,64 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import numpy as np +from numpy.linalg import cholesky + + +def cholesky_solve(K, y): + """ + Applies Cholesky decomposition to obtain the 'alpha coeficients'. + K: kernel. + y: known parameters. + """ + # The initial mathematical problem is to solve Ka=y. + + # First, add a small lambda value. + K[np.diag_indices_from(K)] += 1e-8 + + # Get the Cholesky decomposition of the kernel. + L = cholesky(K) + size = len(L) + + # Solve Lx=y for x. + x = np.zeros(size) + x[0] = y[0] / L[0, 0] + for i in range(1, size): + temp_sum = 0.0 + for j in range(i): + temp_sum += L[i, j] * x[j] + x[i] = (y[i] - temp_sum) / L[i, i] + + # Now, solve LTa=x for a. + L2 = L.T + a = np.zeros(size) + a_ms = size - 1 + a[a_ms] = x[a_ms] / L2[a_ms, a_ms] + # Because of the form of L2 (upper triangular matriz), an inversion of + # range() needs to be done. + for i in range(0, a_ms)[::-1]: + temp_sum = 0.0 + for j in range(i, size)[::-1]: + temp_sum += L2[i, j] * a[j] + a[i] = (x[i] - temp_sum) / L2[i, i] + + return a diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py new file mode 100644 index 000000000..acf5455f4 --- /dev/null +++ b/lj_matrix/do_ml.py @@ -0,0 +1,108 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import time +from misc import printc +import numpy as np +from lj_matrix.gauss_kernel import gauss_kernel +from lj_matrix.cholesky_solve import cholesky_solve + + +def do_ml(desc_data, + energy_data, + training_size, + desc_type=None, + pipe=None, + test_size=None, + sigma=1000.0, + show_msgs=True): + """ + Does the ML methodology. + desc_data: descriptor (or representation) data. + energy_data: energy data associated with desc_data. + training_size: size of the training set to use. + desc_type: string with the name of the descriptor used. + pipe: for multiprocessing purposes. Sends the data calculated + through a pipe. + test_size: size of the test set to use. If no size is given, + the last remaining molecules are used. + sigma: depth of the kernel. + show_msgs: Show debug messages or not. + NOTE: desc_type is just a string and is only for identification purposes. + Also, training is done with the first part of the data and + testing with the ending part of the data. + """ + # Initial calculations for later use. + d_len = len(desc_data) + e_len = len(energy_data) + + if not desc_type: + desc_type = 'NOT SPECIFIED' + + if d_len != e_len: + printc(''.join(['ERROR. Descriptor data size different ', + 'than energy data size.']), 'RED') + return None + + if training_size >= d_len: + printc('ERROR. Training size greater or equal than data size.', 'RED') + return None + + if not test_size: + test_size = d_len - training_size + if test_size > 1500: + test_size = 1500 + + tic = time.perf_counter() + if show_msgs: + printc('{} ML started.'.format(desc_type), 'GREEN') + printc('\tTraining size: {}'.format(training_size), 'CYAN') + printc('\tTest size: {}'.format(test_size), 'CYAN') + printc('\tSigma: {}'.format(sigma), 'CYAN') + + Xcm_training = desc_data[:training_size] + Ycm_training = energy_data[:training_size] + Kcm_training = gauss_kernel(Xcm_training, Xcm_training, sigma) + alpha_cm = cholesky_solve(Kcm_training, Ycm_training) + + Xcm_test = desc_data[-test_size:] + Ycm_test = energy_data[-test_size:] + Kcm_test = gauss_kernel(Xcm_test, Xcm_training, sigma) + Ycm_predicted = np.dot(Kcm_test, alpha_cm) + + mae = np.mean(np.abs(Ycm_predicted - Ycm_test)) + if show_msgs: + printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN') + + toc = time.perf_counter() + tictoc = toc - tic + if show_msgs: + printc('\t{} ML took {:.4f} seconds.'.format(desc_type, tictoc), + 'GREEN') + printc('\t\tTraining size: {}'.format(training_size), 'CYAN') + printc('\t\tTest size: {}'.format(test_size), 'CYAN') + printc('\t\tSigma: {}'.format(sigma), 'CYAN') + + if pipe: + pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc]) + + return mae, tictoc diff --git a/lj_matrix/frob_norm.py b/lj_matrix/frob_norm.py new file mode 100644 index 000000000..4c3a2945d --- /dev/null +++ b/lj_matrix/frob_norm.py @@ -0,0 +1,51 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import math + + +def frob_norm(array): + """ + Calculates the frobenius norm of a given array or matrix. + array: array of data. + """ + + arr_sh_len = len(array.shape) + arr_range = range(len(array)) + fn = 0.0 + + # If it is a 'vector'. + if arr_sh_len == 1: + for i in arr_range: + fn += array[i]*array[i] + + return math.sqrt(fn) + + # If it is a matrix. + elif arr_sh_len == 2: + for i in arr_range: + for j in arr_range: + fn += array[i, j]*array[i, j] + + return math.sqrt(fn) + else: + print('Error. Array size greater than 2 ({}).'.format(arr_sh_len)) diff --git a/lj_matrix/gauss_kernel.py b/lj_matrix/gauss_kernel.py new file mode 100644 index 000000000..5dd8e6406 --- /dev/null +++ b/lj_matrix/gauss_kernel.py @@ -0,0 +1,49 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import math +import numpy as np +from lj_matrix.frob_norm import frob_norm + + +def gauss_kernel(X_1, X_2, sigma): + """ + Calculates the Gaussian Kernel. + X_1: first representations. + X_2: second representations. + sigma: kernel width. + """ + x1_l = len(X_1) + x1_range = range(x1_l) + x2_l = len(X_2) + x2_range = range(x2_l) + + inv_sigma = -0.5 / (sigma*sigma) + + K = np.zeros((x1_l, x2_l)) + for i in x1_range: + for j in x2_range: + f_norm = frob_norm(X_1[i] - X_2[j]) + # print(f_norm) + K[i, j] = math.exp(inv_sigma * f_norm) + + return K diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py new file mode 100644 index 000000000..4f63e95ca --- /dev/null +++ b/lj_matrix/lj_matrix.py @@ -0,0 +1,207 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import time +from lj_matrix.misc import printc +import math +import numpy as np +from numpy.linalg import eig + + +def lj_matrix(mol_data, + nc_data, + sigma=1.0, + epsilon=1.0, + max_len=25, + as_eig=True, + bohr_radius_units=False): + """ + Creates the Lennard-Jones Matrix from the molecule data given. + mol_data: molecule data, matrix of atom coordinates. + nc_data: nuclear charge data, array of atom data. + max_len: maximum amount of atoms in molecule. + as_eig: if data should be returned as matrix or array of eigenvalues. + bohr_radius_units: if units should be in bohr's radius units. + """ + if bohr_radius_units: + conversion_rate = 0.52917721067 + else: + conversion_rate = 1 + + mol_n = len(mol_data) + mol_nr = range(mol_n) + + if not mol_n == len(nc_data): + print(''.join(['Error. Molecule matrix dimension is different ', + 'than the nuclear charge array dimension.'])) + else: + if max_len < mol_n: + print(''.join(['Error. Molecule matrix dimension (mol_n) is ', + 'greater than max_len. Using mol_n.'])) + max_len = None + + if max_len: + lj = np.zeros((max_len, max_len)) + ml_r = range(max_len) + + # Actual calculation of the coulomb matrix. + for i in ml_r: + if i < mol_n: + x_i = mol_data[i, 0] + y_i = mol_data[i, 1] + z_i = mol_data[i, 2] + Z_i = nc_data[i] + else: + break + + for j in ml_r: + if j < mol_n: + x_j = mol_data[j, 0] + y_j = mol_data[j, 1] + z_j = mol_data[j, 2] + + x = (x_i-x_j)**2 + y = (y_i-y_j)**2 + z = (z_i-z_j)**2 + + if i == j: + lj[i, j] = (0.5*Z_i**2.4) + else: + # Calculations are done after i==j is checked + # so no division by zero is done. + + # A little play with r exponents + # so no square root is calculated. + # Conversion factor is included in r^2. + + # 1/r^2 + r_2 = sigma**2/(conversion_rate**2*(x + y + z)) + + r_6 = math.pow(r_2, 3) + r_12 = math.pow(r_6, 2) + lj[i, j] = (4*epsilon*(r_12 - r_6)) + else: + break + + # Now the value will be returned. + if as_eig: + lj_sorted = np.sort(eig(lj)[0])[::-1] + # Thanks to SO for the following lines of code. + # https://stackoverflow.com/a/43011036 + + # Keep zeros at the end. + mask = lj_sorted != 0. + f_mask = mask.sum(0, keepdims=1) >\ + np.arange(lj_sorted.shape[0]-1, -1, -1) + + f_mask = f_mask[::-1] + lj_sorted[f_mask] = lj_sorted[mask] + lj_sorted[~f_mask] = 0. + + return lj_sorted + + else: + return lj + + else: + lj_temp = [] + # Actual calculation of the coulomb matrix. + for i in mol_nr: + x_i = mol_data[i, 0] + y_i = mol_data[i, 1] + z_i = mol_data[i, 2] + Z_i = nc_data[i] + + lj_row = [] + for j in mol_nr: + x_j = mol_data[j, 0] + y_j = mol_data[j, 1] + z_j = mol_data[j, 2] + + x = (x_i-x_j)**2 + y = (y_i-y_j)**2 + z = (z_i-z_j)**2 + + if i == j: + lj_row.append(0.5*Z_i**2.4) + else: + # Calculations are done after i==j is checked + # so no division by zero is done. + + # A little play with r exponents + # so no square root is calculated. + # Conversion factor is included in r^2. + + # 1/r^2 + r_2 = sigma**2/(conversion_rate**2*(x + y + z)) + + r_6 = math.pow(r_2, 3) + r_12 = math.pow(r_6, 2) + lj_row.append(4*epsilon*(r_12 - r_6)) + + lj_temp.append(np.array(lj_row)) + + lj = np.array(lj_temp) + # Now the value will be returned. + if as_eig: + return np.sort(eig(lj)[0])[::-1] + else: + return lj + + +def lj_matrix_multiple(mol_data, + nc_data, + pipe=None, + sigma=1, + epsilon=1, + max_len=25, + as_eig=True, + bohr_radius_units=False): + """ + Calculates the Lennard-Jones Matrix of multiple molecules. + mol_data: molecule data, matrix of atom coordinates. + nc_data: nuclear charge data, array of atom data. + pipe: for multiprocessing purposes. Sends the data calculated + through a pipe. + max_len: maximum amount of atoms in molecule. + as_eig: if data should be returned as matrix or array of eigenvalues. + bohr_radius_units: if units should be in bohr's radius units. + """ + printc('L-J Matrices calculation started.', 'CYAN') + tic = time.perf_counter() + + ljm_data = np.array([lj_matrix(mol, + nc, + sigma, + epsilon, + max_len, + as_eig, + bohr_radius_units) + for mol, nc in zip(mol_data, nc_data)]) + + toc = time.perf_counter() + printc('\tL-JM calculation took {:.4f} seconds.'.format(toc-tic), 'GREEN') + + if pipe: + pipe.send(ljm_data) + + return ljm_data diff --git a/lj_matrix/misc.py b/lj_matrix/misc.py new file mode 100644 index 000000000..c50653a5c --- /dev/null +++ b/lj_matrix/misc.py @@ -0,0 +1,53 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +from colorama import init, Fore, Style + +init() + + +def printc(text, color): + """ + Prints texts normaly, but in color. Using colorama. + text: string with the text to print. + color: color to be used, same as available in colorama. + """ + color_dic = {'BLACK': Fore.BLACK, + 'RED': Fore.RED, + 'GREEN': Fore.GREEN, + 'YELLOW': Fore.YELLOW, + 'BLUE': Fore.BLUE, + 'MAGENTA': Fore.MAGENTA, + 'CYAN': Fore.CYAN, + 'WHITE': Fore.WHITE, + 'RESET': Fore.RESET} + + color_dic_keys = color_dic.keys() + if color not in color_dic_keys: + print(Fore.RED + + '\'{}\' not found, using default color.'.format(color) + + Style.RESET_ALL) + actual_color = Fore.RESET + else: + actual_color = color_dic[color] + + print(actual_color + text + Style.RESET_ALL) diff --git a/lj_matrix/read_qm7_data.py b/lj_matrix/read_qm7_data.py new file mode 100644 index 000000000..b54691fb0 --- /dev/null +++ b/lj_matrix/read_qm7_data.py @@ -0,0 +1,144 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import os +import time +import numpy as np +import random +from lj_matrix.misc import printc + + +# 'periodic_table_of_elements.txt' retrieved from +# https://gist.github.com/GoodmanSciences/c2dd862cd38f21b0ad36b8f96b4bf1ee +def read_nc_data(data_path): + """ + Reads nuclear charge data from file and returns a dictionary. + data_path: path to the data directory. + """ + fname = 'periodic_table_of_elements.txt' + with open(''.join([data_path, '\\', fname]), 'r') as infile: + temp_lines = infile.readlines() + + del temp_lines[0] + + lines = [] + for temp_line in temp_lines: + new_line = temp_line.split(sep=',') + lines.append(new_line) + + # Dictionary of nuclear charge. + return {line[2]: int(line[0]) for line in lines} + + +# 'hof_qm7.txt.txt' retrieved from +# https://github.com/qmlcode/tutorial +def reas_db_data(zi_data, + data_path, + r_seed=111): + """ + Reads molecule database and extracts + its contents as usable variables. + zi_data: dictionary containing nuclear charge data. + data_path: path to the data directory. + r_seed: random seed. + """ + os.chdir(data_path) + + fname = 'hof_qm7.txt' + with open(fname, 'r') as infile: + lines = infile.readlines() + + # Temporary energy dictionary. + energy_temp = dict() + + for line in lines: + xyz_data = line.split() + + xyz_name = xyz_data[0] + hof = float(xyz_data[1]) + dftb = float(xyz_data[2]) + # print(xyz_name, hof, dftb) + + energy_temp[xyz_name] = np.array([hof, hof - dftb]) + + # Use a random seed. + random.seed(r_seed) + + et_keys = list(energy_temp.keys()) + random.shuffle(et_keys) + + # Temporary energy dictionary, shuffled. + energy_temp_shuffled = dict() + for key in et_keys: + energy_temp_shuffled.update({key: energy_temp[key]}) + + mol_data = [] + mol_nc_data = [] + # Actual reading of the xyz files. + for i, k in enumerate(energy_temp_shuffled.keys()): + with open(k, 'r') as xyz_file: + lines = xyz_file.readlines() + + len_lines = len(lines) + mol_temp_data = [] + mol_nc_temp_data = np.array(np.zeros(len_lines-2)) + for j, line in enumerate(lines[2:len_lines]): + line_list = line.split() + + mol_nc_temp_data[j] = float(zi_data[line_list[0]]) + line_data = np.array(np.asarray(line_list[1:4], dtype=float)) + mol_temp_data.append(line_data) + + mol_data.append(mol_temp_data) + mol_nc_data.append(mol_nc_temp_data) + + # Convert everything to a numpy array. + molecules = np.array([np.array(mol) for mol in mol_data]) + nuclear_charge = np.array([nc_d for nc_d in mol_nc_data]) + energy_pbe0 = np.array([energy_temp_shuffled[k][0] + for k in energy_temp_shuffled.keys()]) + energy_delta = np.array([energy_temp_shuffled[k][1] + for k in energy_temp_shuffled.keys()]) + + return molecules, nuclear_charge, energy_pbe0, energy_delta + + +def read_qm7_data(): + """ + Reads all the qm7 data. + """ + tic = time.perf_counter() + printc('Data reading started.', 'CYAN') + + init_path = os.getcwd() + os.chdir('data') + data_path = os.getcwd() + + zi_data = read_nc_data(data_path) + molecules, nuclear_charge, energy_pbe0, energy_delta = \ + reas_db_data(zi_data, data_path) + + os.chdir(init_path) + toc = time.perf_counter() + printc('\tData reading took {:.4f} seconds.'.format(toc-tic), 'GREEN') + + return zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta diff --git a/main.py b/main.py deleted file mode 100644 index 3bf86572e..000000000 --- a/main.py +++ /dev/null @@ -1,238 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -import time -from multiprocessing import Process, Pipe -# import matplotlib.pyplot as plt -import pandas as pd -from misc import printc -from read_qm7_data import read_qm7_data -from c_matrix import c_matrix_multiple -from lj_matrix import lj_matrix_multiple -from do_ml import do_ml - - -# Test -def ml(): - """ - Main function that does the whole ML process. - """ - # Initialization time. - init_time = time.perf_counter() - - # Data reading. - zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\ - read_qm7_data() - - # Matrices calculation. - procs = [] - pipes = [] - - # cm_recv, cm_send = Pipe(False) - # p1 = Process(target=c_matrix_multiple, - # args=(molecules, nuclear_charge, cm_send)) - # procs.append(p1) - # pipes.append(cm_recv) - # p1.start() - - ljm_recv, ljm_send = Pipe(False) - p2 = Process(target=lj_matrix_multiple, - args=(molecules, nuclear_charge, ljm_send, 1, 0.25)) - procs.append(p2) - pipes.append(ljm_recv) - p2.start() - - # cm_data = pipes[0].recv() - ljm_data = pipes[0].recv() - - for proc in procs: - proc.join() - - # ML calculation. - procs = [] - # cm_pipes = [] - ljm_pipes = [] - for i in range(1500, 6500 + 1, 500): - # cm_recv, cm_send = Pipe(False) - # p1 = Process(target=do_ml, - # args=(cm_data, energy_pbe0, i, 'CM', cm_send)) - # procs.append(p1) - # cm_pipes.append(cm_recv) - # p1.start() - - ljm_recv, ljm_send = Pipe(False) - p2 = Process(target=do_ml, - args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send)) - procs.append(p2) - ljm_pipes.append(ljm_recv) - p2.start() - - # cm_bench_results = [] - ljm_bench_results = [] - for ljd_pipe in ljm_pipes: # cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes): - # cm_bench_results.append(cd_pipe.recv()) - ljm_bench_results.append(ljd_pipe.recv()) - - for proc in procs: - proc.join() - - with open('data\\benchmarks.csv', 'a') as save_file: - # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,', - # 'mae,time,lj_s,lj_e,date_ran\n'])) - date = '/'.join([str(field) for field in time.localtime()[:3][::-1]]) - for ljm in ljm_bench_results: # cm, ljm, in zip(cm_bench_results, ljm_bench_results): - # cm_text = ','.join([str(field) for field in cm])\ - # + ',' + date + '\n' - ljm_text = ','.join([str(field) for field in ljm])\ - + ',1,0.25,' + date + '\n' - # save_file.write(cm_text) - save_file.write(ljm_text) - - # End of program - end_time = time.perf_counter() - printc('Program took {:.4f} seconds.'.format(end_time - init_time), - 'CYAN') - - -def pl(): - """ - Function for plotting the benchmarks. - """ - # Original columns. - or_cols = ['ml_type', - 'tr_size', - 'te_size', - 'kernel_s', - 'mae', - 'time', - 'lj_s', - 'lj_e', - 'date_ran'] - # Drop some original columns. - dor_cols = ['te_size', - 'kernel_s', - 'time', - 'date_ran'] - - # Read benchmarks data and drop some columns. - data_temp = pd.read_csv('data\\benchmarks.csv',) - data = pd.DataFrame(data_temp, columns=or_cols) - data = data.drop(columns=dor_cols) - - # Get the data of the first benchmarks and drop unnecesary columns. - first_data = pd.DataFrame(data, index=range(0, 22)) - first_data = first_data.drop(columns=['lj_s', 'lj_e']) - - # Columns to keep temporarily. - fd_columns = ['ml_type', - 'tr_size', - 'mae'] - - # Create new dataframes for each matrix descriptor and fill them. - first_data_cm = pd.DataFrame(columns=fd_columns) - first_data_ljm = pd.DataFrame(columns=fd_columns) - for i in range(first_data.shape[0]): - temp_df = first_data.iloc[[i]] - if first_data.at[i, 'ml_type'] == 'CM': - first_data_cm = first_data_cm.append(temp_df) - else: - first_data_ljm = first_data_ljm.append(temp_df) - - # Drop unnecesary column and rename 'mae' for later use. - first_data_cm = first_data_cm.drop(columns=['ml_type'])\ - .rename(columns={'mae': 'cm_mae'}) - first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\ - .rename(columns={'mae': 'ljm_mae'}) - # print(first_data_cm) - # print(first_data_ljm) - - # Get the cm data axis so it can be joined with the ljm data axis. - cm_axis = first_data_cm.plot(x='tr_size', - y='cm_mae', - kind='line') - # Get the ljm data axis and join it with the cm one. - plot_axis = first_data_ljm.plot(ax=cm_axis, - x='tr_size', - y='ljm_mae', - kind='line') - plot_axis.set_xlabel('tr_size') - plot_axis.set_ylabel('mae') - plot_axis.set_title('mae for different tr_sizes') - # Get the figure and save it. - # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf') - - # Get the rest of the benchmark data and drop unnecesary column. - new_data = data.drop(index=range(0, 22)) - new_data = new_data.drop(columns=['ml_type']) - - # Get the first set and rename it. - nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'}) - ndf_axis = nd_first.plot(x='tr_size', - y='1, 1', - kind='line') - last_axis = ndf_axis - for i in range(22, 99, 11): - lj_s = new_data['lj_s'][i] - lj_e = new_data['lj_e'][i] - new_mae = '{}, {}'.format(lj_s, lj_e) - nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ - .drop(columns=['lj_s', 'lj_e'])\ - .rename(columns={'mae': new_mae}) - last_axis = nd_temp.plot(ax=last_axis, - x='tr_size', - y=new_mae, - kind='line') - print(nd_temp) - - last_axis.set_xlabel('tr_size') - last_axis.set_ylabel('mae') - last_axis.set_title('mae for different parameters of lj(s)') - - last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf') - - ndf_axis = nd_first.plot(x='tr_size', - y='1, 1', - kind='line') - last_axis = ndf_axis - for i in range(99, data.shape[0], 11): - lj_s = new_data['lj_s'][i] - lj_e = new_data['lj_e'][i] - new_mae = '{}, {}'.format(lj_s, lj_e) - nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ - .drop(columns=['lj_s', 'lj_e'])\ - .rename(columns={'mae': new_mae}) - last_axis = nd_temp.plot(ax=last_axis, - x='tr_size', - y=new_mae, - kind='line') - print(nd_temp) - - last_axis.set_xlabel('tr_size') - last_axis.set_ylabel('mae') - last_axis.set_title('mae for different parameters of lj(e)') - - last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf') - - -if __name__ == '__main__': - # ml() - pl() diff --git a/misc.py b/misc.py deleted file mode 100644 index c50653a5c..000000000 --- a/misc.py +++ /dev/null @@ -1,53 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -from colorama import init, Fore, Style - -init() - - -def printc(text, color): - """ - Prints texts normaly, but in color. Using colorama. - text: string with the text to print. - color: color to be used, same as available in colorama. - """ - color_dic = {'BLACK': Fore.BLACK, - 'RED': Fore.RED, - 'GREEN': Fore.GREEN, - 'YELLOW': Fore.YELLOW, - 'BLUE': Fore.BLUE, - 'MAGENTA': Fore.MAGENTA, - 'CYAN': Fore.CYAN, - 'WHITE': Fore.WHITE, - 'RESET': Fore.RESET} - - color_dic_keys = color_dic.keys() - if color not in color_dic_keys: - print(Fore.RED - + '\'{}\' not found, using default color.'.format(color) - + Style.RESET_ALL) - actual_color = Fore.RESET - else: - actual_color = color_dic[color] - - print(actual_color + text + Style.RESET_ALL) diff --git a/read_qm7_data.py b/read_qm7_data.py deleted file mode 100644 index 068ea1a42..000000000 --- a/read_qm7_data.py +++ /dev/null @@ -1,144 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -import os -import time -import numpy as np -import random -from misc import printc - - -# 'periodic_table_of_elements.txt' retrieved from -# https://gist.github.com/GoodmanSciences/c2dd862cd38f21b0ad36b8f96b4bf1ee -def read_nc_data(data_path): - """ - Reads nuclear charge data from file and returns a dictionary. - data_path: path to the data directory. - """ - fname = 'periodic_table_of_elements.txt' - with open(''.join([data_path, '\\', fname]), 'r') as infile: - temp_lines = infile.readlines() - - del temp_lines[0] - - lines = [] - for temp_line in temp_lines: - new_line = temp_line.split(sep=',') - lines.append(new_line) - - # Dictionary of nuclear charge. - return {line[2]: int(line[0]) for line in lines} - - -# 'hof_qm7.txt.txt' retrieved from -# https://github.com/qmlcode/tutorial -def reas_db_data(zi_data, - data_path, - r_seed=111): - """ - Reads molecule database and extracts - its contents as usable variables. - zi_data: dictionary containing nuclear charge data. - data_path: path to the data directory. - r_seed: random seed. - """ - os.chdir(data_path) - - fname = 'hof_qm7.txt' - with open(fname, 'r') as infile: - lines = infile.readlines() - - # Temporary energy dictionary. - energy_temp = dict() - - for line in lines: - xyz_data = line.split() - - xyz_name = xyz_data[0] - hof = float(xyz_data[1]) - dftb = float(xyz_data[2]) - # print(xyz_name, hof, dftb) - - energy_temp[xyz_name] = np.array([hof, hof - dftb]) - - # Use a random seed. - random.seed(r_seed) - - et_keys = list(energy_temp.keys()) - random.shuffle(et_keys) - - # Temporary energy dictionary, shuffled. - energy_temp_shuffled = dict() - for key in et_keys: - energy_temp_shuffled.update({key: energy_temp[key]}) - - mol_data = [] - mol_nc_data = [] - # Actual reading of the xyz files. - for i, k in enumerate(energy_temp_shuffled.keys()): - with open(k, 'r') as xyz_file: - lines = xyz_file.readlines() - - len_lines = len(lines) - mol_temp_data = [] - mol_nc_temp_data = np.array(np.zeros(len_lines-2)) - for j, line in enumerate(lines[2:len_lines]): - line_list = line.split() - - mol_nc_temp_data[j] = float(zi_data[line_list[0]]) - line_data = np.array(np.asarray(line_list[1:4], dtype=float)) - mol_temp_data.append(line_data) - - mol_data.append(mol_temp_data) - mol_nc_data.append(mol_nc_temp_data) - - # Convert everything to a numpy array. - molecules = np.array([np.array(mol) for mol in mol_data]) - nuclear_charge = np.array([nc_d for nc_d in mol_nc_data]) - energy_pbe0 = np.array([energy_temp_shuffled[k][0] - for k in energy_temp_shuffled.keys()]) - energy_delta = np.array([energy_temp_shuffled[k][1] - for k in energy_temp_shuffled.keys()]) - - return molecules, nuclear_charge, energy_pbe0, energy_delta - - -def read_qm7_data(): - """ - Reads all the qm7 data. - """ - tic = time.perf_counter() - printc('Data reading started.', 'CYAN') - - init_path = os.getcwd() - os.chdir('data') - data_path = os.getcwd() - - zi_data = read_nc_data(data_path) - molecules, nuclear_charge, energy_pbe0, energy_delta = \ - reas_db_data(zi_data, data_path) - - os.chdir(init_path) - toc = time.perf_counter() - printc('\tData reading took {:.4f} seconds.'.format(toc-tic), 'GREEN') - - return zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta -- cgit v1.2.3-54-g00ecf From 124c3c5eb77c807b8a8a78413f3800720914c8e1 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Wed, 18 Dec 2019 08:15:18 -0700 Subject: Fix bugs --- lj_matrix/__init__.py | 23 ++++++++++ lj_matrix/__main__.py | 13 +++--- lj_matrix/c_matrix.py | 2 +- lj_matrix/do_ml.py | 4 +- lj_matrix/gauss_kernel.py | 2 +- lj_matrix/lj_matrix.py | 2 +- lj_matrix/read_qm7_data.py | 2 +- lj_matrix/version.py | 23 ++++++++++ setup.py | 102 +++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 161 insertions(+), 12 deletions(-) create mode 100644 lj_matrix/version.py create mode 100644 setup.py diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py index 48cd14913..47d7e5013 100644 --- a/lj_matrix/__init__.py +++ b/lj_matrix/__init__.py @@ -20,3 +20,26 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +from misc import printc +from read_qm7_data import read_qm7_data, read_nc_data, reas_db_data +from c_matrix import c_matrix, c_matrix_multiple +from cholesky_solve import cholesky_solve +from do_ml import do_ml +from frob_norm import frob_norm +from gauss_kernel import gauss_kernel +from lj_matrix import lj_matrix, lj_matrix_multiple + +# If somebody does "from package import *", this is what they will +# be able to access: +__all__ = ['printc', + 'read_qm7_data', + 'read_nc_data', + 'reas_db_data', + 'c_matrix', + 'c_matrix_multiple', + 'cholesky_solve', + 'do_ml', + 'frob_norm', + 'gauss_kernel', + 'lj_matrix', + 'lj_matrix_multiple'] diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py index 4e13f4995..5a0e95b94 100644 --- a/lj_matrix/__main__.py +++ b/lj_matrix/__main__.py @@ -24,11 +24,11 @@ import time from multiprocessing import Process, Pipe # import matplotlib.pyplot as plt import pandas as pd -from lj_matrix.misc import printc -from lj_matrix.read_qm7_data import read_qm7_data -from lj_matrix.c_matrix import c_matrix_multiple -from lj_matrix.lj_matrix import lj_matrix_multiple -from lj_matrix.do_ml import do_ml +from misc import printc +from read_qm7_data import read_qm7_data +from c_matrix import c_matrix_multiple +from lj_matrix import lj_matrix_multiple +from do_ml import do_ml # Test @@ -235,4 +235,5 @@ def pl(): if __name__ == '__main__': # ml() - pl() + # pl() + print('OK!') diff --git a/lj_matrix/c_matrix.py b/lj_matrix/c_matrix.py index f40a18c68..4de711a1b 100644 --- a/lj_matrix/c_matrix.py +++ b/lj_matrix/c_matrix.py @@ -21,7 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import time -from lj_matrix.misc import printc +from misc import printc import math import numpy as np from numpy.linalg import eig diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py index acf5455f4..c88533e68 100644 --- a/lj_matrix/do_ml.py +++ b/lj_matrix/do_ml.py @@ -23,8 +23,8 @@ SOFTWARE. import time from misc import printc import numpy as np -from lj_matrix.gauss_kernel import gauss_kernel -from lj_matrix.cholesky_solve import cholesky_solve +from gauss_kernel import gauss_kernel +from cholesky_solve import cholesky_solve def do_ml(desc_data, diff --git a/lj_matrix/gauss_kernel.py b/lj_matrix/gauss_kernel.py index 5dd8e6406..0dfc65d59 100644 --- a/lj_matrix/gauss_kernel.py +++ b/lj_matrix/gauss_kernel.py @@ -22,7 +22,7 @@ SOFTWARE. """ import math import numpy as np -from lj_matrix.frob_norm import frob_norm +from frob_norm import frob_norm def gauss_kernel(X_1, X_2, sigma): diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py index 4f63e95ca..2a8e0d956 100644 --- a/lj_matrix/lj_matrix.py +++ b/lj_matrix/lj_matrix.py @@ -21,7 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import time -from lj_matrix.misc import printc +from misc import printc import math import numpy as np from numpy.linalg import eig diff --git a/lj_matrix/read_qm7_data.py b/lj_matrix/read_qm7_data.py index b54691fb0..068ea1a42 100644 --- a/lj_matrix/read_qm7_data.py +++ b/lj_matrix/read_qm7_data.py @@ -24,7 +24,7 @@ import os import time import numpy as np import random -from lj_matrix.misc import printc +from misc import printc # 'periodic_table_of_elements.txt' retrieved from diff --git a/lj_matrix/version.py b/lj_matrix/version.py new file mode 100644 index 000000000..fab58433d --- /dev/null +++ b/lj_matrix/version.py @@ -0,0 +1,23 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +__version__ = '0.0.1' diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..719ef3ce0 --- /dev/null +++ b/setup.py @@ -0,0 +1,102 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +# This setup.py template was obtained from +# https://github.com/navdeep-G/setup.py/blob/master/setup.py +# ---------------------------------------------------------------------- +# Note: To use the 'upload' functionality of this file, you must: +# $ pipenv install twine --dev + +import io +import os + +from setuptools import find_packages, setup + +from lj_matrix.version import __version__ + +# Package meta-data. +NAME = 'lj_matrix' +DESCRIPTION = 'A Lennard Jones matrix exploration.' +URL = 'https://github.com/luevano/lj_matrix' +EMAIL = 'a301436@uach.mx' +AUTHOR = 'David Luevano Alvarado' +REQUIRES_PYTHON = '>=3.7' +VERSION = __version__ +# VERSION = '0.0.1' + +# What packages are required for this module to be executed? +REQUIRED = [ + # 'requests', 'maya', 'records', +] + +# What packages are optional? +EXTRAS = { + # 'fancy feature': ['django'], +} + +# The rest you shouldn't have to touch too much :) +# ------------------------------------------------ +# Except, perhaps the License and Trove Classifiers! +# If you do change the License, remember to change +# the Trove Classifier for that! + +here = os.path.abspath(os.path.dirname(__file__)) + +# Import the README and use it as the long-description. +# Note: this will only work if 'README.md' +# is present in your MANIFEST.in file! +try: + with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = '\n' + f.read() +except FileNotFoundError: + long_description = DESCRIPTION + +# Where the magic happens: +setup( + name=NAME, + version=VERSION, + description=DESCRIPTION, + long_description=long_description, + long_description_content_type='text/markdown', + author=AUTHOR, + author_email=EMAIL, + python_requires=REQUIRES_PYTHON, + url=URL, + packages=find_packages(exclude=["tests", + "*.tests", + "*.tests.*", + "tests.*"]), + # If your package is a single module, use this instead of 'packages': + # py_modules=['mypackage'], + install_requires=REQUIRED, + extras_require=EXTRAS, + include_package_data=True, + license='MIT', + classifiers=[ + # Trove classifiers + # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.7' + ] +) -- cgit v1.2.3-54-g00ecf From a50d424d0ab7dd4cc6a2d6fc94371fa65a0d89b2 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Wed, 18 Dec 2019 09:53:44 -0700 Subject: Fix test issues --- lj_matrix/__init__.py | 27 ++++++++++++++------------- lj_matrix/__main__.py | 10 +++++----- lj_matrix/c_matrix.py | 2 +- lj_matrix/do_ml.py | 6 +++--- lj_matrix/gauss_kernel.py | 2 +- lj_matrix/lj_matrix.py | 2 +- lj_matrix/read_qm7_data.py | 2 +- test/__init__.py | 22 ++++++++++++++++++++++ test/test_c_matrix.py | 33 +++++++++++++++++++++++++++++++++ 9 files changed, 81 insertions(+), 25 deletions(-) create mode 100644 test/__init__.py create mode 100644 test/test_c_matrix.py diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py index 47d7e5013..5019bd51d 100644 --- a/lj_matrix/__init__.py +++ b/lj_matrix/__init__.py @@ -20,26 +20,27 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -from misc import printc -from read_qm7_data import read_qm7_data, read_nc_data, reas_db_data -from c_matrix import c_matrix, c_matrix_multiple -from cholesky_solve import cholesky_solve -from do_ml import do_ml -from frob_norm import frob_norm -from gauss_kernel import gauss_kernel -from lj_matrix import lj_matrix, lj_matrix_multiple +from lj_matrix.misc import printc +from lj_matrix.read_qm7_data import read_nc_data, reas_db_data, read_qm7_data +from lj_matrix.c_matrix import c_matrix, c_matrix_multiple +from lj_matrix.lj_matrix import lj_matrix, lj_matrix_multiple +from lj_matrix.frob_norm import frob_norm +from lj_matrix.gauss_kernel import gauss_kernel +from lj_matrix.cholesky_solve import cholesky_solve +from lj_matrix.do_ml import do_ml + # If somebody does "from package import *", this is what they will # be able to access: __all__ = ['printc', - 'read_qm7_data', 'read_nc_data', 'reas_db_data', + 'read_qm7_data', 'c_matrix', 'c_matrix_multiple', - 'cholesky_solve', - 'do_ml', + 'lj_matrix', + 'lj_matrix_multiple', 'frob_norm', 'gauss_kernel', - 'lj_matrix', - 'lj_matrix_multiple'] + 'cholesky_solve', + 'do_ml'] diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py index 5a0e95b94..0b2a7c6f8 100644 --- a/lj_matrix/__main__.py +++ b/lj_matrix/__main__.py @@ -24,11 +24,11 @@ import time from multiprocessing import Process, Pipe # import matplotlib.pyplot as plt import pandas as pd -from misc import printc -from read_qm7_data import read_qm7_data -from c_matrix import c_matrix_multiple -from lj_matrix import lj_matrix_multiple -from do_ml import do_ml +from lj_matrix.misc import printc +from lj_matrix.read_qm7_data import read_qm7_data +from lj_matrix.c_matrix import c_matrix_multiple +from lj_matrix.lj_matrix import lj_matrix_multiple +from lj_matrix.do_ml import do_ml # Test diff --git a/lj_matrix/c_matrix.py b/lj_matrix/c_matrix.py index 4de711a1b..f21ccfd8c 100644 --- a/lj_matrix/c_matrix.py +++ b/lj_matrix/c_matrix.py @@ -21,10 +21,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import time -from misc import printc import math import numpy as np from numpy.linalg import eig +from lj_matrix.misc import printc def c_matrix(mol_data, diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py index c88533e68..ba88a6fd8 100644 --- a/lj_matrix/do_ml.py +++ b/lj_matrix/do_ml.py @@ -21,10 +21,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import time -from misc import printc import numpy as np -from gauss_kernel import gauss_kernel -from cholesky_solve import cholesky_solve +from lj_matrix.misc import printc +from lj_matrix.gauss_kernel import gauss_kernel +from lj_matrix.cholesky_solve import cholesky_solve def do_ml(desc_data, diff --git a/lj_matrix/gauss_kernel.py b/lj_matrix/gauss_kernel.py index 0dfc65d59..5dd8e6406 100644 --- a/lj_matrix/gauss_kernel.py +++ b/lj_matrix/gauss_kernel.py @@ -22,7 +22,7 @@ SOFTWARE. """ import math import numpy as np -from frob_norm import frob_norm +from lj_matrix.frob_norm import frob_norm def gauss_kernel(X_1, X_2, sigma): diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py index 2a8e0d956..2a56a3cdf 100644 --- a/lj_matrix/lj_matrix.py +++ b/lj_matrix/lj_matrix.py @@ -21,10 +21,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import time -from misc import printc import math import numpy as np from numpy.linalg import eig +from lj_matrix.misc import printc def lj_matrix(mol_data, diff --git a/lj_matrix/read_qm7_data.py b/lj_matrix/read_qm7_data.py index 068ea1a42..b54691fb0 100644 --- a/lj_matrix/read_qm7_data.py +++ b/lj_matrix/read_qm7_data.py @@ -24,7 +24,7 @@ import os import time import numpy as np import random -from misc import printc +from lj_matrix.misc import printc # 'periodic_table_of_elements.txt' retrieved from diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 000000000..8b866e928 --- /dev/null +++ b/test/__init__.py @@ -0,0 +1,22 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" \ No newline at end of file diff --git a/test/test_c_matrix.py b/test/test_c_matrix.py new file mode 100644 index 000000000..a8bb5ae34 --- /dev/null +++ b/test/test_c_matrix.py @@ -0,0 +1,33 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import unittest +from lj_matrix.c_matrix import c_matrix + + +class TestCMatrix(unittest.TestCase): + def test_c_matrix(self): + self.assertAlmostEqual(1, 1) + + +if __name__ == '__main__': + unittest.main() -- cgit v1.2.3-54-g00ecf From c8fa9fbff8ed9b5f30882bc5da525f7365095f83 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Wed, 18 Dec 2019 09:56:31 -0700 Subject: Fix bug --- test/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/__init__.py b/test/__init__.py index 8b866e928..48cd14913 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -19,4 +19,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -""" \ No newline at end of file +""" -- cgit v1.2.3-54-g00ecf From b7b0d39af578159a0c53eacc9c55cc79084c7469 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sun, 22 Dec 2019 19:39:05 -0700 Subject: Update requirements --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1856939e2..28b557ddb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -colorama==0.4.1 -numpy==1.17.4 +colorama==0.4.3 +numpy==1.18.0 pandas==0.25.3 matplotlib==3.1.2 \ No newline at end of file -- cgit v1.2.3-54-g00ecf From 72be4105825c639cf9dfad6229c7a1d62a16c44d Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Mon, 23 Dec 2019 11:48:32 -0700 Subject: Change name convention --- lj_matrix/do_ml.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py index ba88a6fd8..bb954a0ae 100644 --- a/lj_matrix/do_ml.py +++ b/lj_matrix/do_ml.py @@ -79,17 +79,17 @@ def do_ml(desc_data, printc('\tTest size: {}'.format(test_size), 'CYAN') printc('\tSigma: {}'.format(sigma), 'CYAN') - Xcm_training = desc_data[:training_size] - Ycm_training = energy_data[:training_size] - Kcm_training = gauss_kernel(Xcm_training, Xcm_training, sigma) - alpha_cm = cholesky_solve(Kcm_training, Ycm_training) + X_training = desc_data[:training_size] + Y_training = energy_data[:training_size] + K_training = gauss_kernel(X_training, X_training, sigma) + alpha_ = cholesky_solve(K_training, Y_training) - Xcm_test = desc_data[-test_size:] - Ycm_test = energy_data[-test_size:] - Kcm_test = gauss_kernel(Xcm_test, Xcm_training, sigma) - Ycm_predicted = np.dot(Kcm_test, alpha_cm) + X_test = desc_data[-test_size:] + Y_test = energy_data[-test_size:] + K_test = gauss_kernel(X_test, X_training, sigma) + Y_predicted = np.dot(K_test, alpha_) - mae = np.mean(np.abs(Ycm_predicted - Ycm_test)) + mae = np.mean(np.abs(Y_predicted - Y_test)) if show_msgs: printc('\tMAE for {}: {:.4f}'.format(desc_type, mae), 'GREEN') -- cgit v1.2.3-54-g00ecf From db64425a5580a49312e313a6e75e7a296eb93b35 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Mon, 23 Dec 2019 12:23:46 -0700 Subject: Restructure code and bug fix --- lj_matrix/__init__.py | 4 +- lj_matrix/__main__.py | 31 ++-------------- lj_matrix/lj_matrix.py | 6 ++- lj_matrix/parallel_create_matrices.py | 70 +++++++++++++++++++++++++++++++++++ lj_matrix/read_qm7_data.py | 6 +-- 5 files changed, 83 insertions(+), 34 deletions(-) create mode 100644 lj_matrix/parallel_create_matrices.py diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py index 5019bd51d..d7794d3be 100644 --- a/lj_matrix/__init__.py +++ b/lj_matrix/__init__.py @@ -21,7 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ from lj_matrix.misc import printc -from lj_matrix.read_qm7_data import read_nc_data, reas_db_data, read_qm7_data +from lj_matrix.read_qm7_data import read_nc_data, read_db_data, read_qm7_data from lj_matrix.c_matrix import c_matrix, c_matrix_multiple from lj_matrix.lj_matrix import lj_matrix, lj_matrix_multiple from lj_matrix.frob_norm import frob_norm @@ -34,7 +34,7 @@ from lj_matrix.do_ml import do_ml # be able to access: __all__ = ['printc', 'read_nc_data', - 'reas_db_data', + 'read_db_data', 'read_qm7_data', 'c_matrix', 'c_matrix_multiple', diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py index 0b2a7c6f8..8e52031f1 100644 --- a/lj_matrix/__main__.py +++ b/lj_matrix/__main__.py @@ -26,8 +26,7 @@ from multiprocessing import Process, Pipe import pandas as pd from lj_matrix.misc import printc from lj_matrix.read_qm7_data import read_qm7_data -from lj_matrix.c_matrix import c_matrix_multiple -from lj_matrix.lj_matrix import lj_matrix_multiple +from lj_matrix.parallel_create_matrices import parallel_create_matrices from lj_matrix.do_ml import do_ml @@ -40,32 +39,10 @@ def ml(): init_time = time.perf_counter() # Data reading. - zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\ - read_qm7_data() + molecules, nuclear_charge, energy_pbe0, energy_delta = read_qm7_data() # Matrices calculation. - procs = [] - pipes = [] - - # cm_recv, cm_send = Pipe(False) - # p1 = Process(target=c_matrix_multiple, - # args=(molecules, nuclear_charge, cm_send)) - # procs.append(p1) - # pipes.append(cm_recv) - # p1.start() - - ljm_recv, ljm_send = Pipe(False) - p2 = Process(target=lj_matrix_multiple, - args=(molecules, nuclear_charge, ljm_send, 1, 0.25)) - procs.append(p2) - pipes.append(ljm_recv) - p2.start() - - # cm_data = pipes[0].recv() - ljm_data = pipes[0].recv() - - for proc in procs: - proc.join() + cm_data, ljm_data = parallel_create_matrices(molecules, nuclear_charge) # ML calculation. procs = [] @@ -234,6 +211,6 @@ def pl(): if __name__ == '__main__': - # ml() + ml() # pl() print('OK!') diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py index 2a56a3cdf..0c16b5686 100644 --- a/lj_matrix/lj_matrix.py +++ b/lj_matrix/lj_matrix.py @@ -38,6 +38,8 @@ def lj_matrix(mol_data, Creates the Lennard-Jones Matrix from the molecule data given. mol_data: molecule data, matrix of atom coordinates. nc_data: nuclear charge data, array of atom data. + sigma: sigma value. + epsilon: epsilon value. max_len: maximum amount of atoms in molecule. as_eig: if data should be returned as matrix or array of eigenvalues. bohr_radius_units: if units should be in bohr's radius units. @@ -171,8 +173,8 @@ def lj_matrix(mol_data, def lj_matrix_multiple(mol_data, nc_data, pipe=None, - sigma=1, - epsilon=1, + sigma=1.0, + epsilon=1.0, max_len=25, as_eig=True, bohr_radius_units=False): diff --git a/lj_matrix/parallel_create_matrices.py b/lj_matrix/parallel_create_matrices.py new file mode 100644 index 000000000..0ab691525 --- /dev/null +++ b/lj_matrix/parallel_create_matrices.py @@ -0,0 +1,70 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +from multiprocessing import Process, Pipe +from lj_matrix.c_matrix import c_matrix_multiple +from lj_matrix.lj_matrix import lj_matrix_multiple + + +def parallel_create_matrices(mol_data, + nc_data, + sigma=1.0, + epsilon=1.0, + max_len=25, + as_eig=True, + bohr_radius_units=False): + """ + Creates the Coulomb and L-J matrices in parallel. + mol_data: molecule data, matrix of atom coordinates. + nc_data: nuclear charge data, array of atom data. + sigma: sigma value for L-J matrix. + epsilon: epsilon value for L-J matrix. + max_len: maximum amount of atoms in molecule. + as_eig: if data should be returned as matrix or array of eigenvalues. + bohr_radius_units: if units should be in bohr's radius units. + """ + + # Matrices calculation. + procs = [] + pipes = [] + + cm_recv, cm_send = Pipe(False) + p1 = Process(target=c_matrix_multiple, + args=(mol_data, nc_data, cm_send)) + procs.append(p1) + pipes.append(cm_recv) + p1.start() + + ljm_recv, ljm_send = Pipe(False) + p2 = Process(target=lj_matrix_multiple, + args=(mol_data, nc_data, ljm_send, sigma, epsilon)) + procs.append(p2) + pipes.append(ljm_recv) + p2.start() + + cm_data = pipes[0].recv() + ljm_data = pipes[1].recv() + + for proc in procs: + proc.join() + + return cm_data, ljm_data diff --git a/lj_matrix/read_qm7_data.py b/lj_matrix/read_qm7_data.py index b54691fb0..9bb7629ca 100644 --- a/lj_matrix/read_qm7_data.py +++ b/lj_matrix/read_qm7_data.py @@ -51,7 +51,7 @@ def read_nc_data(data_path): # 'hof_qm7.txt.txt' retrieved from # https://github.com/qmlcode/tutorial -def reas_db_data(zi_data, +def read_db_data(zi_data, data_path, r_seed=111): """ @@ -135,10 +135,10 @@ def read_qm7_data(): zi_data = read_nc_data(data_path) molecules, nuclear_charge, energy_pbe0, energy_delta = \ - reas_db_data(zi_data, data_path) + read_db_data(zi_data, data_path) os.chdir(init_path) toc = time.perf_counter() printc('\tData reading took {:.4f} seconds.'.format(toc-tic), 'GREEN') - return zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta + return molecules, nuclear_charge, energy_pbe0, energy_delta -- cgit v1.2.3-54-g00ecf From f8bd690096e432b313ee17baa93c7422b45ee9b8 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Mon, 23 Dec 2019 12:29:35 -0700 Subject: Fix init --- lj_matrix/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py index d7794d3be..0c2407a57 100644 --- a/lj_matrix/__init__.py +++ b/lj_matrix/__init__.py @@ -28,6 +28,7 @@ from lj_matrix.frob_norm import frob_norm from lj_matrix.gauss_kernel import gauss_kernel from lj_matrix.cholesky_solve import cholesky_solve from lj_matrix.do_ml import do_ml +from lj_matrix.parallel_create_matrices import parallel_create_matrices # If somebody does "from package import *", this is what they will @@ -43,4 +44,5 @@ __all__ = ['printc', 'frob_norm', 'gauss_kernel', 'cholesky_solve', - 'do_ml'] + 'do_ml', + 'parallel_create_matrices'] -- cgit v1.2.3-54-g00ecf From f5d72558ed6ec63c7de4940c29d4f6c92605a30d Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Mon, 23 Dec 2019 12:39:28 -0700 Subject: Fix init --- lj_matrix/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py index 0c2407a57..d59e3481c 100644 --- a/lj_matrix/__init__.py +++ b/lj_matrix/__init__.py @@ -20,7 +20,6 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -from lj_matrix.misc import printc from lj_matrix.read_qm7_data import read_nc_data, read_db_data, read_qm7_data from lj_matrix.c_matrix import c_matrix, c_matrix_multiple from lj_matrix.lj_matrix import lj_matrix, lj_matrix_multiple @@ -33,8 +32,7 @@ from lj_matrix.parallel_create_matrices import parallel_create_matrices # If somebody does "from package import *", this is what they will # be able to access: -__all__ = ['printc', - 'read_nc_data', +__all__ = ['read_nc_data', 'read_db_data', 'read_qm7_data', 'c_matrix', -- cgit v1.2.3-54-g00ecf From b14c581ca5fdab47d7e1c0b688331368cb7f29d0 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Mon, 23 Dec 2019 13:11:12 -0700 Subject: Refactor ml code --- lj_matrix/do_ml.py | 104 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 93 insertions(+), 11 deletions(-) diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py index bb954a0ae..ac044cfb3 100644 --- a/lj_matrix/do_ml.py +++ b/lj_matrix/do_ml.py @@ -22,19 +22,22 @@ SOFTWARE. """ import time import numpy as np +from multiprocessing import Process, Pipe from lj_matrix.misc import printc from lj_matrix.gauss_kernel import gauss_kernel from lj_matrix.cholesky_solve import cholesky_solve - - -def do_ml(desc_data, - energy_data, - training_size, - desc_type=None, - pipe=None, - test_size=None, - sigma=1000.0, - show_msgs=True): +from lj_matrix.read_qm7_data import read_qm7_data +from lj_matrix.parallel_create_matrices import parallel_create_matrices + + +def ml(desc_data, + energy_data, + training_size, + desc_type=None, + pipe=None, + test_size=None, + sigma=1000.0, + show_msgs=True): """ Does the ML methodology. desc_data: descriptor (or representation) data. @@ -51,6 +54,7 @@ def do_ml(desc_data, Also, training is done with the first part of the data and testing with the ending part of the data. """ + tic = time.perf_counter() # Initial calculations for later use. d_len = len(desc_data) e_len = len(energy_data) @@ -72,7 +76,6 @@ def do_ml(desc_data, if test_size > 1500: test_size = 1500 - tic = time.perf_counter() if show_msgs: printc('{} ML started.'.format(desc_type), 'GREEN') printc('\tTraining size: {}'.format(training_size), 'CYAN') @@ -106,3 +109,82 @@ def do_ml(desc_data, pipe.send([desc_type, training_size, test_size, sigma, mae, tictoc]) return mae, tictoc + + +# Test +def do_ml(min_training_size, + max_training_size=None, + training_increment_size=None, + ljm_sigma=1.0, + ljm_epsilon=1.0, + save_benchmarks=False): + """ + Main function that does the whole ML process. + min_training_size: minimum training size. + max_training_size: maximum training size. + training_increment_size: training increment size. + ljm_sigma: sigma value for lj matrix. + ljm_epsilon: epsilon value for lj matrix. + save_benchmarks: if benchmarks should be saved. + """ + # Initialization time. + init_time = time.perf_counter() + + # Data reading. + molecules, nuclear_charge, energy_pbe0, energy_delta = read_qm7_data() + + # Matrices calculation. + cm_data, ljm_data = parallel_create_matrices(molecules, + nuclear_charge, + ljm_sigma, + ljm_epsilon) + + # ML calculation. + procs = [] + cm_pipes = [] + ljm_pipes = [] + for i in range(min_training_size, + max_training_size + 1, + training_increment_size): + cm_recv, cm_send = Pipe(False) + p1 = Process(target=ml, + args=(cm_data, energy_pbe0, i, 'CM', cm_send)) + procs.append(p1) + cm_pipes.append(cm_recv) + p1.start() + + ljm_recv, ljm_send = Pipe(False) + p2 = Process(target=ml, + args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send)) + procs.append(p2) + ljm_pipes.append(ljm_recv) + p2.start() + + cm_bench_results = [] + ljm_bench_results = [] + for cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes): + cm_bench_results.append(cd_pipe.recv()) + ljm_bench_results.append(ljd_pipe.recv()) + + for proc in procs: + proc.join() + + if save_benchmarks: + with open('data\\benchmarks.csv', 'a') as save_file: + # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,', + # 'mae,time,lj_s,lj_e,date_ran\n'])) + ltime = time.localtime()[:3][::-1] + ljm_se = ',' + str(ljm_sigma) + ',' + str(ljm_epsilon) + ',' + date = '/'.join([str(field) for field in ltime]) + for cm, ljm, in zip(cm_bench_results, ljm_bench_results): + cm_text = ','.join([str(field) for field in cm])\ + + ',' + date + '\n' + ljm_text = ','.join([str(field) for field in ljm])\ + + ljm_se + date + '\n' + save_file.write(cm_text) + save_file.write(ljm_text) + + # End of program + end_time = time.perf_counter() + printc('Program took {:.4f} seconds.'.format(end_time - init_time), + 'CYAN') -- cgit v1.2.3-54-g00ecf From b4c2dc01ab17248814988c8e141bf16072c45abd Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sat, 28 Dec 2019 10:37:49 -0700 Subject: Add options to do_ml function --- lj_matrix/do_ml.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py index ac044cfb3..12323780a 100644 --- a/lj_matrix/do_ml.py +++ b/lj_matrix/do_ml.py @@ -111,13 +111,17 @@ def ml(desc_data, return mae, tictoc -# Test def do_ml(min_training_size, max_training_size=None, training_increment_size=None, ljm_sigma=1.0, ljm_epsilon=1.0, - save_benchmarks=False): + save_benchmarks=False, + max_len=25, + as_eig=True, + bohr_radius_units=False, + sigma=1000.0, + show_msgs=True): """ Main function that does the whole ML process. min_training_size: minimum training size. @@ -126,6 +130,11 @@ def do_ml(min_training_size, ljm_sigma: sigma value for lj matrix. ljm_epsilon: epsilon value for lj matrix. save_benchmarks: if benchmarks should be saved. + max_len: maximum amount of atoms in molecule. + as_eig: if data should be returned as matrix or array of eigenvalues. + bohr_radius_units: if units should be in bohr's radius units. + sigma: depth of the kernel. + show_msgs: Show debug messages or not. """ # Initialization time. init_time = time.perf_counter() @@ -137,7 +146,10 @@ def do_ml(min_training_size, cm_data, ljm_data = parallel_create_matrices(molecules, nuclear_charge, ljm_sigma, - ljm_epsilon) + ljm_epsilon, + max_len, + as_eig, + bohr_radius_units) # ML calculation. procs = [] @@ -148,14 +160,28 @@ def do_ml(min_training_size, training_increment_size): cm_recv, cm_send = Pipe(False) p1 = Process(target=ml, - args=(cm_data, energy_pbe0, i, 'CM', cm_send)) + args=(cm_data, + energy_pbe0, + i, + 'CM', + cm_send, + max_training_size, + sigma, + show_msgs)) procs.append(p1) cm_pipes.append(cm_recv) p1.start() ljm_recv, ljm_send = Pipe(False) p2 = Process(target=ml, - args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send)) + args=(ljm_data, + energy_pbe0, + i, + 'L-JM', + ljm_send, + max_training_size, + sigma, + show_msgs)) procs.append(p2) ljm_pipes.append(ljm_recv) p2.start() -- cgit v1.2.3-54-g00ecf From cdbb1ac890cb0d062cdb2f216c347f681fbfa7b8 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sat, 28 Dec 2019 10:47:20 -0700 Subject: Fix bug --- lj_matrix/__main__.py | 68 +-------------------------------------------------- lj_matrix/do_ml.py | 4 ++- 2 files changed, 4 insertions(+), 68 deletions(-) diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py index 8e52031f1..f7e4065da 100644 --- a/lj_matrix/__main__.py +++ b/lj_matrix/__main__.py @@ -20,76 +20,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import time -from multiprocessing import Process, Pipe -# import matplotlib.pyplot as plt import pandas as pd -from lj_matrix.misc import printc -from lj_matrix.read_qm7_data import read_qm7_data -from lj_matrix.parallel_create_matrices import parallel_create_matrices from lj_matrix.do_ml import do_ml -# Test -def ml(): - """ - Main function that does the whole ML process. - """ - # Initialization time. - init_time = time.perf_counter() - - # Data reading. - molecules, nuclear_charge, energy_pbe0, energy_delta = read_qm7_data() - - # Matrices calculation. - cm_data, ljm_data = parallel_create_matrices(molecules, nuclear_charge) - - # ML calculation. - procs = [] - # cm_pipes = [] - ljm_pipes = [] - for i in range(1500, 6500 + 1, 500): - # cm_recv, cm_send = Pipe(False) - # p1 = Process(target=do_ml, - # args=(cm_data, energy_pbe0, i, 'CM', cm_send)) - # procs.append(p1) - # cm_pipes.append(cm_recv) - # p1.start() - - ljm_recv, ljm_send = Pipe(False) - p2 = Process(target=do_ml, - args=(ljm_data, energy_pbe0, i, 'L-JM', ljm_send)) - procs.append(p2) - ljm_pipes.append(ljm_recv) - p2.start() - - # cm_bench_results = [] - ljm_bench_results = [] - for ljd_pipe in ljm_pipes: # cd_pipe, ljd_pipe in zip(cm_pipes, ljm_pipes): - # cm_bench_results.append(cd_pipe.recv()) - ljm_bench_results.append(ljd_pipe.recv()) - - for proc in procs: - proc.join() - - with open('data\\benchmarks.csv', 'a') as save_file: - # save_file.write(''.join(['ml_type,tr_size,te_size,kernel_s,', - # 'mae,time,lj_s,lj_e,date_ran\n'])) - date = '/'.join([str(field) for field in time.localtime()[:3][::-1]]) - for ljm in ljm_bench_results: # cm, ljm, in zip(cm_bench_results, ljm_bench_results): - # cm_text = ','.join([str(field) for field in cm])\ - # + ',' + date + '\n' - ljm_text = ','.join([str(field) for field in ljm])\ - + ',1,0.25,' + date + '\n' - # save_file.write(cm_text) - save_file.write(ljm_text) - - # End of program - end_time = time.perf_counter() - printc('Program took {:.4f} seconds.'.format(end_time - init_time), - 'CYAN') - - def pl(): """ Function for plotting the benchmarks. @@ -211,6 +145,6 @@ def pl(): if __name__ == '__main__': - ml() + do_ml(min_training_size=1500, max_training_size=3000) # pl() print('OK!') diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py index 12323780a..8724e6831 100644 --- a/lj_matrix/do_ml.py +++ b/lj_matrix/do_ml.py @@ -113,7 +113,7 @@ def ml(desc_data, def do_ml(min_training_size, max_training_size=None, - training_increment_size=None, + training_increment_size=500, ljm_sigma=1.0, ljm_epsilon=1.0, save_benchmarks=False, @@ -138,6 +138,8 @@ def do_ml(min_training_size, """ # Initialization time. init_time = time.perf_counter() + if not max_training_size: + max_training_size = min_training_size + training_increment_size # Data reading. molecules, nuclear_charge, energy_pbe0, energy_delta = read_qm7_data() -- cgit v1.2.3-54-g00ecf From f9cd430d8e66cdac5d78a643f87445e3dd6bdf8e Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sat, 28 Dec 2019 10:54:36 -0700 Subject: Refactor code --- lj_matrix/__init__.py | 4 +- lj_matrix/__main__.py | 125 +------------------------------------------------- lj_matrix/misc.py | 121 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 124 deletions(-) diff --git a/lj_matrix/__init__.py b/lj_matrix/__init__.py index d59e3481c..a430aac68 100644 --- a/lj_matrix/__init__.py +++ b/lj_matrix/__init__.py @@ -28,6 +28,7 @@ from lj_matrix.gauss_kernel import gauss_kernel from lj_matrix.cholesky_solve import cholesky_solve from lj_matrix.do_ml import do_ml from lj_matrix.parallel_create_matrices import parallel_create_matrices +from lj_matrix.misc import plot_benchmarks # If somebody does "from package import *", this is what they will @@ -43,4 +44,5 @@ __all__ = ['read_nc_data', 'gauss_kernel', 'cholesky_solve', 'do_ml', - 'parallel_create_matrices'] + 'parallel_create_matrices', + 'plot_benchmarks'] diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py index f7e4065da..98f341e1e 100644 --- a/lj_matrix/__main__.py +++ b/lj_matrix/__main__.py @@ -20,131 +20,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import pandas as pd from lj_matrix.do_ml import do_ml - - -def pl(): - """ - Function for plotting the benchmarks. - """ - # Original columns. - or_cols = ['ml_type', - 'tr_size', - 'te_size', - 'kernel_s', - 'mae', - 'time', - 'lj_s', - 'lj_e', - 'date_ran'] - # Drop some original columns. - dor_cols = ['te_size', - 'kernel_s', - 'time', - 'date_ran'] - - # Read benchmarks data and drop some columns. - data_temp = pd.read_csv('data\\benchmarks.csv',) - data = pd.DataFrame(data_temp, columns=or_cols) - data = data.drop(columns=dor_cols) - - # Get the data of the first benchmarks and drop unnecesary columns. - first_data = pd.DataFrame(data, index=range(0, 22)) - first_data = first_data.drop(columns=['lj_s', 'lj_e']) - - # Columns to keep temporarily. - fd_columns = ['ml_type', - 'tr_size', - 'mae'] - - # Create new dataframes for each matrix descriptor and fill them. - first_data_cm = pd.DataFrame(columns=fd_columns) - first_data_ljm = pd.DataFrame(columns=fd_columns) - for i in range(first_data.shape[0]): - temp_df = first_data.iloc[[i]] - if first_data.at[i, 'ml_type'] == 'CM': - first_data_cm = first_data_cm.append(temp_df) - else: - first_data_ljm = first_data_ljm.append(temp_df) - - # Drop unnecesary column and rename 'mae' for later use. - first_data_cm = first_data_cm.drop(columns=['ml_type'])\ - .rename(columns={'mae': 'cm_mae'}) - first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\ - .rename(columns={'mae': 'ljm_mae'}) - # print(first_data_cm) - # print(first_data_ljm) - - # Get the cm data axis so it can be joined with the ljm data axis. - cm_axis = first_data_cm.plot(x='tr_size', - y='cm_mae', - kind='line') - # Get the ljm data axis and join it with the cm one. - plot_axis = first_data_ljm.plot(ax=cm_axis, - x='tr_size', - y='ljm_mae', - kind='line') - plot_axis.set_xlabel('tr_size') - plot_axis.set_ylabel('mae') - plot_axis.set_title('mae for different tr_sizes') - # Get the figure and save it. - # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf') - - # Get the rest of the benchmark data and drop unnecesary column. - new_data = data.drop(index=range(0, 22)) - new_data = new_data.drop(columns=['ml_type']) - - # Get the first set and rename it. - nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'}) - ndf_axis = nd_first.plot(x='tr_size', - y='1, 1', - kind='line') - last_axis = ndf_axis - for i in range(22, 99, 11): - lj_s = new_data['lj_s'][i] - lj_e = new_data['lj_e'][i] - new_mae = '{}, {}'.format(lj_s, lj_e) - nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ - .drop(columns=['lj_s', 'lj_e'])\ - .rename(columns={'mae': new_mae}) - last_axis = nd_temp.plot(ax=last_axis, - x='tr_size', - y=new_mae, - kind='line') - print(nd_temp) - - last_axis.set_xlabel('tr_size') - last_axis.set_ylabel('mae') - last_axis.set_title('mae for different parameters of lj(s)') - - last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf') - - ndf_axis = nd_first.plot(x='tr_size', - y='1, 1', - kind='line') - last_axis = ndf_axis - for i in range(99, data.shape[0], 11): - lj_s = new_data['lj_s'][i] - lj_e = new_data['lj_e'][i] - new_mae = '{}, {}'.format(lj_s, lj_e) - nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ - .drop(columns=['lj_s', 'lj_e'])\ - .rename(columns={'mae': new_mae}) - last_axis = nd_temp.plot(ax=last_axis, - x='tr_size', - y=new_mae, - kind='line') - print(nd_temp) - - last_axis.set_xlabel('tr_size') - last_axis.set_ylabel('mae') - last_axis.set_title('mae for different parameters of lj(e)') - - last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf') - +# from lj_matrix.misc import plot_benchmarks if __name__ == '__main__': do_ml(min_training_size=1500, max_training_size=3000) - # pl() + # plot_benchmarks() print('OK!') diff --git a/lj_matrix/misc.py b/lj_matrix/misc.py index c50653a5c..e9142b05f 100644 --- a/lj_matrix/misc.py +++ b/lj_matrix/misc.py @@ -21,6 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ from colorama import init, Fore, Style +import pandas as pd init() @@ -51,3 +52,123 @@ def printc(text, color): actual_color = color_dic[color] print(actual_color + text + Style.RESET_ALL) + + +def plot_benchmarks(): + """ + For plotting the benchmarks. + """ + # Original columns. + or_cols = ['ml_type', + 'tr_size', + 'te_size', + 'kernel_s', + 'mae', + 'time', + 'lj_s', + 'lj_e', + 'date_ran'] + # Drop some original columns. + dor_cols = ['te_size', + 'kernel_s', + 'time', + 'date_ran'] + + # Read benchmarks data and drop some columns. + data_temp = pd.read_csv('data\\benchmarks.csv',) + data = pd.DataFrame(data_temp, columns=or_cols) + data = data.drop(columns=dor_cols) + + # Get the data of the first benchmarks and drop unnecesary columns. + first_data = pd.DataFrame(data, index=range(0, 22)) + first_data = first_data.drop(columns=['lj_s', 'lj_e']) + + # Columns to keep temporarily. + fd_columns = ['ml_type', + 'tr_size', + 'mae'] + + # Create new dataframes for each matrix descriptor and fill them. + first_data_cm = pd.DataFrame(columns=fd_columns) + first_data_ljm = pd.DataFrame(columns=fd_columns) + for i in range(first_data.shape[0]): + temp_df = first_data.iloc[[i]] + if first_data.at[i, 'ml_type'] == 'CM': + first_data_cm = first_data_cm.append(temp_df) + else: + first_data_ljm = first_data_ljm.append(temp_df) + + # Drop unnecesary column and rename 'mae' for later use. + first_data_cm = first_data_cm.drop(columns=['ml_type'])\ + .rename(columns={'mae': 'cm_mae'}) + first_data_ljm = first_data_ljm.drop(columns=['ml_type'])\ + .rename(columns={'mae': 'ljm_mae'}) + # print(first_data_cm) + # print(first_data_ljm) + + # Get the cm data axis so it can be joined with the ljm data axis. + cm_axis = first_data_cm.plot(x='tr_size', + y='cm_mae', + kind='line') + # Get the ljm data axis and join it with the cm one. + plot_axis = first_data_ljm.plot(ax=cm_axis, + x='tr_size', + y='ljm_mae', + kind='line') + plot_axis.set_xlabel('tr_size') + plot_axis.set_ylabel('mae') + plot_axis.set_title('mae for different tr_sizes') + # Get the figure and save it. + # plot_axis.get_figure().savefig('.figs\\mae_diff_tr_sizes.pdf') + + # Get the rest of the benchmark data and drop unnecesary column. + new_data = data.drop(index=range(0, 22)) + new_data = new_data.drop(columns=['ml_type']) + + # Get the first set and rename it. + nd_first = first_data_ljm.rename(columns={'ljm_mae': '1, 1'}) + ndf_axis = nd_first.plot(x='tr_size', + y='1, 1', + kind='line') + last_axis = ndf_axis + for i in range(22, 99, 11): + lj_s = new_data['lj_s'][i] + lj_e = new_data['lj_e'][i] + new_mae = '{}, {}'.format(lj_s, lj_e) + nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ + .drop(columns=['lj_s', 'lj_e'])\ + .rename(columns={'mae': new_mae}) + last_axis = nd_temp.plot(ax=last_axis, + x='tr_size', + y=new_mae, + kind='line') + print(nd_temp) + + last_axis.set_xlabel('tr_size') + last_axis.set_ylabel('mae') + last_axis.set_title('mae for different parameters of lj(s)') + + last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_s.pdf') + + ndf_axis = nd_first.plot(x='tr_size', + y='1, 1', + kind='line') + last_axis = ndf_axis + for i in range(99, data.shape[0], 11): + lj_s = new_data['lj_s'][i] + lj_e = new_data['lj_e'][i] + new_mae = '{}, {}'.format(lj_s, lj_e) + nd_temp = pd.DataFrame(new_data, index=range(i, i + 11))\ + .drop(columns=['lj_s', 'lj_e'])\ + .rename(columns={'mae': new_mae}) + last_axis = nd_temp.plot(ax=last_axis, + x='tr_size', + y=new_mae, + kind='line') + print(nd_temp) + + last_axis.set_xlabel('tr_size') + last_axis.set_ylabel('mae') + last_axis.set_title('mae for different parameters of lj(e)') + + last_axis.get_figure().savefig('.figs\\mae_diff_param_lj_e.pdf') -- cgit v1.2.3-54-g00ecf From c1e7b327655ebaa5c44e4bef5b9b675b23782952 Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sat, 28 Dec 2019 11:05:39 -0700 Subject: Refactor code and fix bug --- lj_matrix/do_ml.py | 3 +++ lj_matrix/lj_matrix.py | 17 +++++++++++++++-- lj_matrix/parallel_create_matrices.py | 27 +++++++++++++++++++++------ 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py index 8724e6831..45dc7a5f0 100644 --- a/lj_matrix/do_ml.py +++ b/lj_matrix/do_ml.py @@ -114,6 +114,7 @@ def ml(desc_data, def do_ml(min_training_size, max_training_size=None, training_increment_size=500, + ljm_diag_value=None, ljm_sigma=1.0, ljm_epsilon=1.0, save_benchmarks=False, @@ -127,6 +128,7 @@ def do_ml(min_training_size, min_training_size: minimum training size. max_training_size: maximum training size. training_increment_size: training increment size. + ljm_diag_value: if a special diagonal value should be used in lj matrix. ljm_sigma: sigma value for lj matrix. ljm_epsilon: epsilon value for lj matrix. save_benchmarks: if benchmarks should be saved. @@ -147,6 +149,7 @@ def do_ml(min_training_size, # Matrices calculation. cm_data, ljm_data = parallel_create_matrices(molecules, nuclear_charge, + ljm_diag_value, ljm_sigma, ljm_epsilon, max_len, diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py index 0c16b5686..c3b61becb 100644 --- a/lj_matrix/lj_matrix.py +++ b/lj_matrix/lj_matrix.py @@ -29,6 +29,7 @@ from lj_matrix.misc import printc def lj_matrix(mol_data, nc_data, + diag_value=None, sigma=1.0, epsilon=1.0, max_len=25, @@ -38,6 +39,7 @@ def lj_matrix(mol_data, Creates the Lennard-Jones Matrix from the molecule data given. mol_data: molecule data, matrix of atom coordinates. nc_data: nuclear charge data, array of atom data. + diag_value: if special diagonal value is to be used. sigma: sigma value. epsilon: epsilon value. max_len: maximum amount of atoms in molecule. @@ -86,7 +88,10 @@ def lj_matrix(mol_data, z = (z_i-z_j)**2 if i == j: - lj[i, j] = (0.5*Z_i**2.4) + if not diag_value: + lj[i, j] = (0.5*Z_i**2.4) + else: + lj[i, j] = diag_value else: # Calculations are done after i==j is checked # so no division by zero is done. @@ -144,7 +149,10 @@ def lj_matrix(mol_data, z = (z_i-z_j)**2 if i == j: - lj_row.append(0.5*Z_i**2.4) + if not diag_value: + lj_row.append(0.5*Z_i**2.4) + else: + lj_row.append(diag_value) else: # Calculations are done after i==j is checked # so no division by zero is done. @@ -173,6 +181,7 @@ def lj_matrix(mol_data, def lj_matrix_multiple(mol_data, nc_data, pipe=None, + diag_value=None, sigma=1.0, epsilon=1.0, max_len=25, @@ -184,6 +193,9 @@ def lj_matrix_multiple(mol_data, nc_data: nuclear charge data, array of atom data. pipe: for multiprocessing purposes. Sends the data calculated through a pipe. + diag_value: if special diagonal value is to be used. + sigma: sigma value. + epsilon: epsilon value. max_len: maximum amount of atoms in molecule. as_eig: if data should be returned as matrix or array of eigenvalues. bohr_radius_units: if units should be in bohr's radius units. @@ -193,6 +205,7 @@ def lj_matrix_multiple(mol_data, ljm_data = np.array([lj_matrix(mol, nc, + diag_value, sigma, epsilon, max_len, diff --git a/lj_matrix/parallel_create_matrices.py b/lj_matrix/parallel_create_matrices.py index 0ab691525..cd5ef5c8e 100644 --- a/lj_matrix/parallel_create_matrices.py +++ b/lj_matrix/parallel_create_matrices.py @@ -27,8 +27,9 @@ from lj_matrix.lj_matrix import lj_matrix_multiple def parallel_create_matrices(mol_data, nc_data, - sigma=1.0, - epsilon=1.0, + ljm_diag_value=None, + ljm_sigma=1.0, + ljm_epsilon=1.0, max_len=25, as_eig=True, bohr_radius_units=False): @@ -36,8 +37,9 @@ def parallel_create_matrices(mol_data, Creates the Coulomb and L-J matrices in parallel. mol_data: molecule data, matrix of atom coordinates. nc_data: nuclear charge data, array of atom data. - sigma: sigma value for L-J matrix. - epsilon: epsilon value for L-J matrix. + ljm_diag_value: if special diagonal value is to be used for lj matrix. + ljm_sigma: sigma value for lj matrix. + ljm_epsilon: psilon value for lj matrix. max_len: maximum amount of atoms in molecule. as_eig: if data should be returned as matrix or array of eigenvalues. bohr_radius_units: if units should be in bohr's radius units. @@ -49,14 +51,27 @@ def parallel_create_matrices(mol_data, cm_recv, cm_send = Pipe(False) p1 = Process(target=c_matrix_multiple, - args=(mol_data, nc_data, cm_send)) + args=(mol_data, + nc_data, + cm_send, + max_len, + as_eig, + bohr_radius_units)) procs.append(p1) pipes.append(cm_recv) p1.start() ljm_recv, ljm_send = Pipe(False) p2 = Process(target=lj_matrix_multiple, - args=(mol_data, nc_data, ljm_send, sigma, epsilon)) + args=(mol_data, + nc_data, + ljm_send, + ljm_diag_value, + ljm_sigma, + ljm_epsilon, + max_len, + as_eig, + bohr_radius_units)) procs.append(p2) pipes.append(ljm_recv) p2.start() -- cgit v1.2.3-54-g00ecf From e4f9e15588ec796f73c000a683cc9152454a913c Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sat, 28 Dec 2019 11:12:36 -0700 Subject: Fix bugs --- lj_matrix/__main__.py | 10 +++++++++- lj_matrix/do_ml.py | 7 +++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py index 98f341e1e..811024ff0 100644 --- a/lj_matrix/__main__.py +++ b/lj_matrix/__main__.py @@ -24,6 +24,14 @@ from lj_matrix.do_ml import do_ml # from lj_matrix.misc import plot_benchmarks if __name__ == '__main__': - do_ml(min_training_size=1500, max_training_size=3000) + do_ml(min_training_size=1500, + max_training_size=2000, + training_increment_size=500, + test_size=None, + ljm_diag_value=None, + ljm_sigma=1.0, + ljm_epsilon=1.0, + save_benchmarks=False, + show_msgs=True) # plot_benchmarks() print('OK!') diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py index 45dc7a5f0..da9386bf7 100644 --- a/lj_matrix/do_ml.py +++ b/lj_matrix/do_ml.py @@ -114,6 +114,7 @@ def ml(desc_data, def do_ml(min_training_size, max_training_size=None, training_increment_size=500, + test_size=None, ljm_diag_value=None, ljm_sigma=1.0, ljm_epsilon=1.0, @@ -128,6 +129,8 @@ def do_ml(min_training_size, min_training_size: minimum training size. max_training_size: maximum training size. training_increment_size: training increment size. + test_size: size of the test set to use. If no size is given, + the last remaining molecules are used. ljm_diag_value: if a special diagonal value should be used in lj matrix. ljm_sigma: sigma value for lj matrix. ljm_epsilon: epsilon value for lj matrix. @@ -170,7 +173,7 @@ def do_ml(min_training_size, i, 'CM', cm_send, - max_training_size, + test_size, sigma, show_msgs)) procs.append(p1) @@ -184,7 +187,7 @@ def do_ml(min_training_size, i, 'L-JM', ljm_send, - max_training_size, + test_size, sigma, show_msgs)) procs.append(p2) -- cgit v1.2.3-54-g00ecf From 4704314c9b4d1066383da5c3d6ca87bba9067c8d Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Sat, 28 Dec 2019 11:37:22 -0700 Subject: Refactor code --- lj_matrix/__main__.py | 1 + lj_matrix/do_ml.py | 5 ++++- lj_matrix/lj_matrix.py | 2 +- lj_matrix/read_qm7_data.py | 7 ++++--- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/lj_matrix/__main__.py b/lj_matrix/__main__.py index 811024ff0..688e5adcc 100644 --- a/lj_matrix/__main__.py +++ b/lj_matrix/__main__.py @@ -31,6 +31,7 @@ if __name__ == '__main__': ljm_diag_value=None, ljm_sigma=1.0, ljm_epsilon=1.0, + r_seed=111, save_benchmarks=False, show_msgs=True) # plot_benchmarks() diff --git a/lj_matrix/do_ml.py b/lj_matrix/do_ml.py index da9386bf7..25a55e823 100644 --- a/lj_matrix/do_ml.py +++ b/lj_matrix/do_ml.py @@ -118,6 +118,7 @@ def do_ml(min_training_size, ljm_diag_value=None, ljm_sigma=1.0, ljm_epsilon=1.0, + r_seed=111, save_benchmarks=False, max_len=25, as_eig=True, @@ -134,6 +135,7 @@ def do_ml(min_training_size, ljm_diag_value: if a special diagonal value should be used in lj matrix. ljm_sigma: sigma value for lj matrix. ljm_epsilon: epsilon value for lj matrix. + r_seed: random seed to use for the shuffling. save_benchmarks: if benchmarks should be saved. max_len: maximum amount of atoms in molecule. as_eig: if data should be returned as matrix or array of eigenvalues. @@ -147,7 +149,8 @@ def do_ml(min_training_size, max_training_size = min_training_size + training_increment_size # Data reading. - molecules, nuclear_charge, energy_pbe0, energy_delta = read_qm7_data() + molecules, nuclear_charge, energy_pbe0, energy_delta =\ + read_qm7_data(r_seed) # Matrices calculation. cm_data, ljm_data = parallel_create_matrices(molecules, diff --git a/lj_matrix/lj_matrix.py b/lj_matrix/lj_matrix.py index c3b61becb..6739ae283 100644 --- a/lj_matrix/lj_matrix.py +++ b/lj_matrix/lj_matrix.py @@ -88,7 +88,7 @@ def lj_matrix(mol_data, z = (z_i-z_j)**2 if i == j: - if not diag_value: + if diag_value is None: lj[i, j] = (0.5*Z_i**2.4) else: lj[i, j] = diag_value diff --git a/lj_matrix/read_qm7_data.py b/lj_matrix/read_qm7_data.py index 9bb7629ca..4401ca1c0 100644 --- a/lj_matrix/read_qm7_data.py +++ b/lj_matrix/read_qm7_data.py @@ -59,7 +59,7 @@ def read_db_data(zi_data, its contents as usable variables. zi_data: dictionary containing nuclear charge data. data_path: path to the data directory. - r_seed: random seed. + r_seed: random seed to use for the shuffling. """ os.chdir(data_path) @@ -122,9 +122,10 @@ def read_db_data(zi_data, return molecules, nuclear_charge, energy_pbe0, energy_delta -def read_qm7_data(): +def read_qm7_data(r_seed=111): """ Reads all the qm7 data. + r_seed: random seed to use for the shuffling. """ tic = time.perf_counter() printc('Data reading started.', 'CYAN') @@ -135,7 +136,7 @@ def read_qm7_data(): zi_data = read_nc_data(data_path) molecules, nuclear_charge, energy_pbe0, energy_delta = \ - read_db_data(zi_data, data_path) + read_db_data(zi_data, data_path, r_seed) os.chdir(init_path) toc = time.perf_counter() -- cgit v1.2.3-54-g00ecf