summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano <55825613+luevano@users.noreply.github.com>2019-12-12 19:38:23 -0700
committerDavid Luevano <55825613+luevano@users.noreply.github.com>2019-12-12 19:38:23 -0700
commitc2b15a51e80a051fe9dbc2e558937f54bf4a459c (patch)
tree878f28debb120fa7c666e9758fe1b04e58c6c619
parent5ac02bfda4b3ff0700344a1b8757ec3d586ea7a5 (diff)
Reformat data reading
-rw-r--r--main.py20
-rw-r--r--read_nc_data.py44
-rw-r--r--read_qm7_data.py (renamed from read_db_edata.py)52
3 files changed, 52 insertions, 64 deletions
diff --git a/main.py b/main.py
index 88734d57f..f37054b3b 100644
--- a/main.py
+++ b/main.py
@@ -20,34 +20,20 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
-import os
import time
from misc import printc
# import matplotlib.pyplot as plt
-from read_nc_data import read_nc_data
-from read_db_edata import read_db_edata
from c_matrix import c_matrix_multiple
from lj_matrix import lj_matrix_multiple
from do_ml import do_ml
+from read_qm7_data import read_qm7_data
# Initialization time.
init_time = time.perf_counter()
# Data reading.
-tic = time.perf_counter()
-printc('Data reading started.', 'CYAN')
-
-init_path = os.getcwd()
-os.chdir('data')
-data_path = os.getcwd()
-
-zi_data = read_nc_data(data_path)
-molecules, nuclear_charge, energy_pbe0, energy_delta = \
- read_db_edata(zi_data, data_path)
-
-os.chdir(init_path)
-toc = time.perf_counter()
-printc('\tData reading took {:.4f} seconds.'.format(toc-tic), 'GREEN')
+zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta =\
+ read_qm7_data()
# Matrices calculation.
cm_data = c_matrix_multiple(molecules, nuclear_charge, as_eig=True)
diff --git a/read_nc_data.py b/read_nc_data.py
deleted file mode 100644
index d7891f8f6..000000000
--- a/read_nc_data.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-# 'periodic_table_of_elements.txt' retrieved from
-# https://gist.github.com/GoodmanSciences/c2dd862cd38f21b0ad36b8f96b4bf1ee
-
-
-def read_nc_data(data_path):
- """
- Reads nuclear charge data from file and returns a dictionary.
- data_path: path to the data directory.
- """
- fname = 'periodic_table_of_elements.txt'
- with open(''.join([data_path, '\\', fname]), 'r') as infile:
- temp_lines = infile.readlines()
-
- del temp_lines[0]
-
- lines = []
- for temp_line in temp_lines:
- new_line = temp_line.split(sep=',')
- lines.append(new_line)
-
- # Dictionary of nuclear charge.
- return {line[2]: int(line[0]) for line in lines}
diff --git a/read_db_edata.py b/read_qm7_data.py
index 893edf26e..0c0cc88aa 100644
--- a/read_db_edata.py
+++ b/read_qm7_data.py
@@ -21,15 +21,39 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
import os
+import time
import numpy as np
import random
+from misc import printc
+# 'periodic_table_of_elements.txt' retrieved from
+# https://gist.github.com/GoodmanSciences/c2dd862cd38f21b0ad36b8f96b4bf1ee
+
+
+def read_nc_data(data_path):
+ """
+ Reads nuclear charge data from file and returns a dictionary.
+ data_path: path to the data directory.
+ """
+ fname = 'periodic_table_of_elements.txt'
+ with open(''.join([data_path, '\\', fname]), 'r') as infile:
+ temp_lines = infile.readlines()
+
+ del temp_lines[0]
+
+ lines = []
+ for temp_line in temp_lines:
+ new_line = temp_line.split(sep=',')
+ lines.append(new_line)
+
+ # Dictionary of nuclear charge.
+ return {line[2]: int(line[0]) for line in lines}
# 'hof_qm7.txt.txt' retrieved from
# https://github.com/qmlcode/tutorial
-def read_db_edata(zi_data,
- data_path,
- r_seed=111):
+def reas_db_data(zi_data,
+ data_path,
+ r_seed=111):
"""
Reads molecule database and extracts
its contents as usable variables.
@@ -96,3 +120,25 @@ def read_db_edata(zi_data,
for k in energy_temp_shuffled.keys()])
return molecules, nuclear_charge, energy_pbe0, energy_delta
+
+
+def read_qm7_data():
+ """
+ Reads all the qm7 data.
+ """
+ tic = time.perf_counter()
+ printc('Data reading started.', 'CYAN')
+
+ init_path = os.getcwd()
+ os.chdir('data')
+ data_path = os.getcwd()
+
+ zi_data = read_nc_data(data_path)
+ molecules, nuclear_charge, energy_pbe0, energy_delta = \
+ reas_db_data(zi_data, data_path)
+
+ os.chdir(init_path)
+ toc = time.perf_counter()
+ printc('\tData reading took {:.4f} seconds.'.format(toc-tic), 'GREEN')
+
+ return zi_data, molecules, nuclear_charge, energy_pbe0, energy_delta