From 3c8a53f42a4a90d3cc235fc4e4132556b9cd74fa Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com> Date: Tue, 25 Feb 2020 21:06:10 -0700 Subject: Rename function --- ml_exp/qm7db.py | 106 ++++++++++++++++++++++++++++++++++++++++++++++++ ml_exp/read_qm7_data.py | 106 ------------------------------------------------ 2 files changed, 106 insertions(+), 106 deletions(-) create mode 100644 ml_exp/qm7db.py delete mode 100644 ml_exp/read_qm7_data.py diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py new file mode 100644 index 000000000..e584bc123 --- /dev/null +++ b/ml_exp/qm7db.py @@ -0,0 +1,106 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import os +import numpy as np +import random + + +# 'hof_qm7.txt.txt' retrieved from +# https://github.com/qmlcode/tutorial +def qm7db(zi_data, + data_path, + r_seed=111, + return_atoms=False): + """ + Reads molecule database and extracts + its contents as usable variables. + zi_data: dictionary containing nuclear charge data. + data_path: path to the data directory. + r_seed: random seed to use for the shuffling. + return_atoms: if atom list should be returned. + """ + os.chdir(data_path) + + fname = 'hof_qm7.txt' + with open(fname, 'r') as infile: + lines = infile.readlines() + + # Temporary energy dictionary. + energy_temp = dict() + + for line in lines: + xyz_data = line.split() + + xyz_name = xyz_data[0] + hof = float(xyz_data[1]) + dftb = float(xyz_data[2]) + # print(xyz_name, hof, dftb) + + energy_temp[xyz_name] = np.array([hof, hof - dftb]) + + # Use a random seed. + random.seed(r_seed) + + et_keys = list(energy_temp.keys()) + random.shuffle(et_keys) + + # Temporary energy dictionary, shuffled. + energy_temp_shuffled = dict() + for key in et_keys: + energy_temp_shuffled.update({key: energy_temp[key]}) + + mol_data = [] + mol_nc_data = [] + atoms = [] + # Actual reading of the xyz files. + for i, k in enumerate(energy_temp_shuffled.keys()): + with open(k, 'r') as xyz_file: + lines = xyz_file.readlines() + + len_lines = len(lines) + mol_temp_data = [] + mol_nc_temp_data = np.array(np.zeros(len_lines-2)) + atoms_temp = [] + for j, line in enumerate(lines[2:len_lines]): + line_list = line.split() + + atoms_temp.append(line_list[0]) + mol_nc_temp_data[j] = float(zi_data[line_list[0]]) + line_data = np.array(np.asarray(line_list[1:4], dtype=float)) + mol_temp_data.append(line_data) + + mol_data.append(mol_temp_data) + mol_nc_data.append(mol_nc_temp_data) + atoms.append(atoms_temp) + + # Convert everything to a numpy array. + molecules = np.array([np.array(mol) for mol in mol_data]) + nuclear_charge = np.array([nc_d for nc_d in mol_nc_data]) + energy_pbe0 = np.array([energy_temp_shuffled[k][0] + for k in energy_temp_shuffled.keys()]) + energy_delta = np.array([energy_temp_shuffled[k][1] + for k in energy_temp_shuffled.keys()]) + + if return_atoms: + return molecules, nuclear_charge, energy_pbe0, energy_delta, atoms + return molecules, nuclear_charge, energy_pbe0, energy_delta diff --git a/ml_exp/read_qm7_data.py b/ml_exp/read_qm7_data.py deleted file mode 100644 index 3017ac429..000000000 --- a/ml_exp/read_qm7_data.py +++ /dev/null @@ -1,106 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -import os -import numpy as np -import random - - -# 'hof_qm7.txt.txt' retrieved from -# https://github.com/qmlcode/tutorial -def read_db_data(zi_data, - data_path, - r_seed=111, - return_atoms=False): - """ - Reads molecule database and extracts - its contents as usable variables. - zi_data: dictionary containing nuclear charge data. - data_path: path to the data directory. - r_seed: random seed to use for the shuffling. - return_atoms: if atom list should be returned. - """ - os.chdir(data_path) - - fname = 'hof_qm7.txt' - with open(fname, 'r') as infile: - lines = infile.readlines() - - # Temporary energy dictionary. - energy_temp = dict() - - for line in lines: - xyz_data = line.split() - - xyz_name = xyz_data[0] - hof = float(xyz_data[1]) - dftb = float(xyz_data[2]) - # print(xyz_name, hof, dftb) - - energy_temp[xyz_name] = np.array([hof, hof - dftb]) - - # Use a random seed. - random.seed(r_seed) - - et_keys = list(energy_temp.keys()) - random.shuffle(et_keys) - - # Temporary energy dictionary, shuffled. - energy_temp_shuffled = dict() - for key in et_keys: - energy_temp_shuffled.update({key: energy_temp[key]}) - - mol_data = [] - mol_nc_data = [] - atoms = [] - # Actual reading of the xyz files. - for i, k in enumerate(energy_temp_shuffled.keys()): - with open(k, 'r') as xyz_file: - lines = xyz_file.readlines() - - len_lines = len(lines) - mol_temp_data = [] - mol_nc_temp_data = np.array(np.zeros(len_lines-2)) - atoms_temp = [] - for j, line in enumerate(lines[2:len_lines]): - line_list = line.split() - - atoms_temp.append(line_list[0]) - mol_nc_temp_data[j] = float(zi_data[line_list[0]]) - line_data = np.array(np.asarray(line_list[1:4], dtype=float)) - mol_temp_data.append(line_data) - - mol_data.append(mol_temp_data) - mol_nc_data.append(mol_nc_temp_data) - atoms.append(atoms_temp) - - # Convert everything to a numpy array. - molecules = np.array([np.array(mol) for mol in mol_data]) - nuclear_charge = np.array([nc_d for nc_d in mol_nc_data]) - energy_pbe0 = np.array([energy_temp_shuffled[k][0] - for k in energy_temp_shuffled.keys()]) - energy_delta = np.array([energy_temp_shuffled[k][1] - for k in energy_temp_shuffled.keys()]) - - if return_atoms: - return molecules, nuclear_charge, energy_pbe0, energy_delta, atoms - return molecules, nuclear_charge, energy_pbe0, energy_delta -- cgit v1.2.3-70-g09d2