From e94c61e224f8cd4b66a870a9d9d03c0ed810d656 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com> Date: Wed, 11 Mar 2020 10:52:02 -0700 Subject: Refactor db reading --- ml_exp/__init__.py | 3 +- ml_exp/krr.py | 2 +- ml_exp/qm7db.py | 76 ------------------------------------- ml_exp/qm9db.py | 62 ------------------------------- ml_exp/readdb.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 110 insertions(+), 140 deletions(-) delete mode 100644 ml_exp/qm7db.py delete mode 100644 ml_exp/qm9db.py create mode 100644 ml_exp/readdb.py diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py index 02819d6a1..b40957baf 100644 --- a/ml_exp/__init__.py +++ b/ml_exp/__init__.py @@ -23,7 +23,7 @@ SOFTWARE. from ml_exp.compound import Compound from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\ get_helping_data, adjacency_matrix, epsilon_index, check_bond, bag_of_bonds -from ml_exp.qm7db import qm7db +from ml_exp.readdb import qm7db, qm9db from ml_exp.data import NUCLEAR_CHARGE, POSSIBLE_BONDS from ml_exp.kernels import laplauss_kernel from ml_exp.krr import krr, multi_krr @@ -37,6 +37,7 @@ __all__ = ['Compound', 'check_bond', 'bag_of_bonds', 'qm7db', + 'qm9db', 'laplauss_kernel', 'krr', 'multi_krr', diff --git a/ml_exp/krr.py b/ml_exp/krr.py index dca1e14e6..a77bfdaf8 100644 --- a/ml_exp/krr.py +++ b/ml_exp/krr.py @@ -31,7 +31,7 @@ except ImportError: TF_AV = False from ml_exp.misc import printc from ml_exp.kernels import laplauss_kernel -from ml_exp.qm7db import qm7db +from ml_exp.readdb import qm7db def krr(descriptors, diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py deleted file mode 100644 index 1e78b8d56..000000000 --- a/ml_exp/qm7db.py +++ /dev/null @@ -1,76 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -from ml_exp.compound import Compound -import numpy as np -try: - import tensorflow as tf - TF_AV = True -except ImportError: - print('Tensorflow couldn\'t be imported. Maybe it is not installed.') - TF_AV = False -import random - - -def qm7db(db_path='data', - is_shuffled=True, - r_seed=111, - use_tf=True): - """ - Creates a list of compounds with the qm7 database. - db_path: path to the database directory. - is_shuffled: if the resulting list of compounds should be shuffled. - r_seed: random seed to use for the shuffling. - use_tf: if tensorflow should be used. - """ - # If tf is to be used but couldn't be imported, don't try to use it. - if use_tf and not TF_AV: - use_tf = False - - fname = f'{db_path}/hof_qm7.txt' - with open(fname, 'r') as f: - lines = f.readlines() - - compounds = [] - for i, line in enumerate(lines): - line = line.split() - compounds.append(Compound(f'{db_path}/{line[0]}', db='qm7')) - compounds[i].qm7pbe0 = np.float64(line[1]) - compounds[i].qm7delta = np.float64(line[1]) - np.float64(line[2]) - - if is_shuffled: - random.seed(r_seed) - random.shuffle(compounds) - - e_pbe0 = np.array([comp.qm7pbe0 for comp in compounds], dtype=np.float64) - e_delta = np.array([comp.qm7delta for comp in compounds], dtype=np.float64) - - if use_tf: - # Check if there's a gpu available and use the first one. - if tf.config.experimental.list_physical_devices('GPU'): - with tf.device('GPU:0'): - e_pbe0 = tf.convert_to_tensor(e_pbe0) - e_delta = tf.convert_to_tensor(e_delta) - else: - raise TypeError('No GPU found, could not create Tensor objects.') - - return compounds, e_pbe0, e_delta diff --git a/ml_exp/qm9db.py b/ml_exp/qm9db.py deleted file mode 100644 index 8354075bc..000000000 --- a/ml_exp/qm9db.py +++ /dev/null @@ -1,62 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -from ml_exp.compound import Compound -import numpy as np -try: - import tensorflow as tf - TF_AV = True -except ImportError: - print('Tensorflow couldn\'t be imported. Maybe it is not installed.') - TF_AV = False -import random - - -def qm9db(db_path='data', - is_shuffled=True, - r_seed=111, - use_tf=True): - """ - Creates a list of compounds with the qm9 database. - db_path: path to the database directory. - is_shuffled: if the resulting list of compounds should be shuffled. - r_seed: random seed to use for the shuffling. - use_tf: if tensorflow should be used. - """ - # If tf is to be used but couldn't be imported, don't try to use it. - if use_tf and not TF_AV: - use_tf = False - - fname = f'{db_path}/xyz_qm9.txt' - with open(fname, 'r') as f: - lines = f.readlines() - - compounds = [] - for i, line in enumerate(lines): - line = line.strip() - compounds.append(Compound(f'{db_path}/{line}', db='qm9')) - - if is_shuffled: - random.seed(r_seed) - random.shuffle(compounds) - - return compounds diff --git a/ml_exp/readdb.py b/ml_exp/readdb.py new file mode 100644 index 000000000..e6fbc7224 --- /dev/null +++ b/ml_exp/readdb.py @@ -0,0 +1,107 @@ +"""MIT License + +Copyright (c) 2019 David Luevano Alvarado + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +from ml_exp.compound import Compound +import numpy as np +try: + import tensorflow as tf + TF_AV = True +except ImportError: + print('Tensorflow couldn\'t be imported. Maybe it is not installed.') + TF_AV = False +import random + + +def qm7db(db_path='data', + is_shuffled=True, + r_seed=111, + use_tf=True): + """ + Creates a list of compounds with the qm7 database. + db_path: path to the database directory. + is_shuffled: if the resulting list of compounds should be shuffled. + r_seed: random seed to use for the shuffling. + use_tf: if tensorflow should be used. + """ + # If tf is to be used but couldn't be imported, don't try to use it. + if use_tf and not TF_AV: + use_tf = False + + fname = f'{db_path}/hof_qm7.txt' + with open(fname, 'r') as f: + lines = f.readlines() + + compounds = [] + for i, line in enumerate(lines): + line = line.split() + compounds.append(Compound(f'{db_path}/{line[0]}', db='qm7')) + compounds[i].qm7pbe0 = np.float64(line[1]) + compounds[i].qm7delta = np.float64(line[1]) - np.float64(line[2]) + + if is_shuffled: + random.seed(r_seed) + random.shuffle(compounds) + + e_pbe0 = np.array([comp.qm7pbe0 for comp in compounds], dtype=np.float64) + e_delta = np.array([comp.qm7delta for comp in compounds], dtype=np.float64) + + if use_tf: + # Check if there's a gpu available and use the first one. + if tf.config.experimental.list_physical_devices('GPU'): + with tf.device('GPU:0'): + e_pbe0 = tf.convert_to_tensor(e_pbe0) + e_delta = tf.convert_to_tensor(e_delta) + else: + raise TypeError('No GPU found, could not create Tensor objects.') + + return compounds, e_pbe0, e_delta + + +def qm9db(db_path='data', + is_shuffled=True, + r_seed=111, + use_tf=True): + """ + Creates a list of compounds with the qm9 database. + db_path: path to the database directory. + is_shuffled: if the resulting list of compounds should be shuffled. + r_seed: random seed to use for the shuffling. + use_tf: if tensorflow should be used. + """ + # If tf is to be used but couldn't be imported, don't try to use it. + if use_tf and not TF_AV: + use_tf = False + + fname = f'{db_path}/xyz_qm9.txt' + with open(fname, 'r') as f: + lines = f.readlines() + + compounds = [] + for i, line in enumerate(lines): + line = line.strip() + compounds.append(Compound(f'{db_path}/{line}', db='qm9')) + + if is_shuffled: + random.seed(r_seed) + random.shuffle(compounds) + + return compounds -- cgit v1.2.3-54-g00ecf