diff options
author | David Luevano Alvarado <55825613+luevano@users.noreply.github.com> | 2020-03-11 10:52:02 -0700 |
---|---|---|
committer | David Luevano Alvarado <55825613+luevano@users.noreply.github.com> | 2020-03-11 10:52:02 -0700 |
commit | e94c61e224f8cd4b66a870a9d9d03c0ed810d656 (patch) | |
tree | 27619a21eaf6c4943a2ca65f1f7a4272f0cfa9b1 | |
parent | 25a3933873f6b7f074e190ad0256f4ecdc40b9a2 (diff) |
Refactor db reading
-rw-r--r-- | ml_exp/__init__.py | 3 | ||||
-rw-r--r-- | ml_exp/krr.py | 2 | ||||
-rw-r--r-- | ml_exp/qm9db.py | 62 | ||||
-rw-r--r-- | ml_exp/readdb.py (renamed from ml_exp/qm7db.py) | 31 |
4 files changed, 34 insertions, 64 deletions
diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py index 02819d6a1..b40957baf 100644 --- a/ml_exp/__init__.py +++ b/ml_exp/__init__.py @@ -23,7 +23,7 @@ SOFTWARE. from ml_exp.compound import Compound from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\ get_helping_data, adjacency_matrix, epsilon_index, check_bond, bag_of_bonds -from ml_exp.qm7db import qm7db +from ml_exp.readdb import qm7db, qm9db from ml_exp.data import NUCLEAR_CHARGE, POSSIBLE_BONDS from ml_exp.kernels import laplauss_kernel from ml_exp.krr import krr, multi_krr @@ -37,6 +37,7 @@ __all__ = ['Compound', 'check_bond', 'bag_of_bonds', 'qm7db', + 'qm9db', 'laplauss_kernel', 'krr', 'multi_krr', diff --git a/ml_exp/krr.py b/ml_exp/krr.py index dca1e14e6..a77bfdaf8 100644 --- a/ml_exp/krr.py +++ b/ml_exp/krr.py @@ -31,7 +31,7 @@ except ImportError: TF_AV = False from ml_exp.misc import printc from ml_exp.kernels import laplauss_kernel -from ml_exp.qm7db import qm7db +from ml_exp.readdb import qm7db def krr(descriptors, diff --git a/ml_exp/qm9db.py b/ml_exp/qm9db.py deleted file mode 100644 index 8354075bc..000000000 --- a/ml_exp/qm9db.py +++ /dev/null @@ -1,62 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano Alvarado - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -from ml_exp.compound import Compound -import numpy as np -try: - import tensorflow as tf - TF_AV = True -except ImportError: - print('Tensorflow couldn\'t be imported. Maybe it is not installed.') - TF_AV = False -import random - - -def qm9db(db_path='data', - is_shuffled=True, - r_seed=111, - use_tf=True): - """ - Creates a list of compounds with the qm9 database. - db_path: path to the database directory. - is_shuffled: if the resulting list of compounds should be shuffled. - r_seed: random seed to use for the shuffling. - use_tf: if tensorflow should be used. - """ - # If tf is to be used but couldn't be imported, don't try to use it. - if use_tf and not TF_AV: - use_tf = False - - fname = f'{db_path}/xyz_qm9.txt' - with open(fname, 'r') as f: - lines = f.readlines() - - compounds = [] - for i, line in enumerate(lines): - line = line.strip() - compounds.append(Compound(f'{db_path}/{line}', db='qm9')) - - if is_shuffled: - random.seed(r_seed) - random.shuffle(compounds) - - return compounds diff --git a/ml_exp/qm7db.py b/ml_exp/readdb.py index 1e78b8d56..e6fbc7224 100644 --- a/ml_exp/qm7db.py +++ b/ml_exp/readdb.py @@ -74,3 +74,34 @@ def qm7db(db_path='data', raise TypeError('No GPU found, could not create Tensor objects.') return compounds, e_pbe0, e_delta + + +def qm9db(db_path='data', + is_shuffled=True, + r_seed=111, + use_tf=True): + """ + Creates a list of compounds with the qm9 database. + db_path: path to the database directory. + is_shuffled: if the resulting list of compounds should be shuffled. + r_seed: random seed to use for the shuffling. + use_tf: if tensorflow should be used. + """ + # If tf is to be used but couldn't be imported, don't try to use it. + if use_tf and not TF_AV: + use_tf = False + + fname = f'{db_path}/xyz_qm9.txt' + with open(fname, 'r') as f: + lines = f.readlines() + + compounds = [] + for i, line in enumerate(lines): + line = line.strip() + compounds.append(Compound(f'{db_path}/{line}', db='qm9')) + + if is_shuffled: + random.seed(r_seed) + random.shuffle(compounds) + + return compounds |