summaryrefslogtreecommitdiff
path: root/ml_exp
diff options
context:
space:
mode:
authorDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-03-11 10:47:45 -0700
committerDavid Luevano Alvarado <55825613+luevano@users.noreply.github.com>2020-03-11 10:47:45 -0700
commit25a3933873f6b7f074e190ad0256f4ecdc40b9a2 (patch)
tree882607ffc8b88a320dfd73275b4ac34f576b4714 /ml_exp
parentd48adb6ca31cab1d86d81aa5c88bfeb198f19af8 (diff)
Add qm9db
Diffstat (limited to 'ml_exp')
-rw-r--r--ml_exp/qm9db.py62
1 files changed, 62 insertions, 0 deletions
diff --git a/ml_exp/qm9db.py b/ml_exp/qm9db.py
new file mode 100644
index 000000000..8354075bc
--- /dev/null
+++ b/ml_exp/qm9db.py
@@ -0,0 +1,62 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+from ml_exp.compound import Compound
+import numpy as np
+try:
+ import tensorflow as tf
+ TF_AV = True
+except ImportError:
+ print('Tensorflow couldn\'t be imported. Maybe it is not installed.')
+ TF_AV = False
+import random
+
+
+def qm9db(db_path='data',
+ is_shuffled=True,
+ r_seed=111,
+ use_tf=True):
+ """
+ Creates a list of compounds with the qm9 database.
+ db_path: path to the database directory.
+ is_shuffled: if the resulting list of compounds should be shuffled.
+ r_seed: random seed to use for the shuffling.
+ use_tf: if tensorflow should be used.
+ """
+ # If tf is to be used but couldn't be imported, don't try to use it.
+ if use_tf and not TF_AV:
+ use_tf = False
+
+ fname = f'{db_path}/xyz_qm9.txt'
+ with open(fname, 'r') as f:
+ lines = f.readlines()
+
+ compounds = []
+ for i, line in enumerate(lines):
+ line = line.strip()
+ compounds.append(Compound(f'{db_path}/{line}', db='qm9'))
+
+ if is_shuffled:
+ random.seed(r_seed)
+ random.shuffle(compounds)
+
+ return compounds