From e94c61e224f8cd4b66a870a9d9d03c0ed810d656 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <55825613+luevano@users.noreply.github.com>
Date: Wed, 11 Mar 2020 10:52:02 -0700
Subject: Refactor db reading

---
 ml_exp/__init__.py |   3 +-
 ml_exp/krr.py      |   2 +-
 ml_exp/qm7db.py    |  76 -------------------------------------
 ml_exp/qm9db.py    |  62 -------------------------------
 ml_exp/readdb.py   | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 110 insertions(+), 140 deletions(-)
 delete mode 100644 ml_exp/qm7db.py
 delete mode 100644 ml_exp/qm9db.py
 create mode 100644 ml_exp/readdb.py

diff --git a/ml_exp/__init__.py b/ml_exp/__init__.py
index 02819d6a1..b40957baf 100644
--- a/ml_exp/__init__.py
+++ b/ml_exp/__init__.py
@@ -23,7 +23,7 @@ SOFTWARE.
 from ml_exp.compound import Compound
 from ml_exp.representations import coulomb_matrix, lennard_jones_matrix,\
         get_helping_data, adjacency_matrix, epsilon_index, check_bond, bag_of_bonds
-from ml_exp.qm7db import qm7db
+from ml_exp.readdb import qm7db, qm9db
 from ml_exp.data import NUCLEAR_CHARGE, POSSIBLE_BONDS
 from ml_exp.kernels import laplauss_kernel
 from ml_exp.krr import krr, multi_krr
@@ -37,6 +37,7 @@ __all__ = ['Compound',
            'check_bond',
            'bag_of_bonds',
            'qm7db',
+           'qm9db',
            'laplauss_kernel',
            'krr',
            'multi_krr',
diff --git a/ml_exp/krr.py b/ml_exp/krr.py
index dca1e14e6..a77bfdaf8 100644
--- a/ml_exp/krr.py
+++ b/ml_exp/krr.py
@@ -31,7 +31,7 @@ except ImportError:
     TF_AV = False
 from ml_exp.misc import printc
 from ml_exp.kernels import laplauss_kernel
-from ml_exp.qm7db import qm7db
+from ml_exp.readdb import qm7db
 
 
 def krr(descriptors,
diff --git a/ml_exp/qm7db.py b/ml_exp/qm7db.py
deleted file mode 100644
index 1e78b8d56..000000000
--- a/ml_exp/qm7db.py
+++ /dev/null
@@ -1,76 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from ml_exp.compound import Compound
-import numpy as np
-try:
-    import tensorflow as tf
-    TF_AV = True
-except ImportError:
-    print('Tensorflow couldn\'t be imported. Maybe it is not installed.')
-    TF_AV = False
-import random
-
-
-def qm7db(db_path='data',
-          is_shuffled=True,
-          r_seed=111,
-          use_tf=True):
-    """
-    Creates a list of compounds with the qm7 database.
-    db_path: path to the database directory.
-    is_shuffled: if the resulting list of compounds should be shuffled.
-    r_seed: random seed to use for the shuffling.
-    use_tf: if tensorflow should be used.
-    """
-    # If tf is to be used but couldn't be imported, don't try to use it.
-    if use_tf and not TF_AV:
-        use_tf = False
-
-    fname = f'{db_path}/hof_qm7.txt'
-    with open(fname, 'r') as f:
-        lines = f.readlines()
-
-    compounds = []
-    for i, line in enumerate(lines):
-        line = line.split()
-        compounds.append(Compound(f'{db_path}/{line[0]}', db='qm7'))
-        compounds[i].qm7pbe0 = np.float64(line[1])
-        compounds[i].qm7delta = np.float64(line[1]) - np.float64(line[2])
-
-    if is_shuffled:
-        random.seed(r_seed)
-        random.shuffle(compounds)
-
-    e_pbe0 = np.array([comp.qm7pbe0 for comp in compounds], dtype=np.float64)
-    e_delta = np.array([comp.qm7delta for comp in compounds], dtype=np.float64)
-
-    if use_tf:
-        # Check if there's a gpu available and use the first one.
-        if tf.config.experimental.list_physical_devices('GPU'):
-            with tf.device('GPU:0'):
-                e_pbe0 = tf.convert_to_tensor(e_pbe0)
-                e_delta = tf.convert_to_tensor(e_delta)
-        else:
-            raise TypeError('No GPU found, could not create Tensor objects.')
-
-    return compounds, e_pbe0, e_delta
diff --git a/ml_exp/qm9db.py b/ml_exp/qm9db.py
deleted file mode 100644
index 8354075bc..000000000
--- a/ml_exp/qm9db.py
+++ /dev/null
@@ -1,62 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano Alvarado
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from ml_exp.compound import Compound
-import numpy as np
-try:
-    import tensorflow as tf
-    TF_AV = True
-except ImportError:
-    print('Tensorflow couldn\'t be imported. Maybe it is not installed.')
-    TF_AV = False
-import random
-
-
-def qm9db(db_path='data',
-          is_shuffled=True,
-          r_seed=111,
-          use_tf=True):
-    """
-    Creates a list of compounds with the qm9 database.
-    db_path: path to the database directory.
-    is_shuffled: if the resulting list of compounds should be shuffled.
-    r_seed: random seed to use for the shuffling.
-    use_tf: if tensorflow should be used.
-    """
-    # If tf is to be used but couldn't be imported, don't try to use it.
-    if use_tf and not TF_AV:
-        use_tf = False
-
-    fname = f'{db_path}/xyz_qm9.txt'
-    with open(fname, 'r') as f:
-        lines = f.readlines()
-
-    compounds = []
-    for i, line in enumerate(lines):
-        line = line.strip()
-        compounds.append(Compound(f'{db_path}/{line}', db='qm9'))
-
-    if is_shuffled:
-        random.seed(r_seed)
-        random.shuffle(compounds)
-
-    return compounds
diff --git a/ml_exp/readdb.py b/ml_exp/readdb.py
new file mode 100644
index 000000000..e6fbc7224
--- /dev/null
+++ b/ml_exp/readdb.py
@@ -0,0 +1,107 @@
+"""MIT License
+
+Copyright (c) 2019 David Luevano Alvarado
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+from ml_exp.compound import Compound
+import numpy as np
+try:
+    import tensorflow as tf
+    TF_AV = True
+except ImportError:
+    print('Tensorflow couldn\'t be imported. Maybe it is not installed.')
+    TF_AV = False
+import random
+
+
+def qm7db(db_path='data',
+          is_shuffled=True,
+          r_seed=111,
+          use_tf=True):
+    """
+    Creates a list of compounds with the qm7 database.
+    db_path: path to the database directory.
+    is_shuffled: if the resulting list of compounds should be shuffled.
+    r_seed: random seed to use for the shuffling.
+    use_tf: if tensorflow should be used.
+    """
+    # If tf is to be used but couldn't be imported, don't try to use it.
+    if use_tf and not TF_AV:
+        use_tf = False
+
+    fname = f'{db_path}/hof_qm7.txt'
+    with open(fname, 'r') as f:
+        lines = f.readlines()
+
+    compounds = []
+    for i, line in enumerate(lines):
+        line = line.split()
+        compounds.append(Compound(f'{db_path}/{line[0]}', db='qm7'))
+        compounds[i].qm7pbe0 = np.float64(line[1])
+        compounds[i].qm7delta = np.float64(line[1]) - np.float64(line[2])
+
+    if is_shuffled:
+        random.seed(r_seed)
+        random.shuffle(compounds)
+
+    e_pbe0 = np.array([comp.qm7pbe0 for comp in compounds], dtype=np.float64)
+    e_delta = np.array([comp.qm7delta for comp in compounds], dtype=np.float64)
+
+    if use_tf:
+        # Check if there's a gpu available and use the first one.
+        if tf.config.experimental.list_physical_devices('GPU'):
+            with tf.device('GPU:0'):
+                e_pbe0 = tf.convert_to_tensor(e_pbe0)
+                e_delta = tf.convert_to_tensor(e_delta)
+        else:
+            raise TypeError('No GPU found, could not create Tensor objects.')
+
+    return compounds, e_pbe0, e_delta
+
+
+def qm9db(db_path='data',
+          is_shuffled=True,
+          r_seed=111,
+          use_tf=True):
+    """
+    Creates a list of compounds with the qm9 database.
+    db_path: path to the database directory.
+    is_shuffled: if the resulting list of compounds should be shuffled.
+    r_seed: random seed to use for the shuffling.
+    use_tf: if tensorflow should be used.
+    """
+    # If tf is to be used but couldn't be imported, don't try to use it.
+    if use_tf and not TF_AV:
+        use_tf = False
+
+    fname = f'{db_path}/xyz_qm9.txt'
+    with open(fname, 'r') as f:
+        lines = f.readlines()
+
+    compounds = []
+    for i, line in enumerate(lines):
+        line = line.strip()
+        compounds.append(Compound(f'{db_path}/{line}', db='qm9'))
+
+    if is_shuffled:
+        random.seed(r_seed)
+        random.shuffle(compounds)
+
+    return compounds
-- 
cgit v1.2.3-70-g09d2