1 files changed, 113 insertions, 52 deletions
diff --git a/src/pyssg/database.py b/src/pyssg/database.py
index 66c7087..290ba51 100644
--- a/src/pyssg/database.py
+++ b/src/pyssg/database.py
@@ -2,7 +2,6 @@ import os
 import sys
 from logging import Logger, getLogger
 from configparser import ConfigParser
-from tabnanny import check
 
 from .utils import get_checksum
 
@@ -11,14 +10,15 @@ log: Logger = getLogger(__name__)
 
 # db class that works for both html and md files
 class Database:
-    __COLUMN_NUM: int = 4
+    __OLD_COLUMN_NUM: int = 4
+    __COLUMN_NUM: int = 5
 
     def __init__(self, db_path: str,
                  config: ConfigParser):
         log.debug('initializing the page db on path "%s"', db_path)
         self.db_path: str = db_path
         self.config: ConfigParser = config
-        self.e: dict[str, tuple[float, float, list[str]]] = dict()
+        self.e: dict[str, tuple[float, float, str, list[str]]] = dict()
 
 
     # updates the tags for a specific entry (file)
@@ -27,12 +27,12 @@ class Database:
                     tags: list[str]) -> None:
         if file_name in self.e:
             log.debug('updating tags for entry "%s"', file_name)
-            cts, mts, old_tags = self.e[file_name]
-            log.debug('entry "%s" old content: (%s, %s, (%s))',
-                      file_name, cts, mts, ', '.join(old_tags))
-            self.e[file_name] = (cts, mts, tags)
-            log.debug('entry "%s" new content: (%s, %s, (%s))',
-                      file_name, cts, mts, ', '.join(tags))
+            cts, mts, checksum, old_tags = self.e[file_name]
+            log.debug('entry "%s" old content: (%s, %s, %s, (%s))',
+                      file_name, cts, mts, checksum, ', '.join(old_tags))
+            self.e[file_name] = (cts, mts, checksum, tags)
+            log.debug('entry "%s" new content: (%s, %s, %s, (%s))',
+                      file_name, cts, mts, checksum, ', '.join(tags))
         else:
             log.error('can\'t update tags for entry "%s",'
                       ' as it is not present in db', file_name)
@@ -51,44 +51,42 @@ class Database:
             f = file_name.replace(remove, '')
             log.debug('removed "%s" from "%s": "%s"', remove, file_name, f)
 
-
         # get current time, needs actual file name
         time: float = os.stat(file_name).st_mtime
         log.debug('modified time for "%s": %s', file_name, time)
 
-        # three cases, 1) entry didn't exist,
-        # 2) entry hasn't been mod and,
-        # 3) entry has been mod
+        # calculate current checksum, also needs actual file name
+        checksum: str = get_checksum(file_name)
+        log.debug('current checksum for "%s": "%s"', file_name, checksum)
+
+        # two cases, 1) entry didn't exist,
+        # 2) entry has been mod and,
+        # 3) entry hasn't been mod
         #1)
         if f not in self.e:
             log.debug('entry "%s" didn\'t exist, adding with defaults', f)
-            self.e[f] = (time, 0.0, tags)
+            self.e[f] = (time, 0.0, checksum, tags)
             return True
 
-        old_time, old_mod_time, tags = self.e[f]
-        log.debug('entry "%s" old content: (%s, %s, (%s))',
-                  f, old_time, old_mod_time, ', '.join(tags))
+        old_time, old_mod_time, old_checksum, tags = self.e[f]
+        log.debug('entry "%s" old content: (%s, %s, %s, (%s))',
+                  f, old_time, old_mod_time, old_checksum, ', '.join(tags))
 
         # 2)
-        if old_mod_time == 0.0:
-            if time > old_time:
+        if checksum != old_checksum:
+            if old_mod_time == 0.0:
                 log.debug('entry "%s" has been modified for the first'
                           ' time, updating', f)
-                self.e[f] = (old_time, time, tags)
-                log.debug('entry "%s" new content: (%s, %s, (%s))',
-                          f, old_time, time, ', '.join(tags))
-                return True
+            else:
+                log.debug('entry "%s" has been modified, updating', f)
+            self.e[f] = (old_time, time, checksum, tags)
+            log.debug('entry "%s" new content: (%s, %s, %s, (%s))',
+                      f, old_time, time, checksum, ', '.join(tags))
+            return True
         # 3)
         else:
-            if time > old_mod_time:
-                log.debug('entry "%s" has been modified, updating', f)
-                self.e[f] = (old_time, time, tags)
-                log.debug('entry "%s" new content: (%s, %s, (%s))',
-                          f, old_time, time, ', '.join(tags))
-                return True
-
-        log.debug('entry "%s" hasn\'t been modified', f)
-        return False
+            log.debug('entry "%s" hasn\'t been modified', f)
+            return False
 
 
     def write(self) -> None:
@@ -98,54 +96,117 @@ class Database:
                 log.debug('parsing row for page "%s"', k)
                 t: str = None
                 row: str = None
-                if len(v[2]) == 0:
+                if len(v[3]) == 0:
                     t = '-'
                 else:
-                    t = ','.join(v[2])
+                    t = ','.join(v[3])
 
-                row = f'{k} {v[0]} {v[1]} {t}'
+                row = f'{k} {v[0]} {v[1]} {v[2]} {t}'
 
                 log.debug('writing row: "%s\\n"', row)
                 file.write(f'{row}\n')
 
 
-    def read(self) -> None:
-        log.debug('reading db')
+    def _db_path_exists(self) -> bool:
+        log.debug('checking that "%s" exists or is a file', self.db_path)
         if not os.path.exists(self.db_path):
             log.warning('"%s" doesn\'t exist, will be'
                         ' created once process finishes,'
                         ' ignore if it\'s the first run', self.db_path)
-            return
+            return False
 
-        if os.path.exists(self.db_path) and not os.path.isfile(self.db_path):
+        if not os.path.isfile(self.db_path):
             log.error('"%s" is not a file"', self.db_path)
             sys.exit(1)
 
+        return True
+
+
+    def _read_raw(self) -> list[str]:
         rows: list[str] = None
         with open(self.db_path, 'r') as file:
             rows = file.readlines()
-        log.info('db contains %d rows', len(rows))
+        log.debug('db contains %d rows', len(rows))
+
+        return rows
+
+
+    def read_old(self) -> None:
+        log.debug('reading db with old schema (%d columns)', self.__OLD_COLUMN_NUM)
+        if not self._db_path_exists():
+            log.error('db path "%s" desn\'t exist, --add-checksum-to-db should'
+                      'only be used when updating the old db schema', self.db_path)
+            sys.exit(1)
+
+        rows: list[str] = self._read_raw()
+        cols: list[str] = None
+        # l=list of values in entry
+        log.debug('parsing rows from db')
+        for it, row in enumerate(rows):
+            i: int = it + 1
+            r: str = row.strip()
+            log.debug('row %d content: "%s"', i, r)
+            # (file_name, ctimestamp, mtimestamp, [tags])
+            cols: tuple[str, float, float, list[str]] = tuple(r.split())
+            col_num: int = len(cols)
+            if col_num != self.__OLD_COLUMN_NUM:
+                log.critical('row %d doesn\'t contain %s columns, contains %d'
+                             ' columns: "%s"',
+                             i, self.__OLD_COLUMN_NUM, col_num, r)
+                sys.exit(1)
+
+            t: list[str] = None
+            if cols[3] == '-':
+                t = []
+            else:
+                t = cols[3].split(',')
+            log.debug('tag content: (%s)', ', '.join(t))
+            file_path: str = os.path.join(self.config.get('path', 'src'), cols[0])
+            checksum: str = get_checksum(file_path)
+            log.debug('checksum for "%s": "%s"', file_path, checksum)
 
-        # parse each entry and populate accordingly
-        l: list[str] = None
+            self.e[cols[0]] = (float(cols[1]), float(cols[2]), checksum, t)
+
+
+
+    def read(self) -> None:
+        log.debug('reading db')
+        if not self._db_path_exists():
+            return
+
+        rows: list[str] = self._read_raw()
+        cols: list[str] = None
         # l=list of values in entry
         log.debug('parsing rows from db')
         for it, row in enumerate(rows):
-            i = it + 1
-            r = row.strip()
+            i: int = it + 1
+            r: str = row.strip()
             log.debug('row %d content: "%s"', i, r)
-            l = tuple(r.split())
-            if len(l) != self.__COLUMN_NUM:
-                log.critical('row %d doesn\'t contain %s columns,'
-                             ' contains %d elements; row %d content: "%s"',
-                             i, self.__COLUMN_NUM, len(l), i, r)
+            # (file_name, ctimestamp, mtimestamp, checksum, [tags])
+            cols: tuple[str, float, float, str, list[str]] = tuple(r.split())
+            col_num: int = len(cols)
+            if col_num == self.__OLD_COLUMN_NUM:
+                log.error('row %d contains %d columns: "%s"; this is probably'
+                          ' because of missing checksum column, which is used'
+                          ' now to also check if a file has changed. Rerun'
+                          ' with flag --add-checksum-to-db to add the checksum'
+                          ' column to the current db; if you did any changes'
+                          ' since last timestamp in db, it won\'t update'
+                          ' modification timestamp',
+                          i, self.__OLD_COLUMN_NUM, r)
+                sys.exit(1)
+
+            if col_num != self.__COLUMN_NUM:
+                log.critical('row %d doesn\'t contain %s columns, contains %d'
+                             ' columns: "%s"',
+                             i, self.__COLUMN_NUM, col_num, r)
                 sys.exit(1)
 
             t: list[str] = None
-            if l[3] == '-':
+            if cols[4] == '-':
                 t = []
             else:
-                t = l[3].split(',')
+                t = cols[4].split(',')
             log.debug('tag content: (%s)', ', '.join(t))
 
-            self.e[l[0]] = (float(l[1]), float(l[2]), t)
+            self.e[cols[0]] = (float(cols[1]), float(cols[2]), cols[3], t)