refactor database entries into its own class

author: David Luevano Alvarado <david@luevano.xyz> 2022-11-29 01:22:50 -0600
committer: David Luevano Alvarado <david@luevano.xyz> 2022-11-29 01:22:50 -0600
commit: 655e3c6fb7b13659e15b30e96b6c943d48170a71 (patch)
tree: 07b6718f99f95edb99a5a251678de74ffbda5174
parent: 98bfded1b407431ad62642d7f029e4e5f3534c07 (diff)
4 files changed, 98 insertions, 67 deletions
diff --git a/src/pyssg/database.py b/src/pyssg/database.py
index 913adb7..5a174c9 100644
--- a/src/pyssg/database.py
+++ b/src/pyssg/database.py
@@ -1,9 +1,11 @@
 import os
 import sys
+import csv
 from logging import Logger, getLogger
 from configparser import ConfigParser
 
 from .utils import get_checksum
+from .database_entry import DatabaseEntry
 
 log: Logger = getLogger(__name__)
 
@@ -11,27 +13,28 @@ log: Logger = getLogger(__name__)
 # db class that works for both html and md files
 class Database:
     __COLUMN_NUM: int = 5
+    __COLUMN_DELIMITER: str = '|'
 
     def __init__(self, db_path: str,
                  config: ConfigParser):
         log.debug('initializing the page db on path "%s"', db_path)
         self.db_path: str = db_path
         self.config: ConfigParser = config
-        self.e: dict[str, tuple[float, float, str, list[str]]] = dict()
+        self.e: dict[str, DatabaseEntry] = dict()
 
 
     # updates the tags for a specific entry (file)
-    #   file_name only contains the entry name (without the absolute path)
+    #   file_name only contains the entry name (not an absolute path)
     def update_tags(self, file_name: str,
-                    tags: list[str]) -> None:
+                    new_tags: list[str]) -> None:
         if file_name in self.e:
             log.debug('updating tags for entry "%s"', file_name)
-            cts, mts, checksum, old_tags = self.e[file_name]
-            log.debug('entry "%s" old content: (%s, %s, %s, (%s))',
-                      file_name, cts, mts, checksum, ', '.join(old_tags))
-            self.e[file_name] = (cts, mts, checksum, tags)
-            log.debug('entry "%s" new content: (%s, %s, %s, (%s))',
-                      file_name, cts, mts, checksum, ', '.join(tags))
+            log.debug('entry "%s" old content: %s',
+                      file_name, self.e[file_name])
+
+            self.e[file_name].update_tags(new_tags)
+            log.debug('entry "%s" new content: %s',
+                      file_name, self.e[file_name])
         else:
             log.error('can\'t update tags for entry "%s",'
                       ' as it is not present in db', file_name)
@@ -64,23 +67,22 @@ class Database:
         #1)
         if f not in self.e:
             log.debug('entry "%s" didn\'t exist, adding with defaults', f)
-            self.e[f] = (time, 0.0, checksum, tags)
+            self.e[f] = DatabaseEntry([f, time, 0.0, checksum, tags])
             return True
 
-        old_time, old_mod_time, old_checksum, tags = self.e[f]
-        log.debug('entry "%s" old content: (%s, %s, %s, (%s))',
-                  f, old_time, old_mod_time, old_checksum, ', '.join(tags))
+        # old_e is old entity
+        old_e: DatabaseEntry = self.e[f]
+        log.debug('entry "%s" old content: %s', f, old_e)
 
         # 2)
-        if checksum != old_checksum:
-            if old_mod_time == 0.0:
+        if checksum != old_e.checksum:
+            if old_e.mtimestamp == 0.0:
                 log.debug('entry "%s" has been modified for the first'
                           ' time, updating', f)
             else:
                 log.debug('entry "%s" has been modified, updating', f)
-            self.e[f] = (old_time, time, checksum, tags)
-            log.debug('entry "%s" new content: (%s, %s, %s, (%s))',
-                      f, old_time, time, checksum, ', '.join(tags))
+            self.e[f] = DatabaseEntry([f, old_e.ctimestamp, time, checksum, tags])
+            log.debug('entry "%s" new content: (%s, %s, %s, (%s))', f, self.e[f])
             return True
         # 3)
         else:
@@ -91,19 +93,10 @@ class Database:
     def write(self) -> None:
         log.debug('writing db')
         with open(self.db_path, 'w') as file:
-            for k, v in self.e.items():
-                log.debug('parsing row for page "%s"', k)
-                t: str
-                row: str
-                if len(v[3]) == 0:
-                    t = '-'
-                else:
-                    t = ','.join(v[3])
-
-                row = f'{k} {v[0]} {v[1]} {v[2]} {t}'
-
-                log.debug('writing row: "%s\\n"', row)
-                file.write(f'{row}\n')
+            for _, v in self.e.items():
+                log.debug('writing row: %s', v)
+                csv_writer = csv.writer(file, delimiter=self.__COLUMN_DELIMITER)
+                csv_writer.writerow(v.get_raw_entry())
 
 
     def _db_path_exists(self) -> bool:
@@ -121,10 +114,11 @@ class Database:
         return True
 
 
-    def _read_raw(self) -> list[str]:
-        rows: list[str]
-        with open(self.db_path, 'r') as file:
-            rows = file.readlines()
+    def _get_csv_rows(self) -> list[list[str]]:
+        rows: list[list[str]]
+        with open(self.db_path, 'r') as f:
+            csv_reader = csv.reader(f, delimiter=self.__COLUMN_DELIMITER)
+            rows = list(csv_reader)
         log.debug('db contains %d rows', len(rows))
 
         return rows
@@ -135,30 +129,19 @@ class Database:
         if not self._db_path_exists():
             return
 
-        rows: list[str] = self._read_raw()
+        rows: list[list[str]] = self._get_csv_rows()
         # l=list of values in entry
         log.debug('parsing rows from db')
         for it, row in enumerate(rows):
             i: int = it + 1
-            r: str = row.strip()
-            log.debug('row %d content: "%s"', i, r)
-            # ignoring type error, as i'm doing the check later
-            # (file_name, ctimestamp, mtimestamp, checksum, [tags])
-            cols: tuple[str, float, float, str, list[str]] = tuple(r.split())  # type: ignore
-            col_num: int = len(cols)
+            col_num: int = len(row)
+            log.debug('row %d content: "%s"', i, row)
 
             if col_num != self.__COLUMN_NUM:
                 log.critical('row %d doesn\'t contain %s columns, contains %d'
                              ' columns: "%s"',
-                             i, self.__COLUMN_NUM, col_num, r)
+                             i, self.__COLUMN_NUM, col_num, row)
                 sys.exit(1)
 
-            t: list[str]
-            if cols[4] == '-':
-                t = []
-            else:
-                # ignoring type error, the "check" is done in this whole if/else
-                t = cols[4].split(',')  # type: ignore
-            log.debug('tag content: (%s)', ', '.join(t))
-
-            self.e[cols[0]] = (float(cols[1]), float(cols[2]), cols[3], t)
+            entry: DatabaseEntry = DatabaseEntry(row)
+            self.e[entry.fname] = entry
diff --git a/src/pyssg/database_entry.py b/src/pyssg/database_entry.py
new file mode 100644
index 0000000..3fec92a
--- /dev/null
+++ b/src/pyssg/database_entry.py
@@ -0,0 +1,47 @@
+from typing import Union
+from logging import Logger, getLogger
+
+log: Logger = getLogger(__name__)
+
+
+class DatabaseEntry:
+    # not specifying the type of "list" as it could be only str
+    #   or the actual values
+    def __init__(self, entry: list) -> None:
+        self.fname: str = entry[0]
+        self.ctimestamp: float = float(entry[1])
+        self.mtimestamp: float = float(entry[2])
+        self.checksum: str = entry[3]
+        self.tags: list[str]
+
+        if isinstance(entry[4], list):
+            self.tags = entry[4]
+        else:
+            if entry[4] == '-':
+                self.tags = []
+            else:
+                self.tags = entry[4].split(',')
+
+        log.debug('tag content: [%s]', ', '.join(self.tags))
+
+
+    def __str__(self) -> str:
+        _return_str: str = '[{}, {}, {}, {}, [{}]]'\
+                .format(self.fname,
+                        self.ctimestamp,
+                        self.mtimestamp,
+                        self.checksum,
+                        ', '.join(self.tags))
+        return _return_str
+
+
+    def get_raw_entry(self) -> list[str]:
+        return [self.fname,
+                str(self.ctimestamp),
+                str(self.mtimestamp),
+                self.checksum,
+                ','.join(self.tags) if self.tags else '-']
+
+
+    def update_tags(self, new_tags: list[str]) -> None:
+        self.tags = new_tags
diff --git a/src/pyssg/md_parser.py b/src/pyssg/md_parser.py
index 664532a..bbd22a7 100644
--- a/src/pyssg/md_parser.py
+++ b/src/pyssg/md_parser.py
@@ -72,8 +72,8 @@ class MDParser:
             content: str = self.md.reset().convert(open(src_file).read())
             # ignoring md.Meta type as it is not yet defined (because it is from an extension)
             page: Page = Page(f,
-                              self.db.e[f][0],
-                              self.db.e[f][1],
+                              self.db.e[f].ctimestamp,
+                              self.db.e[f].mtimestamp,
                               content,
                               self.md.Meta,  # type: ignore
                               self.config)
diff --git a/src/pyssg/page.py b/src/pyssg/page.py
index 467dd7e..264bc92 100644
--- a/src/pyssg/page.py
+++ b/src/pyssg/page.py
@@ -25,27 +25,28 @@ class Page:
         self.config: ConfigParser = config
 
         # data from self.meta
-        self.title: str = ''
-        self.author: str = ''
+        self.title: str
+        self.author: str
+        self.summary: str
+        self.lang: str
         self.cdatetime: datetime
         self.mdatetime: datetime
-        self.summary: str = ''
-        self.lang: str = 'en'
         self.tags: list[tuple[str, str]] = []
 
         # constructed
-        self.url: str = ''
-        self.image_url: str = ''
-        self.cdate: str = ''
-        self.cdate_list: str = ''
-        self.cdate_list_sep: str = ''
-        self.cdate_rss: str = ''
-        self.cdate_sitemap: str = ''
+        self.url: str
+        self.image_url: str
+        self.cdate: str
+        self.cdate_list: str
+        self.cdate_list_sep: str
+        self.cdate_rss: str
+        self.cdate_sitemap: str
+
         self.mdate: str
         self.mdate_list: str
         self.mdate_list_sep: str
-        self.mdate_rss: str = ''
-        self.mdate_sitemap: str = ''
+        self.mdate_rss: str
+        self.mdate_sitemap: str
 
         # later assigned references to next and previous pages
         #   not always assigned (tail ends), and the None helps check it, ignoring
author	David Luevano Alvarado <david@luevano.xyz>	2022-11-29 01:22:50 -0600
committer	David Luevano Alvarado <david@luevano.xyz>	2022-11-29 01:22:50 -0600
commit	655e3c6fb7b13659e15b30e96b6c943d48170a71 (patch)
tree	07b6718f99f95edb99a5a251678de74ffbda5174
parent	98bfded1b407431ad62642d7f029e4e5f3534c07 (diff)