From 98bfded1b407431ad62642d7f029e4e5f3534c07 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <david@luevano.xyz>
Date: Sun, 27 Nov 2022 06:44:07 -0600
Subject: refactor code and fix type checks

still need to refactor more code before migrating to yaml config file
---
 .gitignore                       |  6 ++++
 build_upload.sh                  |  1 -
 src/pyssg/arg_parser.py          | 63 ++-----------------------------------
 src/pyssg/builder.py             | 24 +++++++-------
 src/pyssg/configuration.py       |  2 +-
 src/pyssg/database.py            | 68 ++++++----------------------------------
 src/pyssg/md_parser.py           | 20 +++++-------
 src/pyssg/page.py                | 21 +++++++------
 src/pyssg/per_level_formatter.py |  8 +++--
 src/pyssg/pyssg.py               | 57 +++++++++++++++++++--------------
 src/pyssg/utils.py               | 14 ++++-----
 11 files changed, 96 insertions(+), 188 deletions(-)

diff --git a/.gitignore b/.gitignore
index b6e4761..8bf6475 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,9 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# project specific
+.vscode/
+site_example/
+dst/**/*.html
+.files
\ No newline at end of file
diff --git a/build_upload.sh b/build_upload.sh
index 7ddd573..03e1f68 100755
--- a/build_upload.sh
+++ b/build_upload.sh
@@ -7,7 +7,6 @@ echo "building package"
 python -m build
 
 echo "uploading to pypi"
-# alternatively, use /bin/python -m twine, i use twine in arch
 twine upload dist/*
 
 echo "removing dist/*"
diff --git a/src/pyssg/arg_parser.py b/src/pyssg/arg_parser.py
index 2fc6853..2eb7d72 100644
--- a/src/pyssg/arg_parser.py
+++ b/src/pyssg/arg_parser.py
@@ -1,7 +1,7 @@
-from argparse import ArgumentParser, Namespace
+from argparse import ArgumentParser
 
 
-def get_parsed_arguments() -> Namespace:
+def get_parser() -> ArgumentParser:
     parser = ArgumentParser(prog='pyssg',
                             description='''Static Site Generator that parses
                             Markdown files into HTML files. For datetime
@@ -37,62 +37,5 @@ def get_parsed_arguments() -> Namespace:
     parser.add_argument('--debug',
                         action='store_true',
                         help='''change logging level from info to debug''')
-    parser.add_argument('--add-checksum-to-db',
-                        action='store_true',
-                        help='''add checksum column to db entries''')
-    # really not needed, too much bloat and case scenarios to check for,
-    #   instead, just read from config file or default config file
-    """
-    parser.add_argument('-s', '--src',
-                        default='src',
-                        type=str,
-                        help='''src directory; handmade files, templates and
-                        metadata directory; defaults to 'src' ''')
-    parser.add_argument('-d', '--dst',
-                        default='dst',
-                        type=str,
-                        help='''dst directory; generated (and transfered html)
-                        files; defaults to 'dst' ''')
-    parser.add_argument('-t', '--plt',
-                        default='plt',
-                        type=str,
-                        help='''plt directory; all template files; defaults to
-                        'plt' ''')
-    parser.add_argument('-u', '--url',
-                        default='',
-                        type=str,
-                        help='''base url without trailing slash''')
-    parser.add_argument('--static-url',
-                        default='',
-                        type=str,
-                        help='''base static url without trailing slash''')
-    parser.add_argument('--default-image-url',
-                        default='',
-                        type=str,
-                        help='''default image url''')
-    parser.add_argument('--title',
-                        default='Blog',
-                        type=str,
-                        help='''general title for the website; defaults to
-                        'Blog' ''')
-    parser.add_argument('--date-format',
-                        default='%a, %b %d, %Y @ %H:%M %Z',
-                        type=str,
-                        help='''date format used inside pages (for creation and
-                        modification times, for example); defaults to '%%a, %%b
-                        %%d, %%Y @ %%H:%%M %%Z' ('Tue, Mar 16, 2021 @ 02:46 UTC',
-                        for example)''')
-    parser.add_argument('--list-date-format',
-                        default='%b %d',
-                        type=str,
-                        help='''date format used for page entries in a list;
-                        defaults to '%%b %%d' ('Mar 16', for example)''')
-    parser.add_argument('--list-sep-date-format',
-                        default='%B %Y',
-                        type=str,
-                        help='''date format used for the separator between page
-                        entries in a list; defaults to '%%B %%Y' ('March 2021',
-                        for example)''')
-    """
 
-    return parser.parse_args()
+    return parser
diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py
index 6d65187..f0ca802 100644
--- a/src/pyssg/builder.py
+++ b/src/pyssg/builder.py
@@ -30,15 +30,15 @@ class Builder:
                                             trim_blocks=True,
                                             lstrip_blocks=True)
 
-        self.dirs: list[str] = None
-        self.md_files: list[str] = None
-        self.html_files: list[str] = None
+        self.dirs: list[str]
+        self.md_files: list[str]
+        self.html_files: list[str]
 
         # files and pages are synoyms
-        self.all_files: list[Page] = None
-        self.updated_files: list[Page] = None
-        self.all_tags: list[str] = None
-        self.common_vars: dict = None
+        self.all_files: list[Page]
+        self.updated_files: list[Page]
+        self.all_tags: list[tuple[str, str]]
+        self.common_vars: dict
 
 
     def build(self) -> None:
@@ -80,7 +80,7 @@ class Builder:
 
     def __create_dir_structure(self) -> None:
         log.debug('creating dir structure')
-        dir_path: str = None
+        dir_path: str
         for d in self.dirs:
             dir_path = os.path.join(self.config.get('path', 'dst'), d)
             # using silent=True to not print the info create dir msgs for this
@@ -92,8 +92,8 @@ class Builder:
             log.debug('copying all html files')
         else:
             log.debug('no html files to copy')
-        src_file: str = None
-        dst_file: str = None
+        src_file: str
+        dst_file: str
 
         for f in self.html_files:
             src_file = os.path.join(self.config.get('path', 'src'), f)
@@ -114,7 +114,7 @@ class Builder:
     def __render_articles(self) -> None:
         log.debug('rendering html')
         article_vars: dict = deepcopy(self.common_vars)
-        temp_files: list[Page] = None
+        temp_files: list[Page]
 
         # check if only updated should be created
         if self.config.getboolean('other', 'force'):
@@ -136,7 +136,7 @@ class Builder:
     def __render_tags(self) -> None:
         log.debug('rendering tags')
         tag_vars: dict = deepcopy(self.common_vars)
-        tag_pages: list[Page] = None
+        tag_pages: list[Page]
         for t in self.all_tags:
             log.debug('rendering tag "%s"', t[0])
             # clean tag_pages
diff --git a/src/pyssg/configuration.py b/src/pyssg/configuration.py
index 32a010b..d420fe8 100644
--- a/src/pyssg/configuration.py
+++ b/src/pyssg/configuration.py
@@ -10,7 +10,7 @@ from .utils import get_expanded_path
 log: Logger = getLogger(__name__)
 
 
-DEFAULT_CONFIG_PATH = '$XDG_CONFIG_HOME/pyssg/config.ini'
+DEFAULT_CONFIG_PATH: str = '$XDG_CONFIG_HOME/pyssg/config.ini'
 VERSION = version('pyssg')
 
 
diff --git a/src/pyssg/database.py b/src/pyssg/database.py
index 290ba51..913adb7 100644
--- a/src/pyssg/database.py
+++ b/src/pyssg/database.py
@@ -10,7 +10,6 @@ log: Logger = getLogger(__name__)
 
 # db class that works for both html and md files
 class Database:
-    __OLD_COLUMN_NUM: int = 4
     __COLUMN_NUM: int = 5
 
     def __init__(self, db_path: str,
@@ -42,12 +41,12 @@ class Database:
     # returns a bool that indicates if the entry
     # was (includes new entries) or wasn't updated
     def update(self, file_name: str,
-               remove: str=None) -> bool:
+               remove: str='') -> bool:
         log.debug('updating entry for file "%s"', file_name)
         # initial default values
         f: str = file_name
         tags: list[str] = []
-        if remove is not None:
+        if remove != '':
             f = file_name.replace(remove, '')
             log.debug('removed "%s" from "%s": "%s"', remove, file_name, f)
 
@@ -94,8 +93,8 @@ class Database:
         with open(self.db_path, 'w') as file:
             for k, v in self.e.items():
                 log.debug('parsing row for page "%s"', k)
-                t: str = None
-                row: str = None
+                t: str
+                row: str
                 if len(v[3]) == 0:
                     t = '-'
                 else:
@@ -123,7 +122,7 @@ class Database:
 
 
     def _read_raw(self) -> list[str]:
-        rows: list[str] = None
+        rows: list[str]
         with open(self.db_path, 'r') as file:
             rows = file.readlines()
         log.debug('db contains %d rows', len(rows))
@@ -131,70 +130,22 @@ class Database:
         return rows
 
 
-    def read_old(self) -> None:
-        log.debug('reading db with old schema (%d columns)', self.__OLD_COLUMN_NUM)
-        if not self._db_path_exists():
-            log.error('db path "%s" desn\'t exist, --add-checksum-to-db should'
-                      'only be used when updating the old db schema', self.db_path)
-            sys.exit(1)
-
-        rows: list[str] = self._read_raw()
-        cols: list[str] = None
-        # l=list of values in entry
-        log.debug('parsing rows from db')
-        for it, row in enumerate(rows):
-            i: int = it + 1
-            r: str = row.strip()
-            log.debug('row %d content: "%s"', i, r)
-            # (file_name, ctimestamp, mtimestamp, [tags])
-            cols: tuple[str, float, float, list[str]] = tuple(r.split())
-            col_num: int = len(cols)
-            if col_num != self.__OLD_COLUMN_NUM:
-                log.critical('row %d doesn\'t contain %s columns, contains %d'
-                             ' columns: "%s"',
-                             i, self.__OLD_COLUMN_NUM, col_num, r)
-                sys.exit(1)
-
-            t: list[str] = None
-            if cols[3] == '-':
-                t = []
-            else:
-                t = cols[3].split(',')
-            log.debug('tag content: (%s)', ', '.join(t))
-            file_path: str = os.path.join(self.config.get('path', 'src'), cols[0])
-            checksum: str = get_checksum(file_path)
-            log.debug('checksum for "%s": "%s"', file_path, checksum)
-
-            self.e[cols[0]] = (float(cols[1]), float(cols[2]), checksum, t)
-
-
-
     def read(self) -> None:
         log.debug('reading db')
         if not self._db_path_exists():
             return
 
         rows: list[str] = self._read_raw()
-        cols: list[str] = None
         # l=list of values in entry
         log.debug('parsing rows from db')
         for it, row in enumerate(rows):
             i: int = it + 1
             r: str = row.strip()
             log.debug('row %d content: "%s"', i, r)
+            # ignoring type error, as i'm doing the check later
             # (file_name, ctimestamp, mtimestamp, checksum, [tags])
-            cols: tuple[str, float, float, str, list[str]] = tuple(r.split())
+            cols: tuple[str, float, float, str, list[str]] = tuple(r.split())  # type: ignore
             col_num: int = len(cols)
-            if col_num == self.__OLD_COLUMN_NUM:
-                log.error('row %d contains %d columns: "%s"; this is probably'
-                          ' because of missing checksum column, which is used'
-                          ' now to also check if a file has changed. Rerun'
-                          ' with flag --add-checksum-to-db to add the checksum'
-                          ' column to the current db; if you did any changes'
-                          ' since last timestamp in db, it won\'t update'
-                          ' modification timestamp',
-                          i, self.__OLD_COLUMN_NUM, r)
-                sys.exit(1)
 
             if col_num != self.__COLUMN_NUM:
                 log.critical('row %d doesn\'t contain %s columns, contains %d'
@@ -202,11 +153,12 @@ class Database:
                              i, self.__COLUMN_NUM, col_num, r)
                 sys.exit(1)
 
-            t: list[str] = None
+            t: list[str]
             if cols[4] == '-':
                 t = []
             else:
-                t = cols[4].split(',')
+                # ignoring type error, the "check" is done in this whole if/else
+                t = cols[4].split(',')  # type: ignore
             log.debug('tag content: (%s)', ', '.join(t))
 
             self.e[cols[0]] = (float(cols[1]), float(cols[2]), cols[3], t)
diff --git a/src/pyssg/md_parser.py b/src/pyssg/md_parser.py
index 759ead6..664532a 100644
--- a/src/pyssg/md_parser.py
+++ b/src/pyssg/md_parser.py
@@ -36,7 +36,9 @@ def _get_md_obj() -> Markdown:
     log.debug('list of md extensions: (%s)',
               ', '.join([e if isinstance(e, str) else type(e).__name__
                          for e in exts]))
-    return Markdown(extensions=exts, output_format='html5')
+    # for some reason, the d efinition for output_format doesn't include html5
+    #   even though it is listed in the documentation, ignoring
+    return Markdown(extensions=exts, output_format='html5')  # type: ignore
 
 
 # page and file is basically a synonym here...
@@ -51,21 +53,14 @@ class MDParser:
         self.db: Database = db
         self.md: Markdown = _get_md_obj()
 
-        self.all_files: list[Page] = None
+        self.all_files: list[Page] = []
         # updated and modified are synonyms here
-        self.updated_files: list[Page] = None
-        self.all_tags: list[tuple[str]] = None
+        self.updated_files: list[Page] = []
+        self.all_tags: list[tuple[str, str]] = []
 
 
     def parse_files(self) -> None:
         log.debug('parsing all files')
-        # initialize lists
-        self.all_files = []
-        self.updated_files = []
-        self.all_tags = []
-        # not used, not sure why i had this
-        # all_tag_names: list[str] = []
-
         for f in self.files:
             log.debug('parsing file "%s"', f)
             src_file: str = os.path.join(self.config.get('path', 'src'), f)
@@ -75,11 +70,12 @@ class MDParser:
 
             log.debug('parsing md into html')
             content: str = self.md.reset().convert(open(src_file).read())
+            # ignoring md.Meta type as it is not yet defined (because it is from an extension)
             page: Page = Page(f,
                               self.db.e[f][0],
                               self.db.e[f][1],
                               content,
-                              self.md.Meta,
+                              self.md.Meta,  # type: ignore
                               self.config)
             page.parse_metadata()
 
diff --git a/src/pyssg/page.py b/src/pyssg/page.py
index 21add82..467dd7e 100644
--- a/src/pyssg/page.py
+++ b/src/pyssg/page.py
@@ -27,11 +27,11 @@ class Page:
         # data from self.meta
         self.title: str = ''
         self.author: str = ''
-        self.cdatetime: datetime = None
-        self.mdatetime: datetime = None
+        self.cdatetime: datetime
+        self.mdatetime: datetime
         self.summary: str = ''
         self.lang: str = 'en'
-        self.tags: list[tuple[str]] = []
+        self.tags: list[tuple[str, str]] = []
 
         # constructed
         self.url: str = ''
@@ -41,15 +41,16 @@ class Page:
         self.cdate_list_sep: str = ''
         self.cdate_rss: str = ''
         self.cdate_sitemap: str = ''
-        self.mdate: str = None
-        self.mdate_list: str = None
-        self.mdate_list_sep: str = None
+        self.mdate: str
+        self.mdate_list: str
+        self.mdate_list_sep: str
         self.mdate_rss: str = ''
         self.mdate_sitemap: str = ''
 
         # later assigned references to next and previous pages
-        self.next: Page = None
-        self.previous: Page = None
+        #   not always assigned (tail ends), and the None helps check it, ignoring
+        self.next: Page = None  # type: ignore
+        self.previous: Page = None  # type: ignore
 
         # also from self.meta, but for og metadata
         self.og: dict[str, str] = dict()
@@ -70,7 +71,7 @@ class Page:
 
 
     # parses meta from self.meta, for og, it prioritizes,
-    # the actual og meta
+    #   the actual og meta
     def parse_metadata(self):
         log.debug('parsing metadata for file "%s"', self.name)
         self.title = self.__get_mandatory_meta('title')
@@ -132,7 +133,7 @@ class Page:
             og_elements: list[str] = self.meta['og']
             log.debug('parsing og metadata')
             for og_e in og_elements:
-                kv: str = og_e.split(',', 1)
+                kv: list[str] = og_e.split(',', 1)
                 if len(kv) != 2:
                     log.error('invalid og syntax for "%s", needs to be "k, v"', og_e)
                     sys.exit(1)
diff --git a/src/pyssg/per_level_formatter.py b/src/pyssg/per_level_formatter.py
index 5ab3946..04f943b 100644
--- a/src/pyssg/per_level_formatter.py
+++ b/src/pyssg/per_level_formatter.py
@@ -1,4 +1,4 @@
-from logging import Formatter, DEBUG, INFO, WARNING, ERROR, CRITICAL
+from logging import Formatter, LogRecord, DEBUG, INFO, WARNING, ERROR, CRITICAL
 
 # only reason for this class is to get info formatting as normal text
 #   and everything else with more info and with colors
@@ -20,8 +20,10 @@ class PerLevelFormatter(Formatter):
     }
 
 
-    def format(self, record: str) -> str:
-        fmt: str = self.__FORMATS.get(record.levelno)
+    def format(self, record: LogRecord) -> str:
+        # this should never fail, as __FORMATS is defined above,
+        #   so no issue of just converting to str
+        fmt: str = str(self.__FORMATS.get(record.levelno))
         formatter: Formatter = Formatter(
             fmt=fmt, datefmt=self.__DATE_FMT, style='%')
 
diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py
index eb042b6..a496b34 100644
--- a/src/pyssg/pyssg.py
+++ b/src/pyssg/pyssg.py
@@ -4,9 +4,10 @@ from importlib.resources import path as rpath
 from typing import Union
 from configparser import ConfigParser
 from logging import Logger, getLogger, DEBUG
+from argparse import ArgumentParser
 
+from .arg_parser import get_parser
 from .utils import create_dir, copy_file, get_expanded_path
-from .arg_parser import get_parsed_arguments
 from .configuration import get_parsed_config, DEFAULT_CONFIG_PATH, VERSION
 from .database import Database
 from .builder import Builder
@@ -15,7 +16,30 @@ log: Logger = getLogger(__name__)
 
 
 def main() -> None:
-    args: dict[str, Union[str, bool]] = vars(get_parsed_arguments())
+    arg_parser: ArgumentParser = get_parser()
+    args: dict[str, Union[str, bool]] = vars(arg_parser.parse_args())
+
+    # too messy to place at utils.py, don't want to be
+    #   passing the arg parser around
+    def _log_perror(message: str) -> None:
+        arg_parser.print_usage()
+        # even if it's an error, print it as info
+        #   as it is not critical, only config related
+        log.info(message)
+        sys.exit(1)
+
+    # -1 as first argument is program path
+    num_args = len(sys.argv) - 1
+    if num_args == 2 and args['config']:
+        _log_perror('pyssg: error: only config argument passed, --help for more')
+    elif not num_args > 0 or (num_args == 1 and args['debug']):
+        _log_perror('pyssg: error: no arguments passed, --help for more')
+    elif num_args == 3 and (args['debug'] and args['config']):
+        _log_perror("pyssg: error: no arguments passed other than 'debug' and 'config', --help for more")
+
+    if args['version']:
+        log.info('pyssg v%s', VERSION)
+        sys.exit(0)
 
     if args['debug']:
         # need to modify the root logger specifically,
@@ -27,15 +51,7 @@ def main() -> None:
             handler.setLevel(DEBUG)
         log.debug('changed logging level to DEBUG')
 
-    if not len(sys.argv) > 1 or (len(sys.argv) == 2 and args['debug']):
-        log.info('pyssg v%s - no arguments passed, --help for more', VERSION)
-        sys.exit(0)
-
-    if args['version']:
-        log.info('pyssg v%s', VERSION)
-        sys.exit(0)
-
-    config_path: str = args['config'] if args['config'] else DEFAULT_CONFIG_PATH
+    config_path: str = str(args['config']) if args['config'] else DEFAULT_CONFIG_PATH
     config_path = get_expanded_path(config_path)
     config_dir, _ = os.path.split(config_path)
     log.debug('checked config file path, final config path "%s"', config_path)
@@ -44,7 +60,7 @@ def main() -> None:
         log.info('copying default config file')
         create_dir(config_dir)
         with rpath('pyssg.plt', 'default.ini') as p:
-            copy_file(p, config_path)
+            copy_file(str(p), config_path)
         sys.exit(0)
 
     if not os.path.exists(config_path):
@@ -61,27 +77,19 @@ def main() -> None:
         create_dir(config.get('path', 'src'))
         create_dir(os.path.join(config.get('path', 'dst'), 'tag'), True)
         create_dir(config.get('path', 'plt'))
-        files: list[str] = ('index.html',
+        files: list[str] = ['index.html',
                             'page.html',
                             'tag.html',
                             'rss.xml',
-                            'sitemap.xml')
+                            'sitemap.xml']
         log.debug('list of files to copy over: (%s)', ', '.join(files))
         for f in files:
             plt_file: str = os.path.join(config.get('path', 'plt'), f)
             with rpath('pyssg.plt', f) as p:
-                copy_file(p, plt_file)
+                copy_file(str(p), plt_file)
+        log.info('finished initialization')
         sys.exit(0)
 
-    if args['add_checksum_to_db']:
-        log.info('adding checksum column to existing db')
-        db_path: str = os.path.join(config.get('path', 'src'), '.files')
-        db: Database = Database(db_path, config)
-        # needs to be read_old instead of read
-        db.read_old()
-        db.write()
-
-        sys.exit(0)
 
     if args['build']:
         log.info('building the html files')
@@ -93,4 +101,5 @@ def main() -> None:
         builder.build()
 
         db.write()
+        log.info('finished building the html files')
         sys.exit(0)
diff --git a/src/pyssg/utils.py b/src/pyssg/utils.py
index 4b525cf..3e05d0a 100644
--- a/src/pyssg/utils.py
+++ b/src/pyssg/utils.py
@@ -9,14 +9,14 @@ log: Logger = getLogger(__name__)
 
 def get_file_list(path: str,
                   exts: list[str],
-                  exclude: list[str]=None) -> list[str]:
+                  exclude: list[str]=[]) -> list[str]:
     log.debug('retrieving file list in path "%s" that contain file'
               ' extensions (%s) except (%s)',
               path, ', '.join(exts),
-              ', '.join(exclude if exclude is not None else []))
+              ', '.join(exclude))
     out: list[str] = []
     for root, dirs, files in os.walk(path):
-        if exclude is not None:
+        if exclude != []:
             log.debug('removing excludes from list')
             dirs[:] = [d for d in dirs if d not in exclude]
 
@@ -34,12 +34,12 @@ def get_file_list(path: str,
 
 
 def get_dir_structure(path: str,
-                      exclude: list[str]=None) -> list[str]:
+                      exclude: list[str]=[]) -> list[str]:
     log.debug('retrieving dir structure in path "%s" except (%s)',
-              path, ', '.join(exclude if exclude is not None else []))
+              path, ', '.join(exclude))
     out: list[str] = []
     for root, dirs, files in os.walk(path):
-        if exclude is not None:
+        if exclude != []:
             log.debug('removing excludes from list')
             dirs[:] = [d for d in dirs if d not in exclude]
 
@@ -85,7 +85,7 @@ def get_checksum(path: str) -> str:
     return file_hash.hexdigest()
 
 
-def get_expanded_path(path: str) -> None:
+def get_expanded_path(path: str) -> str:
     log.debug('expanding path "%s"', path)
     expanded_path: str = os.path.normpath(os.path.expandvars(path))
     if '$' in expanded_path:
-- 
cgit v1.2.3-70-g09d2