From e8ce49590ed5a73f72e45c8ae2e021ddcf3dbf12 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <david@luevano.xyz>
Date: Mon, 18 Apr 2022 22:27:42 -0600
Subject: add logging to builder, md_parser and page, and minor code refactor

---
 src/pyssg/builder.py   | 95 +++++++++++++++++++++++++++++++-----------------
 src/pyssg/md_parser.py | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/pyssg/page.py      | 39 +++++++++++++++-----
 src/pyssg/parser.py    | 79 ----------------------------------------
 src/pyssg/utils.py     |  2 +-
 5 files changed, 192 insertions(+), 121 deletions(-)
 create mode 100644 src/pyssg/md_parser.py
 delete mode 100644 src/pyssg/parser.py

diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py
index 130062e..e7a49fe 100644
--- a/src/pyssg/builder.py
+++ b/src/pyssg/builder.py
@@ -5,18 +5,24 @@ from operator import itemgetter
 from jinja2 import Environment, Template
 from markdown import Markdown
 from configparser import ConfigParser
+import logging
+from logging import Logger
 
+from .utils import create_dir, copy_file
 from .database import Database
-from .parser import MDParser
+from .md_parser import MDParser
 from .page import Page
 from .discovery import get_file_list, get_dir_structure
 
+log: Logger = logging.getLogger(__name__)
+
 
 class Builder:
     def __init__(self, config: ConfigParser,
                  env: Environment,
                  db: Database,
                  md: Markdown):
+        log.debug('initializing site builder')
         self.config: ConfigParser = config
         self.env: Environment = env
         self.db: Database = db
@@ -26,13 +32,15 @@ class Builder:
         self.md_files: list[str] = None
         self.html_files: list[str] = None
 
-        self.all_pages: list[Page] = None
-        self.updated_pages: list[Page] = None
+        # files and pages are synoyms
+        self.all_files: list[Page] = None
+        self.updated_files: list[Page] = None
         self.all_tags: list[str] = None
         self.common_vars: dict = None
 
 
     def build(self) -> None:
+        log.debug('building site')
         self.dirs = get_dir_structure(self.config.get('path', 'src'),
                                       ['templates'])
         self.md_files = get_file_list(self.config.get('path', 'src'),
@@ -49,16 +57,17 @@ class Builder:
                                     self.config,
                                     self.db,
                                     self.md)
-        parser.parse()
+        parser.parse_files()
 
         # just so i don't have to pass these vars to all the functions
-        self.all_pages = parser.all_pages
-        self.updated_pages = parser.updated_pages
+        self.all_files = parser.all_files
+        self.updated_files = parser.updated_files
         self.all_tags = parser.all_tags
 
         # dict for the keyword args to pass to the template renderer
+        log.debug('adding config, all_pages and all_tags to exposed vars for jinja')
         self.common_vars = dict(config=self.config,
-                                all_pages=self.all_pages,
+                                all_pages=self.all_files,
                                 all_tags=self.all_tags)
 
         self.__render_articles()
@@ -69,16 +78,18 @@ class Builder:
 
 
     def __create_dir_structure(self) -> None:
+        log.debug('creating dir structure')
+        dir_path: str = None
         for d in self.dirs:
-            # for the dir structure,
-            # doesn't matter if the dir already exists
-            try:
-                os.makedirs(os.path.join(self.config.get('path', 'dst'), d))
-            except FileExistsError:
-                pass
+            dir_path = os.path.join(self.config.get('path', 'dst'), d)
+            create_dir(dir_path, True)
 
 
     def __copy_html_files(self) -> None:
+        if len(self.html_files) > 0:
+            log.debug('copying all html files')
+        else:
+            log.debug('no html files to copy')
         src_file: str = None
         dst_file: str = None
 
@@ -88,53 +99,73 @@ class Builder:
 
             # only copy files if they have been modified (or are new)
             if self.db.update(src_file, remove=f'{self.config.get("path", "src")}/'):
-                shutil.copy2(src_file, dst_file)
+                log.debug('file "%s" has been modified or is new, copying', f)
+                copy_file(src_file, dst_file)
+            else:
+                if self.config.getboolean('other', 'force'):
+                    log.debug('file "%s" hasn\'t been modified, but option force is set to true, copying anyways', f)
+                    copy_file(src_file, dst_file)
+                else:
+                    log.debug('file "%s" hasn\'t been modified, ignoring', f)
 
 
     def __render_articles(self) -> None:
+        log.debug('rendering html')
         article_vars: dict = deepcopy(self.common_vars)
+        temp_files: list[Page] = None
+
         # check if only updated should be created
         if self.config.getboolean('other', 'force'):
-            for p in self.all_pages:
-                article_vars['page'] = p
-                self.__render_template("page.html",
-                                       p.name.replace('.md','.html'),
-                                       **article_vars)
+            log.debug('all html will be rendered, force is set to true')
+            temp_files = self.all_files
         else:
-            for p in self.updated_pages:
-                article_vars['page'] = p
-                self.__render_template("page.html",
-                                       p.name.replace('.md','.html'),
-                                       **article_vars)
+            log.debug('only updated or new html will be rendered')
+            temp_files = self.updated_files
+
+        for p in temp_files:
+            log.debug('adding page to exposed vars for jinja')
+            article_vars['page'] = p
+            # actually render article
+            self.__render_template("page.html",
+                                   p.name.replace('.md','.html'),
+                                   **article_vars)
 
 
     def __render_tags(self) -> None:
+        log.debug('rendering tags')
         tag_vars: dict = deepcopy(self.common_vars)
+        tag_pages: list[Page] = None
         for t in self.all_tags:
-            # get a list of all pages that have current tag
-            tag_pages: list[Page] = []
-            for p in self.all_pages:
+            log.debug('rendering tag "%s"', t[0])
+            # clean tag_pages
+            tag_pages = []
+            log.debug('adding all pages that contain current tag')
+            for p in self.all_files:
                 if p.tags is not None and t[0] in list(map(itemgetter(0),
                                                            p.tags)):
+                    log.debug('adding page "%s" as it contains tag "%s"',
+                              p.name, t[0])
                     tag_pages.append(p)
 
+            log.debug('adding tag and tag_pages to exposed vars for jinja')
             tag_vars['tag'] = t
             tag_vars['tag_pages'] = tag_pages
 
-            # build tag page
+            # actually render tag page
             self.__render_template('tag.html',
                                    f'tag/@{t[0]}.html',
                                    **tag_vars)
 
-            # clean list of pages with current tag
-            tag_pages = []
-
 
     def __render_template(self, template_name: str,
                           file_name: str,
                           **template_vars) -> None:
+        log.debug('rendering html "%s" with template "%s"',
+                  file_name, template_name)
         template: Template = self.env.get_template(template_name)
         content: str = template.render(**template_vars)
+        dst_path: str = os.path.join(self.config.get('path', 'dst'), file_name)
 
-        with open(os.path.join(self.config.get('path', 'dst'), file_name), 'w') as f:
+        log.debug('writing html file to path "%s"', dst_path)
+        with open(dst_path, 'w') as f:
             f.write(content)
diff --git a/src/pyssg/md_parser.py b/src/pyssg/md_parser.py
new file mode 100644
index 0000000..b00da19
--- /dev/null
+++ b/src/pyssg/md_parser.py
@@ -0,0 +1,98 @@
+import os
+from operator import itemgetter
+from markdown import Markdown
+from configparser import ConfigParser
+import logging
+from logging import Logger
+
+from .database import Database
+from .page import Page
+
+log: Logger = logging.getLogger(__name__)
+
+
+# page and file is basically a synonym here...
+class MDParser:
+    def __init__(self, files: list[str],
+                 config: ConfigParser,
+                 db: Database,
+                 md: Markdown):
+        log.debug('initializing the md parser with %d files', len(files))
+        self.files: list[str] = files
+
+        self.config: ConfigParser = config
+        self.db: Database = db
+        self.md: Markdown = md
+
+        self.all_files: list[Page] = None
+        # updated and modified are synonyms here
+        self.updated_files: list[Page] = None
+        self.all_tags: list[tuple[str]] = None
+
+
+    def parse_files(self) -> None:
+        log.debug('parsing all files')
+        # initialize lists
+        self.all_files = []
+        self.updated_files = []
+        self.all_tags = []
+        # not used, not sure why i had this
+        # all_tag_names: list[str] = []
+
+        for f in self.files:
+            log.debug('parsing file "%s"', f)
+            src_file: str = os.path.join(self.config.get('path', 'src'), f)
+            log.debug('path "%s"', src_file)
+            # get flag if update is successful
+            file_updated: bool = self.db.update(src_file, remove=f'{self.config.get("path", "src")}/')
+
+            log.debug('parsing md into html')
+            content: str = self.md.reset().convert(open(src_file).read())
+            page: Page = Page(f,
+                              self.db.e[f][0],
+                              self.db.e[f][1],
+                              content,
+                              self.md.Meta,
+                              self.config)
+            page.parse_metadata()
+
+            # keep a separated list for all and updated pages
+            if file_updated:
+                log.debug('has been modified, adding to mod file list')
+                self.updated_files.append(page)
+            log.debug('adding to file list')
+            self.all_files.append(page)
+
+            # parse tags
+            if page.tags is not None:
+                log.debug('parsing tags')
+                # add its tag to corresponding db entry if existent
+                self.db.update_tags(f, list(map(itemgetter(0), page.tags)))
+
+                log.debug('add all tags to tag list')
+                for t in page.tags:
+                    if t[0] not in list(map(itemgetter(0), self.all_tags)):
+                        log.debug('adding tag "%s" as it\'s not present in tag list', t[0])
+                        self.all_tags.append(t)
+                    else:
+                        log.debug('ignoring tag "%s" as it\'s present in tag list', t[0])
+            else:
+                log.debug('no tags to parse')
+
+        log.debug('sorting all lists for consistency')
+        self.all_tags.sort(key=itemgetter(0))
+        self.updated_files.sort(reverse=True)
+        self.all_files.sort(reverse=True)
+
+        pages_amount: int = len(self.all_files)
+        # note that prev and next are switched because of the
+        # reverse ordering of all_pages
+        log.debug('update next and prev attributes')
+        for i, p in enumerate(self.all_files):
+            if i != 0:
+                next_page: Page = self.all_files[i - 1]
+                p.next = next_page
+
+            if i != pages_amount - 1:
+                prev_page: Page = self.all_files[i + 1]
+                p.previous = prev_page
diff --git a/src/pyssg/page.py b/src/pyssg/page.py
index 784749c..dd3881c 100644
--- a/src/pyssg/page.py
+++ b/src/pyssg/page.py
@@ -1,6 +1,12 @@
+import sys
 from datetime import datetime, timezone
+import logging
+from logging import Logger
 
 from configparser import ConfigParser
+from re import L
+
+log: Logger = logging.getLogger(__name__)
 
 
 class Page:
@@ -11,6 +17,7 @@ class Page:
                  html: str,
                  meta: dict,
                  config: ConfigParser):
+        log.debug('initializing the page object with name "%s"', name)
         # initial data
         self.name: str = name
         self.ctimestamp: float = ctime
@@ -56,14 +63,15 @@ class Page:
 
     # parses meta from self.meta, for og, it prioritizes,
     # the actual og meta
-    def parse(self):
-        # required meta elements
+    def parse_metadata(self):
+        log.debug('parsing metadata for file "%s"', self.name)
+        log.debug('parsing required metadata')
         self.title = self.meta['title'][0]
         self.author = self.meta['author'][0]
         self.summary = self.meta['summary'][0]
         self.lang = self.meta['lang'][0]
 
-        # dates
+        log.debug('parsing timestamp')
         self.cdatetime = datetime.fromtimestamp(self.ctimestamp,
                                                  tz=timezone.utc)
         self.cdate = self.cdatetime.strftime(self.config.get('fmt', 'date'))
@@ -73,8 +81,8 @@ class Page:
         self.cdate_sitemap = \
         self.cdatetime.strftime(self.config.get('fmt', 'sitemap_date'))
 
-        # only if file/page has been modified
         if self.mtimestamp != 0.0:
+            log.debug('parsing modified timestamp')
             self.mdatetime = datetime.fromtimestamp(self.mtimestamp,
                                                      tz=timezone.utc)
             self.mdate = self.mdatetime.strftime(self.config.get('fmt', 'date'))
@@ -83,36 +91,49 @@ class Page:
             self.mdate_rss = self.mdatetime.strftime(self.config.get('fmt', 'rss_date'))
             self.mdate_sitemap = \
             self.mdatetime.strftime(self.config.get('fmt', 'sitemap_date'))
+        else:
+            log.debug('not parsing modified timestamp, hasn\'t been modified')
 
-        # not always contains tags
         try:
             tags_only: list[str] = self.meta['tags']
+            log.debug('parsing tags')
             tags_only.sort()
 
             for t in tags_only:
                 self.tags.append((t,
                                   f'{self.config.get("url", "main")}/tag/@{t}.html'))
-        except KeyError: pass
+        except KeyError:
+            log.debug('not parsing tags, doesn\'t have any')
 
+        log.debug('parsing url')
         self.url = f'{self.config.get("url", "main")}/{self.name.replace(".md", ".html")}'
+        log.debug('final url "%s"', self.url)
 
+        log.debug('parsing image url')
         try:
             self.image_url = \
             f'{self.config.get("url", "static")}/{self.meta["image_url"][0]}'
         except KeyError:
+            log.debug('using default image, no image_url tag found')
             self.image_url = \
             f'{self.config.get("url", "static")}/{self.config.get("url", "default_image")}'
+        log.debug('final image url "%s"', self.image_url)
 
         # if contains open graph elements
         try:
             # og_e = object graph entry
-            for og_e in self.meta['og']:
+            og_elements: list[str] = self.meta['og']
+            log.debug('parsing og metadata')
+            for og_e in og_elements:
                 kv: str = og_e.split(',', 1)
                 if len(kv) != 2:
-                    raise Exception('invalid og syntax')
+                    log.error('invalid og syntax for "%s", needs to be "k, v"', og_e)
+                    sys.exit(1)
 
                 k: str = kv[0].strip()
                 v: str = kv[1].strip()
 
+                log.debug('og element: ("%s", "%s")', k, v)
                 self.og[k] = v
-        except KeyError: pass
+        except KeyError:
+            log.debug('no og metadata found')
diff --git a/src/pyssg/parser.py b/src/pyssg/parser.py
deleted file mode 100644
index 2888fcb..0000000
--- a/src/pyssg/parser.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import os
-from operator import itemgetter
-from markdown import Markdown
-from configparser import ConfigParser
-
-from .database import Database
-from .page import Page
-
-
-# parser of md files, stores list of pages and tags
-class MDParser:
-    def __init__(self, files: list[str],
-                 config: ConfigParser,
-                 db: Database,
-                 md: Markdown):
-        self.files: list[str] = files
-
-        self.config: ConfigParser = config
-        self.db: Database = db
-        self.md: Markdown = md
-
-        self.all_pages: list[Page] = None
-        self.updated_pages: list[Page] = None
-        self.all_tags: list[tuple[str]] = None
-
-
-    def parse(self) -> None:
-        # initialize lists
-        self.all_pages = []
-        self.updated_pages = []
-        self.all_tags = []
-        # not used, not sure why i had this
-        # all_tag_names: list[str] = []
-
-        for f in self.files:
-            src_file: str = os.path.join(self.config.get('path', 'src'), f)
-            # get flag if update is successful
-            updated: bool = self.db.update(src_file, remove=f'{self.config.get("path", "src")}/')
-
-            content: str = self.md.reset().convert(open(src_file).read())
-            page: Page = Page(f,
-                              self.db.e[f][0],
-                              self.db.e[f][1],
-                              content,
-                              self.md.Meta,
-                              self.config)
-            page.parse()
-
-            # keep a separated list for all and updated pages
-            if updated:
-                self.updated_pages.append(page)
-            self.all_pages.append(page)
-
-            # parse tags
-            if page.tags is not None:
-                # add its tag to corresponding db entry if existent
-                self.db.update_tags(f, list(map(itemgetter(0), page.tags)))
-
-                # update all_tags attribute
-                for t in page.tags:
-                    if t[0] not in list(map(itemgetter(0), self.all_tags)):
-                        self.all_tags.append(t)
-
-        # sort list of tags for consistency
-        self.all_tags.sort(key=itemgetter(0))
-        self.updated_pages.sort(reverse=True)
-        self.all_pages.sort(reverse=True)
-
-        pages_amount: int = len(self.all_pages)
-        # note that prev and next are switched because of the reverse rodering
-        # of all_pages
-        for i, p in enumerate(self.all_pages):
-            if i != 0:
-                next_page: Page = self.all_pages[i - 1]
-                p.next = next_page
-
-            if i != pages_amount - 1:
-                prev_page: Page = self.all_pages[i + 1]
-                p.previous = prev_page
diff --git a/src/pyssg/utils.py b/src/pyssg/utils.py
index 2194fe1..a24d7ca 100644
--- a/src/pyssg/utils.py
+++ b/src/pyssg/utils.py
@@ -20,7 +20,7 @@ def create_dir(path: str, p: bool=False) -> None:
 
 def copy_file(src: str, dst: str) -> None:
     if not os.path.exists(dst):
-        shutil.copy(src, dst)
+        shutil.copy2(src, dst)
         log.info('copied file "%s" to "%s"', src, dst)
     else:
         log.info('file "%s" already exists, ignoring', dst)
-- 
cgit v1.2.3-70-g09d2