From e8ce49590ed5a73f72e45c8ae2e021ddcf3dbf12 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado Date: Mon, 18 Apr 2022 22:27:42 -0600 Subject: add logging to builder, md_parser and page, and minor code refactor --- src/pyssg/builder.py | 95 +++++++++++++++++++++++++++++++----------------- src/pyssg/md_parser.py | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/pyssg/page.py | 39 +++++++++++++++----- src/pyssg/parser.py | 79 ---------------------------------------- src/pyssg/utils.py | 2 +- 5 files changed, 192 insertions(+), 121 deletions(-) create mode 100644 src/pyssg/md_parser.py delete mode 100644 src/pyssg/parser.py diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py index 130062e..e7a49fe 100644 --- a/src/pyssg/builder.py +++ b/src/pyssg/builder.py @@ -5,18 +5,24 @@ from operator import itemgetter from jinja2 import Environment, Template from markdown import Markdown from configparser import ConfigParser +import logging +from logging import Logger +from .utils import create_dir, copy_file from .database import Database -from .parser import MDParser +from .md_parser import MDParser from .page import Page from .discovery import get_file_list, get_dir_structure +log: Logger = logging.getLogger(__name__) + class Builder: def __init__(self, config: ConfigParser, env: Environment, db: Database, md: Markdown): + log.debug('initializing site builder') self.config: ConfigParser = config self.env: Environment = env self.db: Database = db @@ -26,13 +32,15 @@ class Builder: self.md_files: list[str] = None self.html_files: list[str] = None - self.all_pages: list[Page] = None - self.updated_pages: list[Page] = None + # files and pages are synoyms + self.all_files: list[Page] = None + self.updated_files: list[Page] = None self.all_tags: list[str] = None self.common_vars: dict = None def build(self) -> None: + log.debug('building site') self.dirs = get_dir_structure(self.config.get('path', 'src'), ['templates']) self.md_files = get_file_list(self.config.get('path', 'src'), @@ -49,16 +57,17 @@ class Builder: self.config, self.db, self.md) - parser.parse() + parser.parse_files() # just so i don't have to pass these vars to all the functions - self.all_pages = parser.all_pages - self.updated_pages = parser.updated_pages + self.all_files = parser.all_files + self.updated_files = parser.updated_files self.all_tags = parser.all_tags # dict for the keyword args to pass to the template renderer + log.debug('adding config, all_pages and all_tags to exposed vars for jinja') self.common_vars = dict(config=self.config, - all_pages=self.all_pages, + all_pages=self.all_files, all_tags=self.all_tags) self.__render_articles() @@ -69,16 +78,18 @@ class Builder: def __create_dir_structure(self) -> None: + log.debug('creating dir structure') + dir_path: str = None for d in self.dirs: - # for the dir structure, - # doesn't matter if the dir already exists - try: - os.makedirs(os.path.join(self.config.get('path', 'dst'), d)) - except FileExistsError: - pass + dir_path = os.path.join(self.config.get('path', 'dst'), d) + create_dir(dir_path, True) def __copy_html_files(self) -> None: + if len(self.html_files) > 0: + log.debug('copying all html files') + else: + log.debug('no html files to copy') src_file: str = None dst_file: str = None @@ -88,53 +99,73 @@ class Builder: # only copy files if they have been modified (or are new) if self.db.update(src_file, remove=f'{self.config.get("path", "src")}/'): - shutil.copy2(src_file, dst_file) + log.debug('file "%s" has been modified or is new, copying', f) + copy_file(src_file, dst_file) + else: + if self.config.getboolean('other', 'force'): + log.debug('file "%s" hasn\'t been modified, but option force is set to true, copying anyways', f) + copy_file(src_file, dst_file) + else: + log.debug('file "%s" hasn\'t been modified, ignoring', f) def __render_articles(self) -> None: + log.debug('rendering html') article_vars: dict = deepcopy(self.common_vars) + temp_files: list[Page] = None + # check if only updated should be created if self.config.getboolean('other', 'force'): - for p in self.all_pages: - article_vars['page'] = p - self.__render_template("page.html", - p.name.replace('.md','.html'), - **article_vars) + log.debug('all html will be rendered, force is set to true') + temp_files = self.all_files else: - for p in self.updated_pages: - article_vars['page'] = p - self.__render_template("page.html", - p.name.replace('.md','.html'), - **article_vars) + log.debug('only updated or new html will be rendered') + temp_files = self.updated_files + + for p in temp_files: + log.debug('adding page to exposed vars for jinja') + article_vars['page'] = p + # actually render article + self.__render_template("page.html", + p.name.replace('.md','.html'), + **article_vars) def __render_tags(self) -> None: + log.debug('rendering tags') tag_vars: dict = deepcopy(self.common_vars) + tag_pages: list[Page] = None for t in self.all_tags: - # get a list of all pages that have current tag - tag_pages: list[Page] = [] - for p in self.all_pages: + log.debug('rendering tag "%s"', t[0]) + # clean tag_pages + tag_pages = [] + log.debug('adding all pages that contain current tag') + for p in self.all_files: if p.tags is not None and t[0] in list(map(itemgetter(0), p.tags)): + log.debug('adding page "%s" as it contains tag "%s"', + p.name, t[0]) tag_pages.append(p) + log.debug('adding tag and tag_pages to exposed vars for jinja') tag_vars['tag'] = t tag_vars['tag_pages'] = tag_pages - # build tag page + # actually render tag page self.__render_template('tag.html', f'tag/@{t[0]}.html', **tag_vars) - # clean list of pages with current tag - tag_pages = [] - def __render_template(self, template_name: str, file_name: str, **template_vars) -> None: + log.debug('rendering html "%s" with template "%s"', + file_name, template_name) template: Template = self.env.get_template(template_name) content: str = template.render(**template_vars) + dst_path: str = os.path.join(self.config.get('path', 'dst'), file_name) - with open(os.path.join(self.config.get('path', 'dst'), file_name), 'w') as f: + log.debug('writing html file to path "%s"', dst_path) + with open(dst_path, 'w') as f: f.write(content) diff --git a/src/pyssg/md_parser.py b/src/pyssg/md_parser.py new file mode 100644 index 0000000..b00da19 --- /dev/null +++ b/src/pyssg/md_parser.py @@ -0,0 +1,98 @@ +import os +from operator import itemgetter +from markdown import Markdown +from configparser import ConfigParser +import logging +from logging import Logger + +from .database import Database +from .page import Page + +log: Logger = logging.getLogger(__name__) + + +# page and file is basically a synonym here... +class MDParser: + def __init__(self, files: list[str], + config: ConfigParser, + db: Database, + md: Markdown): + log.debug('initializing the md parser with %d files', len(files)) + self.files: list[str] = files + + self.config: ConfigParser = config + self.db: Database = db + self.md: Markdown = md + + self.all_files: list[Page] = None + # updated and modified are synonyms here + self.updated_files: list[Page] = None + self.all_tags: list[tuple[str]] = None + + + def parse_files(self) -> None: + log.debug('parsing all files') + # initialize lists + self.all_files = [] + self.updated_files = [] + self.all_tags = [] + # not used, not sure why i had this + # all_tag_names: list[str] = [] + + for f in self.files: + log.debug('parsing file "%s"', f) + src_file: str = os.path.join(self.config.get('path', 'src'), f) + log.debug('path "%s"', src_file) + # get flag if update is successful + file_updated: bool = self.db.update(src_file, remove=f'{self.config.get("path", "src")}/') + + log.debug('parsing md into html') + content: str = self.md.reset().convert(open(src_file).read()) + page: Page = Page(f, + self.db.e[f][0], + self.db.e[f][1], + content, + self.md.Meta, + self.config) + page.parse_metadata() + + # keep a separated list for all and updated pages + if file_updated: + log.debug('has been modified, adding to mod file list') + self.updated_files.append(page) + log.debug('adding to file list') + self.all_files.append(page) + + # parse tags + if page.tags is not None: + log.debug('parsing tags') + # add its tag to corresponding db entry if existent + self.db.update_tags(f, list(map(itemgetter(0), page.tags))) + + log.debug('add all tags to tag list') + for t in page.tags: + if t[0] not in list(map(itemgetter(0), self.all_tags)): + log.debug('adding tag "%s" as it\'s not present in tag list', t[0]) + self.all_tags.append(t) + else: + log.debug('ignoring tag "%s" as it\'s present in tag list', t[0]) + else: + log.debug('no tags to parse') + + log.debug('sorting all lists for consistency') + self.all_tags.sort(key=itemgetter(0)) + self.updated_files.sort(reverse=True) + self.all_files.sort(reverse=True) + + pages_amount: int = len(self.all_files) + # note that prev and next are switched because of the + # reverse ordering of all_pages + log.debug('update next and prev attributes') + for i, p in enumerate(self.all_files): + if i != 0: + next_page: Page = self.all_files[i - 1] + p.next = next_page + + if i != pages_amount - 1: + prev_page: Page = self.all_files[i + 1] + p.previous = prev_page diff --git a/src/pyssg/page.py b/src/pyssg/page.py index 784749c..dd3881c 100644 --- a/src/pyssg/page.py +++ b/src/pyssg/page.py @@ -1,6 +1,12 @@ +import sys from datetime import datetime, timezone +import logging +from logging import Logger from configparser import ConfigParser +from re import L + +log: Logger = logging.getLogger(__name__) class Page: @@ -11,6 +17,7 @@ class Page: html: str, meta: dict, config: ConfigParser): + log.debug('initializing the page object with name "%s"', name) # initial data self.name: str = name self.ctimestamp: float = ctime @@ -56,14 +63,15 @@ class Page: # parses meta from self.meta, for og, it prioritizes, # the actual og meta - def parse(self): - # required meta elements + def parse_metadata(self): + log.debug('parsing metadata for file "%s"', self.name) + log.debug('parsing required metadata') self.title = self.meta['title'][0] self.author = self.meta['author'][0] self.summary = self.meta['summary'][0] self.lang = self.meta['lang'][0] - # dates + log.debug('parsing timestamp') self.cdatetime = datetime.fromtimestamp(self.ctimestamp, tz=timezone.utc) self.cdate = self.cdatetime.strftime(self.config.get('fmt', 'date')) @@ -73,8 +81,8 @@ class Page: self.cdate_sitemap = \ self.cdatetime.strftime(self.config.get('fmt', 'sitemap_date')) - # only if file/page has been modified if self.mtimestamp != 0.0: + log.debug('parsing modified timestamp') self.mdatetime = datetime.fromtimestamp(self.mtimestamp, tz=timezone.utc) self.mdate = self.mdatetime.strftime(self.config.get('fmt', 'date')) @@ -83,36 +91,49 @@ class Page: self.mdate_rss = self.mdatetime.strftime(self.config.get('fmt', 'rss_date')) self.mdate_sitemap = \ self.mdatetime.strftime(self.config.get('fmt', 'sitemap_date')) + else: + log.debug('not parsing modified timestamp, hasn\'t been modified') - # not always contains tags try: tags_only: list[str] = self.meta['tags'] + log.debug('parsing tags') tags_only.sort() for t in tags_only: self.tags.append((t, f'{self.config.get("url", "main")}/tag/@{t}.html')) - except KeyError: pass + except KeyError: + log.debug('not parsing tags, doesn\'t have any') + log.debug('parsing url') self.url = f'{self.config.get("url", "main")}/{self.name.replace(".md", ".html")}' + log.debug('final url "%s"', self.url) + log.debug('parsing image url') try: self.image_url = \ f'{self.config.get("url", "static")}/{self.meta["image_url"][0]}' except KeyError: + log.debug('using default image, no image_url tag found') self.image_url = \ f'{self.config.get("url", "static")}/{self.config.get("url", "default_image")}' + log.debug('final image url "%s"', self.image_url) # if contains open graph elements try: # og_e = object graph entry - for og_e in self.meta['og']: + og_elements: list[str] = self.meta['og'] + log.debug('parsing og metadata') + for og_e in og_elements: kv: str = og_e.split(',', 1) if len(kv) != 2: - raise Exception('invalid og syntax') + log.error('invalid og syntax for "%s", needs to be "k, v"', og_e) + sys.exit(1) k: str = kv[0].strip() v: str = kv[1].strip() + log.debug('og element: ("%s", "%s")', k, v) self.og[k] = v - except KeyError: pass + except KeyError: + log.debug('no og metadata found') diff --git a/src/pyssg/parser.py b/src/pyssg/parser.py deleted file mode 100644 index 2888fcb..0000000 --- a/src/pyssg/parser.py +++ /dev/null @@ -1,79 +0,0 @@ -import os -from operator import itemgetter -from markdown import Markdown -from configparser import ConfigParser - -from .database import Database -from .page import Page - - -# parser of md files, stores list of pages and tags -class MDParser: - def __init__(self, files: list[str], - config: ConfigParser, - db: Database, - md: Markdown): - self.files: list[str] = files - - self.config: ConfigParser = config - self.db: Database = db - self.md: Markdown = md - - self.all_pages: list[Page] = None - self.updated_pages: list[Page] = None - self.all_tags: list[tuple[str]] = None - - - def parse(self) -> None: - # initialize lists - self.all_pages = [] - self.updated_pages = [] - self.all_tags = [] - # not used, not sure why i had this - # all_tag_names: list[str] = [] - - for f in self.files: - src_file: str = os.path.join(self.config.get('path', 'src'), f) - # get flag if update is successful - updated: bool = self.db.update(src_file, remove=f'{self.config.get("path", "src")}/') - - content: str = self.md.reset().convert(open(src_file).read()) - page: Page = Page(f, - self.db.e[f][0], - self.db.e[f][1], - content, - self.md.Meta, - self.config) - page.parse() - - # keep a separated list for all and updated pages - if updated: - self.updated_pages.append(page) - self.all_pages.append(page) - - # parse tags - if page.tags is not None: - # add its tag to corresponding db entry if existent - self.db.update_tags(f, list(map(itemgetter(0), page.tags))) - - # update all_tags attribute - for t in page.tags: - if t[0] not in list(map(itemgetter(0), self.all_tags)): - self.all_tags.append(t) - - # sort list of tags for consistency - self.all_tags.sort(key=itemgetter(0)) - self.updated_pages.sort(reverse=True) - self.all_pages.sort(reverse=True) - - pages_amount: int = len(self.all_pages) - # note that prev and next are switched because of the reverse rodering - # of all_pages - for i, p in enumerate(self.all_pages): - if i != 0: - next_page: Page = self.all_pages[i - 1] - p.next = next_page - - if i != pages_amount - 1: - prev_page: Page = self.all_pages[i + 1] - p.previous = prev_page diff --git a/src/pyssg/utils.py b/src/pyssg/utils.py index 2194fe1..a24d7ca 100644 --- a/src/pyssg/utils.py +++ b/src/pyssg/utils.py @@ -20,7 +20,7 @@ def create_dir(path: str, p: bool=False) -> None: def copy_file(src: str, dst: str) -> None: if not os.path.exists(dst): - shutil.copy(src, dst) + shutil.copy2(src, dst) log.info('copied file "%s" to "%s"', src, dst) else: log.info('file "%s" already exists, ignoring', dst) -- cgit v1.2.3-70-g09d2