summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano Alvarado <david@luevano.xyz>2022-04-18 22:27:42 -0600
committerDavid Luevano Alvarado <david@luevano.xyz>2022-04-18 22:27:42 -0600
commite8ce49590ed5a73f72e45c8ae2e021ddcf3dbf12 (patch)
tree0295d32cf3137eea1b13642cf3addd0d5f9f5468
parentc888faca827733136edc1fa7ab310b702d43d7ab (diff)
add logging to builder, md_parser and page, and minor code refactor
-rw-r--r--src/pyssg/builder.py95
-rw-r--r--src/pyssg/md_parser.py98
-rw-r--r--src/pyssg/page.py39
-rw-r--r--src/pyssg/parser.py79
-rw-r--r--src/pyssg/utils.py2
5 files changed, 192 insertions, 121 deletions
diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py
index 130062e..e7a49fe 100644
--- a/src/pyssg/builder.py
+++ b/src/pyssg/builder.py
@@ -5,18 +5,24 @@ from operator import itemgetter
from jinja2 import Environment, Template
from markdown import Markdown
from configparser import ConfigParser
+import logging
+from logging import Logger
+from .utils import create_dir, copy_file
from .database import Database
-from .parser import MDParser
+from .md_parser import MDParser
from .page import Page
from .discovery import get_file_list, get_dir_structure
+log: Logger = logging.getLogger(__name__)
+
class Builder:
def __init__(self, config: ConfigParser,
env: Environment,
db: Database,
md: Markdown):
+ log.debug('initializing site builder')
self.config: ConfigParser = config
self.env: Environment = env
self.db: Database = db
@@ -26,13 +32,15 @@ class Builder:
self.md_files: list[str] = None
self.html_files: list[str] = None
- self.all_pages: list[Page] = None
- self.updated_pages: list[Page] = None
+ # files and pages are synoyms
+ self.all_files: list[Page] = None
+ self.updated_files: list[Page] = None
self.all_tags: list[str] = None
self.common_vars: dict = None
def build(self) -> None:
+ log.debug('building site')
self.dirs = get_dir_structure(self.config.get('path', 'src'),
['templates'])
self.md_files = get_file_list(self.config.get('path', 'src'),
@@ -49,16 +57,17 @@ class Builder:
self.config,
self.db,
self.md)
- parser.parse()
+ parser.parse_files()
# just so i don't have to pass these vars to all the functions
- self.all_pages = parser.all_pages
- self.updated_pages = parser.updated_pages
+ self.all_files = parser.all_files
+ self.updated_files = parser.updated_files
self.all_tags = parser.all_tags
# dict for the keyword args to pass to the template renderer
+ log.debug('adding config, all_pages and all_tags to exposed vars for jinja')
self.common_vars = dict(config=self.config,
- all_pages=self.all_pages,
+ all_pages=self.all_files,
all_tags=self.all_tags)
self.__render_articles()
@@ -69,16 +78,18 @@ class Builder:
def __create_dir_structure(self) -> None:
+ log.debug('creating dir structure')
+ dir_path: str = None
for d in self.dirs:
- # for the dir structure,
- # doesn't matter if the dir already exists
- try:
- os.makedirs(os.path.join(self.config.get('path', 'dst'), d))
- except FileExistsError:
- pass
+ dir_path = os.path.join(self.config.get('path', 'dst'), d)
+ create_dir(dir_path, True)
def __copy_html_files(self) -> None:
+ if len(self.html_files) > 0:
+ log.debug('copying all html files')
+ else:
+ log.debug('no html files to copy')
src_file: str = None
dst_file: str = None
@@ -88,53 +99,73 @@ class Builder:
# only copy files if they have been modified (or are new)
if self.db.update(src_file, remove=f'{self.config.get("path", "src")}/'):
- shutil.copy2(src_file, dst_file)
+ log.debug('file "%s" has been modified or is new, copying', f)
+ copy_file(src_file, dst_file)
+ else:
+ if self.config.getboolean('other', 'force'):
+ log.debug('file "%s" hasn\'t been modified, but option force is set to true, copying anyways', f)
+ copy_file(src_file, dst_file)
+ else:
+ log.debug('file "%s" hasn\'t been modified, ignoring', f)
def __render_articles(self) -> None:
+ log.debug('rendering html')
article_vars: dict = deepcopy(self.common_vars)
+ temp_files: list[Page] = None
+
# check if only updated should be created
if self.config.getboolean('other', 'force'):
- for p in self.all_pages:
- article_vars['page'] = p
- self.__render_template("page.html",
- p.name.replace('.md','.html'),
- **article_vars)
+ log.debug('all html will be rendered, force is set to true')
+ temp_files = self.all_files
else:
- for p in self.updated_pages:
- article_vars['page'] = p
- self.__render_template("page.html",
- p.name.replace('.md','.html'),
- **article_vars)
+ log.debug('only updated or new html will be rendered')
+ temp_files = self.updated_files
+
+ for p in temp_files:
+ log.debug('adding page to exposed vars for jinja')
+ article_vars['page'] = p
+ # actually render article
+ self.__render_template("page.html",
+ p.name.replace('.md','.html'),
+ **article_vars)
def __render_tags(self) -> None:
+ log.debug('rendering tags')
tag_vars: dict = deepcopy(self.common_vars)
+ tag_pages: list[Page] = None
for t in self.all_tags:
- # get a list of all pages that have current tag
- tag_pages: list[Page] = []
- for p in self.all_pages:
+ log.debug('rendering tag "%s"', t[0])
+ # clean tag_pages
+ tag_pages = []
+ log.debug('adding all pages that contain current tag')
+ for p in self.all_files:
if p.tags is not None and t[0] in list(map(itemgetter(0),
p.tags)):
+ log.debug('adding page "%s" as it contains tag "%s"',
+ p.name, t[0])
tag_pages.append(p)
+ log.debug('adding tag and tag_pages to exposed vars for jinja')
tag_vars['tag'] = t
tag_vars['tag_pages'] = tag_pages
- # build tag page
+ # actually render tag page
self.__render_template('tag.html',
f'tag/@{t[0]}.html',
**tag_vars)
- # clean list of pages with current tag
- tag_pages = []
-
def __render_template(self, template_name: str,
file_name: str,
**template_vars) -> None:
+ log.debug('rendering html "%s" with template "%s"',
+ file_name, template_name)
template: Template = self.env.get_template(template_name)
content: str = template.render(**template_vars)
+ dst_path: str = os.path.join(self.config.get('path', 'dst'), file_name)
- with open(os.path.join(self.config.get('path', 'dst'), file_name), 'w') as f:
+ log.debug('writing html file to path "%s"', dst_path)
+ with open(dst_path, 'w') as f:
f.write(content)
diff --git a/src/pyssg/md_parser.py b/src/pyssg/md_parser.py
new file mode 100644
index 0000000..b00da19
--- /dev/null
+++ b/src/pyssg/md_parser.py
@@ -0,0 +1,98 @@
+import os
+from operator import itemgetter
+from markdown import Markdown
+from configparser import ConfigParser
+import logging
+from logging import Logger
+
+from .database import Database
+from .page import Page
+
+log: Logger = logging.getLogger(__name__)
+
+
+# page and file is basically a synonym here...
+class MDParser:
+ def __init__(self, files: list[str],
+ config: ConfigParser,
+ db: Database,
+ md: Markdown):
+ log.debug('initializing the md parser with %d files', len(files))
+ self.files: list[str] = files
+
+ self.config: ConfigParser = config
+ self.db: Database = db
+ self.md: Markdown = md
+
+ self.all_files: list[Page] = None
+ # updated and modified are synonyms here
+ self.updated_files: list[Page] = None
+ self.all_tags: list[tuple[str]] = None
+
+
+ def parse_files(self) -> None:
+ log.debug('parsing all files')
+ # initialize lists
+ self.all_files = []
+ self.updated_files = []
+ self.all_tags = []
+ # not used, not sure why i had this
+ # all_tag_names: list[str] = []
+
+ for f in self.files:
+ log.debug('parsing file "%s"', f)
+ src_file: str = os.path.join(self.config.get('path', 'src'), f)
+ log.debug('path "%s"', src_file)
+ # get flag if update is successful
+ file_updated: bool = self.db.update(src_file, remove=f'{self.config.get("path", "src")}/')
+
+ log.debug('parsing md into html')
+ content: str = self.md.reset().convert(open(src_file).read())
+ page: Page = Page(f,
+ self.db.e[f][0],
+ self.db.e[f][1],
+ content,
+ self.md.Meta,
+ self.config)
+ page.parse_metadata()
+
+ # keep a separated list for all and updated pages
+ if file_updated:
+ log.debug('has been modified, adding to mod file list')
+ self.updated_files.append(page)
+ log.debug('adding to file list')
+ self.all_files.append(page)
+
+ # parse tags
+ if page.tags is not None:
+ log.debug('parsing tags')
+ # add its tag to corresponding db entry if existent
+ self.db.update_tags(f, list(map(itemgetter(0), page.tags)))
+
+ log.debug('add all tags to tag list')
+ for t in page.tags:
+ if t[0] not in list(map(itemgetter(0), self.all_tags)):
+ log.debug('adding tag "%s" as it\'s not present in tag list', t[0])
+ self.all_tags.append(t)
+ else:
+ log.debug('ignoring tag "%s" as it\'s present in tag list', t[0])
+ else:
+ log.debug('no tags to parse')
+
+ log.debug('sorting all lists for consistency')
+ self.all_tags.sort(key=itemgetter(0))
+ self.updated_files.sort(reverse=True)
+ self.all_files.sort(reverse=True)
+
+ pages_amount: int = len(self.all_files)
+ # note that prev and next are switched because of the
+ # reverse ordering of all_pages
+ log.debug('update next and prev attributes')
+ for i, p in enumerate(self.all_files):
+ if i != 0:
+ next_page: Page = self.all_files[i - 1]
+ p.next = next_page
+
+ if i != pages_amount - 1:
+ prev_page: Page = self.all_files[i + 1]
+ p.previous = prev_page
diff --git a/src/pyssg/page.py b/src/pyssg/page.py
index 784749c..dd3881c 100644
--- a/src/pyssg/page.py
+++ b/src/pyssg/page.py
@@ -1,6 +1,12 @@
+import sys
from datetime import datetime, timezone
+import logging
+from logging import Logger
from configparser import ConfigParser
+from re import L
+
+log: Logger = logging.getLogger(__name__)
class Page:
@@ -11,6 +17,7 @@ class Page:
html: str,
meta: dict,
config: ConfigParser):
+ log.debug('initializing the page object with name "%s"', name)
# initial data
self.name: str = name
self.ctimestamp: float = ctime
@@ -56,14 +63,15 @@ class Page:
# parses meta from self.meta, for og, it prioritizes,
# the actual og meta
- def parse(self):
- # required meta elements
+ def parse_metadata(self):
+ log.debug('parsing metadata for file "%s"', self.name)
+ log.debug('parsing required metadata')
self.title = self.meta['title'][0]
self.author = self.meta['author'][0]
self.summary = self.meta['summary'][0]
self.lang = self.meta['lang'][0]
- # dates
+ log.debug('parsing timestamp')
self.cdatetime = datetime.fromtimestamp(self.ctimestamp,
tz=timezone.utc)
self.cdate = self.cdatetime.strftime(self.config.get('fmt', 'date'))
@@ -73,8 +81,8 @@ class Page:
self.cdate_sitemap = \
self.cdatetime.strftime(self.config.get('fmt', 'sitemap_date'))
- # only if file/page has been modified
if self.mtimestamp != 0.0:
+ log.debug('parsing modified timestamp')
self.mdatetime = datetime.fromtimestamp(self.mtimestamp,
tz=timezone.utc)
self.mdate = self.mdatetime.strftime(self.config.get('fmt', 'date'))
@@ -83,36 +91,49 @@ class Page:
self.mdate_rss = self.mdatetime.strftime(self.config.get('fmt', 'rss_date'))
self.mdate_sitemap = \
self.mdatetime.strftime(self.config.get('fmt', 'sitemap_date'))
+ else:
+ log.debug('not parsing modified timestamp, hasn\'t been modified')
- # not always contains tags
try:
tags_only: list[str] = self.meta['tags']
+ log.debug('parsing tags')
tags_only.sort()
for t in tags_only:
self.tags.append((t,
f'{self.config.get("url", "main")}/tag/@{t}.html'))
- except KeyError: pass
+ except KeyError:
+ log.debug('not parsing tags, doesn\'t have any')
+ log.debug('parsing url')
self.url = f'{self.config.get("url", "main")}/{self.name.replace(".md", ".html")}'
+ log.debug('final url "%s"', self.url)
+ log.debug('parsing image url')
try:
self.image_url = \
f'{self.config.get("url", "static")}/{self.meta["image_url"][0]}'
except KeyError:
+ log.debug('using default image, no image_url tag found')
self.image_url = \
f'{self.config.get("url", "static")}/{self.config.get("url", "default_image")}'
+ log.debug('final image url "%s"', self.image_url)
# if contains open graph elements
try:
# og_e = object graph entry
- for og_e in self.meta['og']:
+ og_elements: list[str] = self.meta['og']
+ log.debug('parsing og metadata')
+ for og_e in og_elements:
kv: str = og_e.split(',', 1)
if len(kv) != 2:
- raise Exception('invalid og syntax')
+ log.error('invalid og syntax for "%s", needs to be "k, v"', og_e)
+ sys.exit(1)
k: str = kv[0].strip()
v: str = kv[1].strip()
+ log.debug('og element: ("%s", "%s")', k, v)
self.og[k] = v
- except KeyError: pass
+ except KeyError:
+ log.debug('no og metadata found')
diff --git a/src/pyssg/parser.py b/src/pyssg/parser.py
deleted file mode 100644
index 2888fcb..0000000
--- a/src/pyssg/parser.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import os
-from operator import itemgetter
-from markdown import Markdown
-from configparser import ConfigParser
-
-from .database import Database
-from .page import Page
-
-
-# parser of md files, stores list of pages and tags
-class MDParser:
- def __init__(self, files: list[str],
- config: ConfigParser,
- db: Database,
- md: Markdown):
- self.files: list[str] = files
-
- self.config: ConfigParser = config
- self.db: Database = db
- self.md: Markdown = md
-
- self.all_pages: list[Page] = None
- self.updated_pages: list[Page] = None
- self.all_tags: list[tuple[str]] = None
-
-
- def parse(self) -> None:
- # initialize lists
- self.all_pages = []
- self.updated_pages = []
- self.all_tags = []
- # not used, not sure why i had this
- # all_tag_names: list[str] = []
-
- for f in self.files:
- src_file: str = os.path.join(self.config.get('path', 'src'), f)
- # get flag if update is successful
- updated: bool = self.db.update(src_file, remove=f'{self.config.get("path", "src")}/')
-
- content: str = self.md.reset().convert(open(src_file).read())
- page: Page = Page(f,
- self.db.e[f][0],
- self.db.e[f][1],
- content,
- self.md.Meta,
- self.config)
- page.parse()
-
- # keep a separated list for all and updated pages
- if updated:
- self.updated_pages.append(page)
- self.all_pages.append(page)
-
- # parse tags
- if page.tags is not None:
- # add its tag to corresponding db entry if existent
- self.db.update_tags(f, list(map(itemgetter(0), page.tags)))
-
- # update all_tags attribute
- for t in page.tags:
- if t[0] not in list(map(itemgetter(0), self.all_tags)):
- self.all_tags.append(t)
-
- # sort list of tags for consistency
- self.all_tags.sort(key=itemgetter(0))
- self.updated_pages.sort(reverse=True)
- self.all_pages.sort(reverse=True)
-
- pages_amount: int = len(self.all_pages)
- # note that prev and next are switched because of the reverse rodering
- # of all_pages
- for i, p in enumerate(self.all_pages):
- if i != 0:
- next_page: Page = self.all_pages[i - 1]
- p.next = next_page
-
- if i != pages_amount - 1:
- prev_page: Page = self.all_pages[i + 1]
- p.previous = prev_page
diff --git a/src/pyssg/utils.py b/src/pyssg/utils.py
index 2194fe1..a24d7ca 100644
--- a/src/pyssg/utils.py
+++ b/src/pyssg/utils.py
@@ -20,7 +20,7 @@ def create_dir(path: str, p: bool=False) -> None:
def copy_file(src: str, dst: str) -> None:
if not os.path.exists(dst):
- shutil.copy(src, dst)
+ shutil.copy2(src, dst)
log.info('copied file "%s" to "%s"', src, dst)
else:
log.info('file "%s" already exists, ignoring', dst)