From 1b2d6aff6ccf72fdb292a1f05bb41bf9633a8f55 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado Date: Tue, 25 Apr 2023 04:03:48 -0600 Subject: refactor tests and add more typing --- src/pyssg/configuration.py | 2 + src/pyssg/md_parser.py | 12 ++++-- src/pyssg/page.py | 101 ++++++++++++++++++++------------------------- src/pyssg/plt/default.yaml | 5 --- 4 files changed, 55 insertions(+), 65 deletions(-) (limited to 'src') diff --git a/src/pyssg/configuration.py b/src/pyssg/configuration.py index e2dc26b..c7b4248 100644 --- a/src/pyssg/configuration.py +++ b/src/pyssg/configuration.py @@ -12,6 +12,7 @@ DEFAULT_CONFIG_PATH: str = '$XDG_CONFIG_HOME/pyssg/config.yaml' VERSION: str = version('pyssg') +# TODO: add checking for extensions config (such as pymdvar) def __check_well_formed_config(config: dict[str, Any], config_base: list[dict[str, Any]], prefix_key: str = '') -> None: @@ -80,6 +81,7 @@ def get_static_config(sc_package: str = 'static_config.yaml', log.debug('reading and setting static config') config: dict[str, Any] = get_parsed_yaml(sc_package, plt_resource)[0] + # TODO: move this to utils and update the tests def __time(fmt: str) -> str: return datetime.now(tz=timezone.utc).strftime(config['fmt'][fmt]) diff --git a/src/pyssg/md_parser.py b/src/pyssg/md_parser.py index 3ef297d..3b62985 100644 --- a/src/pyssg/md_parser.py +++ b/src/pyssg/md_parser.py @@ -4,6 +4,7 @@ from logging import Logger, getLogger from markdown import Markdown from yafg import YafgExtension +from pymdvar import VariableExtension from markdown_checklist.extension import ChecklistExtension from .database import Database @@ -12,13 +13,17 @@ from .page import Page log: Logger = getLogger(__name__) -def _get_md_obj() -> Markdown: +# TODO: add configuration testing for extensions config (pymdvar for ex) +def get_md_obj(variables: dict[str, str] = dict(), + enable_env: bool = False) -> Markdown: exts: list = ['extra', 'meta', 'sane_lists', 'smarty', 'toc', 'wikilinks', + VariableExtension(variables=variables, + enable_env=enable_env), # stripTitle generates an error when True, # if there is no title attr YafgExtension(stripTitle=False, @@ -50,7 +55,7 @@ class MDParser: self.config: dict = config self.dir_config: dict = dir_config self.db: Database = db - self.md: Markdown = _get_md_obj() + self.md: Markdown = get_md_obj() self.all_files: list[Page] = [] self.all_tags: list[tuple[str, str]] = [] @@ -65,7 +70,8 @@ class MDParser: log.debug('parsing md into html') content: str = self.md.reset().convert(open(src_file).read()) - # ignoring md.Meta type as it is not yet defined (because it is from an extension) + # ignoring md.Meta type as it is not yet defined + # (because it is from an extension) page: Page = Page(f, self.db.e[f].ctimestamp, self.db.e[f].mtimestamp, diff --git a/src/pyssg/page.py b/src/pyssg/page.py index 6b8916d..4902bea 100644 --- a/src/pyssg/page.py +++ b/src/pyssg/page.py @@ -1,22 +1,21 @@ -import sys from datetime import datetime, timezone from logging import Logger, getLogger +from typing import Any log: Logger = getLogger(__name__) class Page: - def __init__(self, - name: str, - ctime: float, - mtime: float, - html: str, - toc: str, - toc_tokens: list[str], - meta: dict, - config: dict, - dir_config: dict) -> None: - log.debug('initializing the page object with name "%s"', name) + def __init__(self, name: str, + ctime: float, + mtime: float, + html: str, + toc: str, + toc_tokens: list[str], + meta: dict[str, Any], + config: dict[str, Any], + dir_config: dict[str, Any]) -> None: + log.debug('initializing a page object with name "%s"', name) # initial data self.name: str = name self.ctimestamp: float = ctime @@ -24,9 +23,9 @@ class Page: self.content: str = html self.toc: str = toc self.toc_tokens: list[str] = toc_tokens - self.meta: dict = meta - self.config: dict = config - self.dir_config: dict = dir_config + self.meta: dict[str, Any] = meta + self.config: dict[str, Any] = config + self.dir_config: dict[str, Any] = dir_config # data from self.meta self.title: str @@ -48,25 +47,25 @@ class Page: self.next: Page | None = None self.previous: Page | None = None - # also from self.meta, but for og metadata - self.og: dict[str, str] = dict() - def __lt__(self, other): return self.ctimestamp < other.ctimestamp - def __get_meta(self, var: str, or_else: str | list[str]) -> str | list[str]: + def __get_meta(self, var: str, + or_else: str | list[str] = '') -> str | list[str] | Any: if var in self.meta: log.debug('getting metadata "%s"', var) return self.meta[var] else: - log.debug('getting metadata "%s" failed, using optional value "%s"', var, or_else) + log.debug('getting metadata "%s" failed, using optional value "%s"', + var, or_else) return or_else def cdate(self, format: str) -> str: if format in self.config['fmt']: return self.cdatetime.strftime(self.config['fmt'][format]) else: - log.warning('format "%s" not found in config["fmt"], returning empty string', format) + log.warning('format "%s" not found in config, returning ' + 'empty string', format) return '' def mdate(self, format: str) -> str: @@ -74,28 +73,32 @@ class Page: log.warning('no mdatetime found, can\'t return a formatted string') return '' if format in self.config['fmt']: - return self.mdatetime.strftime(self.config['fmt'][format]) # type: ignore + return self.mdatetime.strftime(self.config['fmt'][format]) else: - log.warning('format "%s" not found in config["fmt"], returning empty string', format) + log.warning('format "%s" not found in config, returning ' + 'empty string', format) return '' + def from_timestamp(self, timestamp: float) -> datetime: + return datetime.fromtimestamp(timestamp, tz=timezone.utc) + # parses meta from self.meta, for og, it prioritizes, # the actual og meta def parse_metadata(self): log.debug('parsing metadata for file "%s"', self.name) - self.title = self.__get_meta('title', [''])[0] + self.title = str(self.__get_meta('title')) self.author = list(self.__get_meta('author', [''])) - self.summary = self.__get_meta('summary', [''])[0] - self.lang = self.__get_meta('lang', ['en'])[0] + self.summary = str(self.__get_meta('summary')) + self.lang = str(self.__get_meta('lang', 'en')) log.debug('parsing timestamp') - self.cdatetime = datetime.fromtimestamp(self.ctimestamp, tz=timezone.utc) + self.cdatetime = self.from_timestamp(self.ctimestamp) self.cdate_rss = self.cdate('rss_date') self.cdate_sitemap = self.cdate('sitemap_date') if self.mtimestamp != 0.0: log.debug('parsing modified timestamp') - self.mdatetime = datetime.fromtimestamp(self.mtimestamp, tz=timezone.utc) + self.mdatetime = self.from_timestamp(self.mtimestamp) self.mdate_rss = self.mdate('rss_date') self.mdate_sitemap = self.mdate('sitemap_date') else: @@ -108,30 +111,35 @@ class Page: tags_only.sort() for t in tags_only: - # need to specify dir_config['url'] as it is a hardcoded tag url - self.tags.append((t, f'{self.dir_config["url"]}/tag/@{t}.html')) + # need to specify dir_config['url'] as it is + # a hardcoded tag url + tag_url: str = f'{self.dir_config["url"]}/tag/@{t}.html' + self.tags.append((t, tag_url)) else: log.debug('no tags to parse') - log.debug('parsing url') - # no need to specify dir_config['url'] as self.name already contains the relative url - self.url = f'{self.config["url"]["main"]}/{self.name.replace(".md", ".html")}' + log.debug('parsing page url') + # no need to specify dir_config['url'] as self.name already + # contains the relative url + name_html: str = self.name.replace(".md", ".html") + self.url = f'{self.config["url"]["main"]}/{name_html}' log.debug('final url "%s"', self.url) log.debug('parsing image url') default_image_url: str = '' if 'default_image' in self.config['url']: - log.debug('"default_image" url found, will use if no "image_url" is found') + log.debug('"default_image" url found, will use if no "image_url" ' + 'is found') default_image_url = self.config['url']['default_image'] image_url: str - image_url = self.__get_meta('image_url', [default_image_url])[0] + image_url = str(self.__get_meta('image_url', default_image_url)) if image_url != '': if 'static' in self.config['url']: self.image_url = f'{self.config["url"]["static"]}/{image_url}' else: - log.debug('no static url set, using main url, this could cause problems') + log.debug('no static url set, using main url') self.image_url = f'{self.config["url"]["main"]}/{image_url}' log.debug('final image url "%s"', self.image_url) else: @@ -139,24 +147,3 @@ class Page: log.debug('no image url set for the page, could be because no' ' "image_url" was found in the metadata and/or no ' ' "default_image" set in the config file') - - # if contains open graph elements - # TODO: better handle this part - # og_e = object graph entry - og_elements: list[str] = list(self.__get_meta('og', [])) - if og_elements: - log.debug('parsing og metadata') - for og_e in og_elements: - kv: list[str] = og_e.split(',', 1) - if len(kv) != 2: - log.error('invalid og syntax for "%s", needs to be "k, v"', og_e) - sys.exit(1) - - k: str = kv[0].strip() - v: str = kv[1].strip() - - log.debug('og element: ("%s", "%s")', k, v) - self.og[k] = v - - else: - log.debug('no tags to parse') diff --git a/src/pyssg/plt/default.yaml b/src/pyssg/plt/default.yaml index 0b722a6..ca2f7ad 100644 --- a/src/pyssg/plt/default.yaml +++ b/src/pyssg/plt/default.yaml @@ -10,12 +10,8 @@ path: db: !join [*root, ".files"] url: main: "https://example.com" - static: "https://static.example.com" - default_image: "images/default.png" fmt: date: "%a, %b %d, %Y @ %H:%M %Z" - list_date: "%b %d" - list_sep_date: "%B %Y" dirs: /: cfg: @@ -24,5 +20,4 @@ dirs: index: False rss: False sitemap: False - exclude_dirs: [] ... \ No newline at end of file -- cgit v1.2.3-70-g09d2