diff options
Diffstat (limited to 'src/pyssg/md/parser.py')
-rw-r--r-- | src/pyssg/md/parser.py | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/src/pyssg/md/parser.py b/src/pyssg/md/parser.py new file mode 100644 index 0000000..34a8922 --- /dev/null +++ b/src/pyssg/md/parser.py @@ -0,0 +1,135 @@ +import os +from logging import Logger, getLogger +from typing import Any + +from markdown import Markdown +from yafg import YafgExtension +from pymdvar import VariableExtension +from markdown_checklist.extension import ChecklistExtension +from markdown.extensions.toc import TocExtension + +from pyssg.db.database import Database +from pyssg.md.page import Page +from pyssg.utils import get_file_stats + +log: Logger = getLogger(__name__) + + +# TODO: add configuration testing for extensions config (pymdvar for ex) +def get_md_obj(variables: dict[str, str], + enable_env: bool) -> Markdown: + exts: list = ['extra', + 'meta', + 'sane_lists', + 'smarty', + 'wikilinks', + TocExtension(permalink=True, + baselevel=2), + VariableExtension(variables=variables, + enable_env=enable_env), + # stripTitle generates an error when True, + # if there is no title attr + YafgExtension(stripTitle=False, + figureClass='', + figcaptionClass='', + figureNumbering=False, + figureNumberClass='number', + figureNumberText='Figure'), + ChecklistExtension(), + 'pymdownx.mark', + 'pymdownx.caret', + 'pymdownx.tilde'] + log.debug('list of md extensions: (%s)', + ', '.join([e if isinstance(e, str) else type(e).__name__ + for e in exts])) + # for some reason, the definition for output_format doesn't include html5 + # even though it is listed in the documentation, ignoring + return Markdown(extensions=exts, output_format='html5') # type: ignore + + +# page and file is basically a synonym +class MDParser: + def __init__(self, files: list[str], + config: dict, + dir_config: dict, + db: Database): + log.debug('initializing the md parser with %d files', len(files)) + self.files: list[str] = files + self.config: dict = config + self.dir_config: dict = dir_config + self.db: Database = db + # TODO: actually add extensions support, for now only pymdvar is configured + self.pymdvar_vars: dict[str, str] = dict() + self.pymdvar_enable_env: bool = False + if 'exts' in config and 'pymdvar' in config['exts']: + pymdvar: dict[str, Any] = config['exts']['pymdvar'] + if 'variables' in pymdvar and type(pymdvar['variables']) == dict: + self.pymdvar_vars = pymdvar['variables'] + if 'enable_env' in pymdvar and type(pymdvar['enable_env']) == bool: + self.pymdvar_enable_env = pymdvar['enable_env'] + log.debug('pymdvar_variables: %s', self.pymdvar_vars) + log.debug('pymdvar_enable_env: %s', self.pymdvar_enable_env) + + self.md: Markdown = get_md_obj(self.pymdvar_vars, self.pymdvar_enable_env) + + self.all_files: list[Page] = [] + self.all_tags: list[str] = [] + + def parse_files(self) -> None: + for i, f in enumerate(self.files): + log.debug('parsing file "%s"', f) + path: str = os.path.join(self.dir_config['src'], f) + content: str = self.md.reset().convert(open(path).read()) + fstats = get_file_stats(path) + chksm: str = fstats[0] + time: float = fstats[1] + + entry: tuple + # old entry + oentry: tuple | None = self.db.select(f) + if not oentry: + entry = self.db.insert(f, time, chksm) + else: + oe_chksm: str = oentry[3] + if chksm != oe_chksm: + entry = self.db.update(f, time, chksm) + else: + entry = oentry + + # ignoring md.Meta type as it is not yet defined + # (because it is from an extension) + page: Page = Page(f, + entry[1], + entry[2], + content, + self.md.toc, # type: ignore + self.md.toc_tokens, # type: ignore + self.md.Meta, # type: ignore + self.config) + page.parse_metadata() + self.all_files.append(page) + + if self.dir_config['tags']: + if entry[4] is not None: + if set(page.tags) != set(entry[4]): + self.db.update_tags(f, page.tags) + + for t in page.tags: + if t not in self.all_tags: + self.all_tags.append(t) + log.debug('added tag "%s" to all tags', t) + + self.all_files.sort(reverse=True) + self.all_tags.sort() + + pages_amount: int = len(self.all_files) + # note that prev and next are switched because of the + # reverse ordering of all_pages + for i, p in enumerate(self.all_files): + if i != 0: + next_page: Page = self.all_files[i - 1] + p.next = next_page + + if i != pages_amount - 1: + prev_page: Page = self.all_files[i + 1] + p.previous = prev_page |