diff options
Diffstat (limited to 'src/pyssg/md_parser.py')
-rw-r--r-- | src/pyssg/md_parser.py | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/src/pyssg/md_parser.py b/src/pyssg/md_parser.py new file mode 100644 index 0000000..b00da19 --- /dev/null +++ b/src/pyssg/md_parser.py @@ -0,0 +1,98 @@ +import os +from operator import itemgetter +from markdown import Markdown +from configparser import ConfigParser +import logging +from logging import Logger + +from .database import Database +from .page import Page + +log: Logger = logging.getLogger(__name__) + + +# page and file is basically a synonym here... +class MDParser: + def __init__(self, files: list[str], + config: ConfigParser, + db: Database, + md: Markdown): + log.debug('initializing the md parser with %d files', len(files)) + self.files: list[str] = files + + self.config: ConfigParser = config + self.db: Database = db + self.md: Markdown = md + + self.all_files: list[Page] = None + # updated and modified are synonyms here + self.updated_files: list[Page] = None + self.all_tags: list[tuple[str]] = None + + + def parse_files(self) -> None: + log.debug('parsing all files') + # initialize lists + self.all_files = [] + self.updated_files = [] + self.all_tags = [] + # not used, not sure why i had this + # all_tag_names: list[str] = [] + + for f in self.files: + log.debug('parsing file "%s"', f) + src_file: str = os.path.join(self.config.get('path', 'src'), f) + log.debug('path "%s"', src_file) + # get flag if update is successful + file_updated: bool = self.db.update(src_file, remove=f'{self.config.get("path", "src")}/') + + log.debug('parsing md into html') + content: str = self.md.reset().convert(open(src_file).read()) + page: Page = Page(f, + self.db.e[f][0], + self.db.e[f][1], + content, + self.md.Meta, + self.config) + page.parse_metadata() + + # keep a separated list for all and updated pages + if file_updated: + log.debug('has been modified, adding to mod file list') + self.updated_files.append(page) + log.debug('adding to file list') + self.all_files.append(page) + + # parse tags + if page.tags is not None: + log.debug('parsing tags') + # add its tag to corresponding db entry if existent + self.db.update_tags(f, list(map(itemgetter(0), page.tags))) + + log.debug('add all tags to tag list') + for t in page.tags: + if t[0] not in list(map(itemgetter(0), self.all_tags)): + log.debug('adding tag "%s" as it\'s not present in tag list', t[0]) + self.all_tags.append(t) + else: + log.debug('ignoring tag "%s" as it\'s present in tag list', t[0]) + else: + log.debug('no tags to parse') + + log.debug('sorting all lists for consistency') + self.all_tags.sort(key=itemgetter(0)) + self.updated_files.sort(reverse=True) + self.all_files.sort(reverse=True) + + pages_amount: int = len(self.all_files) + # note that prev and next are switched because of the + # reverse ordering of all_pages + log.debug('update next and prev attributes') + for i, p in enumerate(self.all_files): + if i != 0: + next_page: Page = self.all_files[i - 1] + p.next = next_page + + if i != pages_amount - 1: + prev_page: Page = self.all_files[i + 1] + p.previous = prev_page |