src/pyssg/md_parser.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124

import os
from operator import itemgetter
from markdown import Markdown
from configparser import ConfigParser
from logging import Logger, getLogger

from markdown import Markdown
from yafg import YafgExtension
from MarkdownHighlight.highlight import HighlightExtension
from markdown_checklist.extension import ChecklistExtension

from .database import Database
from .page import Page

log: Logger = getLogger(__name__)


def _get_md_obj() -> Markdown:
    exts: list = ['extra',
                  'meta',
                  'sane_lists',
                  'smarty',
                  'toc',
                  'wikilinks',
                  # stripTitle generates an error when True,
                  # if there is no title attr
                  YafgExtension(stripTitle=False,
                                figureClass="",
                                figcaptionClass="",
                                figureNumbering=False,
                                figureNumberClass="number",
                                figureNumberText="Figure"),
                  HighlightExtension(),
                  ChecklistExtension()]
    log.debug('list of md extensions: (%s)',
              ', '.join([e if isinstance(e, str) else type(e).__name__
                         for e in exts]))
    return Markdown(extensions=exts, output_format='html5')


# page and file is basically a synonym here...
class MDParser:
    def __init__(self, files: list[str],
                 config: ConfigParser,
                 db: Database):
        log.debug('initializing the md parser with %d files', len(files))
        self.files: list[str] = files

        self.config: ConfigParser = config
        self.db: Database = db
        self.md: Markdown = _get_md_obj()

        self.all_files: list[Page] = None
        # updated and modified are synonyms here
        self.updated_files: list[Page] = None
        self.all_tags: list[tuple[str]] = None


    def parse_files(self) -> None:
        log.debug('parsing all files')
        # initialize lists
        self.all_files = []
        self.updated_files = []
        self.all_tags = []
        # not used, not sure why i had this
        # all_tag_names: list[str] = []

        for f in self.files:
            log.debug('parsing file "%s"', f)
            src_file: str = os.path.join(self.config.get('path', 'src'), f)
            log.debug('path "%s"', src_file)
            # get flag if update is successful
            file_updated: bool = self.db.update(src_file, remove=f'{self.config.get("path", "src")}/')

            log.debug('parsing md into html')
            content: str = self.md.reset().convert(open(src_file).read())
            page: Page = Page(f,
                              self.db.e[f][0],
                              self.db.e[f][1],
                              content,
                              self.md.Meta,
                              self.config)
            page.parse_metadata()

            # keep a separated list for all and updated pages
            if file_updated:
                log.debug('has been modified, adding to mod file list')
                self.updated_files.append(page)
            log.debug('adding to file list')
            self.all_files.append(page)

            # parse tags
            if page.tags is not None:
                log.debug('parsing tags')
                # add its tag to corresponding db entry if existent
                self.db.update_tags(f, list(map(itemgetter(0), page.tags)))

                log.debug('add all tags to tag list')
                for t in page.tags:
                    if t[0] not in list(map(itemgetter(0), self.all_tags)):
                        log.debug('adding tag "%s" as it\'s not present in tag list', t[0])
                        self.all_tags.append(t)
                    else:
                        log.debug('ignoring tag "%s" as it\'s present in tag list', t[0])
            else:
                log.debug('no tags to parse')

        log.debug('sorting all lists for consistency')
        self.all_tags.sort(key=itemgetter(0))
        self.updated_files.sort(reverse=True)
        self.all_files.sort(reverse=True)

        pages_amount: int = len(self.all_files)
        # note that prev and next are switched because of the
        # reverse ordering of all_pages
        log.debug('update next and prev attributes')
        for i, p in enumerate(self.all_files):
            if i != 0:
                next_page: Page = self.all_files[i - 1]
                p.next = next_page

            if i != pages_amount - 1:
                prev_page: Page = self.all_files[i + 1]
                p.previous = prev_page