summaryrefslogtreecommitdiff
path: root/src/pyssg/md_parser.py
blob: b00da1973ff5aed06f60e88e396db39b3018b9de (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
from operator import itemgetter
from markdown import Markdown
from configparser import ConfigParser
import logging
from logging import Logger

from .database import Database
from .page import Page

log: Logger = logging.getLogger(__name__)


# page and file is basically a synonym here...
class MDParser:
    def __init__(self, files: list[str],
                 config: ConfigParser,
                 db: Database,
                 md: Markdown):
        log.debug('initializing the md parser with %d files', len(files))
        self.files: list[str] = files

        self.config: ConfigParser = config
        self.db: Database = db
        self.md: Markdown = md

        self.all_files: list[Page] = None
        # updated and modified are synonyms here
        self.updated_files: list[Page] = None
        self.all_tags: list[tuple[str]] = None


    def parse_files(self) -> None:
        log.debug('parsing all files')
        # initialize lists
        self.all_files = []
        self.updated_files = []
        self.all_tags = []
        # not used, not sure why i had this
        # all_tag_names: list[str] = []

        for f in self.files:
            log.debug('parsing file "%s"', f)
            src_file: str = os.path.join(self.config.get('path', 'src'), f)
            log.debug('path "%s"', src_file)
            # get flag if update is successful
            file_updated: bool = self.db.update(src_file, remove=f'{self.config.get("path", "src")}/')

            log.debug('parsing md into html')
            content: str = self.md.reset().convert(open(src_file).read())
            page: Page = Page(f,
                              self.db.e[f][0],
                              self.db.e[f][1],
                              content,
                              self.md.Meta,
                              self.config)
            page.parse_metadata()

            # keep a separated list for all and updated pages
            if file_updated:
                log.debug('has been modified, adding to mod file list')
                self.updated_files.append(page)
            log.debug('adding to file list')
            self.all_files.append(page)

            # parse tags
            if page.tags is not None:
                log.debug('parsing tags')
                # add its tag to corresponding db entry if existent
                self.db.update_tags(f, list(map(itemgetter(0), page.tags)))

                log.debug('add all tags to tag list')
                for t in page.tags:
                    if t[0] not in list(map(itemgetter(0), self.all_tags)):
                        log.debug('adding tag "%s" as it\'s not present in tag list', t[0])
                        self.all_tags.append(t)
                    else:
                        log.debug('ignoring tag "%s" as it\'s present in tag list', t[0])
            else:
                log.debug('no tags to parse')

        log.debug('sorting all lists for consistency')
        self.all_tags.sort(key=itemgetter(0))
        self.updated_files.sort(reverse=True)
        self.all_files.sort(reverse=True)

        pages_amount: int = len(self.all_files)
        # note that prev and next are switched because of the
        # reverse ordering of all_pages
        log.debug('update next and prev attributes')
        for i, p in enumerate(self.all_files):
            if i != 0:
                next_page: Page = self.all_files[i - 1]
                p.next = next_page

            if i != pages_amount - 1:
                prev_page: Page = self.all_files[i + 1]
                p.previous = prev_page