1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
from datetime import datetime, timezone
from logging import Logger, getLogger
from typing import Any
log: Logger = getLogger(__name__)
class Page:
def __init__(self, name: str,
ctime: float,
mtime: float,
html: str,
toc: str,
toc_tokens: list[str],
meta: dict[str, Any],
config: dict[str, Any],
dir_config: dict[str, Any]) -> None:
log.debug('initializing a page object with name "%s"', name)
# initial data
self.name: str = name
self.ctimestamp: float = ctime
self.mtimestamp: float = mtime
self.content: str = html
self.toc: str = toc
self.toc_tokens: list[str] = toc_tokens
self.meta: dict[str, Any] = meta
self.config: dict[str, Any] = config
self.dir_config: dict[str, Any] = dir_config
# data from self.meta
self.title: str
self.author: list[str]
self.summary: str
self.lang: str
self.cdatetime: datetime
self.mdatetime: datetime | None = None
self.tags: list[tuple[str, str]] = []
# constructed
self.url: str
self.cdate_rss: str
self.cdate_sitemap: str
self.mdate_rss: str | None = None
self.mdate_sitemap: str | None = None
self.next: Page | None = None
self.previous: Page | None = None
def __lt__(self, other):
return self.ctimestamp < other.ctimestamp
def __get_meta(self, var: str,
or_else: str | list[str] = '') -> str | list[str] | Any:
if var in self.meta:
log.debug('getting metadata "%s"', var)
return self.meta[var]
else:
log.debug('getting metadata "%s" failed, using optional value "%s"',
var, or_else)
return or_else
# these date/cdate/mdate might be a bit overcomplicated
def __date(self, dt: datetime, format: str) -> str:
if format in self.config['fmt']:
return dt.strftime(self.config['fmt'][format])
else:
log.warning('format "%s" not found in config, returning '
'empty string', format)
return ''
def cdate(self, format: str) -> str:
return self.__date(self.cdatetime, format)
def mdate(self, format: str) -> str:
if self.mdatetime is None:
log.warning('no mdatetime found, can\'t return a formatted string')
return ''
return self.__date(self.mdatetime, format)
def from_timestamp(self, timestamp: float) -> datetime:
return datetime.fromtimestamp(timestamp, tz=timezone.utc)
# parses meta from self.meta
def parse_metadata(self):
log.debug('parsing metadata for file "%s"', self.name)
self.title = str(self.__get_meta('title'))
self.author = list(self.__get_meta('author', ['']))
self.summary = str(self.__get_meta('summary'))
self.lang = str(self.__get_meta('lang', 'en'))
log.debug('parsing timestamp')
self.cdatetime = self.from_timestamp(self.ctimestamp)
self.cdate_rss = self.cdate('rss_date')
self.cdate_sitemap = self.cdate('sitemap_date')
if self.mtimestamp != 0.0:
log.debug('parsing modified timestamp')
self.mdatetime = self.from_timestamp(self.mtimestamp)
self.mdate_rss = self.mdate('rss_date')
self.mdate_sitemap = self.mdate('sitemap_date')
else:
log.debug('not parsing modified timestamp, hasn\'t been modified')
if self.dir_config['tags']:
log.debug('parsing tags')
tags_only: list[str] = list(self.__get_meta('tags', []))
if tags_only:
tags_only.sort()
for t in tags_only:
# need to specify dir_config['url'] as it is
# a hardcoded tag url
tag_url: str = f'{self.dir_config["url"]}/tag/@{t}.html'
self.tags.append((t, tag_url))
else:
log.debug('no tags to parse')
log.debug('parsing page url')
# no need to specify dir_config['url'] as self.name already
# contains the relative url
name_html: str = self.name.replace(".md", ".html")
self.url = f'{self.config["url"]["main"]}/{name_html}'
log.debug('final url "%s"', self.url)
|