1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
from datetime import datetime, timezone
class Page:
def __init__(self,
name: str,
c_time: float,
m_time: float,
html: str,
meta: dict):
self.name: str = name
self.c_time: float = c_time
self.m_time: float = m_time
self.html: str = html
self.meta: dict = meta
# data from self.meta
self.title: str = ''
self.author: str = ''
self.c_datetime: datetime = None
self.m_datetime: datetime = None
self.summary: str = ''
self.lang: str = 'en'
self.tags: list = None
# also from self.meta, but for og metadata
self.og: dict[str, str] = dict()
self.__parse_meta()
def __lt__(self, other):
return self.c_time < other.c_time
# parses meta from self.meta, for og, it prioritizes,
# the actual og meta
def __parse_meta(self):
try:
self.title = self.meta['title'][0]
except KeyError: pass
try:
self.author = self.meta['author'][0]
except KeyError: pass
self.c_datetime = datetime.fromtimestamp(self.c_time,
tz=timezone.utc)
if self.m_time != 0.0:
self.m_datetime = datetime.fromtimestamp(self.m_time,
tz=timezone.utc)
try:
self.summary = self.meta['summary'][0]
except KeyError: pass
try:
self.lang = self.meta['lang'][0]
except KeyError: pass
try:
self.tags = self.meta['tags']
self.tags.sort()
except KeyError: pass
try:
# og_e = object graph entry
for og_e in self.meta['og']:
kv: str = og_e.split(',', 1)
if len(kv) != 2:
raise Exception('invalid og syntax')
k: str = kv[0].strip()
v: str = kv[1].strip()
self.og[k] = v
except KeyError: pass
|