diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/pyssg/builder.py | 282 | ||||
-rw-r--r-- | src/pyssg/configuration.py | 16 | ||||
-rw-r--r-- | src/pyssg/page.py | 95 | ||||
-rw-r--r-- | src/pyssg/parser.py | 42 | ||||
-rw-r--r-- | src/pyssg/pyssg.py | 65 | ||||
-rw-r--r-- | src/pyssg/rss.py | 60 | ||||
-rw-r--r-- | src/pyssg/sitemap.py | 54 | ||||
-rw-r--r-- | src/pyssg/template.py | 243 |
8 files changed, 276 insertions, 581 deletions
diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py index 8f7b67c..3a4474e 100644 --- a/src/pyssg/builder.py +++ b/src/pyssg/builder.py @@ -1,34 +1,48 @@ import os import shutil +from operator import itemgetter from copy import deepcopy +from jinja2 import Environment, Template +from markdown import Markdown +from importlib.metadata import version +from datetime import datetime, timezone from .configuration import Configuration -from .template import Template from .database import Database from .parser import MDParser from .page import Page from .discovery import get_file_list, get_dir_structure + +VERSION = version('pyssg') +# specific format for rss +DFORMAT = '%a, %d %b %Y %H:%M:%S GMT' +RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) + + class HTMLBuilder: def __init__(self, config: Configuration, - template: Template, - db: Database): + env: Environment, + db: Database, + md: Markdown): self.src: str = config.src self.dst: str = config.dst + self.title: str = config.title self.base_url: str = config.base_url - self.dformat: str = config.dformat - self.l_dformat: str = config.l_dformat - self.lsep_dformat: str = config.lsep_dformat + self.base_static_url: str = config.base_static_url self.force: bool = config.force - self.template: Template = template + self.config: Configuration = config + self.env: Environment = env self.db: Database = db + self.md: Markdown = md self.dirs: list[str] = None self.md_files: list[str] = None self.html_files: list[str] = None self.all_pages: list[Page] = None + self.updated_pages: list[Page] = None self.all_tags: list[str] = None @@ -40,31 +54,22 @@ class HTMLBuilder: self.__create_dir_structure() self.__copy_html_files() - parser: MDParser = MDParser(self.src, self.md_files, self.db) - parser.parse() + parser: MDParser = MDParser(self.src, + self.md_files, + self.db, + self.md) + parser.parse(self.config) # just to be able to extract all pages out of this class self.all_pages = parser.all_pages + self.updated_pages = parser.updated_pages self.all_tags = parser.all_tags # create the article index - self.__create_article_index(parser.all_tags, parser.all_pages) - - # create each category of html pages + self.__create_article_index() # check if all pages should be created - if self.force: - self.__create_articles(parser.all_pages) - else: - self.__create_articles(parser.updated_pages) - self.__create_tags(parser.all_tags, parser.all_pages) - - - def get_pages(self) -> list[Page]: - return self.all_pages - - - def get_tags(self) -> list[str]: - return self.all_tags + self.__create_articles() + self.__create_tags() def __create_dir_structure(self) -> None: @@ -90,142 +95,57 @@ class HTMLBuilder: shutil.copy2(src_file, dst_file) - # this is really similar to create_tag (singular) - def __create_article_index(self, tags: list[str], - pages: list[Page]) -> None: - # make temporary template - t: Template = deepcopy(self.template) - - # do basic replacements - # get page and tag list formated, both functions do replacements - p_list: list[str] = self.__get_pages_formatted(pages, t) - t_list: list[str] = self.__get_tags_formatted(tags, t) - # common - t.header = t.header.replace("$$LANG", 'en') - t.header = t.header.replace('$$TITLE', f'Index') + def __create_article_index(self) -> None: + template: Template = self.env.get_template("index.html") + content: str = template.render(site_title=self.title, + site_base_url=self.base_url, + site_base_static_url=self.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.all_pages, + all_tags=self.all_tags) with open(os.path.join(self.dst, 'index.html'), 'w') as f: - f.write(t.header) - f.write(t.articles.header) - - f.write(t.tags.list_header) - for tag in t_list: - f.write(tag) - f.write(t.tags.list_footer) + f.write(content) - f.write(t.articles.list_header) - for page in p_list: - f.write(page) - f.write(t.articles.list_footer) - f.write(t.articles.footer) - f.write(t.footer) - - - def __create_articles(self, pages: list[Page]) -> None: - for p in pages: - self.__create_article(p) + def __create_articles(self) -> None: + # check if only updated should be created + if self.force: + for p in self.all_pages: + self.__create_article(p) + else: + for p in self.updated_pages: + self.__create_article(p) def __create_article(self, page: Page) -> None: - # TODO: create better solution for replace - # make temporary template - t: Template = deepcopy(self.template) - # prepare html file name f_name: str = page.name f_name = f_name.replace('.md', '.html') - # get timestamps - c_date: str = page.c_datetime.strftime(self.dformat) - m_date: str = None - if page.m_datetime is not None: - m_date: str = page.m_datetime.strftime(self.dformat) - - # do basic replacements - # get tag list formatted (some replacements done inside - # get_tags_formatted) - t_list: list[str] = None - if page.tags is not None: - t_list = self.__get_tags_formatted(page.tags, t) - # tags formatted as a single string. - tags_formatted: str = ''.join(t_list) - # join list header and footer with all tags list. - tags_formatted = ''.join([t.tags.list_header, - tags_formatted, - t.tags.list_footer]) - - # common - t.header = t.header.replace("$$LANG", page.lang) - t.header = t.header.replace('$$TITLE', page.title) - - # article header - t.article.header = t.article.header.replace('$$TITLE', page.title) - t.article.header = t.article.header.replace('$$AUTHOR', page.author) - t.article.header = t.article.header.replace('$$CTIME', c_date) - if m_date is not None: - t.article.header = t.article.header.replace('$$MTIME', m_date) - else: - t.article.header = t.article.header.replace('$$MTIME', '') - - # article footer (same replaces as header) - t.article.footer = t.article.footer.replace('$$TITLE', page.title) - t.article.footer = t.article.footer.replace('$$AUTHOR', page.author) - t.article.footer = t.article.footer.replace('$$CTIME', c_date) - if m_date is not None: - t.article.footer = t.article.footer.replace('$$MTIME', m_date) - else: - t.article.footer = t.article.footer.replace('$$MTIME', '') - - # add tags to article list - if t_list is not None: - t.article.footer = t.article.footer.replace('$$TAGS', - tags_formatted) - else: - t.article.footer = t.article.footer.replace('$$TAGS', '') + template: Template = self.env.get_template("page.html") + content: str = template.render(site_title=self.title, + site_base_url=self.base_url, + site_base_static_url=self.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.all_pages, + all_tags=self.all_tags, + page=page) with open(os.path.join(self.dst, f_name), 'w') as f: - f.write(t.header) - f.write(t.article.header) - f.write(page.html) - - # not required anymore, tags included in article footer - # if t_list is not None: - # f.write(t.tags.list_header) - # for tag in t_list: - # f.write(tag) - # f.write(t.tags.list_footer) - - f.write(t.article.footer) - f.write(t.footer) - - - def __get_tags_formatted(self, tags: list[str], - template: Template) -> list[str]: - tag_amount: int = len(tags) - tags_formatted: list[str] = [] - for i, t in enumerate(tags): - # t_e=tag entry - t_e: str = template.tags.list_entry - t_e = t_e.replace('$$URL', - f'{self.base_url}/tag/@{t}.html') - t_e = t_e.replace('$$NAME', t) - - tags_formatted.append(t_e) - if i != tag_amount - 1: - tags_formatted.append(template.tags.list_separator) - - return tags_formatted - - - def __create_tags(self, tags: list[str], - pages: list[Page]) -> None: - for t in tags: + f.write(content) + + + def __create_tags(self) -> None: + for t in self.all_tags: # get a list of all pages that have current tag tag_pages: list[Page] = [] - for p in pages: - if p.tags is not None and t in p.tags: + for p in self.all_pages: + if p.tags is not None and t[0] in list(map(itemgetter(0), + p.tags)): tag_pages.append(p) # build tag page @@ -235,61 +155,19 @@ class HTMLBuilder: tag_pages = [] - def __create_tag(self, tag: str, + def __create_tag(self, tag: tuple[str], pages: list[Page]) -> None: - # TODO: create better solution for replace - # make temporary template - t: Template = deepcopy(self.template) - - # do basic replacements - # get page list formated (some replacements done inside - # get_pages_formatted) - p_list: list[str] = self.__get_pages_formatted(pages, t) - # common - t.header = t.header.replace("$$LANG", 'en') - t.header = t.header.replace('$$TITLE', f'Posts filtered by: {tag}') - - # tag header - tag_url: str = f'{self.base_url}/tag/@{tag}.html' - t.tags.header = t.tags.header.replace('$$NAME', tag) - - with open(os.path.join(self.dst, f'tag/@{tag}.html'), 'w') as f: - f.write(t.header) - f.write(t.tags.header) - - f.write(t.articles.list_header) - for p in p_list: - f.write(p) - f.write(t.articles.list_footer) - - f.write(t.tags.footer) - f.write(t.footer) - - - def __get_pages_formatted(self, pages: list[Page], - template: Template) -> list[str]: - month_year: str = '-' - pages_formatted: list[str] = [] - for p in pages: - # check if the monthly separator should be included - c_month_year: str = p.c_datetime.strftime(self.lsep_dformat) - if c_month_year != month_year: - month_year = c_month_year - - month_sep: str = template.articles.list_separator - month_sep = month_sep.replace('$$SEP', month_year) - - pages_formatted.append(month_sep) - - f_name: str = p.name - f_name = f_name.replace('.md', '.html') - - # p_e=page entry - p_e: str = template.articles.list_entry - p_e = p_e.replace('$$URL', f'{self.base_url}/{f_name}') - p_e = p_e.replace('$$DATE', p.c_datetime.strftime(self.l_dformat)) - p_e = p_e.replace('$$TITLE', p.title) - - pages_formatted.append(p_e) - - return pages_formatted + + template: Template = self.env.get_template("tag.html") + content: str = template.render(site_title=self.title, + site_base_url=self.base_url, + site_base_static_url=self.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.all_pages, + all_tags=self.all_tags, + tag=tag, + tag_pages=pages) + + with open(os.path.join(self.dst, f'tag/@{tag[0]}.html'), 'w') as f: + f.write(content) diff --git a/src/pyssg/configuration.py b/src/pyssg/configuration.py index 8ee592e..dca43b5 100644 --- a/src/pyssg/configuration.py +++ b/src/pyssg/configuration.py @@ -7,7 +7,9 @@ class Configuration: self.path: str = path self.src: str = None self.dst: str = None + self.plt: str = None self.base_url: str = None + self.base_static_url: str = None self.title: str = None self.dformat: str = None self.l_dformat: str = None @@ -45,10 +47,18 @@ class Configuration: except KeyError: pass try: + self.plt = opts['PLT_PATH'] + except KeyError: pass + + try: self.base_url = opts['BASE_URL'] except KeyError: pass try: + self.base_static_url = opts['BASE_STATIC_URL'] + except KeyError: pass + + try: self.title = opts['TITLE'] except KeyError: pass @@ -80,9 +90,15 @@ class Configuration: if self.dst is None: self.dst = opts['dst'] + if self.plt is None: + self.plt = opts['plt'] + if self.base_url is None: self.base_url = opts['url'] + if self.base_static_url is None: + self.base_static_url = opts['static_url'] + if self.title is None: self.title = opts['title'] diff --git a/src/pyssg/page.py b/src/pyssg/page.py index e03ca6b..1e0cdeb 100644 --- a/src/pyssg/page.py +++ b/src/pyssg/page.py @@ -1,68 +1,101 @@ from datetime import datetime, timezone +from .configuration import Configuration + + +DFORMAT_RSS = '%a, %d %b %Y %H:%M:%S GMT' +DFORMAT_SITEMAP = '%Y-%m-%d' + class Page: def __init__(self, name: str, - c_time: float, - m_time: float, + ctime: float, + mtime: float, html: str, meta: dict): + # initial data self.name: str = name - self.c_time: float = c_time - self.m_time: float = m_time - self.html: str = html + self.ctimestamp: float = ctime + self.mtimestamp: float = mtime + self.content: str = html self.meta: dict = meta # data from self.meta self.title: str = '' self.author: str = '' - self.c_datetime: datetime = None - self.m_datetime: datetime = None + self.cdatetime: datetime = None + self.mdatetime: datetime = None self.summary: str = '' self.lang: str = 'en' - self.tags: list = None + self.tags: list[tuple[str]] = [] + + # constructed + self.url: str = '' + self.cdate: str = '' + self.cdate_list: str = '' + self.cdate_list_sep: str = '' + self.cdate_rss: str = '' + self.cdate_sitemap: str = '' + self.mdate: str = None + self.mdate_list: str = None + self.mdate_list_sep: str = None + self.mdate_rss: str = '' + self.mdate_sitemap: str = '' + + # later assigned references to next and previous pages + self.next: Page = None + self.previous: Page = None # also from self.meta, but for og metadata self.og: dict[str, str] = dict() - self.__parse_meta() def __lt__(self, other): - return self.c_time < other.c_time + return self.ctimestamp < other.ctimestamp # parses meta from self.meta, for og, it prioritizes, # the actual og meta - def __parse_meta(self): - try: - self.title = self.meta['title'][0] - except KeyError: pass - - try: - self.author = self.meta['author'][0] - except KeyError: pass - - self.c_datetime = datetime.fromtimestamp(self.c_time, + def parse(self, config: Configuration): + # required meta elements + self.title = self.meta['title'][0] + self.author = self.meta['author'][0] + self.summary = self.meta['summary'][0] + self.lang = self.meta['lang'][0] + + # dates + self.cdatetime = datetime.fromtimestamp(self.ctimestamp, tz=timezone.utc) - - if self.m_time != 0.0: - self.m_datetime = datetime.fromtimestamp(self.m_time, + self.cdate = self.cdatetime.strftime(config.dformat) + self.cdate_list = self.cdatetime.strftime(config.l_dformat) + self.cdate_list_sep = self.cdatetime.strftime(config.lsep_dformat) + self.cdate_rss = self.cdatetime.strftime(DFORMAT_RSS) + self.cdate_sitemap = self.cdatetime.strftime(DFORMAT_SITEMAP) + + # only if file/page has been modified + if self.mtimestamp != 0.0: + self.mdatetime = datetime.fromtimestamp(self.mtimestamp, tz=timezone.utc) + self.mdate = self.mdatetime.strftime(config.dformat) + self.mdate_list = self.mdatetime.strftime(config.l_dformat) + self.mdate_list_sep = self.mdatetime.strftime(config.lsep_dformat) + self.mdate_rss = self.mdatetime.strftime(DFORMAT_RSS) + self.mdate_sitemap = self.mdatetime.strftime(DFORMAT_SITEMAP) + # not always contains tags try: - self.summary = self.meta['summary'][0] - except KeyError: pass + tags_only: list[str] = self.meta['tags'] + tags_only.sort() - try: - self.lang = self.meta['lang'][0] + for t in tags_only: + self.tags.append((t, + f'{config.base_url}/tag/@{t}.html')) except KeyError: pass - try: - self.tags = self.meta['tags'] - self.tags.sort() - except KeyError: pass + self.url = f'{config.base_url}/{self.name.replace(".md", ".html")}' + # if contains object graph elements try: # og_e = object graph entry for og_e in self.meta['og']: diff --git a/src/pyssg/parser.py b/src/pyssg/parser.py index 548fb05..7323bde 100644 --- a/src/pyssg/parser.py +++ b/src/pyssg/parser.py @@ -1,8 +1,11 @@ import os +from operator import itemgetter from datetime import datetime from markdown import Markdown from .database import Database +from .configuration import Configuration +from .configuration import Configuration from .page import Page @@ -10,33 +13,38 @@ from .page import Page class MDParser: def __init__(self, src: str, files: list[str], - db: Database): - self.md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists', - 'smarty', 'toc', 'wikilinks'], - output_format='html5') + db: Database, + md: Markdown): self.src: str = src self.files: list[str] = files + self.db: Database = db + self.md: Markdown = md self.all_pages: list[Page] = None self.updated_pages: list[Page] = None - self.all_tags: list[str] = None + self.all_tags: list[tuple[str]] = None - def parse(self): + def parse(self, config: Configuration): # initialize lists self.all_pages = [] self.updated_pages = [] self.all_tags = [] + all_tag_names: list[str] = [] for f in self.files: src_file: str = os.path.join(self.src, f) # get flag if update is successful updated: bool = self.db.update(src_file, remove=f'{self.src}/') - page: Page = None content: str = self.md.reset().convert(open(src_file).read()) - page = Page(f, self.db.e[f][0], self.db.e[f][1], content, self.md.Meta) + page: Page = Page(f, + self.db.e[f][0], + self.db.e[f][1], + content, + self.md.Meta) + page.parse(config) # keep a separated list for all and updated pages if updated: @@ -46,14 +54,26 @@ class MDParser: # parse tags if page.tags is not None: # add its tag to corresponding db entry if existent - self.db.update_tags(f, page.tags) + self.db.update_tags(f, list(map(itemgetter(0), page.tags))) # update all_tags attribute for t in page.tags: - if t not in self.all_tags: + if t[0] not in list(map(itemgetter(0), self.all_tags)): self.all_tags.append(t) # sort list of tags for consistency - self.all_tags.sort() + self.all_tags.sort(key=itemgetter(0)) self.updated_pages.sort(reverse=True) self.all_pages.sort(reverse=True) + # TODO: fix this in case it doesn't work lol + # this should update references to all_pages and updated_pages??? + for i, p in enumerate(self.all_pages): + try: + prev_page: Page = self.all_pages[i - 1] + p.previous = prev_page + except IndexError: pass + + try: + next_page: Page = self.all_pages[i + 1] + p.next = next_page + except IndexError: pass diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py index 5d8ac33..933f7d3 100644 --- a/src/pyssg/pyssg.py +++ b/src/pyssg/pyssg.py @@ -1,16 +1,23 @@ import os from argparse import ArgumentParser, Namespace from typing import Union +from jinja2 import Environment, FileSystemLoader +from markdown import Markdown +from importlib.metadata import version +from importlib.resources import contents +from datetime import datetime, timezone from .configuration import Configuration from .database import Database -from .template import Template from .builder import HTMLBuilder from .page import Page from .rss import RSSBuilder from .sitemap import SitemapBuilder +VERSION = version('pyssg') + + def get_options() -> Namespace: parser = ArgumentParser(prog='pyssg', description='''Static Site Generator that reads @@ -19,6 +26,9 @@ def get_options() -> Namespace: location) all other options are ignored.\nFor datetime formats see: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes''') + parser.add_argument('-v', '--version', + action='store_true', + help='''print program version''') parser.add_argument('-c', '--config', default='$XDG_CONFIG_HOME/pyssg/pyssgrc', type=str, @@ -34,11 +44,20 @@ def get_options() -> Namespace: type=str, help='''dst directory; generated (and transfered html) files; defaults to 'dst' ''') + parser.add_argument('-t', '--plt', + default='plt', + type=str, + help='''plt directory; all template files; defaults to + 'plt' ''') parser.add_argument('-u', '--url', default='', type=str, help='''base url without trailing slash''') - parser.add_argument('-t', '--title', + parser.add_argument('--static-url', + default='', + type=str, + help='''base static url without trailing slash''') + parser.add_argument('--title', default='Blog', type=str, help='''general title for the website; defaults to @@ -86,16 +105,20 @@ def main() -> None: config.read() config.fill_missing(opts) + if opts['version']: + print(f'pyssg v{VERSION}') + return + if opts['init']: try: os.mkdir(config.src) os.makedirs(os.path.join(config.dst, 'tag')) + os.mkdir(config.plt) except FileExistsError: pass - # write default templates - template: Template = Template(config.src) - template.write() + for f in contents('pyssg'): + print(f) return if opts['build']: @@ -103,22 +126,34 @@ def main() -> None: db: Database = Database(os.path.join(config.src, '.files')) db.read() - # read templates - template: Template = Template(config.src) - template.read() + # the autoescape option could be a security risk if used in a dynamic + # website, as far as i can tell + env: Environment = Environment(loader=FileSystemLoader(config.plt), + autoescape=False, + trim_blocks=True, + lstrip_blocks=True) - builder: HTMLBuilder = HTMLBuilder(config, template, db) + md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists', + 'smarty', 'toc', 'wikilinks'], + output_format='html5') + builder: HTMLBuilder = HTMLBuilder(config, + env, + db, + md) builder.build() - # get all parsed pages for rss construction - all_pages: list[Page] = builder.get_pages() - rss_builder: RSSBuilder = RSSBuilder(config, template.rss, all_pages) + # get all parsed pages and tags for rss and sitemap construction + all_pages: list[Page] = builder.all_pages + all_tags: list[tuple[str]] = builder.all_tags + + rss_builder: RSSBuilder = RSSBuilder(config, + env, + all_pages, + all_tags) rss_builder.build() - # get all tags for sitemap construction - all_tags: list[str] = builder.get_tags() sm_builder: SitemapBuilder = SitemapBuilder(config, - template.sitemap, + env, all_pages, all_tags) sm_builder.build() diff --git a/src/pyssg/rss.py b/src/pyssg/rss.py index 742b6b2..7de29ae 100644 --- a/src/pyssg/rss.py +++ b/src/pyssg/rss.py @@ -1,60 +1,38 @@ import os -import importlib.metadata +from jinja2 import Environment, Template +from importlib.metadata import version from datetime import datetime, timezone from .page import Page from .configuration import Configuration -VERSION = importlib.metadata.version('pyssg') -# This is static right here since an rss feed -# requires very specific date format +VERSION = version('pyssg') +# specific format for rss DFORMAT = '%a, %d %b %Y %H:%M:%S GMT' +RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) class RSSBuilder: def __init__(self, config: Configuration, - template: str, - pages: list[Page]): + env: Environment, + pages: list[Page], + tags: list[tuple[str]]): self.config: Configuration = config - self.rss: str = template + self.env: Environment = env self.pages: list[Page] = pages + self.tags: list[tuple[str]] = tags def build(self): - # initial base replacements - self.rss = self.rss.replace('$$TITLE', self.config.title) - self.rss = self.rss.replace('$$LINK', self.config.base_url) - self.rss = self.rss.replace('$$PYSSGVERSION', f'pyssg v{VERSION}') - items_formatted: str = self.__get_items_formatted() - self.rss = self.rss.replace('$$ITEMS', items_formatted) - - current_date: str = datetime.now(tz=timezone.utc).strftime(DFORMAT) - self.rss = self.rss.replace('$$CURRENTDATE', current_date) + template: Template = self.env.get_template("rss.xml") + content: str = template.render(site_title=self.config.title, + site_base_url=self.config.base_url, + site_base_static_url=self.config.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.pages, + all_tags=self.tags) with open(os.path.join(self.config.dst, 'rss.xml'), 'w') as f: - f.write(self.rss) - - - def __get_items_formatted(self) -> str: - # i_f=items formatted for short - i_f: str = '' - for p in self.pages: - url: str = f'{self.config.base_url}/{p.name.replace(".md", ".html")}' - date: str = p.c_datetime.strftime(DFORMAT) - - i_f = f'{i_f} <item>\n' - i_f = f'{i_f} <title>{p.title}</title>\n' - i_f = f'{i_f} <link>{url}</link>\n' - i_f = f'{i_f} <guid isPermaLink="true">{url}</guid>\n' - i_f = f'{i_f} <pubDate>{date}</pubDate>\n' - # TODO: maybe make this optional? - # add the tags as categories - if p.tags is not None: - for t in p.tags: - i_f = f'{i_f} <category>{t.capitalize()}</category>\n' - i_f = f'{i_f} <description>{p.summary}</description>\n' - i_f = f'{i_f} <content:encoded><![CDATA[{p.html}]]></content:encoded>\n' - i_f = f'{i_f} </item>\n' - - return i_f + f.write(content) diff --git a/src/pyssg/sitemap.py b/src/pyssg/sitemap.py index 410033f..f63a7e6 100644 --- a/src/pyssg/sitemap.py +++ b/src/pyssg/sitemap.py @@ -1,60 +1,38 @@ import os +from jinja2 import Environment, Template +from importlib.metadata import version from datetime import datetime, timezone from .page import Page from .configuration import Configuration - +VERSION = version('pyssg') +# specific format for sitemap (not as strict) DFORMAT = '%Y-%m-%d' +RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) class SitemapBuilder: def __init__(self, config: Configuration, - template: str, + env: Environment, pages: list[Page], tags: list[str]): self.config: Configuration = config - self.sitemap: str = template + self.env: Environment = env self.pages: list[Page] = pages self.tags: list[str] = tags def build(self): - # initial base replacements - urls_formatted: str = self.__get_urls_formatted() - self.sitemap = self.sitemap.replace('$$URLS', urls_formatted) + template: Template = self.env.get_template("sitemap.xml") + content: str = template.render(site_title=self.config.title, + site_base_url=self.config.base_url, + site_base_static_url=self.config.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.pages, + all_tags=self.tags) with open(os.path.join(self.config.dst, 'sitemap.xml'), 'w') as f: - f.write(self.sitemap) - - - def __get_urls_formatted(self) -> str: - # u_f=items formatted for short - u_f: str = '' - for p in self.pages: - url: str = f'{self.config.base_url}/{p.name.replace(".md", ".html")}' - if p.m_datetime is not None: - date: str = p.m_datetime.strftime(DFORMAT) - else: - date: str = p.c_datetime.strftime(DFORMAT) - - u_f = f'{u_f} <url>\n' - u_f = f'{u_f} <loc>{url}</loc>\n' - u_f = f'{u_f} <lastmod>{date}</lastmod>\n' - u_f = f'{u_f} <changefreq>weekly</changefreq>\n' - u_f = f'{u_f} <priority>1.0</priority>\n' - u_f = f'{u_f} </url>\n' - - for t in self.tags: - url: str = f'{self.config.base_url}/tag/@{t}.html' - date: str = datetime.now(tz=timezone.utc).strftime(DFORMAT) - - u_f = f'{u_f} <url>\n' - u_f = f'{u_f} <loc>{url}</loc>\n' - u_f = f'{u_f} <lastmod>{date}</lastmod>\n' - u_f = f'{u_f} <changefreq>daily</changefreq>\n' - u_f = f'{u_f} <priority>0.5</priority>\n' - u_f = f'{u_f} </url>\n' - - return u_f + f.write(content) diff --git a/src/pyssg/template.py b/src/pyssg/template.py deleted file mode 100644 index 3ddcf82..0000000 --- a/src/pyssg/template.py +++ /dev/null @@ -1,243 +0,0 @@ -import os - -from .page import Page - - -# all objects here require a header and footer as minimum -class HF: - def __init__(self): - self.header: str = None - self.footer: str = None - - -# some objects require a "list-like" set of attributes -class Common(HF): - def __init__(self): - self.list_header: str = None - self.list_footer: str = None - self.list_entry: str = None - self.list_separator: str = None - - -# main class -class Template(HF): - def __init__(self, src: str): - self.src: str = src - self.article: HF = HF() - self.articles: Common = Common() - self.tags: Common = Common() - self.rss: str = None - self.sitemap: str = None - - self.is_read: bool = False - - - # writes default templates - def write(self) -> None: - # get initial working directory - iwd = os.getcwd() - os.chdir(self.src) - - # create templates dir - os.mkdir('templates') - os.chdir('templates') - - # common - os.mkdir('common') - os.chdir('common') - self.__write_template('header.html', - ['<!DOCTYPE html>\n', - '<html lang="$$LANG">\n', - '<head>\n', - '<meta charset="utf-8">\n', - '<title>$$TITLE</title>\n', - '</head>\n', - '<body>\n']) - self.__write_template('footer.html', - ['</body>\n', - '</html>\n']) - - # go back to templates - os.chdir('..') - - # article entry - os.mkdir('article') - os.chdir('article') - self.__write_template('header.html', - ['<h1>$$TITLE</h1>\n', - '<p>$$AUTHOR</p>\n', - '<p>Created: $$CTIME, modified: $$MTIME</p>\n']) - self.__write_template('footer.html', - ['']) - - # go back to templates - os.chdir('..') - - # article index (articles) - os.mkdir('articles') - os.chdir('articles') - self.__write_template('header.html', - ['']) - self.__write_template('list_header.html', - ['<h2>Articles</h2>\n', - '<ul>\n']) - self.__write_template('list_entry.html', - ['<li>$$DATE - <a href="$$URL">$$TITLE</a></li>\n']) - self.__write_template('list_separator.html', - ['<h3>$$SEP</h3>\n']) - self.__write_template('list_footer.html', - ['</ul>\n']) - self.__write_template('footer.html', - ['']) - - # go back to templates - os.chdir('..') - - # tag - os.mkdir('tag') - os.chdir('tag') - self.__write_template('header.html', - ['']) - self.__write_template('list_header.html', - ['<p>Tags: ']) - self.__write_template('list_entry.html', - ['<a href="$$URL">$$NAME</a>']) - self.__write_template('list_separator.html', - [', ']) - self.__write_template('list_footer.html', - ['</p>\n']) - self.__write_template('footer.html', - ['']) - - # go back to templates - os.chdir('..') - - os.mkdir('rss') - os.chdir('rss') - self.__write_template('rss.xml', - ['<?xml version="1.0" encoding="UTF-8" ?>\n', - '<rss version="2.0"\n', - ' xmlns:atom="http://www.w3.org/2005/Atom"\n', - ' xmlns:content="http://purl.org/rss/1.0/modules/content/">\n', - ' <channel>\n', - ' <title>$$TITLE</title>\n', - ' <link>$$LINK</link>\n', - ' <atom:link href="EXAMPLE.ORG/RSS.XML" rel="self" type="application/rss+xml"/>\n', - ' <description>SHORT DESCRIPTION.</description>\n', - ' <language>en-us</language>\n', - ' <copyright>COPYRIGHT NOTICE.</copyright>\n', - ' <managingEditor>EMAIL@EXAMPLE.ORG (NAME)</managingEditor>\n', - ' <webMaster>EMAIL@EXAMPLE.ORG (NAME)</webMaster>\n', - ' <pubDate>$$CURRENTDATE</pubDate>\n', - ' <lastBuildDate>$$CURRENTDATE</lastBuildDate>\n', - ' <generator>$$PYSSGVERSION</generator>\n', - ' <docs>https://validator.w3.org/feed/docs/rss2.html</docs>\n', - ' <ttl>30</ttl>\n', - ' <image>\n', - ' <url>EXAMPLE.ORG/IMAGE.PNG</url>\n', - ' <title>$$TITLE</title>\n', - ' <link>$$LINK</link>\n', - ' </image>\n', - '$$ITEMS\n', - ' </channel>\n', - '</rss>']) - - # go back to templates - os.chdir('..') - - os.mkdir('sitemap') - os.chdir('sitemap') - self.__write_template('sitemap.xml', - ['<?xml version="1.0" encoding="utf-8"?>\n', - '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n', - ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n', - ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n', - 'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n', - '$$URLS\n', - '</urlset>']) - # return to initial working directory - os.chdir(iwd) - - - # reads templates and stores them into class attributes - def read(self) -> None: - # only read templates if not read already - # (might want to change this behaviour) - if self.is_read: - return - self.is_read = True - - # get initial working directory - iwd = os.getcwd() - os.chdir(os.path.join(self.src, 'templates')) - - # common - os.chdir('common') - self.header = self.__read_template('header.html') - self.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # article entry - os.chdir('article') - self.article.header = self.__read_template('header.html') - self.article.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # article index - os.chdir('articles') - self.articles.header = self.__read_template('header.html') - self.articles.list_header = \ - self.__read_template('list_header.html') - self.articles.list_entry = \ - self.__read_template('list_entry.html') - self.articles.list_separator = \ - self.__read_template('list_separator.html') - self.articles.list_footer = \ - self.__read_template('list_footer.html') - self.articles.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # tag - os.chdir('tag') - self.tags.header = self.__read_template('header.html') - self.tags.list_header = self.__read_template('list_header.html') - self.tags.list_entry = self.__read_template('list_entry.html') - self.tags.list_separator = self.__read_template('list_separator.html') - self.tags.list_footer = self.__read_template('list_footer.html') - self.tags.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # rss - os.chdir('rss') - self.rss = self.__read_template('rss.xml') - - # go back to templates - os.chdir('..') - - # sitemap - os.chdir('sitemap') - self.sitemap = self.__read_template('sitemap.xml') - - # return to initial working directory - os.chdir(iwd) - - - def __write_template(self, file_name: str, content: list[str]) -> None: - with open(file_name, 'w+') as f: - for c in content: - f.write(c) - - def __read_template(self, file_name: str) -> str: - out: str = None - with open(file_name, 'r') as f: - out = f.read() - - return out |