From 045eb5986ea11174b2ee39dc8476b365d734d37d Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado Date: Sat, 5 Jun 2021 03:13:59 -0600 Subject: refactor code and add image_url parsing --- src/pyssg/builder.py | 134 +++++++++++++++++---------------------------- src/pyssg/configuration.py | 53 ++++++++++++------ src/pyssg/page.py | 46 +++++++++------- src/pyssg/parser.py | 11 ++-- src/pyssg/plt/index.html | 6 +- src/pyssg/plt/page.html | 4 +- src/pyssg/plt/rss.xml | 18 +++--- src/pyssg/plt/sitemap.xml | 2 +- src/pyssg/plt/tag.html | 4 +- src/pyssg/pyssg.py | 34 +++--------- src/pyssg/rss.py | 38 ------------- src/pyssg/sitemap.py | 38 ------------- 12 files changed, 142 insertions(+), 246 deletions(-) delete mode 100644 src/pyssg/rss.py delete mode 100644 src/pyssg/sitemap.py (limited to 'src') diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py index b6bd371..84494da 100644 --- a/src/pyssg/builder.py +++ b/src/pyssg/builder.py @@ -1,10 +1,9 @@ import os import shutil +from copy import deepcopy from operator import itemgetter from jinja2 import Environment, Template from markdown import Markdown -from importlib.metadata import version -from datetime import datetime, timezone from .configuration import Configuration from .database import Database @@ -13,24 +12,11 @@ from .page import Page from .discovery import get_file_list, get_dir_structure -VERSION = version('pyssg') -# specific format for rss -DFORMAT = '%a, %d %b %Y %H:%M:%S GMT' -RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) - - -class HTMLBuilder: +class Builder: def __init__(self, config: Configuration, env: Environment, db: Database, md: Markdown): - self.src: str = config.src - self.dst: str = config.dst - self.title: str = config.title - self.base_url: str = config.base_url - self.base_static_url: str = config.base_static_url - self.force: bool = config.force - self.config: Configuration = config self.env: Environment = env self.db: Database = db @@ -43,32 +29,39 @@ class HTMLBuilder: self.all_pages: list[Page] = None self.updated_pages: list[Page] = None self.all_tags: list[str] = None + self.common_vars: dict = None def build(self) -> None: - self.dirs = get_dir_structure(self.src, ['templates']) - self.md_files = get_file_list(self.src, ['.md'], ['templates']) - self.html_files = get_file_list(self.src, ['.html'], ['templates']) + self.dirs = get_dir_structure(self.config.src, ['templates']) + self.md_files = get_file_list(self.config.src, ['.md'], ['templates']) + self.html_files = get_file_list(self.config.src, ['.html'], ['templates']) self.__create_dir_structure() self.__copy_html_files() - parser: MDParser = MDParser(self.src, + parser: MDParser = MDParser(self.config.src, self.md_files, + self.config, self.db, self.md) - parser.parse(self.config) + parser.parse() - # just to be able to extract all pages out of this class + # just so i don't have to pass these vars to all the functions self.all_pages = parser.all_pages self.updated_pages = parser.updated_pages self.all_tags = parser.all_tags - # create the article index - self.__create_article_index() - # check if all pages should be created - self.__create_articles() - self.__create_tags() + # dict for the keyword args to pass to the template renderer + self.common_vars = dict(config=self.config, + all_pages=self.all_pages, + all_tags=self.all_tags) + + self.__render_articles() + self.__render_tags() + self.__render_template('index.html', 'index.html', **self.common_vars) + self.__render_template('rss.xml', 'rss.xml', **self.common_vars) + self.__render_template('sitemap.xml', 'sitemap.xml', **self.common_vars) def __create_dir_structure(self) -> None: @@ -76,7 +69,7 @@ class HTMLBuilder: # for the dir structure, # doesn't matter if the dir already exists try: - os.makedirs(os.path.join(self.dst, d)) + os.makedirs(os.path.join(self.config.dst, d)) except FileExistsError: pass @@ -86,59 +79,33 @@ class HTMLBuilder: dst_file: str = None for f in self.html_files: - src_file = os.path.join(self.src, f) - dst_file = os.path.join(self.dst, f) + src_file = os.path.join(self.config.src, f) + dst_file = os.path.join(self.config.dst, f) # only copy files if they have been modified (or are new) - if self.db.update(src_file, remove=f'{self.src}/'): + if self.db.update(src_file, remove=f'{self.config.src}/'): shutil.copy2(src_file, dst_file) - def __create_article_index(self) -> None: - template: Template = self.env.get_template("index.html") - content: str = template.render(site_title=self.title, - site_base_url=self.base_url, - site_base_static_url=self.base_static_url, - pyssg_version=VERSION, - run_date=RUN_DATE, - all_pages=self.all_pages, - all_tags=self.all_tags) - - with open(os.path.join(self.dst, 'index.html'), 'w') as f: - f.write(content) - - - def __create_articles(self) -> None: + def __render_articles(self) -> None: + article_vars: dict = deepcopy(self.common_vars) # check if only updated should be created - if self.force: + if self.config.force: for p in self.all_pages: - self.__create_article(p) + article_vars['page'] = p + self.__render_template("page.html", + p.name.replace('.md','.html'), + **article_vars) else: for p in self.updated_pages: - self.__create_article(p) + article_vars['page'] = p + self.__render_template("page.html", + p.name.replace('.md','.html'), + **article_vars) - def __create_article(self, page: Page) -> None: - # prepare html file name - f_name: str = page.name - f_name = f_name.replace('.md', '.html') - - template: Template = self.env.get_template("page.html") - content: str = template.render(site_title=self.title, - site_base_url=self.base_url, - site_base_static_url=self.base_static_url, - pyssg_version=VERSION, - run_date=RUN_DATE, - all_pages=self.all_pages, - all_tags=self.all_tags, - page=page) - - - with open(os.path.join(self.dst, f_name), 'w') as f: - f.write(content) - - - def __create_tags(self) -> None: + def __render_tags(self) -> None: + tag_vars: dict = deepcopy(self.common_vars) for t in self.all_tags: # get a list of all pages that have current tag tag_pages: list[Page] = [] @@ -147,26 +114,23 @@ class HTMLBuilder: p.tags)): tag_pages.append(p) + tag_vars['tag'] = t + tag_vars['tag_pages'] = tag_pages + # build tag page - self.__create_tag(t, tag_pages) + self.__render_template('tag.html', + f'tag/@{t[0]}.html', + **tag_vars) # clean list of pages with current tag tag_pages = [] - def __create_tag(self, tag: tuple[str], - pages: list[Page]) -> None: - - template: Template = self.env.get_template("tag.html") - content: str = template.render(site_title=self.title, - site_base_url=self.base_url, - site_base_static_url=self.base_static_url, - pyssg_version=VERSION, - run_date=RUN_DATE, - all_pages=self.all_pages, - all_tags=self.all_tags, - tag=tag, - tag_pages=pages) + def __render_template(self, template_name: str, + file_name: str, + **template_vars) -> None: + template: Template = self.env.get_template(template_name) + content: str = template.render(**template_vars) - with open(os.path.join(self.dst, f'tag/@{tag[0]}.html'), 'w') as f: + with open(os.path.join(self.config.dst, file_name), 'w') as f: f.write(content) diff --git a/src/pyssg/configuration.py b/src/pyssg/configuration.py index dca43b5..e07f452 100644 --- a/src/pyssg/configuration.py +++ b/src/pyssg/configuration.py @@ -1,21 +1,33 @@ import os from typing import Union +from importlib.metadata import version +from datetime import datetime, timezone class Configuration: def __init__(self, path: str): self.path: str = path + # config file specific self.src: str = None self.dst: str = None self.plt: str = None - self.base_url: str = None - self.base_static_url: str = None + self.url: str = None + self.static_url: str = None + self.default_image_url: str = None self.title: str = None self.dformat: str = None self.l_dformat: str = None self.lsep_dformat: str = None self.force: bool = None + # other + self.version: str = version('pyssg') + self.dformat_rss: str = '%a, %d %b %Y %H:%M:%S GMT' + self.dformat_sitemap: str = '%Y-%m-%d' + self.run_date_rss = datetime.now(tz=timezone.utc).strftime(self.dformat_rss) + self.run_date_sitemap = \ + datetime.now(tz=timezone.utc).strftime(self.dformat_sitemap) + def read(self): try: @@ -29,7 +41,7 @@ class Configuration: if len(kv) != 2: raise Exception('wrong config syntax') - k: str = kv[0].strip() + k: str = kv[0].strip().lower() v_temp: str = kv[1].strip() # check if value should be a boolean true v: Union[str, bool] = v_temp\ @@ -39,45 +51,49 @@ class Configuration: opts[k] = v try: - self.src = opts['SRC_PATH'] + self.src = opts['src'] except KeyError: pass try: - self.dst = opts['DST_PATH'] + self.dst = opts['dst'] except KeyError: pass try: - self.plt = opts['PLT_PATH'] + self.plt = opts['plt'] except KeyError: pass try: - self.base_url = opts['BASE_URL'] + self.url = opts['url'] except KeyError: pass try: - self.base_static_url = opts['BASE_STATIC_URL'] + self.static_url = opts['static_url'] except KeyError: pass try: - self.title = opts['TITLE'] + self.default_image_url = opts['default_image_url'] except KeyError: pass try: - self.dformat = opts['DATE_FORMAT'] + self.title = opts['title'] except KeyError: pass try: - self.l_dformat = opts['LIST_DATE_FORMAT'] + self.dformat = opts['date_formaT'] except KeyError: pass try: - self.lsep_dformat = opts['LIST_SEP_DATE_FORMAT'] + self.l_dformat = opts['list_date_FORMAT'] + except KeyError: pass + + try: + self.lsep_dformat = opts['list_sep_dATE_FORMAT'] except KeyError: pass try: # if the parser above didn't read a boolean true, then take it # as a false anyways - self.force = opts['FORCE'] if opts['FORCE'] is True else False + self.force = opts['force'] if opts['force'] is True else False except KeyError: pass except OSError: pass @@ -93,11 +109,14 @@ class Configuration: if self.plt is None: self.plt = opts['plt'] - if self.base_url is None: - self.base_url = opts['url'] + if self.url is None: + self.url = opts['url'] + + if self.static_url is None: + self.static_url = opts['static_url'] - if self.base_static_url is None: - self.base_static_url = opts['static_url'] + if self.default_image_url is None: + self.default_image_url = opts['default_image_url'] if self.title is None: self.title = opts['title'] diff --git a/src/pyssg/page.py b/src/pyssg/page.py index 1e0cdeb..43acb05 100644 --- a/src/pyssg/page.py +++ b/src/pyssg/page.py @@ -3,23 +3,21 @@ from datetime import datetime, timezone from .configuration import Configuration -DFORMAT_RSS = '%a, %d %b %Y %H:%M:%S GMT' -DFORMAT_SITEMAP = '%Y-%m-%d' - - class Page: def __init__(self, name: str, ctime: float, mtime: float, html: str, - meta: dict): + meta: dict, + config: Configuration): # initial data self.name: str = name self.ctimestamp: float = ctime self.mtimestamp: float = mtime self.content: str = html self.meta: dict = meta + self.config: Configuration = config # data from self.meta self.title: str = '' @@ -32,6 +30,7 @@ class Page: # constructed self.url: str = '' + self.image_url: str = '' self.cdate: str = '' self.cdate_list: str = '' self.cdate_list_sep: str = '' @@ -57,7 +56,7 @@ class Page: # parses meta from self.meta, for og, it prioritizes, # the actual og meta - def parse(self, config: Configuration): + def parse(self): # required meta elements self.title = self.meta['title'][0] self.author = self.meta['author'][0] @@ -67,21 +66,23 @@ class Page: # dates self.cdatetime = datetime.fromtimestamp(self.ctimestamp, tz=timezone.utc) - self.cdate = self.cdatetime.strftime(config.dformat) - self.cdate_list = self.cdatetime.strftime(config.l_dformat) - self.cdate_list_sep = self.cdatetime.strftime(config.lsep_dformat) - self.cdate_rss = self.cdatetime.strftime(DFORMAT_RSS) - self.cdate_sitemap = self.cdatetime.strftime(DFORMAT_SITEMAP) + self.cdate = self.cdatetime.strftime(self.config.dformat) + self.cdate_list = self.cdatetime.strftime(self.config.l_dformat) + self.cdate_list_sep = self.cdatetime.strftime(self.config.lsep_dformat) + self.cdate_rss = self.cdatetime.strftime(self.config.dformat_rss) + self.cdate_sitemap = \ + self.cdatetime.strftime(self.config.dformat_sitemap) # only if file/page has been modified if self.mtimestamp != 0.0: self.mdatetime = datetime.fromtimestamp(self.mtimestamp, tz=timezone.utc) - self.mdate = self.mdatetime.strftime(config.dformat) - self.mdate_list = self.mdatetime.strftime(config.l_dformat) - self.mdate_list_sep = self.mdatetime.strftime(config.lsep_dformat) - self.mdate_rss = self.mdatetime.strftime(DFORMAT_RSS) - self.mdate_sitemap = self.mdatetime.strftime(DFORMAT_SITEMAP) + self.mdate = self.mdatetime.strftime(self.config.dformat) + self.mdate_list = self.mdatetime.strftime(self.config.l_dformat) + self.mdate_list_sep = self.mdatetime.strftime(self.config.lsep_dformat) + self.mdate_rss = self.mdatetime.strftime(self.config.dformat_rss) + self.mdate_sitemap = \ + self.mdatetime.strftime(self.config.dformat_sitemap) # not always contains tags try: @@ -90,12 +91,19 @@ class Page: for t in tags_only: self.tags.append((t, - f'{config.base_url}/tag/@{t}.html')) + f'{self.config.url}/tag/@{t}.html')) except KeyError: pass - self.url = f'{config.base_url}/{self.name.replace(".md", ".html")}' + self.url = f'{self.config.url}/{self.name.replace(".md", ".html")}' + + try: + self.image_url = \ + f'{self.config.base_static_url}/{self.meta["image_url"]}' + except KeyError: + self.image_url = \ + f'{self.config.base_static_url}/{self.config.default_image_url}' - # if contains object graph elements + # if contains open graph elements try: # og_e = object graph entry for og_e in self.meta['og']: diff --git a/src/pyssg/parser.py b/src/pyssg/parser.py index 7323bde..d75c923 100644 --- a/src/pyssg/parser.py +++ b/src/pyssg/parser.py @@ -5,7 +5,6 @@ from markdown import Markdown from .database import Database from .configuration import Configuration -from .configuration import Configuration from .page import Page @@ -13,11 +12,13 @@ from .page import Page class MDParser: def __init__(self, src: str, files: list[str], + config: Configuration, db: Database, md: Markdown): self.src: str = src self.files: list[str] = files + self.config: Configuration = config self.db: Database = db self.md: Markdown = md @@ -26,7 +27,7 @@ class MDParser: self.all_tags: list[tuple[str]] = None - def parse(self, config: Configuration): + def parse(self) -> None: # initialize lists self.all_pages = [] self.updated_pages = [] @@ -43,8 +44,9 @@ class MDParser: self.db.e[f][0], self.db.e[f][1], content, - self.md.Meta) - page.parse(config) + self.md.Meta, + self.config) + page.parse() # keep a separated list for all and updated pages if updated: @@ -66,7 +68,6 @@ class MDParser: self.updated_pages.sort(reverse=True) self.all_pages.sort(reverse=True) # TODO: fix this in case it doesn't work lol - # this should update references to all_pages and updated_pages??? for i, p in enumerate(self.all_pages): try: prev_page: Page = self.all_pages[i - 1] diff --git a/src/pyssg/plt/index.html b/src/pyssg/plt/index.html index c96db3b..e06efdb 100644 --- a/src/pyssg/plt/index.html +++ b/src/pyssg/plt/index.html @@ -2,11 +2,11 @@ - - Index -- {{site_title}} + + Index -- {{config.title}} -

Index -- {{site_title}}

+

Index -- {{config.title}}

Some text here.

Tags: diff --git a/src/pyssg/plt/page.html b/src/pyssg/plt/page.html index fefaaa6..2fc3943 100644 --- a/src/pyssg/plt/page.html +++ b/src/pyssg/plt/page.html @@ -2,8 +2,8 @@ - - {{page.title}} -- {{site_title}} + + {{page.title}} -- {{config.title}}

{{page.title}}

diff --git a/src/pyssg/plt/rss.xml b/src/pyssg/plt/rss.xml index 0426580..42020d7 100644 --- a/src/pyssg/plt/rss.xml +++ b/src/pyssg/plt/rss.xml @@ -3,24 +3,24 @@ xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/"> - {{site_title}} - {{site_base_url}} - + {{config.title}} + {{config.url}} + Short site description. en-us Blog Copyright 2021 Somebody some@one.com (Sombody) some@one.com (Sombody) - {{run_date}} - {{run_date}} - pyssg v{{pyssg_version}} + {{config.run_date_rss}} + {{run_date_rss}} + pyssg v{{config.version}} https://validator.w3.org/feed/docs/rss2.html 30 - {{site_base_static_url}}/images/blog.png - {{site_title}} - {{site_base_url}} + {{config.static_url}}/images/blog.png + {{config.title}} + {{config.url}} {%for p in all_pages%} diff --git a/src/pyssg/plt/sitemap.xml b/src/pyssg/plt/sitemap.xml index a5b5404..26ee5c1 100644 --- a/src/pyssg/plt/sitemap.xml +++ b/src/pyssg/plt/sitemap.xml @@ -14,7 +14,7 @@ {%for t in all_tags%} {{t[1]}} - {{run_date}} + {{config.run_date_sitemap}} daily 0.5 diff --git a/src/pyssg/plt/tag.html b/src/pyssg/plt/tag.html index 2fdc27b..d856ce4 100644 --- a/src/pyssg/plt/tag.html +++ b/src/pyssg/plt/tag.html @@ -2,8 +2,8 @@ - - Posts filtered by {{tag[0]}} -- {{site_title}} + + Posts filtered by {{tag[0]}} -- {{config.title}}

Posts filtered by {{tag[0]}}

diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py index 76361f5..75f0fe4 100644 --- a/src/pyssg/pyssg.py +++ b/src/pyssg/pyssg.py @@ -4,21 +4,16 @@ from argparse import ArgumentParser, Namespace from typing import Union from jinja2 import Environment, FileSystemLoader from markdown import Markdown -from importlib.metadata import version from importlib.resources import path -from datetime import datetime, timezone from .configuration import Configuration from .database import Database -from .builder import HTMLBuilder +from .builder import Builder from .page import Page from .rss import RSSBuilder from .sitemap import SitemapBuilder -VERSION = version('pyssg') - - def get_options() -> Namespace: parser = ArgumentParser(prog='pyssg', description='''Static Site Generator that reads @@ -58,6 +53,10 @@ def get_options() -> Namespace: default='', type=str, help='''base static url without trailing slash''') + parser.add_argument('--default-image-url', + default='', + type=str, + help='''default image url''') parser.add_argument('--title', default='Blog', type=str, @@ -107,7 +106,7 @@ def main() -> None: config.fill_missing(opts) if opts['version']: - print(f'pyssg v{VERSION}') + print(f'pyssg v{config.version}') return if opts['init']: @@ -147,27 +146,8 @@ def main() -> None: md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists', 'smarty', 'toc', 'wikilinks'], output_format='html5') - builder: HTMLBuilder = HTMLBuilder(config, - env, - db, - md) + builder: Builder = Builder(config, env, db, md) builder.build() - # get all parsed pages and tags for rss and sitemap construction - all_pages: list[Page] = builder.all_pages - all_tags: list[tuple[str]] = builder.all_tags - - rss_builder: RSSBuilder = RSSBuilder(config, - env, - all_pages, - all_tags) - rss_builder.build() - - sm_builder: SitemapBuilder = SitemapBuilder(config, - env, - all_pages, - all_tags) - sm_builder.build() - db.write() return diff --git a/src/pyssg/rss.py b/src/pyssg/rss.py deleted file mode 100644 index 7de29ae..0000000 --- a/src/pyssg/rss.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -from jinja2 import Environment, Template -from importlib.metadata import version -from datetime import datetime, timezone - -from .page import Page -from .configuration import Configuration - - -VERSION = version('pyssg') -# specific format for rss -DFORMAT = '%a, %d %b %Y %H:%M:%S GMT' -RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) - - -class RSSBuilder: - def __init__(self, config: Configuration, - env: Environment, - pages: list[Page], - tags: list[tuple[str]]): - self.config: Configuration = config - self.env: Environment = env - self.pages: list[Page] = pages - self.tags: list[tuple[str]] = tags - - - def build(self): - template: Template = self.env.get_template("rss.xml") - content: str = template.render(site_title=self.config.title, - site_base_url=self.config.base_url, - site_base_static_url=self.config.base_static_url, - pyssg_version=VERSION, - run_date=RUN_DATE, - all_pages=self.pages, - all_tags=self.tags) - - with open(os.path.join(self.config.dst, 'rss.xml'), 'w') as f: - f.write(content) diff --git a/src/pyssg/sitemap.py b/src/pyssg/sitemap.py deleted file mode 100644 index f63a7e6..0000000 --- a/src/pyssg/sitemap.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -from jinja2 import Environment, Template -from importlib.metadata import version -from datetime import datetime, timezone - -from .page import Page -from .configuration import Configuration - -VERSION = version('pyssg') -# specific format for sitemap (not as strict) -DFORMAT = '%Y-%m-%d' -RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) - - -class SitemapBuilder: - def __init__(self, config: Configuration, - env: Environment, - pages: list[Page], - tags: list[str]): - self.config: Configuration = config - self.env: Environment = env - self.pages: list[Page] = pages - self.tags: list[str] = tags - - - def build(self): - template: Template = self.env.get_template("sitemap.xml") - content: str = template.render(site_title=self.config.title, - site_base_url=self.config.base_url, - site_base_static_url=self.config.base_static_url, - pyssg_version=VERSION, - run_date=RUN_DATE, - all_pages=self.pages, - all_tags=self.tags) - - - with open(os.path.join(self.config.dst, 'sitemap.xml'), 'w') as f: - f.write(content) -- cgit v1.2.3-70-g09d2