From 11b9e8505b8a92555cce6615f19268a620e9ce53 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado Date: Fri, 4 Jun 2021 22:13:44 -0600 Subject: refactor and add support for jinija --- ChangeLog | 2 + README.md | 58 ++++++++-- plt/index.html | 32 +++++ plt/page.html | 25 ++++ plt/rss.xml | 39 +++++++ plt/sitemap.xml | 22 ++++ plt/tag.html | 26 +++++ pyssgrc | 9 ++ requirements.txt | 1 + setup.cfg | 1 + src/pyssg/builder.py | 282 +++++++++++++-------------------------------- src/pyssg/configuration.py | 16 +++ src/pyssg/page.py | 95 ++++++++++----- src/pyssg/parser.py | 42 +++++-- src/pyssg/pyssg.py | 65 ++++++++--- src/pyssg/rss.py | 60 +++------- src/pyssg/sitemap.py | 54 +++------ src/pyssg/template.py | 243 -------------------------------------- 18 files changed, 483 insertions(+), 589 deletions(-) create mode 100644 plt/index.html create mode 100644 plt/page.html create mode 100644 plt/rss.xml create mode 100644 plt/sitemap.xml create mode 100644 plt/tag.html create mode 100644 pyssgrc delete mode 100644 src/pyssg/template.py diff --git a/ChangeLog b/ChangeLog index fac7994..a51fb5a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,8 @@ CHANGES ======= +* just include changelog + v0.4.1 ------ diff --git a/README.md b/README.md index 59a1333..80bbe65 100644 --- a/README.md +++ b/README.md @@ -9,15 +9,15 @@ I'm writing this in *pYtHoN* (thought about doing it in Go, but I'm most comfort **This is still a WIP. Still doesn't build `sitemap.xml` or `rss.xml` files.** - [x] Build static site parsing `markdown` files ( `*.md` -> `*.html`) - - [x] Using plain `*.html` files for templates. - - [ ] Would like to change to something more flexible and easier to manage ([`jinja`](https://jinja.palletsprojects.com/en/3.0.x/), for example). - - [x] Preserves hand-made `*.html` files. - - [x] Tag functionality. - - [ ] Open Graph (and similar) support. + - [x] ~~Using plain `*.html` files for templates.~~ Changed to Jinja templates. + - [x] Would like to change to something more flexible and easier to manage ([`jinja`](https://jinja.palletsprojects.com/en/3.0.x/), for example). + - [x] Preserves hand-made `*.html` files. + - [x] Tag functionality. + - [ ] Open Graph (and similar) support. - [x] Build `sitemap.xml` file. - [x] Build `rss.xml` file. - [x] Only build page if `*.md` is new or updated. - - [ ] Extend this to tag pages and index (right now all tags and index is built no matter if no new/updated file is present). + - [ ] Extend this to tag pages and index (right now all tags and index is built no matter if no new/updated file is present). - [x] Configuration file as an alternative to using command line flags (configuration file options are prioritized). ### Markdown features @@ -51,9 +51,9 @@ First initialize the directories you're going to use for the source files and de pyssg -s src_dir -d dst_dir -i ``` -That creates the desired directories with the basic templates that can be edited as desired. Place your `*.md` files somewhere inside the source directory (`src_dir` in the command above), but outside of the `templates` directory. It accepts sub-directories. +That creates the desired directories with the basic templates that can be edited as desired (see variables available for Jinja below). Place your `*.md` files somewhere inside the source directory (`src_dir` in the command above), but outside of the `templates` directory. It accepts sub-directories. -Strongly recommended to edit `rss.xml` template under `rss` directory, since it has a lot of placeholder values. +Strongly recommended to edit the `rss.xml` template. Build the site with: @@ -64,3 +64,45 @@ pyssg -s src_dir -d dst_dir -u https://base.url -b That creates all `*.html` for the site and can be easily moved to the server. Here, the `-u` flag is technically optional in the sense that you'll not receive a warning/error, but it's used to prepend links with this URL (not strictly required everywhere), so don't ignore it; also don't include the trailing `/`. For now, the `-b`uild tag also creates the `rss.xml` and `sitemap.xml` files based on templates including only all converted `*.md` files (and processed tags in case of the sitemap), meaning that separate `*.html` files should be included manually in the template. + +For more options/flags just checkout `pyssg -h`. + +## Available Jinja variables + +Here is the list of variables that you can use specific Jinja templates with a short description. Note that all urls are without the trailing slash `/`. + +- General: + - `site_title` (`str`) (all): title of the website. + - `site_base_url` (`str`) (all): base url of the website. + - `site_base_static_url` (`str`) (all): base static url where all static files are located, mostly needed for correct rss feed generator when using a `base` tag and using relative links to files. For more, see [](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/base). + - `pyssg_version` (`str`) (all): version in numeric form, i.e. `0.5.0`. + - `run_date` (`str`) (all): date when the program was run, with format required for rss. +- Pages: + - `all_pages` (`list(Page)`) (all): list of all the pages, sorted by creation time, reversed. + - `page` (`Page`) (`page.html`): page object that contains the following attributes: + - `title` (`str`): title of the page. + - `author` (`str`): author of the page. + - `content` (`str`): actual content of the page. + - `cdatetime` (`str`): creation datetime object of the page. + - `cdate` (`str`): formatted `cdatetime` as the configuration option `DATE_FORMAT`. + - `cdate_list` (`str`): formatted `cdatetime` as the configuration option `LIST_DATE_FORMAT`. + - `cdate_list_sep` (`str`): formatted `cdatetime` as the configuration option `LIST_SEP_DATE_FORMAT`. + - `cdate_rss` (`str`): formatted `cdatetime` as required by rss. + - `cdate_sitemap` (`str`): formatted `cdatetime` as required by sitemap. + - `mdatetime` (`str`): modification datetime object of the page. Defaults to None. + - `mdate` (`str`): formatted `mdatetime` as the configuration option `DATE_FORMAT`. Defaults to None. + - `mdate_list` (`str`): formatted `mdatetime` as the configuration option `LIST_DATE_FORMAT`. + - `mdate_list_sep` (`str`): formatted `mdatetime` as the configuration option `LIST_SEP_DATE_FORMAT`. + - `mdate_rss` (`str`): formatted `mdatetime` as required by rss. + - `mdate_sitemap` (`str`): formatted `mdatetime` as required by sitemap. + - `summary` (`str`): summary of the page, as specified in the `*.md` file. + - `lang` (`str`): page language, used for the general `html` tag `lang` attribute. + - `tags` (`list(tuple(str))`): list of tuple of tags of the page, containing the name and the url of the tag, in that order. Defaults to empty list. + - `url` (`str`): url of the page, this already includes the `site_base_url`. + - `next/previous` (`Page`): reference to the next or previous page object (containing all these attributes). Defaults to None + - `og` (`dict(str, str)`): dict for object graph metadata. + - `meta` (`dict(str, list(str))`): meta dict as obtained from python-markdown, in case you use a meta tag not yet supported, it will be available there. +- Tags: + - `tag` (`tuple(str)`) (`tag.html`): tuple of name and url of the current tag. + - `tag_pages` (`list(Page)`) (`tag.html`): similar to `all_pages` but contains all the pages for the current tag. + - `all_tags` (`list(tuple(str))`) (all): similar to `page.tags` but contains all the tags. diff --git a/plt/index.html b/plt/index.html new file mode 100644 index 0000000..c96db3b --- /dev/null +++ b/plt/index.html @@ -0,0 +1,32 @@ + + + + + + Index -- {{site_title}} + + +

Index -- {{site_title}}

+

Some text here.

+ +

Tags: + {%for t in all_tags%} + {{t[0]}}{{", " if not loop.last else ""}} + {%endfor%} +

+ +

Articles

+ + + diff --git a/plt/page.html b/plt/page.html new file mode 100644 index 0000000..fefaaa6 --- /dev/null +++ b/plt/page.html @@ -0,0 +1,25 @@ + + + + + + {{page.title}} -- {{site_title}} + + +

{{page.title}}

+

By {{page.author}}

+

Created: {{page.cdate}}

+ {%if page.mdate is not none%} +

Modified: {{page.mdate}}

+ {%endif%} + + {{page.content}} + +

Tags: + {%for t in page.tags%} + {{t[0]}}{{", " if not loop.last else ""}} + {%endfor%} +

+ + + diff --git a/plt/rss.xml b/plt/rss.xml new file mode 100644 index 0000000..99b2b49 --- /dev/null +++ b/plt/rss.xml @@ -0,0 +1,39 @@ + + + + {{site_title}} + {{site_base_url}} + + Short site description. + en-us + Blog + Copyright 2021 Somebody + some@one.com (Sombody) + some@one.com (Sombody) + {{run_date}} + {{run_date}} + pyssg v{{pyssg_version}} + https://validator.w3.org/feed/docs/rss2.html + 30 + + {{site_base_static_url}}/images/blog.png + {{site_title}} + {{site_base_url}} + + {%for p in all_pages%} + + {{p.title}} + {{p.url}} + {{p.url}} + {{p.cdate_rss}} + {%for t in p.tags%} + {{t[0]}} + {%endfor%} + {{p.summary}} + + + {%endfor%} + + diff --git a/plt/sitemap.xml b/plt/sitemap.xml new file mode 100644 index 0000000..a5b5404 --- /dev/null +++ b/plt/sitemap.xml @@ -0,0 +1,22 @@ + + + {%for p in all_pages%} + + {{p.url}} + {{p.mdate_sitemap if p.mdate_sitemap else p.cdate_sitemap}} + weekly + 1.0 + + {%endfor%} + + {%for t in all_tags%} + + {{t[1]}} + {{run_date}} + daily + 0.5 + + {%endfor%} + diff --git a/plt/tag.html b/plt/tag.html new file mode 100644 index 0000000..2fdc27b --- /dev/null +++ b/plt/tag.html @@ -0,0 +1,26 @@ + + + + + + Posts filtered by {{tag[0]}} -- {{site_title}} + + +

Posts filtered by {{tag[0]}}

+

Some text here.

+ +

Articles

+ + + diff --git a/pyssgrc b/pyssgrc new file mode 100644 index 0000000..2e8f895 --- /dev/null +++ b/pyssgrc @@ -0,0 +1,9 @@ +SRC_PATH=e_src +DST_PATH=e_dst +PLT_PATH=plt +BASE_URL=https://blog.luevano.xyz +BASE_STATIC_URL=https://static.luevano.xyz +TITLE=Luévano's Blog +DATE_FORMAT=%a, %b %d, %Y @ %H:%M %Z +LIST_DATE_FORMAT=%b %d +LIST_SEP_DATE_FORMAT=%B %Y diff --git a/requirements.txt b/requirements.txt index cc2d163..248a5ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ markdown==3.3.4 +jinja2==3.0.1 diff --git a/setup.cfg b/setup.cfg index 4df0e93..f661287 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,6 +34,7 @@ console_scripts = package_dir = = src packages = find: +include_package_data=True [options.packages.find] where = src diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py index 8f7b67c..3a4474e 100644 --- a/src/pyssg/builder.py +++ b/src/pyssg/builder.py @@ -1,34 +1,48 @@ import os import shutil +from operator import itemgetter from copy import deepcopy +from jinja2 import Environment, Template +from markdown import Markdown +from importlib.metadata import version +from datetime import datetime, timezone from .configuration import Configuration -from .template import Template from .database import Database from .parser import MDParser from .page import Page from .discovery import get_file_list, get_dir_structure + +VERSION = version('pyssg') +# specific format for rss +DFORMAT = '%a, %d %b %Y %H:%M:%S GMT' +RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) + + class HTMLBuilder: def __init__(self, config: Configuration, - template: Template, - db: Database): + env: Environment, + db: Database, + md: Markdown): self.src: str = config.src self.dst: str = config.dst + self.title: str = config.title self.base_url: str = config.base_url - self.dformat: str = config.dformat - self.l_dformat: str = config.l_dformat - self.lsep_dformat: str = config.lsep_dformat + self.base_static_url: str = config.base_static_url self.force: bool = config.force - self.template: Template = template + self.config: Configuration = config + self.env: Environment = env self.db: Database = db + self.md: Markdown = md self.dirs: list[str] = None self.md_files: list[str] = None self.html_files: list[str] = None self.all_pages: list[Page] = None + self.updated_pages: list[Page] = None self.all_tags: list[str] = None @@ -40,31 +54,22 @@ class HTMLBuilder: self.__create_dir_structure() self.__copy_html_files() - parser: MDParser = MDParser(self.src, self.md_files, self.db) - parser.parse() + parser: MDParser = MDParser(self.src, + self.md_files, + self.db, + self.md) + parser.parse(self.config) # just to be able to extract all pages out of this class self.all_pages = parser.all_pages + self.updated_pages = parser.updated_pages self.all_tags = parser.all_tags # create the article index - self.__create_article_index(parser.all_tags, parser.all_pages) - - # create each category of html pages + self.__create_article_index() # check if all pages should be created - if self.force: - self.__create_articles(parser.all_pages) - else: - self.__create_articles(parser.updated_pages) - self.__create_tags(parser.all_tags, parser.all_pages) - - - def get_pages(self) -> list[Page]: - return self.all_pages - - - def get_tags(self) -> list[str]: - return self.all_tags + self.__create_articles() + self.__create_tags() def __create_dir_structure(self) -> None: @@ -90,142 +95,57 @@ class HTMLBuilder: shutil.copy2(src_file, dst_file) - # this is really similar to create_tag (singular) - def __create_article_index(self, tags: list[str], - pages: list[Page]) -> None: - # make temporary template - t: Template = deepcopy(self.template) - - # do basic replacements - # get page and tag list formated, both functions do replacements - p_list: list[str] = self.__get_pages_formatted(pages, t) - t_list: list[str] = self.__get_tags_formatted(tags, t) - # common - t.header = t.header.replace("$$LANG", 'en') - t.header = t.header.replace('$$TITLE', f'Index') + def __create_article_index(self) -> None: + template: Template = self.env.get_template("index.html") + content: str = template.render(site_title=self.title, + site_base_url=self.base_url, + site_base_static_url=self.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.all_pages, + all_tags=self.all_tags) with open(os.path.join(self.dst, 'index.html'), 'w') as f: - f.write(t.header) - f.write(t.articles.header) - - f.write(t.tags.list_header) - for tag in t_list: - f.write(tag) - f.write(t.tags.list_footer) + f.write(content) - f.write(t.articles.list_header) - for page in p_list: - f.write(page) - f.write(t.articles.list_footer) - f.write(t.articles.footer) - f.write(t.footer) - - - def __create_articles(self, pages: list[Page]) -> None: - for p in pages: - self.__create_article(p) + def __create_articles(self) -> None: + # check if only updated should be created + if self.force: + for p in self.all_pages: + self.__create_article(p) + else: + for p in self.updated_pages: + self.__create_article(p) def __create_article(self, page: Page) -> None: - # TODO: create better solution for replace - # make temporary template - t: Template = deepcopy(self.template) - # prepare html file name f_name: str = page.name f_name = f_name.replace('.md', '.html') - # get timestamps - c_date: str = page.c_datetime.strftime(self.dformat) - m_date: str = None - if page.m_datetime is not None: - m_date: str = page.m_datetime.strftime(self.dformat) - - # do basic replacements - # get tag list formatted (some replacements done inside - # get_tags_formatted) - t_list: list[str] = None - if page.tags is not None: - t_list = self.__get_tags_formatted(page.tags, t) - # tags formatted as a single string. - tags_formatted: str = ''.join(t_list) - # join list header and footer with all tags list. - tags_formatted = ''.join([t.tags.list_header, - tags_formatted, - t.tags.list_footer]) - - # common - t.header = t.header.replace("$$LANG", page.lang) - t.header = t.header.replace('$$TITLE', page.title) - - # article header - t.article.header = t.article.header.replace('$$TITLE', page.title) - t.article.header = t.article.header.replace('$$AUTHOR', page.author) - t.article.header = t.article.header.replace('$$CTIME', c_date) - if m_date is not None: - t.article.header = t.article.header.replace('$$MTIME', m_date) - else: - t.article.header = t.article.header.replace('$$MTIME', '') - - # article footer (same replaces as header) - t.article.footer = t.article.footer.replace('$$TITLE', page.title) - t.article.footer = t.article.footer.replace('$$AUTHOR', page.author) - t.article.footer = t.article.footer.replace('$$CTIME', c_date) - if m_date is not None: - t.article.footer = t.article.footer.replace('$$MTIME', m_date) - else: - t.article.footer = t.article.footer.replace('$$MTIME', '') - - # add tags to article list - if t_list is not None: - t.article.footer = t.article.footer.replace('$$TAGS', - tags_formatted) - else: - t.article.footer = t.article.footer.replace('$$TAGS', '') + template: Template = self.env.get_template("page.html") + content: str = template.render(site_title=self.title, + site_base_url=self.base_url, + site_base_static_url=self.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.all_pages, + all_tags=self.all_tags, + page=page) with open(os.path.join(self.dst, f_name), 'w') as f: - f.write(t.header) - f.write(t.article.header) - f.write(page.html) - - # not required anymore, tags included in article footer - # if t_list is not None: - # f.write(t.tags.list_header) - # for tag in t_list: - # f.write(tag) - # f.write(t.tags.list_footer) - - f.write(t.article.footer) - f.write(t.footer) - - - def __get_tags_formatted(self, tags: list[str], - template: Template) -> list[str]: - tag_amount: int = len(tags) - tags_formatted: list[str] = [] - for i, t in enumerate(tags): - # t_e=tag entry - t_e: str = template.tags.list_entry - t_e = t_e.replace('$$URL', - f'{self.base_url}/tag/@{t}.html') - t_e = t_e.replace('$$NAME', t) - - tags_formatted.append(t_e) - if i != tag_amount - 1: - tags_formatted.append(template.tags.list_separator) - - return tags_formatted - - - def __create_tags(self, tags: list[str], - pages: list[Page]) -> None: - for t in tags: + f.write(content) + + + def __create_tags(self) -> None: + for t in self.all_tags: # get a list of all pages that have current tag tag_pages: list[Page] = [] - for p in pages: - if p.tags is not None and t in p.tags: + for p in self.all_pages: + if p.tags is not None and t[0] in list(map(itemgetter(0), + p.tags)): tag_pages.append(p) # build tag page @@ -235,61 +155,19 @@ class HTMLBuilder: tag_pages = [] - def __create_tag(self, tag: str, + def __create_tag(self, tag: tuple[str], pages: list[Page]) -> None: - # TODO: create better solution for replace - # make temporary template - t: Template = deepcopy(self.template) - - # do basic replacements - # get page list formated (some replacements done inside - # get_pages_formatted) - p_list: list[str] = self.__get_pages_formatted(pages, t) - # common - t.header = t.header.replace("$$LANG", 'en') - t.header = t.header.replace('$$TITLE', f'Posts filtered by: {tag}') - - # tag header - tag_url: str = f'{self.base_url}/tag/@{tag}.html' - t.tags.header = t.tags.header.replace('$$NAME', tag) - - with open(os.path.join(self.dst, f'tag/@{tag}.html'), 'w') as f: - f.write(t.header) - f.write(t.tags.header) - - f.write(t.articles.list_header) - for p in p_list: - f.write(p) - f.write(t.articles.list_footer) - - f.write(t.tags.footer) - f.write(t.footer) - - - def __get_pages_formatted(self, pages: list[Page], - template: Template) -> list[str]: - month_year: str = '-' - pages_formatted: list[str] = [] - for p in pages: - # check if the monthly separator should be included - c_month_year: str = p.c_datetime.strftime(self.lsep_dformat) - if c_month_year != month_year: - month_year = c_month_year - - month_sep: str = template.articles.list_separator - month_sep = month_sep.replace('$$SEP', month_year) - - pages_formatted.append(month_sep) - - f_name: str = p.name - f_name = f_name.replace('.md', '.html') - - # p_e=page entry - p_e: str = template.articles.list_entry - p_e = p_e.replace('$$URL', f'{self.base_url}/{f_name}') - p_e = p_e.replace('$$DATE', p.c_datetime.strftime(self.l_dformat)) - p_e = p_e.replace('$$TITLE', p.title) - - pages_formatted.append(p_e) - - return pages_formatted + + template: Template = self.env.get_template("tag.html") + content: str = template.render(site_title=self.title, + site_base_url=self.base_url, + site_base_static_url=self.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.all_pages, + all_tags=self.all_tags, + tag=tag, + tag_pages=pages) + + with open(os.path.join(self.dst, f'tag/@{tag[0]}.html'), 'w') as f: + f.write(content) diff --git a/src/pyssg/configuration.py b/src/pyssg/configuration.py index 8ee592e..dca43b5 100644 --- a/src/pyssg/configuration.py +++ b/src/pyssg/configuration.py @@ -7,7 +7,9 @@ class Configuration: self.path: str = path self.src: str = None self.dst: str = None + self.plt: str = None self.base_url: str = None + self.base_static_url: str = None self.title: str = None self.dformat: str = None self.l_dformat: str = None @@ -44,10 +46,18 @@ class Configuration: self.dst = opts['DST_PATH'] except KeyError: pass + try: + self.plt = opts['PLT_PATH'] + except KeyError: pass + try: self.base_url = opts['BASE_URL'] except KeyError: pass + try: + self.base_static_url = opts['BASE_STATIC_URL'] + except KeyError: pass + try: self.title = opts['TITLE'] except KeyError: pass @@ -80,9 +90,15 @@ class Configuration: if self.dst is None: self.dst = opts['dst'] + if self.plt is None: + self.plt = opts['plt'] + if self.base_url is None: self.base_url = opts['url'] + if self.base_static_url is None: + self.base_static_url = opts['static_url'] + if self.title is None: self.title = opts['title'] diff --git a/src/pyssg/page.py b/src/pyssg/page.py index e03ca6b..1e0cdeb 100644 --- a/src/pyssg/page.py +++ b/src/pyssg/page.py @@ -1,68 +1,101 @@ from datetime import datetime, timezone +from .configuration import Configuration + + +DFORMAT_RSS = '%a, %d %b %Y %H:%M:%S GMT' +DFORMAT_SITEMAP = '%Y-%m-%d' + class Page: def __init__(self, name: str, - c_time: float, - m_time: float, + ctime: float, + mtime: float, html: str, meta: dict): + # initial data self.name: str = name - self.c_time: float = c_time - self.m_time: float = m_time - self.html: str = html + self.ctimestamp: float = ctime + self.mtimestamp: float = mtime + self.content: str = html self.meta: dict = meta # data from self.meta self.title: str = '' self.author: str = '' - self.c_datetime: datetime = None - self.m_datetime: datetime = None + self.cdatetime: datetime = None + self.mdatetime: datetime = None self.summary: str = '' self.lang: str = 'en' - self.tags: list = None + self.tags: list[tuple[str]] = [] + + # constructed + self.url: str = '' + self.cdate: str = '' + self.cdate_list: str = '' + self.cdate_list_sep: str = '' + self.cdate_rss: str = '' + self.cdate_sitemap: str = '' + self.mdate: str = None + self.mdate_list: str = None + self.mdate_list_sep: str = None + self.mdate_rss: str = '' + self.mdate_sitemap: str = '' + + # later assigned references to next and previous pages + self.next: Page = None + self.previous: Page = None # also from self.meta, but for og metadata self.og: dict[str, str] = dict() - self.__parse_meta() def __lt__(self, other): - return self.c_time < other.c_time + return self.ctimestamp < other.ctimestamp # parses meta from self.meta, for og, it prioritizes, # the actual og meta - def __parse_meta(self): - try: - self.title = self.meta['title'][0] - except KeyError: pass - - try: - self.author = self.meta['author'][0] - except KeyError: pass - - self.c_datetime = datetime.fromtimestamp(self.c_time, + def parse(self, config: Configuration): + # required meta elements + self.title = self.meta['title'][0] + self.author = self.meta['author'][0] + self.summary = self.meta['summary'][0] + self.lang = self.meta['lang'][0] + + # dates + self.cdatetime = datetime.fromtimestamp(self.ctimestamp, tz=timezone.utc) - - if self.m_time != 0.0: - self.m_datetime = datetime.fromtimestamp(self.m_time, + self.cdate = self.cdatetime.strftime(config.dformat) + self.cdate_list = self.cdatetime.strftime(config.l_dformat) + self.cdate_list_sep = self.cdatetime.strftime(config.lsep_dformat) + self.cdate_rss = self.cdatetime.strftime(DFORMAT_RSS) + self.cdate_sitemap = self.cdatetime.strftime(DFORMAT_SITEMAP) + + # only if file/page has been modified + if self.mtimestamp != 0.0: + self.mdatetime = datetime.fromtimestamp(self.mtimestamp, tz=timezone.utc) + self.mdate = self.mdatetime.strftime(config.dformat) + self.mdate_list = self.mdatetime.strftime(config.l_dformat) + self.mdate_list_sep = self.mdatetime.strftime(config.lsep_dformat) + self.mdate_rss = self.mdatetime.strftime(DFORMAT_RSS) + self.mdate_sitemap = self.mdatetime.strftime(DFORMAT_SITEMAP) + # not always contains tags try: - self.summary = self.meta['summary'][0] - except KeyError: pass + tags_only: list[str] = self.meta['tags'] + tags_only.sort() - try: - self.lang = self.meta['lang'][0] + for t in tags_only: + self.tags.append((t, + f'{config.base_url}/tag/@{t}.html')) except KeyError: pass - try: - self.tags = self.meta['tags'] - self.tags.sort() - except KeyError: pass + self.url = f'{config.base_url}/{self.name.replace(".md", ".html")}' + # if contains object graph elements try: # og_e = object graph entry for og_e in self.meta['og']: diff --git a/src/pyssg/parser.py b/src/pyssg/parser.py index 548fb05..7323bde 100644 --- a/src/pyssg/parser.py +++ b/src/pyssg/parser.py @@ -1,8 +1,11 @@ import os +from operator import itemgetter from datetime import datetime from markdown import Markdown from .database import Database +from .configuration import Configuration +from .configuration import Configuration from .page import Page @@ -10,33 +13,38 @@ from .page import Page class MDParser: def __init__(self, src: str, files: list[str], - db: Database): - self.md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists', - 'smarty', 'toc', 'wikilinks'], - output_format='html5') + db: Database, + md: Markdown): self.src: str = src self.files: list[str] = files + self.db: Database = db + self.md: Markdown = md self.all_pages: list[Page] = None self.updated_pages: list[Page] = None - self.all_tags: list[str] = None + self.all_tags: list[tuple[str]] = None - def parse(self): + def parse(self, config: Configuration): # initialize lists self.all_pages = [] self.updated_pages = [] self.all_tags = [] + all_tag_names: list[str] = [] for f in self.files: src_file: str = os.path.join(self.src, f) # get flag if update is successful updated: bool = self.db.update(src_file, remove=f'{self.src}/') - page: Page = None content: str = self.md.reset().convert(open(src_file).read()) - page = Page(f, self.db.e[f][0], self.db.e[f][1], content, self.md.Meta) + page: Page = Page(f, + self.db.e[f][0], + self.db.e[f][1], + content, + self.md.Meta) + page.parse(config) # keep a separated list for all and updated pages if updated: @@ -46,14 +54,26 @@ class MDParser: # parse tags if page.tags is not None: # add its tag to corresponding db entry if existent - self.db.update_tags(f, page.tags) + self.db.update_tags(f, list(map(itemgetter(0), page.tags))) # update all_tags attribute for t in page.tags: - if t not in self.all_tags: + if t[0] not in list(map(itemgetter(0), self.all_tags)): self.all_tags.append(t) # sort list of tags for consistency - self.all_tags.sort() + self.all_tags.sort(key=itemgetter(0)) self.updated_pages.sort(reverse=True) self.all_pages.sort(reverse=True) + # TODO: fix this in case it doesn't work lol + # this should update references to all_pages and updated_pages??? + for i, p in enumerate(self.all_pages): + try: + prev_page: Page = self.all_pages[i - 1] + p.previous = prev_page + except IndexError: pass + + try: + next_page: Page = self.all_pages[i + 1] + p.next = next_page + except IndexError: pass diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py index 5d8ac33..933f7d3 100644 --- a/src/pyssg/pyssg.py +++ b/src/pyssg/pyssg.py @@ -1,16 +1,23 @@ import os from argparse import ArgumentParser, Namespace from typing import Union +from jinja2 import Environment, FileSystemLoader +from markdown import Markdown +from importlib.metadata import version +from importlib.resources import contents +from datetime import datetime, timezone from .configuration import Configuration from .database import Database -from .template import Template from .builder import HTMLBuilder from .page import Page from .rss import RSSBuilder from .sitemap import SitemapBuilder +VERSION = version('pyssg') + + def get_options() -> Namespace: parser = ArgumentParser(prog='pyssg', description='''Static Site Generator that reads @@ -19,6 +26,9 @@ def get_options() -> Namespace: location) all other options are ignored.\nFor datetime formats see: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes''') + parser.add_argument('-v', '--version', + action='store_true', + help='''print program version''') parser.add_argument('-c', '--config', default='$XDG_CONFIG_HOME/pyssg/pyssgrc', type=str, @@ -34,11 +44,20 @@ def get_options() -> Namespace: type=str, help='''dst directory; generated (and transfered html) files; defaults to 'dst' ''') + parser.add_argument('-t', '--plt', + default='plt', + type=str, + help='''plt directory; all template files; defaults to + 'plt' ''') parser.add_argument('-u', '--url', default='', type=str, help='''base url without trailing slash''') - parser.add_argument('-t', '--title', + parser.add_argument('--static-url', + default='', + type=str, + help='''base static url without trailing slash''') + parser.add_argument('--title', default='Blog', type=str, help='''general title for the website; defaults to @@ -86,16 +105,20 @@ def main() -> None: config.read() config.fill_missing(opts) + if opts['version']: + print(f'pyssg v{VERSION}') + return + if opts['init']: try: os.mkdir(config.src) os.makedirs(os.path.join(config.dst, 'tag')) + os.mkdir(config.plt) except FileExistsError: pass - # write default templates - template: Template = Template(config.src) - template.write() + for f in contents('pyssg'): + print(f) return if opts['build']: @@ -103,22 +126,34 @@ def main() -> None: db: Database = Database(os.path.join(config.src, '.files')) db.read() - # read templates - template: Template = Template(config.src) - template.read() + # the autoescape option could be a security risk if used in a dynamic + # website, as far as i can tell + env: Environment = Environment(loader=FileSystemLoader(config.plt), + autoescape=False, + trim_blocks=True, + lstrip_blocks=True) - builder: HTMLBuilder = HTMLBuilder(config, template, db) + md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists', + 'smarty', 'toc', 'wikilinks'], + output_format='html5') + builder: HTMLBuilder = HTMLBuilder(config, + env, + db, + md) builder.build() - # get all parsed pages for rss construction - all_pages: list[Page] = builder.get_pages() - rss_builder: RSSBuilder = RSSBuilder(config, template.rss, all_pages) + # get all parsed pages and tags for rss and sitemap construction + all_pages: list[Page] = builder.all_pages + all_tags: list[tuple[str]] = builder.all_tags + + rss_builder: RSSBuilder = RSSBuilder(config, + env, + all_pages, + all_tags) rss_builder.build() - # get all tags for sitemap construction - all_tags: list[str] = builder.get_tags() sm_builder: SitemapBuilder = SitemapBuilder(config, - template.sitemap, + env, all_pages, all_tags) sm_builder.build() diff --git a/src/pyssg/rss.py b/src/pyssg/rss.py index 742b6b2..7de29ae 100644 --- a/src/pyssg/rss.py +++ b/src/pyssg/rss.py @@ -1,60 +1,38 @@ import os -import importlib.metadata +from jinja2 import Environment, Template +from importlib.metadata import version from datetime import datetime, timezone from .page import Page from .configuration import Configuration -VERSION = importlib.metadata.version('pyssg') -# This is static right here since an rss feed -# requires very specific date format +VERSION = version('pyssg') +# specific format for rss DFORMAT = '%a, %d %b %Y %H:%M:%S GMT' +RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) class RSSBuilder: def __init__(self, config: Configuration, - template: str, - pages: list[Page]): + env: Environment, + pages: list[Page], + tags: list[tuple[str]]): self.config: Configuration = config - self.rss: str = template + self.env: Environment = env self.pages: list[Page] = pages + self.tags: list[tuple[str]] = tags def build(self): - # initial base replacements - self.rss = self.rss.replace('$$TITLE', self.config.title) - self.rss = self.rss.replace('$$LINK', self.config.base_url) - self.rss = self.rss.replace('$$PYSSGVERSION', f'pyssg v{VERSION}') - items_formatted: str = self.__get_items_formatted() - self.rss = self.rss.replace('$$ITEMS', items_formatted) - - current_date: str = datetime.now(tz=timezone.utc).strftime(DFORMAT) - self.rss = self.rss.replace('$$CURRENTDATE', current_date) + template: Template = self.env.get_template("rss.xml") + content: str = template.render(site_title=self.config.title, + site_base_url=self.config.base_url, + site_base_static_url=self.config.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.pages, + all_tags=self.tags) with open(os.path.join(self.config.dst, 'rss.xml'), 'w') as f: - f.write(self.rss) - - - def __get_items_formatted(self) -> str: - # i_f=items formatted for short - i_f: str = '' - for p in self.pages: - url: str = f'{self.config.base_url}/{p.name.replace(".md", ".html")}' - date: str = p.c_datetime.strftime(DFORMAT) - - i_f = f'{i_f} \n' - i_f = f'{i_f} {p.title}\n' - i_f = f'{i_f} {url}\n' - i_f = f'{i_f} {url}\n' - i_f = f'{i_f} {date}\n' - # TODO: maybe make this optional? - # add the tags as categories - if p.tags is not None: - for t in p.tags: - i_f = f'{i_f} {t.capitalize()}\n' - i_f = f'{i_f} {p.summary}\n' - i_f = f'{i_f} \n' - i_f = f'{i_f} \n' - - return i_f + f.write(content) diff --git a/src/pyssg/sitemap.py b/src/pyssg/sitemap.py index 410033f..f63a7e6 100644 --- a/src/pyssg/sitemap.py +++ b/src/pyssg/sitemap.py @@ -1,60 +1,38 @@ import os +from jinja2 import Environment, Template +from importlib.metadata import version from datetime import datetime, timezone from .page import Page from .configuration import Configuration - +VERSION = version('pyssg') +# specific format for sitemap (not as strict) DFORMAT = '%Y-%m-%d' +RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) class SitemapBuilder: def __init__(self, config: Configuration, - template: str, + env: Environment, pages: list[Page], tags: list[str]): self.config: Configuration = config - self.sitemap: str = template + self.env: Environment = env self.pages: list[Page] = pages self.tags: list[str] = tags def build(self): - # initial base replacements - urls_formatted: str = self.__get_urls_formatted() - self.sitemap = self.sitemap.replace('$$URLS', urls_formatted) + template: Template = self.env.get_template("sitemap.xml") + content: str = template.render(site_title=self.config.title, + site_base_url=self.config.base_url, + site_base_static_url=self.config.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.pages, + all_tags=self.tags) with open(os.path.join(self.config.dst, 'sitemap.xml'), 'w') as f: - f.write(self.sitemap) - - - def __get_urls_formatted(self) -> str: - # u_f=items formatted for short - u_f: str = '' - for p in self.pages: - url: str = f'{self.config.base_url}/{p.name.replace(".md", ".html")}' - if p.m_datetime is not None: - date: str = p.m_datetime.strftime(DFORMAT) - else: - date: str = p.c_datetime.strftime(DFORMAT) - - u_f = f'{u_f} \n' - u_f = f'{u_f} {url}\n' - u_f = f'{u_f} {date}\n' - u_f = f'{u_f} weekly\n' - u_f = f'{u_f} 1.0\n' - u_f = f'{u_f} \n' - - for t in self.tags: - url: str = f'{self.config.base_url}/tag/@{t}.html' - date: str = datetime.now(tz=timezone.utc).strftime(DFORMAT) - - u_f = f'{u_f} \n' - u_f = f'{u_f} {url}\n' - u_f = f'{u_f} {date}\n' - u_f = f'{u_f} daily\n' - u_f = f'{u_f} 0.5\n' - u_f = f'{u_f} \n' - - return u_f + f.write(content) diff --git a/src/pyssg/template.py b/src/pyssg/template.py deleted file mode 100644 index 3ddcf82..0000000 --- a/src/pyssg/template.py +++ /dev/null @@ -1,243 +0,0 @@ -import os - -from .page import Page - - -# all objects here require a header and footer as minimum -class HF: - def __init__(self): - self.header: str = None - self.footer: str = None - - -# some objects require a "list-like" set of attributes -class Common(HF): - def __init__(self): - self.list_header: str = None - self.list_footer: str = None - self.list_entry: str = None - self.list_separator: str = None - - -# main class -class Template(HF): - def __init__(self, src: str): - self.src: str = src - self.article: HF = HF() - self.articles: Common = Common() - self.tags: Common = Common() - self.rss: str = None - self.sitemap: str = None - - self.is_read: bool = False - - - # writes default templates - def write(self) -> None: - # get initial working directory - iwd = os.getcwd() - os.chdir(self.src) - - # create templates dir - os.mkdir('templates') - os.chdir('templates') - - # common - os.mkdir('common') - os.chdir('common') - self.__write_template('header.html', - ['\n', - '\n', - '\n', - '\n', - '$$TITLE\n', - '\n', - '\n']) - self.__write_template('footer.html', - ['\n', - '\n']) - - # go back to templates - os.chdir('..') - - # article entry - os.mkdir('article') - os.chdir('article') - self.__write_template('header.html', - ['

$$TITLE

\n', - '

$$AUTHOR

\n', - '

Created: $$CTIME, modified: $$MTIME

\n']) - self.__write_template('footer.html', - ['']) - - # go back to templates - os.chdir('..') - - # article index (articles) - os.mkdir('articles') - os.chdir('articles') - self.__write_template('header.html', - ['']) - self.__write_template('list_header.html', - ['

Articles

\n', - '\n']) - self.__write_template('footer.html', - ['']) - - # go back to templates - os.chdir('..') - - # tag - os.mkdir('tag') - os.chdir('tag') - self.__write_template('header.html', - ['']) - self.__write_template('list_header.html', - ['

Tags: ']) - self.__write_template('list_entry.html', - ['$$NAME']) - self.__write_template('list_separator.html', - [', ']) - self.__write_template('list_footer.html', - ['

\n']) - self.__write_template('footer.html', - ['']) - - # go back to templates - os.chdir('..') - - os.mkdir('rss') - os.chdir('rss') - self.__write_template('rss.xml', - ['\n', - '\n', - ' \n', - ' $$TITLE\n', - ' $$LINK\n', - ' \n', - ' SHORT DESCRIPTION.\n', - ' en-us\n', - ' COPYRIGHT NOTICE.\n', - ' EMAIL@EXAMPLE.ORG (NAME)\n', - ' EMAIL@EXAMPLE.ORG (NAME)\n', - ' $$CURRENTDATE\n', - ' $$CURRENTDATE\n', - ' $$PYSSGVERSION\n', - ' https://validator.w3.org/feed/docs/rss2.html\n', - ' 30\n', - ' \n', - ' EXAMPLE.ORG/IMAGE.PNG\n', - ' $$TITLE\n', - ' $$LINK\n', - ' \n', - '$$ITEMS\n', - ' \n', - '']) - - # go back to templates - os.chdir('..') - - os.mkdir('sitemap') - os.chdir('sitemap') - self.__write_template('sitemap.xml', - ['\n', - '\n', - '$$URLS\n', - '']) - # return to initial working directory - os.chdir(iwd) - - - # reads templates and stores them into class attributes - def read(self) -> None: - # only read templates if not read already - # (might want to change this behaviour) - if self.is_read: - return - self.is_read = True - - # get initial working directory - iwd = os.getcwd() - os.chdir(os.path.join(self.src, 'templates')) - - # common - os.chdir('common') - self.header = self.__read_template('header.html') - self.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # article entry - os.chdir('article') - self.article.header = self.__read_template('header.html') - self.article.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # article index - os.chdir('articles') - self.articles.header = self.__read_template('header.html') - self.articles.list_header = \ - self.__read_template('list_header.html') - self.articles.list_entry = \ - self.__read_template('list_entry.html') - self.articles.list_separator = \ - self.__read_template('list_separator.html') - self.articles.list_footer = \ - self.__read_template('list_footer.html') - self.articles.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # tag - os.chdir('tag') - self.tags.header = self.__read_template('header.html') - self.tags.list_header = self.__read_template('list_header.html') - self.tags.list_entry = self.__read_template('list_entry.html') - self.tags.list_separator = self.__read_template('list_separator.html') - self.tags.list_footer = self.__read_template('list_footer.html') - self.tags.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # rss - os.chdir('rss') - self.rss = self.__read_template('rss.xml') - - # go back to templates - os.chdir('..') - - # sitemap - os.chdir('sitemap') - self.sitemap = self.__read_template('sitemap.xml') - - # return to initial working directory - os.chdir(iwd) - - - def __write_template(self, file_name: str, content: list[str]) -> None: - with open(file_name, 'w+') as f: - for c in content: - f.write(c) - - def __read_template(self, file_name: str) -> str: - out: str = None - with open(file_name, 'r') as f: - out = f.read() - - return out -- cgit v1.2.3-54-g00ecf