diff options
author | David Luevano Alvarado <david@luevano.xyz> | 2021-06-04 22:13:44 -0600 |
---|---|---|
committer | David Luevano Alvarado <david@luevano.xyz> | 2021-06-04 22:13:44 -0600 |
commit | 11b9e8505b8a92555cce6615f19268a620e9ce53 (patch) | |
tree | e39ce9da68045d7aa6c5cde7f09e2b5185262d18 | |
parent | 30607ae87ed36289d860a41f8027900bebeec1e8 (diff) |
refactor and add support for jinija
-rw-r--r-- | ChangeLog | 2 | ||||
-rw-r--r-- | README.md | 58 | ||||
-rw-r--r-- | plt/index.html | 32 | ||||
-rw-r--r-- | plt/page.html | 25 | ||||
-rw-r--r-- | plt/rss.xml | 39 | ||||
-rw-r--r-- | plt/sitemap.xml | 22 | ||||
-rw-r--r-- | plt/tag.html | 26 | ||||
-rw-r--r-- | pyssgrc | 9 | ||||
-rw-r--r-- | requirements.txt | 1 | ||||
-rw-r--r-- | setup.cfg | 1 | ||||
-rw-r--r-- | src/pyssg/builder.py | 282 | ||||
-rw-r--r-- | src/pyssg/configuration.py | 16 | ||||
-rw-r--r-- | src/pyssg/page.py | 95 | ||||
-rw-r--r-- | src/pyssg/parser.py | 42 | ||||
-rw-r--r-- | src/pyssg/pyssg.py | 65 | ||||
-rw-r--r-- | src/pyssg/rss.py | 60 | ||||
-rw-r--r-- | src/pyssg/sitemap.py | 54 | ||||
-rw-r--r-- | src/pyssg/template.py | 243 |
18 files changed, 483 insertions, 589 deletions
@@ -1,6 +1,8 @@ CHANGES ======= +* just include changelog + v0.4.1 ------ @@ -9,15 +9,15 @@ I'm writing this in *pYtHoN* (thought about doing it in Go, but I'm most comfort **This is still a WIP. Still doesn't build `sitemap.xml` or `rss.xml` files.** - [x] Build static site parsing `markdown` files ( `*.md` -> `*.html`) - - [x] Using plain `*.html` files for templates. - - [ ] Would like to change to something more flexible and easier to manage ([`jinja`](https://jinja.palletsprojects.com/en/3.0.x/), for example). - - [x] Preserves hand-made `*.html` files. - - [x] Tag functionality. - - [ ] Open Graph (and similar) support. + - [x] ~~Using plain `*.html` files for templates.~~ Changed to Jinja templates. + - [x] Would like to change to something more flexible and easier to manage ([`jinja`](https://jinja.palletsprojects.com/en/3.0.x/), for example). + - [x] Preserves hand-made `*.html` files. + - [x] Tag functionality. + - [ ] Open Graph (and similar) support. - [x] Build `sitemap.xml` file. - [x] Build `rss.xml` file. - [x] Only build page if `*.md` is new or updated. - - [ ] Extend this to tag pages and index (right now all tags and index is built no matter if no new/updated file is present). + - [ ] Extend this to tag pages and index (right now all tags and index is built no matter if no new/updated file is present). - [x] Configuration file as an alternative to using command line flags (configuration file options are prioritized). ### Markdown features @@ -51,9 +51,9 @@ First initialize the directories you're going to use for the source files and de pyssg -s src_dir -d dst_dir -i ``` -That creates the desired directories with the basic templates that can be edited as desired. Place your `*.md` files somewhere inside the source directory (`src_dir` in the command above), but outside of the `templates` directory. It accepts sub-directories. +That creates the desired directories with the basic templates that can be edited as desired (see variables available for Jinja below). Place your `*.md` files somewhere inside the source directory (`src_dir` in the command above), but outside of the `templates` directory. It accepts sub-directories. -Strongly recommended to edit `rss.xml` template under `rss` directory, since it has a lot of placeholder values. +Strongly recommended to edit the `rss.xml` template. Build the site with: @@ -64,3 +64,45 @@ pyssg -s src_dir -d dst_dir -u https://base.url -b That creates all `*.html` for the site and can be easily moved to the server. Here, the `-u` flag is technically optional in the sense that you'll not receive a warning/error, but it's used to prepend links with this URL (not strictly required everywhere), so don't ignore it; also don't include the trailing `/`. For now, the `-b`uild tag also creates the `rss.xml` and `sitemap.xml` files based on templates including only all converted `*.md` files (and processed tags in case of the sitemap), meaning that separate `*.html` files should be included manually in the template. + +For more options/flags just checkout `pyssg -h`. + +## Available Jinja variables + +Here is the list of variables that you can use specific Jinja templates with a short description. Note that all urls are without the trailing slash `/`. + +- General: + - `site_title` (`str`) (all): title of the website. + - `site_base_url` (`str`) (all): base url of the website. + - `site_base_static_url` (`str`) (all): base static url where all static files are located, mostly needed for correct rss feed generator when using a `base` tag and using relative links to files. For more, see [<base>](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/base). + - `pyssg_version` (`str`) (all): version in numeric form, i.e. `0.5.0`. + - `run_date` (`str`) (all): date when the program was run, with format required for rss. +- Pages: + - `all_pages` (`list(Page)`) (all): list of all the pages, sorted by creation time, reversed. + - `page` (`Page`) (`page.html`): page object that contains the following attributes: + - `title` (`str`): title of the page. + - `author` (`str`): author of the page. + - `content` (`str`): actual content of the page. + - `cdatetime` (`str`): creation datetime object of the page. + - `cdate` (`str`): formatted `cdatetime` as the configuration option `DATE_FORMAT`. + - `cdate_list` (`str`): formatted `cdatetime` as the configuration option `LIST_DATE_FORMAT`. + - `cdate_list_sep` (`str`): formatted `cdatetime` as the configuration option `LIST_SEP_DATE_FORMAT`. + - `cdate_rss` (`str`): formatted `cdatetime` as required by rss. + - `cdate_sitemap` (`str`): formatted `cdatetime` as required by sitemap. + - `mdatetime` (`str`): modification datetime object of the page. Defaults to None. + - `mdate` (`str`): formatted `mdatetime` as the configuration option `DATE_FORMAT`. Defaults to None. + - `mdate_list` (`str`): formatted `mdatetime` as the configuration option `LIST_DATE_FORMAT`. + - `mdate_list_sep` (`str`): formatted `mdatetime` as the configuration option `LIST_SEP_DATE_FORMAT`. + - `mdate_rss` (`str`): formatted `mdatetime` as required by rss. + - `mdate_sitemap` (`str`): formatted `mdatetime` as required by sitemap. + - `summary` (`str`): summary of the page, as specified in the `*.md` file. + - `lang` (`str`): page language, used for the general `html` tag `lang` attribute. + - `tags` (`list(tuple(str))`): list of tuple of tags of the page, containing the name and the url of the tag, in that order. Defaults to empty list. + - `url` (`str`): url of the page, this already includes the `site_base_url`. + - `next/previous` (`Page`): reference to the next or previous page object (containing all these attributes). Defaults to None + - `og` (`dict(str, str)`): dict for object graph metadata. + - `meta` (`dict(str, list(str))`): meta dict as obtained from python-markdown, in case you use a meta tag not yet supported, it will be available there. +- Tags: + - `tag` (`tuple(str)`) (`tag.html`): tuple of name and url of the current tag. + - `tag_pages` (`list(Page)`) (`tag.html`): similar to `all_pages` but contains all the pages for the current tag. + - `all_tags` (`list(tuple(str))`) (all): similar to `page.tags` but contains all the tags. diff --git a/plt/index.html b/plt/index.html new file mode 100644 index 0000000..c96db3b --- /dev/null +++ b/plt/index.html @@ -0,0 +1,32 @@ +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <base href="{{site_base_static_url}}"> + <title>Index -- {{site_title}}</title> + </head> + <body> + <h1>Index -- {{site_title}}</h1> + <p>Some text here.</p> + + <p>Tags: + {%for t in all_tags%} + <a href="{{t[1]}}">{{t[0]}}</a>{{", " if not loop.last else ""}} + {%endfor%} + </p> + + <h2>Articles</h2> + <ul> + {%for p in all_pages%} + {%if loop.previtem%} + {%if loop.previtem.cdate_list_sep !=p.cdate_list_sep%} + <h3>{{p.cdate_list_sep}}</h3> + {%endif%} + {%else%} + <h3>{{p.cdate_list_sep}}</h3> + {%endif%} + <li>{{p.cdate_list}} - <a href="{{p.url}}">{{p.title}}</a></li> + {%endfor%} + </ul> + </body> +</html> diff --git a/plt/page.html b/plt/page.html new file mode 100644 index 0000000..fefaaa6 --- /dev/null +++ b/plt/page.html @@ -0,0 +1,25 @@ +<!DOCTYPE html> +<html lang="{{page.lang}}"> + <head> + <meta charset="utf-8"> + <base href="{{site_base_static_url}}"> + <title>{{page.title}} -- {{site_title}}</title> + </head> + <body> + <h1>{{page.title}}</h1> + <p>By {{page.author}}</p> + <p>Created: {{page.cdate}}</p> + {%if page.mdate is not none%} + <p>Modified: {{page.mdate}}</p> + {%endif%} + + {{page.content}} + + <p>Tags: + {%for t in page.tags%} + <a href="{{t[1]}}">{{t[0]}}</a>{{", " if not loop.last else ""}} + {%endfor%} + </p> + </body> +</html> + diff --git a/plt/rss.xml b/plt/rss.xml new file mode 100644 index 0000000..99b2b49 --- /dev/null +++ b/plt/rss.xml @@ -0,0 +1,39 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<rss version="2.0" + xmlns:atom="http://www.w3.org/2005/Atom" + xmlns:content="http://purl.org/rss/1.0/modules/content/"> + <channel> + <title>{{site_title}}</title> + <link>{{site_base_url}}</link> + <atom:link href="{{site_base_url}}/rss.xml" rel="self" type="application/rss+xml"/> + <description>Short site description.</description> + <language>en-us</language> + <category>Blog</category> + <copyright>Copyright 2021 Somebody</copyright> + <managingEditor>some@one.com (Sombody)</managingEditor> + <webMaster>some@one.com (Sombody)</webMaster> + <pubDate>{{run_date}}</pubDate> + <lastBuildDate>{{run_date}}</lastBuildDate> + <generator>pyssg v{{pyssg_version}}</generator> + <docs>https://validator.w3.org/feed/docs/rss2.html</docs> + <ttl>30</ttl> + <image> + <url>{{site_base_static_url}}/images/blog.png</url> + <title>{{site_title}}</title> + <link>{{site_base_url}}</link> + </image> + {%for p in all_pages%} + <item> + <title>{{p.title}}</title> + <link>{{p.url}}</link> + <guid isPermaLink="true">{{p.url}}</guid> + <pubDate>{{p.cdate_rss}}</pubDate> + {%for t in p.tags%} + <category>{{t[0]}}</category> + {%endfor%} + <description>{{p.summary}}</description> + <content:decoded><![CDATA[{{p.content}}]]></content:decoded> + </item> + {%endfor%} + </channel> +</rss> diff --git a/plt/sitemap.xml b/plt/sitemap.xml new file mode 100644 index 0000000..a5b5404 --- /dev/null +++ b/plt/sitemap.xml @@ -0,0 +1,22 @@ +<?xml version="1.0" encoding="utf-8"?> +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"> + {%for p in all_pages%} + <url> + <loc>{{p.url}}</loc> + <lastmod>{{p.mdate_sitemap if p.mdate_sitemap else p.cdate_sitemap}}</lastmod> + <changefreq>weekly</changefreq> + <priority>1.0</priority> + </url> + {%endfor%} + + {%for t in all_tags%} + <url> + <loc>{{t[1]}}</loc> + <lastmod>{{run_date}}</lastmod> + <changefreq>daily</changefreq> + <priority>0.5</priority> + </url> + {%endfor%} +</urlset> diff --git a/plt/tag.html b/plt/tag.html new file mode 100644 index 0000000..2fdc27b --- /dev/null +++ b/plt/tag.html @@ -0,0 +1,26 @@ +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <base href="{{site_base_static_url}}"> + <title>Posts filtered by {{tag[0]}} -- {{site_title}}</title> + </head> + <body> + <h1>Posts filtered by {{tag[0]}}</h1> + <p>Some text here.</p> + + <h2>Articles</h2> + <ul> + {%for p in tag_pages%} + {%if loop.previtem%} + {%if loop.previtem.cdate_list_sep !=p.cdate_list_sep%} + <h3>{{p.cdate_list_sep}}</h3> + {%endif%} + {%else%} + <h3>{{p.cdate_list_sep}}</h3> + {%endif%} + <li>{{p.cdate_list}} - <a href="{{p.url}}">{{p.title}}</a></li> + {%endfor%} + </ul> + </body> +</html> @@ -0,0 +1,9 @@ +SRC_PATH=e_src +DST_PATH=e_dst +PLT_PATH=plt +BASE_URL=https://blog.luevano.xyz +BASE_STATIC_URL=https://static.luevano.xyz +TITLE=Luévano's Blog +DATE_FORMAT=%a, %b %d, %Y @ %H:%M %Z +LIST_DATE_FORMAT=%b %d +LIST_SEP_DATE_FORMAT=%B %Y diff --git a/requirements.txt b/requirements.txt index cc2d163..248a5ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ markdown==3.3.4 +jinja2==3.0.1 @@ -34,6 +34,7 @@ console_scripts = package_dir = = src packages = find: +include_package_data=True [options.packages.find] where = src diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py index 8f7b67c..3a4474e 100644 --- a/src/pyssg/builder.py +++ b/src/pyssg/builder.py @@ -1,34 +1,48 @@ import os import shutil +from operator import itemgetter from copy import deepcopy +from jinja2 import Environment, Template +from markdown import Markdown +from importlib.metadata import version +from datetime import datetime, timezone from .configuration import Configuration -from .template import Template from .database import Database from .parser import MDParser from .page import Page from .discovery import get_file_list, get_dir_structure + +VERSION = version('pyssg') +# specific format for rss +DFORMAT = '%a, %d %b %Y %H:%M:%S GMT' +RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) + + class HTMLBuilder: def __init__(self, config: Configuration, - template: Template, - db: Database): + env: Environment, + db: Database, + md: Markdown): self.src: str = config.src self.dst: str = config.dst + self.title: str = config.title self.base_url: str = config.base_url - self.dformat: str = config.dformat - self.l_dformat: str = config.l_dformat - self.lsep_dformat: str = config.lsep_dformat + self.base_static_url: str = config.base_static_url self.force: bool = config.force - self.template: Template = template + self.config: Configuration = config + self.env: Environment = env self.db: Database = db + self.md: Markdown = md self.dirs: list[str] = None self.md_files: list[str] = None self.html_files: list[str] = None self.all_pages: list[Page] = None + self.updated_pages: list[Page] = None self.all_tags: list[str] = None @@ -40,31 +54,22 @@ class HTMLBuilder: self.__create_dir_structure() self.__copy_html_files() - parser: MDParser = MDParser(self.src, self.md_files, self.db) - parser.parse() + parser: MDParser = MDParser(self.src, + self.md_files, + self.db, + self.md) + parser.parse(self.config) # just to be able to extract all pages out of this class self.all_pages = parser.all_pages + self.updated_pages = parser.updated_pages self.all_tags = parser.all_tags # create the article index - self.__create_article_index(parser.all_tags, parser.all_pages) - - # create each category of html pages + self.__create_article_index() # check if all pages should be created - if self.force: - self.__create_articles(parser.all_pages) - else: - self.__create_articles(parser.updated_pages) - self.__create_tags(parser.all_tags, parser.all_pages) - - - def get_pages(self) -> list[Page]: - return self.all_pages - - - def get_tags(self) -> list[str]: - return self.all_tags + self.__create_articles() + self.__create_tags() def __create_dir_structure(self) -> None: @@ -90,142 +95,57 @@ class HTMLBuilder: shutil.copy2(src_file, dst_file) - # this is really similar to create_tag (singular) - def __create_article_index(self, tags: list[str], - pages: list[Page]) -> None: - # make temporary template - t: Template = deepcopy(self.template) - - # do basic replacements - # get page and tag list formated, both functions do replacements - p_list: list[str] = self.__get_pages_formatted(pages, t) - t_list: list[str] = self.__get_tags_formatted(tags, t) - # common - t.header = t.header.replace("$$LANG", 'en') - t.header = t.header.replace('$$TITLE', f'Index') + def __create_article_index(self) -> None: + template: Template = self.env.get_template("index.html") + content: str = template.render(site_title=self.title, + site_base_url=self.base_url, + site_base_static_url=self.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.all_pages, + all_tags=self.all_tags) with open(os.path.join(self.dst, 'index.html'), 'w') as f: - f.write(t.header) - f.write(t.articles.header) - - f.write(t.tags.list_header) - for tag in t_list: - f.write(tag) - f.write(t.tags.list_footer) + f.write(content) - f.write(t.articles.list_header) - for page in p_list: - f.write(page) - f.write(t.articles.list_footer) - f.write(t.articles.footer) - f.write(t.footer) - - - def __create_articles(self, pages: list[Page]) -> None: - for p in pages: - self.__create_article(p) + def __create_articles(self) -> None: + # check if only updated should be created + if self.force: + for p in self.all_pages: + self.__create_article(p) + else: + for p in self.updated_pages: + self.__create_article(p) def __create_article(self, page: Page) -> None: - # TODO: create better solution for replace - # make temporary template - t: Template = deepcopy(self.template) - # prepare html file name f_name: str = page.name f_name = f_name.replace('.md', '.html') - # get timestamps - c_date: str = page.c_datetime.strftime(self.dformat) - m_date: str = None - if page.m_datetime is not None: - m_date: str = page.m_datetime.strftime(self.dformat) - - # do basic replacements - # get tag list formatted (some replacements done inside - # get_tags_formatted) - t_list: list[str] = None - if page.tags is not None: - t_list = self.__get_tags_formatted(page.tags, t) - # tags formatted as a single string. - tags_formatted: str = ''.join(t_list) - # join list header and footer with all tags list. - tags_formatted = ''.join([t.tags.list_header, - tags_formatted, - t.tags.list_footer]) - - # common - t.header = t.header.replace("$$LANG", page.lang) - t.header = t.header.replace('$$TITLE', page.title) - - # article header - t.article.header = t.article.header.replace('$$TITLE', page.title) - t.article.header = t.article.header.replace('$$AUTHOR', page.author) - t.article.header = t.article.header.replace('$$CTIME', c_date) - if m_date is not None: - t.article.header = t.article.header.replace('$$MTIME', m_date) - else: - t.article.header = t.article.header.replace('$$MTIME', '') - - # article footer (same replaces as header) - t.article.footer = t.article.footer.replace('$$TITLE', page.title) - t.article.footer = t.article.footer.replace('$$AUTHOR', page.author) - t.article.footer = t.article.footer.replace('$$CTIME', c_date) - if m_date is not None: - t.article.footer = t.article.footer.replace('$$MTIME', m_date) - else: - t.article.footer = t.article.footer.replace('$$MTIME', '') - - # add tags to article list - if t_list is not None: - t.article.footer = t.article.footer.replace('$$TAGS', - tags_formatted) - else: - t.article.footer = t.article.footer.replace('$$TAGS', '') + template: Template = self.env.get_template("page.html") + content: str = template.render(site_title=self.title, + site_base_url=self.base_url, + site_base_static_url=self.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.all_pages, + all_tags=self.all_tags, + page=page) with open(os.path.join(self.dst, f_name), 'w') as f: - f.write(t.header) - f.write(t.article.header) - f.write(page.html) - - # not required anymore, tags included in article footer - # if t_list is not None: - # f.write(t.tags.list_header) - # for tag in t_list: - # f.write(tag) - # f.write(t.tags.list_footer) - - f.write(t.article.footer) - f.write(t.footer) - - - def __get_tags_formatted(self, tags: list[str], - template: Template) -> list[str]: - tag_amount: int = len(tags) - tags_formatted: list[str] = [] - for i, t in enumerate(tags): - # t_e=tag entry - t_e: str = template.tags.list_entry - t_e = t_e.replace('$$URL', - f'{self.base_url}/tag/@{t}.html') - t_e = t_e.replace('$$NAME', t) - - tags_formatted.append(t_e) - if i != tag_amount - 1: - tags_formatted.append(template.tags.list_separator) - - return tags_formatted - - - def __create_tags(self, tags: list[str], - pages: list[Page]) -> None: - for t in tags: + f.write(content) + + + def __create_tags(self) -> None: + for t in self.all_tags: # get a list of all pages that have current tag tag_pages: list[Page] = [] - for p in pages: - if p.tags is not None and t in p.tags: + for p in self.all_pages: + if p.tags is not None and t[0] in list(map(itemgetter(0), + p.tags)): tag_pages.append(p) # build tag page @@ -235,61 +155,19 @@ class HTMLBuilder: tag_pages = [] - def __create_tag(self, tag: str, + def __create_tag(self, tag: tuple[str], pages: list[Page]) -> None: - # TODO: create better solution for replace - # make temporary template - t: Template = deepcopy(self.template) - - # do basic replacements - # get page list formated (some replacements done inside - # get_pages_formatted) - p_list: list[str] = self.__get_pages_formatted(pages, t) - # common - t.header = t.header.replace("$$LANG", 'en') - t.header = t.header.replace('$$TITLE', f'Posts filtered by: {tag}') - - # tag header - tag_url: str = f'{self.base_url}/tag/@{tag}.html' - t.tags.header = t.tags.header.replace('$$NAME', tag) - - with open(os.path.join(self.dst, f'tag/@{tag}.html'), 'w') as f: - f.write(t.header) - f.write(t.tags.header) - - f.write(t.articles.list_header) - for p in p_list: - f.write(p) - f.write(t.articles.list_footer) - - f.write(t.tags.footer) - f.write(t.footer) - - - def __get_pages_formatted(self, pages: list[Page], - template: Template) -> list[str]: - month_year: str = '-' - pages_formatted: list[str] = [] - for p in pages: - # check if the monthly separator should be included - c_month_year: str = p.c_datetime.strftime(self.lsep_dformat) - if c_month_year != month_year: - month_year = c_month_year - - month_sep: str = template.articles.list_separator - month_sep = month_sep.replace('$$SEP', month_year) - - pages_formatted.append(month_sep) - - f_name: str = p.name - f_name = f_name.replace('.md', '.html') - - # p_e=page entry - p_e: str = template.articles.list_entry - p_e = p_e.replace('$$URL', f'{self.base_url}/{f_name}') - p_e = p_e.replace('$$DATE', p.c_datetime.strftime(self.l_dformat)) - p_e = p_e.replace('$$TITLE', p.title) - - pages_formatted.append(p_e) - - return pages_formatted + + template: Template = self.env.get_template("tag.html") + content: str = template.render(site_title=self.title, + site_base_url=self.base_url, + site_base_static_url=self.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.all_pages, + all_tags=self.all_tags, + tag=tag, + tag_pages=pages) + + with open(os.path.join(self.dst, f'tag/@{tag[0]}.html'), 'w') as f: + f.write(content) diff --git a/src/pyssg/configuration.py b/src/pyssg/configuration.py index 8ee592e..dca43b5 100644 --- a/src/pyssg/configuration.py +++ b/src/pyssg/configuration.py @@ -7,7 +7,9 @@ class Configuration: self.path: str = path self.src: str = None self.dst: str = None + self.plt: str = None self.base_url: str = None + self.base_static_url: str = None self.title: str = None self.dformat: str = None self.l_dformat: str = None @@ -45,10 +47,18 @@ class Configuration: except KeyError: pass try: + self.plt = opts['PLT_PATH'] + except KeyError: pass + + try: self.base_url = opts['BASE_URL'] except KeyError: pass try: + self.base_static_url = opts['BASE_STATIC_URL'] + except KeyError: pass + + try: self.title = opts['TITLE'] except KeyError: pass @@ -80,9 +90,15 @@ class Configuration: if self.dst is None: self.dst = opts['dst'] + if self.plt is None: + self.plt = opts['plt'] + if self.base_url is None: self.base_url = opts['url'] + if self.base_static_url is None: + self.base_static_url = opts['static_url'] + if self.title is None: self.title = opts['title'] diff --git a/src/pyssg/page.py b/src/pyssg/page.py index e03ca6b..1e0cdeb 100644 --- a/src/pyssg/page.py +++ b/src/pyssg/page.py @@ -1,68 +1,101 @@ from datetime import datetime, timezone +from .configuration import Configuration + + +DFORMAT_RSS = '%a, %d %b %Y %H:%M:%S GMT' +DFORMAT_SITEMAP = '%Y-%m-%d' + class Page: def __init__(self, name: str, - c_time: float, - m_time: float, + ctime: float, + mtime: float, html: str, meta: dict): + # initial data self.name: str = name - self.c_time: float = c_time - self.m_time: float = m_time - self.html: str = html + self.ctimestamp: float = ctime + self.mtimestamp: float = mtime + self.content: str = html self.meta: dict = meta # data from self.meta self.title: str = '' self.author: str = '' - self.c_datetime: datetime = None - self.m_datetime: datetime = None + self.cdatetime: datetime = None + self.mdatetime: datetime = None self.summary: str = '' self.lang: str = 'en' - self.tags: list = None + self.tags: list[tuple[str]] = [] + + # constructed + self.url: str = '' + self.cdate: str = '' + self.cdate_list: str = '' + self.cdate_list_sep: str = '' + self.cdate_rss: str = '' + self.cdate_sitemap: str = '' + self.mdate: str = None + self.mdate_list: str = None + self.mdate_list_sep: str = None + self.mdate_rss: str = '' + self.mdate_sitemap: str = '' + + # later assigned references to next and previous pages + self.next: Page = None + self.previous: Page = None # also from self.meta, but for og metadata self.og: dict[str, str] = dict() - self.__parse_meta() def __lt__(self, other): - return self.c_time < other.c_time + return self.ctimestamp < other.ctimestamp # parses meta from self.meta, for og, it prioritizes, # the actual og meta - def __parse_meta(self): - try: - self.title = self.meta['title'][0] - except KeyError: pass - - try: - self.author = self.meta['author'][0] - except KeyError: pass - - self.c_datetime = datetime.fromtimestamp(self.c_time, + def parse(self, config: Configuration): + # required meta elements + self.title = self.meta['title'][0] + self.author = self.meta['author'][0] + self.summary = self.meta['summary'][0] + self.lang = self.meta['lang'][0] + + # dates + self.cdatetime = datetime.fromtimestamp(self.ctimestamp, tz=timezone.utc) - - if self.m_time != 0.0: - self.m_datetime = datetime.fromtimestamp(self.m_time, + self.cdate = self.cdatetime.strftime(config.dformat) + self.cdate_list = self.cdatetime.strftime(config.l_dformat) + self.cdate_list_sep = self.cdatetime.strftime(config.lsep_dformat) + self.cdate_rss = self.cdatetime.strftime(DFORMAT_RSS) + self.cdate_sitemap = self.cdatetime.strftime(DFORMAT_SITEMAP) + + # only if file/page has been modified + if self.mtimestamp != 0.0: + self.mdatetime = datetime.fromtimestamp(self.mtimestamp, tz=timezone.utc) + self.mdate = self.mdatetime.strftime(config.dformat) + self.mdate_list = self.mdatetime.strftime(config.l_dformat) + self.mdate_list_sep = self.mdatetime.strftime(config.lsep_dformat) + self.mdate_rss = self.mdatetime.strftime(DFORMAT_RSS) + self.mdate_sitemap = self.mdatetime.strftime(DFORMAT_SITEMAP) + # not always contains tags try: - self.summary = self.meta['summary'][0] - except KeyError: pass + tags_only: list[str] = self.meta['tags'] + tags_only.sort() - try: - self.lang = self.meta['lang'][0] + for t in tags_only: + self.tags.append((t, + f'{config.base_url}/tag/@{t}.html')) except KeyError: pass - try: - self.tags = self.meta['tags'] - self.tags.sort() - except KeyError: pass + self.url = f'{config.base_url}/{self.name.replace(".md", ".html")}' + # if contains object graph elements try: # og_e = object graph entry for og_e in self.meta['og']: diff --git a/src/pyssg/parser.py b/src/pyssg/parser.py index 548fb05..7323bde 100644 --- a/src/pyssg/parser.py +++ b/src/pyssg/parser.py @@ -1,8 +1,11 @@ import os +from operator import itemgetter from datetime import datetime from markdown import Markdown from .database import Database +from .configuration import Configuration +from .configuration import Configuration from .page import Page @@ -10,33 +13,38 @@ from .page import Page class MDParser: def __init__(self, src: str, files: list[str], - db: Database): - self.md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists', - 'smarty', 'toc', 'wikilinks'], - output_format='html5') + db: Database, + md: Markdown): self.src: str = src self.files: list[str] = files + self.db: Database = db + self.md: Markdown = md self.all_pages: list[Page] = None self.updated_pages: list[Page] = None - self.all_tags: list[str] = None + self.all_tags: list[tuple[str]] = None - def parse(self): + def parse(self, config: Configuration): # initialize lists self.all_pages = [] self.updated_pages = [] self.all_tags = [] + all_tag_names: list[str] = [] for f in self.files: src_file: str = os.path.join(self.src, f) # get flag if update is successful updated: bool = self.db.update(src_file, remove=f'{self.src}/') - page: Page = None content: str = self.md.reset().convert(open(src_file).read()) - page = Page(f, self.db.e[f][0], self.db.e[f][1], content, self.md.Meta) + page: Page = Page(f, + self.db.e[f][0], + self.db.e[f][1], + content, + self.md.Meta) + page.parse(config) # keep a separated list for all and updated pages if updated: @@ -46,14 +54,26 @@ class MDParser: # parse tags if page.tags is not None: # add its tag to corresponding db entry if existent - self.db.update_tags(f, page.tags) + self.db.update_tags(f, list(map(itemgetter(0), page.tags))) # update all_tags attribute for t in page.tags: - if t not in self.all_tags: + if t[0] not in list(map(itemgetter(0), self.all_tags)): self.all_tags.append(t) # sort list of tags for consistency - self.all_tags.sort() + self.all_tags.sort(key=itemgetter(0)) self.updated_pages.sort(reverse=True) self.all_pages.sort(reverse=True) + # TODO: fix this in case it doesn't work lol + # this should update references to all_pages and updated_pages??? + for i, p in enumerate(self.all_pages): + try: + prev_page: Page = self.all_pages[i - 1] + p.previous = prev_page + except IndexError: pass + + try: + next_page: Page = self.all_pages[i + 1] + p.next = next_page + except IndexError: pass diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py index 5d8ac33..933f7d3 100644 --- a/src/pyssg/pyssg.py +++ b/src/pyssg/pyssg.py @@ -1,16 +1,23 @@ import os from argparse import ArgumentParser, Namespace from typing import Union +from jinja2 import Environment, FileSystemLoader +from markdown import Markdown +from importlib.metadata import version +from importlib.resources import contents +from datetime import datetime, timezone from .configuration import Configuration from .database import Database -from .template import Template from .builder import HTMLBuilder from .page import Page from .rss import RSSBuilder from .sitemap import SitemapBuilder +VERSION = version('pyssg') + + def get_options() -> Namespace: parser = ArgumentParser(prog='pyssg', description='''Static Site Generator that reads @@ -19,6 +26,9 @@ def get_options() -> Namespace: location) all other options are ignored.\nFor datetime formats see: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes''') + parser.add_argument('-v', '--version', + action='store_true', + help='''print program version''') parser.add_argument('-c', '--config', default='$XDG_CONFIG_HOME/pyssg/pyssgrc', type=str, @@ -34,11 +44,20 @@ def get_options() -> Namespace: type=str, help='''dst directory; generated (and transfered html) files; defaults to 'dst' ''') + parser.add_argument('-t', '--plt', + default='plt', + type=str, + help='''plt directory; all template files; defaults to + 'plt' ''') parser.add_argument('-u', '--url', default='', type=str, help='''base url without trailing slash''') - parser.add_argument('-t', '--title', + parser.add_argument('--static-url', + default='', + type=str, + help='''base static url without trailing slash''') + parser.add_argument('--title', default='Blog', type=str, help='''general title for the website; defaults to @@ -86,16 +105,20 @@ def main() -> None: config.read() config.fill_missing(opts) + if opts['version']: + print(f'pyssg v{VERSION}') + return + if opts['init']: try: os.mkdir(config.src) os.makedirs(os.path.join(config.dst, 'tag')) + os.mkdir(config.plt) except FileExistsError: pass - # write default templates - template: Template = Template(config.src) - template.write() + for f in contents('pyssg'): + print(f) return if opts['build']: @@ -103,22 +126,34 @@ def main() -> None: db: Database = Database(os.path.join(config.src, '.files')) db.read() - # read templates - template: Template = Template(config.src) - template.read() + # the autoescape option could be a security risk if used in a dynamic + # website, as far as i can tell + env: Environment = Environment(loader=FileSystemLoader(config.plt), + autoescape=False, + trim_blocks=True, + lstrip_blocks=True) - builder: HTMLBuilder = HTMLBuilder(config, template, db) + md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists', + 'smarty', 'toc', 'wikilinks'], + output_format='html5') + builder: HTMLBuilder = HTMLBuilder(config, + env, + db, + md) builder.build() - # get all parsed pages for rss construction - all_pages: list[Page] = builder.get_pages() - rss_builder: RSSBuilder = RSSBuilder(config, template.rss, all_pages) + # get all parsed pages and tags for rss and sitemap construction + all_pages: list[Page] = builder.all_pages + all_tags: list[tuple[str]] = builder.all_tags + + rss_builder: RSSBuilder = RSSBuilder(config, + env, + all_pages, + all_tags) rss_builder.build() - # get all tags for sitemap construction - all_tags: list[str] = builder.get_tags() sm_builder: SitemapBuilder = SitemapBuilder(config, - template.sitemap, + env, all_pages, all_tags) sm_builder.build() diff --git a/src/pyssg/rss.py b/src/pyssg/rss.py index 742b6b2..7de29ae 100644 --- a/src/pyssg/rss.py +++ b/src/pyssg/rss.py @@ -1,60 +1,38 @@ import os -import importlib.metadata +from jinja2 import Environment, Template +from importlib.metadata import version from datetime import datetime, timezone from .page import Page from .configuration import Configuration -VERSION = importlib.metadata.version('pyssg') -# This is static right here since an rss feed -# requires very specific date format +VERSION = version('pyssg') +# specific format for rss DFORMAT = '%a, %d %b %Y %H:%M:%S GMT' +RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) class RSSBuilder: def __init__(self, config: Configuration, - template: str, - pages: list[Page]): + env: Environment, + pages: list[Page], + tags: list[tuple[str]]): self.config: Configuration = config - self.rss: str = template + self.env: Environment = env self.pages: list[Page] = pages + self.tags: list[tuple[str]] = tags def build(self): - # initial base replacements - self.rss = self.rss.replace('$$TITLE', self.config.title) - self.rss = self.rss.replace('$$LINK', self.config.base_url) - self.rss = self.rss.replace('$$PYSSGVERSION', f'pyssg v{VERSION}') - items_formatted: str = self.__get_items_formatted() - self.rss = self.rss.replace('$$ITEMS', items_formatted) - - current_date: str = datetime.now(tz=timezone.utc).strftime(DFORMAT) - self.rss = self.rss.replace('$$CURRENTDATE', current_date) + template: Template = self.env.get_template("rss.xml") + content: str = template.render(site_title=self.config.title, + site_base_url=self.config.base_url, + site_base_static_url=self.config.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.pages, + all_tags=self.tags) with open(os.path.join(self.config.dst, 'rss.xml'), 'w') as f: - f.write(self.rss) - - - def __get_items_formatted(self) -> str: - # i_f=items formatted for short - i_f: str = '' - for p in self.pages: - url: str = f'{self.config.base_url}/{p.name.replace(".md", ".html")}' - date: str = p.c_datetime.strftime(DFORMAT) - - i_f = f'{i_f} <item>\n' - i_f = f'{i_f} <title>{p.title}</title>\n' - i_f = f'{i_f} <link>{url}</link>\n' - i_f = f'{i_f} <guid isPermaLink="true">{url}</guid>\n' - i_f = f'{i_f} <pubDate>{date}</pubDate>\n' - # TODO: maybe make this optional? - # add the tags as categories - if p.tags is not None: - for t in p.tags: - i_f = f'{i_f} <category>{t.capitalize()}</category>\n' - i_f = f'{i_f} <description>{p.summary}</description>\n' - i_f = f'{i_f} <content:encoded><![CDATA[{p.html}]]></content:encoded>\n' - i_f = f'{i_f} </item>\n' - - return i_f + f.write(content) diff --git a/src/pyssg/sitemap.py b/src/pyssg/sitemap.py index 410033f..f63a7e6 100644 --- a/src/pyssg/sitemap.py +++ b/src/pyssg/sitemap.py @@ -1,60 +1,38 @@ import os +from jinja2 import Environment, Template +from importlib.metadata import version from datetime import datetime, timezone from .page import Page from .configuration import Configuration - +VERSION = version('pyssg') +# specific format for sitemap (not as strict) DFORMAT = '%Y-%m-%d' +RUN_DATE = datetime.now(tz=timezone.utc).strftime(DFORMAT) class SitemapBuilder: def __init__(self, config: Configuration, - template: str, + env: Environment, pages: list[Page], tags: list[str]): self.config: Configuration = config - self.sitemap: str = template + self.env: Environment = env self.pages: list[Page] = pages self.tags: list[str] = tags def build(self): - # initial base replacements - urls_formatted: str = self.__get_urls_formatted() - self.sitemap = self.sitemap.replace('$$URLS', urls_formatted) + template: Template = self.env.get_template("sitemap.xml") + content: str = template.render(site_title=self.config.title, + site_base_url=self.config.base_url, + site_base_static_url=self.config.base_static_url, + pyssg_version=VERSION, + run_date=RUN_DATE, + all_pages=self.pages, + all_tags=self.tags) with open(os.path.join(self.config.dst, 'sitemap.xml'), 'w') as f: - f.write(self.sitemap) - - - def __get_urls_formatted(self) -> str: - # u_f=items formatted for short - u_f: str = '' - for p in self.pages: - url: str = f'{self.config.base_url}/{p.name.replace(".md", ".html")}' - if p.m_datetime is not None: - date: str = p.m_datetime.strftime(DFORMAT) - else: - date: str = p.c_datetime.strftime(DFORMAT) - - u_f = f'{u_f} <url>\n' - u_f = f'{u_f} <loc>{url}</loc>\n' - u_f = f'{u_f} <lastmod>{date}</lastmod>\n' - u_f = f'{u_f} <changefreq>weekly</changefreq>\n' - u_f = f'{u_f} <priority>1.0</priority>\n' - u_f = f'{u_f} </url>\n' - - for t in self.tags: - url: str = f'{self.config.base_url}/tag/@{t}.html' - date: str = datetime.now(tz=timezone.utc).strftime(DFORMAT) - - u_f = f'{u_f} <url>\n' - u_f = f'{u_f} <loc>{url}</loc>\n' - u_f = f'{u_f} <lastmod>{date}</lastmod>\n' - u_f = f'{u_f} <changefreq>daily</changefreq>\n' - u_f = f'{u_f} <priority>0.5</priority>\n' - u_f = f'{u_f} </url>\n' - - return u_f + f.write(content) diff --git a/src/pyssg/template.py b/src/pyssg/template.py deleted file mode 100644 index 3ddcf82..0000000 --- a/src/pyssg/template.py +++ /dev/null @@ -1,243 +0,0 @@ -import os - -from .page import Page - - -# all objects here require a header and footer as minimum -class HF: - def __init__(self): - self.header: str = None - self.footer: str = None - - -# some objects require a "list-like" set of attributes -class Common(HF): - def __init__(self): - self.list_header: str = None - self.list_footer: str = None - self.list_entry: str = None - self.list_separator: str = None - - -# main class -class Template(HF): - def __init__(self, src: str): - self.src: str = src - self.article: HF = HF() - self.articles: Common = Common() - self.tags: Common = Common() - self.rss: str = None - self.sitemap: str = None - - self.is_read: bool = False - - - # writes default templates - def write(self) -> None: - # get initial working directory - iwd = os.getcwd() - os.chdir(self.src) - - # create templates dir - os.mkdir('templates') - os.chdir('templates') - - # common - os.mkdir('common') - os.chdir('common') - self.__write_template('header.html', - ['<!DOCTYPE html>\n', - '<html lang="$$LANG">\n', - '<head>\n', - '<meta charset="utf-8">\n', - '<title>$$TITLE</title>\n', - '</head>\n', - '<body>\n']) - self.__write_template('footer.html', - ['</body>\n', - '</html>\n']) - - # go back to templates - os.chdir('..') - - # article entry - os.mkdir('article') - os.chdir('article') - self.__write_template('header.html', - ['<h1>$$TITLE</h1>\n', - '<p>$$AUTHOR</p>\n', - '<p>Created: $$CTIME, modified: $$MTIME</p>\n']) - self.__write_template('footer.html', - ['']) - - # go back to templates - os.chdir('..') - - # article index (articles) - os.mkdir('articles') - os.chdir('articles') - self.__write_template('header.html', - ['']) - self.__write_template('list_header.html', - ['<h2>Articles</h2>\n', - '<ul>\n']) - self.__write_template('list_entry.html', - ['<li>$$DATE - <a href="$$URL">$$TITLE</a></li>\n']) - self.__write_template('list_separator.html', - ['<h3>$$SEP</h3>\n']) - self.__write_template('list_footer.html', - ['</ul>\n']) - self.__write_template('footer.html', - ['']) - - # go back to templates - os.chdir('..') - - # tag - os.mkdir('tag') - os.chdir('tag') - self.__write_template('header.html', - ['']) - self.__write_template('list_header.html', - ['<p>Tags: ']) - self.__write_template('list_entry.html', - ['<a href="$$URL">$$NAME</a>']) - self.__write_template('list_separator.html', - [', ']) - self.__write_template('list_footer.html', - ['</p>\n']) - self.__write_template('footer.html', - ['']) - - # go back to templates - os.chdir('..') - - os.mkdir('rss') - os.chdir('rss') - self.__write_template('rss.xml', - ['<?xml version="1.0" encoding="UTF-8" ?>\n', - '<rss version="2.0"\n', - ' xmlns:atom="http://www.w3.org/2005/Atom"\n', - ' xmlns:content="http://purl.org/rss/1.0/modules/content/">\n', - ' <channel>\n', - ' <title>$$TITLE</title>\n', - ' <link>$$LINK</link>\n', - ' <atom:link href="EXAMPLE.ORG/RSS.XML" rel="self" type="application/rss+xml"/>\n', - ' <description>SHORT DESCRIPTION.</description>\n', - ' <language>en-us</language>\n', - ' <copyright>COPYRIGHT NOTICE.</copyright>\n', - ' <managingEditor>EMAIL@EXAMPLE.ORG (NAME)</managingEditor>\n', - ' <webMaster>EMAIL@EXAMPLE.ORG (NAME)</webMaster>\n', - ' <pubDate>$$CURRENTDATE</pubDate>\n', - ' <lastBuildDate>$$CURRENTDATE</lastBuildDate>\n', - ' <generator>$$PYSSGVERSION</generator>\n', - ' <docs>https://validator.w3.org/feed/docs/rss2.html</docs>\n', - ' <ttl>30</ttl>\n', - ' <image>\n', - ' <url>EXAMPLE.ORG/IMAGE.PNG</url>\n', - ' <title>$$TITLE</title>\n', - ' <link>$$LINK</link>\n', - ' </image>\n', - '$$ITEMS\n', - ' </channel>\n', - '</rss>']) - - # go back to templates - os.chdir('..') - - os.mkdir('sitemap') - os.chdir('sitemap') - self.__write_template('sitemap.xml', - ['<?xml version="1.0" encoding="utf-8"?>\n', - '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n', - ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n', - ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n', - 'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n', - '$$URLS\n', - '</urlset>']) - # return to initial working directory - os.chdir(iwd) - - - # reads templates and stores them into class attributes - def read(self) -> None: - # only read templates if not read already - # (might want to change this behaviour) - if self.is_read: - return - self.is_read = True - - # get initial working directory - iwd = os.getcwd() - os.chdir(os.path.join(self.src, 'templates')) - - # common - os.chdir('common') - self.header = self.__read_template('header.html') - self.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # article entry - os.chdir('article') - self.article.header = self.__read_template('header.html') - self.article.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # article index - os.chdir('articles') - self.articles.header = self.__read_template('header.html') - self.articles.list_header = \ - self.__read_template('list_header.html') - self.articles.list_entry = \ - self.__read_template('list_entry.html') - self.articles.list_separator = \ - self.__read_template('list_separator.html') - self.articles.list_footer = \ - self.__read_template('list_footer.html') - self.articles.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # tag - os.chdir('tag') - self.tags.header = self.__read_template('header.html') - self.tags.list_header = self.__read_template('list_header.html') - self.tags.list_entry = self.__read_template('list_entry.html') - self.tags.list_separator = self.__read_template('list_separator.html') - self.tags.list_footer = self.__read_template('list_footer.html') - self.tags.footer = self.__read_template('footer.html') - - # go back to templates - os.chdir('..') - - # rss - os.chdir('rss') - self.rss = self.__read_template('rss.xml') - - # go back to templates - os.chdir('..') - - # sitemap - os.chdir('sitemap') - self.sitemap = self.__read_template('sitemap.xml') - - # return to initial working directory - os.chdir(iwd) - - - def __write_template(self, file_name: str, content: list[str]) -> None: - with open(file_name, 'w+') as f: - for c in content: - f.write(c) - - def __read_template(self, file_name: str) -> str: - out: str = None - with open(file_name, 'r') as f: - out = f.read() - - return out |