From 0bc00ce9352ba843d62c189b68e0e07724cc4b58 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado Date: Sun, 4 Dec 2022 15:06:48 -0600 Subject: migrate from INI to YAML, breaks compatibility config file and template files need to be converted to the new format to use with YAML config --- README.md | 99 ++++++++++++++++++++----------------- requirements.txt | 1 + src/pyssg/builder.py | 10 ++-- src/pyssg/configuration.py | 79 ++++++++++++++--------------- src/pyssg/database.py | 5 +- src/pyssg/md_parser.py | 5 +- src/pyssg/page.py | 5 +- src/pyssg/plt/default.ini | 16 ------ src/pyssg/plt/default.yaml | 18 +++++++ src/pyssg/plt/index.html | 4 +- src/pyssg/plt/mandatory_config.yaml | 14 ++++++ src/pyssg/plt/page.html | 2 +- src/pyssg/plt/rss.xml | 4 +- src/pyssg/plt/static_config.yaml | 8 +++ src/pyssg/plt/tag.html | 2 +- src/pyssg/pyssg.py | 22 ++++++--- src/pyssg/yaml_parser.py | 45 +++++++++++++++++ 17 files changed, 212 insertions(+), 127 deletions(-) delete mode 100644 src/pyssg/plt/default.ini create mode 100644 src/pyssg/plt/default.yaml create mode 100644 src/pyssg/plt/mandatory_config.yaml create mode 100644 src/pyssg/plt/static_config.yaml create mode 100644 src/pyssg/yaml_parser.py diff --git a/README.md b/README.md index f01e03c..fb3f19e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # pyssg - Static Site Generator written in Python -Inspired (initially) by Roman Zolotarev's [`ssg5`](https://rgz.ee/bin/ssg5) and [`rssg`](https://rgz.ee/bin/rssg), Luke Smith's [`lb` and `sup`](https://github.com/LukeSmithxyz/lb) and, pedantic.software's great (but *"mamador"*, as I would say in spanish) [`blogit`](https://pedantic.software/git/blogit/). +Generates HTML files from MD files for a static site, personally using it for a blog-like site. + +Initially inspired by Roman Zolotarev's [`ssg5`](https://rgz.ee/bin/ssg5) and [`rssg`](https://rgz.ee/bin/rssg), Luke Smith's [`lb` and `sup`](https://github.com/LukeSmithxyz/lb) and, pedantic.software's [`blogit`](https://pedantic.software/git/blogit/). ## Features and to-do @@ -19,15 +21,15 @@ Inspired (initially) by Roman Zolotarev's [`ssg5`](https://rgz.ee/bin/ssg5) and - [ ] Include manually added `*.html` files. - [x] Only build page if `*.md` is new or updated. - [ ] Extend this to tag pages and index (right now all tags and index is built no matter if no new/updated file is present). -- [x] Configuration file. ~~as an alternative to using command line flags (configuration file options are prioritized).~~ - - [x] Use [`configparser`](https://docs.python.org/3/library/configparser.html) instead of custom config handler. - - [ ] Migrate to YAML instead of INI, as it is way more flexible. +- [x] Configuration file. ~~as an alternative to using command line flags (configuration file options are prioritized).~~ + - [x] ~~Use [`configparser`](https://docs.python.org/3/library/configparser.html) instead of custom config handler.~~ + - [x] Migrate to YAML instead of INI, as it is way more flexible. Uses [`PyYAML`](https://pyyaml.org/). - [x] Avoid the program to freak out when there are directories created in advance. - [x] Provide more meaningful error messages when you are missing mandatory metadata in your `*.md` files. - [ ] More complex directory structure to support multiple subdomains and different types of pages. - [ ] Option/change to using an SQL database instead of the custom solution. - [x] Checksum checking because the timestamp of the file is not enough. -- [ ] Better management of the extensions. +- [ ] Better management of the markdown extensions. ### Markdown features @@ -64,11 +66,11 @@ Will add a PKBUILD (and possibly submit it to the AUR) sometime later. pyssg --copy-default-config -c ``` -- Where `-c` is optional as by default `$XDG_CONFIG_HOME/pyssg/config.ini` is used. +- Where `-c` is optional as by default `$XDG_CONFIG_HOME/pyssg/config.yaml` is used. 2. Edit the config file created as needed. -- `config.ini` is parsed using Python's [`configparser`](https://docs.python.org/3/library/configparser.html), [more about the config file](#config-file). +- `config.yaml` is parsed using [`PyYAML`](https://pyyaml.org/), [more about the config file](#config-file). 3. Initialize the directory structures (source, destination, template) and move template files: @@ -107,54 +109,61 @@ pyssg -b ## Config file -All sections/options need to be compliant with the [`configparser`](https://docs.python.org/3/library/configparser.html). - -At least the sections and options given in the default config should be present: - -```ini -[path] -src=src # source -dst=dst # destination -plt=plt # template -[url] -main=https://example.com -static=https://static.example.com # used for static resources (images, js, css, etc) -default_image=/images/default.png # this will be appended to 'static' at the end -[fmt] # % needs to be escaped with another % -date=%%a, %%b %%d, %%Y @ %%H:%%M %%Z -list_date=%%b %%d -list_sep_date=%%B %%Y -[info] -title=Example site -[other] -force=False -``` +All sections/options need to be compliant with [`PyYAML`](https://pyyaml.org/) which should be compliant with [`YAML 1.2`](https://yaml.org/spec/1.2.2/). Additionaly, I've added the custom tag `!join` which concatenates strings from an array, which an be used as follows: -Along with these, these extra ones will be added on runtime: - -```ini -[fmt] -rss_date=%%a, %%d %%b %%Y %%H:%%M:%%S GMT # fixed -sitemap_date=%%Y-%%m-%%d # fixed -[info] -version= # current 'pyssg' version (0.5.1.dev16, for example) -debug=True/False # depending if --debug was used when executing -rss_run_date= # date the program was run, formatted with 'rss_date' -sitemap_run_date= # date the program was run, formatted with 'sitemap_date' +```yaml +variable: &variable_reference_name "value" +other_variable: !join [*variable_reference_name, "other_value", 1] ``` -You can add any other option/section that you can later use in the Jinja templates via the exposed config object. +Which would produce `other_variable: "valueother_value1`. Also environment variables will be expanded internally. + +At least the following config items should be present in the config: + +```yaml +%YAML 1.2 +--- +# not needed, shown here as an example of the !join tag +define: &root "$HOME/path/to/" # $HOME expands to /home/user, for example + +title: "Example site" +path: + src: !join [*root, "src"] # $HOME/path/to/src + dst: "$HOME/some/other/path/to/dst" + plt: "plt" +url: + main: "https://example.com" +fmt: + date: "%a, %b %d, %Y @ %H:%M %Z" + list_date: "%b %d" + list_sep_date: "%B %Y" +... +``` -Other requisites are: +The following will be added on runtime: + +```yaml +%YAML 1.2 +--- +fmt: + rss_date: "%a, %d %b %Y %H:%M:%S GMT" # fixed + sitemap_date: "%Y-%m-%d" # fixed +info: + version: "x.y.z" # current 'pyssg' version (0.5.1.dev16, for example) + debug: True/False # depending if --debug was used when executing + force: True/False # depending if --force was used when executing +rss_run_date: # date the program was run, formatted with 'fmt.rss_date' +sitemap_run_date: # date the program was run, formatted with 'fmt.sitemap_date' +... +``` -- Urls shouldn't have the trailing slash `/`. -- The only character that needs to be escaped is `%` with another `%`. +You can add any other option/section that you can later use in the Jinja templates via the exposed config object. URL's shouldn't have the trailing slash `/` ## Available Jinja variables These variables are exposed to use within the templates. The below list is in the form of *variable (type) (available from): description*. `section/option` describe config file section and option and `object.attribute` corresponding object and it's attribute. -- `config` (`ConfigParser`) (all): parsed config file plus the added options internally (as described in [config file](#config-file)). +- `config` (`dict`) (all): parsed config file plus the added options internally (as described in [config file](#config-file)). - `all_pages` (`list(Page)`) (all): list of all the pages, sorted by creation time, reversed. - `page` (`Page`) (`page.html`): contains the following attributes (genarally these are parsed from the metadata in the `*.md` files): - `title` (`str`): title of the page. diff --git a/requirements.txt b/requirements.txt index d5997fd..9192da2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ markdown-checklist>=0.4.4 MarkupSafe>=2.1.1 yafg>=0.3 pymdown-extensions>=9.9 +PyYAML>=6.0 diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py index 9834e1d..391c7e0 100644 --- a/src/pyssg/builder.py +++ b/src/pyssg/builder.py @@ -1,7 +1,6 @@ import os from copy import deepcopy from operator import itemgetter -from configparser import ConfigParser from logging import Logger, getLogger from jinja2 import Environment, Template, FileSystemLoader as FSLoader @@ -15,10 +14,10 @@ log: Logger = getLogger(__name__) class Builder: - def __init__(self, config: ConfigParser, + def __init__(self, config: dict, db: Database): log.debug('initializing site builder') - self.config: ConfigParser = config + self.config: dict = config self.db: Database = db # the autoescape option could be a security risk if used in a dynamic @@ -104,7 +103,8 @@ class Builder: log.debug('file "%s" has been modified or is new, copying', f) copy_file(src_file, dst_file) else: - if self.config.getboolean('other', 'force'): + # TODO: need to check if this holds after yaml update + if self.config['info']['force']: log.debug('file "%s" hasn\'t been modified, but option force is set to true, copying anyways', f) copy_file(src_file, dst_file) else: @@ -117,7 +117,7 @@ class Builder: temp_files: list[Page] # check if only updated should be created - if self.config.getboolean('other', 'force'): + if self.config['info']['force']: log.debug('all html will be rendered, force is set to true') temp_files = self.all_files else: diff --git a/src/pyssg/configuration.py b/src/pyssg/configuration.py index 895df5c..1d05289 100644 --- a/src/pyssg/configuration.py +++ b/src/pyssg/configuration.py @@ -1,63 +1,64 @@ import sys -import yaml -import pprint from importlib.metadata import version from importlib.resources import path as rpath from datetime import datetime, timezone -from configparser import ConfigParser from logging import Logger, getLogger from .utils import get_expanded_path +from .yaml_parser import get_parsed_yaml log: Logger = getLogger(__name__) - - DEFAULT_CONFIG_PATH: str = '$XDG_CONFIG_HOME/pyssg/config.yaml' -VERSION = version('pyssg') - +VERSION: str = version('pyssg') -def __expand_all_paths(config: ConfigParser) -> None: - log.debug('expanding all path options') - for option in config.options('path'): - path: str = config['path'][option] - config.set('path', option, get_expanded_path(path)) +def __check_well_formed_config(config: dict) -> None: + log.debug('checking that config file is well formed (at least contains mandatory fields') + mandatory_config: dict = get_parsed_yaml('mandatory_config.yaml', 'pyssg.plt')[0] -def __check_well_formed_config(config: ConfigParser) -> None: - log.debug('checking that config file is well formed') - default_config: ConfigParser = ConfigParser() - with rpath('pyssg.plt', 'default.ini') as p: - log.debug('reading config file "%s"', p) - default_config.read(p) - - for section in default_config.sections(): + for section in mandatory_config.keys(): log.debug('checking section "%s"', section) - if not config.has_section(section): + if not config[section]: log.error('config does not have section "%s"', section) sys.exit(1) - for option in default_config.options(section): + # the case for elements that don't have nested elements + if not mandatory_config[section]: + log.debug('section "%s" doesn\'t need nested elements', section) + continue + for option in mandatory_config[section].keys(): log.debug('checking option "%s"', option) - if not config.has_option(section, option): + if option not in config[section] or not config[section][option]: log.error('config does not have option "%s" in section "%s"', option, section) sys.exit(1) -def get_parsed_config(path: str) -> ConfigParser: - config: ConfigParser = ConfigParser() +def __expand_all_paths(config: dict) -> None: + log.debug('expanding all path options: %s', config['path'].keys()) + for option in config['path'].keys(): + config['path'][option] = get_expanded_path(config['path'][option]) + + +# not necessary to type deeper than the first dict +def get_parsed_config(path: str) -> list[dict]: log.debug('reading config file "%s"', path) - config.read(path) - - __check_well_formed_config(config) - __expand_all_paths(config) - - # set other required options - log.debug('setting extra config options') - config.set('fmt', 'rss_date', '%%a, %%d %%b %%Y %%H:%%M:%%S GMT') - config.set('fmt', 'sitemap_date', '%%Y-%%m-%%d') - config.set('info', 'version', VERSION) - config.set('info', 'rss_run_date', datetime.now( - tz=timezone.utc).strftime(config['fmt']['rss_date'])) - config.set('info', 'sitemap_run_date', datetime.now( - tz=timezone.utc).strftime(config['fmt']['sitemap_date'])) + config: list[dict] = get_parsed_yaml(path) # type: ignore + + __check_well_formed_config(config[0]) + __expand_all_paths(config[0]) + + return config + + +# not necessary to type deeper than the first dict, +# static config means config that shouldn't be changed by the user +def get_static_config() -> dict[str, dict]: + log.debug('reading and setting static config') + config: dict = get_parsed_yaml('static_config.yaml', 'pyssg.plt')[0] # type: ignore + + config['info']['version'] = VERSION + config['info']['rss_run_date'] = datetime.now(tz=timezone.utc)\ + .strftime(config['fmt']['rss_date']) + config['info']['sitemap_run_date'] = datetime.now(tz=timezone.utc)\ + .strftime(config['fmt']['sitemap_date']) return config diff --git a/src/pyssg/database.py b/src/pyssg/database.py index 5a174c9..34bf534 100644 --- a/src/pyssg/database.py +++ b/src/pyssg/database.py @@ -2,7 +2,6 @@ import os import sys import csv from logging import Logger, getLogger -from configparser import ConfigParser from .utils import get_checksum from .database_entry import DatabaseEntry @@ -15,11 +14,9 @@ class Database: __COLUMN_NUM: int = 5 __COLUMN_DELIMITER: str = '|' - def __init__(self, db_path: str, - config: ConfigParser): + def __init__(self, db_path: str) -> None: log.debug('initializing the page db on path "%s"', db_path) self.db_path: str = db_path - self.config: ConfigParser = config self.e: dict[str, DatabaseEntry] = dict() diff --git a/src/pyssg/md_parser.py b/src/pyssg/md_parser.py index 061fcd5..5f4fb46 100644 --- a/src/pyssg/md_parser.py +++ b/src/pyssg/md_parser.py @@ -1,7 +1,6 @@ import os from operator import itemgetter from markdown import Markdown -from configparser import ConfigParser from logging import Logger, getLogger from markdown import Markdown @@ -44,12 +43,12 @@ def _get_md_obj() -> Markdown: # page and file is basically a synonym here... class MDParser: def __init__(self, files: list[str], - config: ConfigParser, + config: dict, db: Database): log.debug('initializing the md parser with %d files', len(files)) self.files: list[str] = files - self.config: ConfigParser = config + self.config: dict = config self.db: Database = db self.md: Markdown = _get_md_obj() diff --git a/src/pyssg/page.py b/src/pyssg/page.py index 4f2ee43..4a12f62 100644 --- a/src/pyssg/page.py +++ b/src/pyssg/page.py @@ -2,7 +2,6 @@ import os import sys from datetime import datetime, timezone from logging import Logger, getLogger -from configparser import ConfigParser log: Logger = getLogger(__name__) @@ -14,7 +13,7 @@ class Page: mtime: float, html: str, meta: dict, - config: ConfigParser): + config: dict): log.debug('initializing the page object with name "%s"', name) # initial data self.name: str = name @@ -22,7 +21,7 @@ class Page: self.mtimestamp: float = mtime self.content: str = html self.meta: dict = meta - self.config: ConfigParser = config + self.config: dict = config # data from self.meta self.title: str diff --git a/src/pyssg/plt/default.ini b/src/pyssg/plt/default.ini deleted file mode 100644 index ab4eac1..0000000 --- a/src/pyssg/plt/default.ini +++ /dev/null @@ -1,16 +0,0 @@ -[path] -src=src -dst=dst -plt=plt -[url] -main=https://example.com -static=https://static.example.com -default_image=/images/default.png -[fmt] -date=%%a, %%b %%d, %%Y @ %%H:%%M %%Z -list_date=%%b %%d -list_sep_date=%%B %%Y -[info] -title=Example site -[other] -force=False \ No newline at end of file diff --git a/src/pyssg/plt/default.yaml b/src/pyssg/plt/default.yaml new file mode 100644 index 0000000..c90d44d --- /dev/null +++ b/src/pyssg/plt/default.yaml @@ -0,0 +1,18 @@ +%YAML 1.2 +--- +define: &root "$HOME/pyssg/site_example/" + +title: "Example site" +path: + src: !join [*root, "src"] + dst: !join [*root, "dst"] + plt: !join [*root, "plt"] +url: + main: "https://example.com" + static: "https://static.example.com" + default_image: "/images/default.png" +fmt: + date: "%a, %b %d, %Y @ %H:%M %Z" + list_date: "%b %d" + list_sep_date: "%B %Y" +... \ No newline at end of file diff --git a/src/pyssg/plt/index.html b/src/pyssg/plt/index.html index d061625..96d66ef 100644 --- a/src/pyssg/plt/index.html +++ b/src/pyssg/plt/index.html @@ -3,10 +3,10 @@ - Index -- {{config['info']['title']}} + Index -- {{config['title']}} -

Index -- {{config['info']['title']}}

+

Index -- {{config['title']}}

Some text here.

Tags: diff --git a/src/pyssg/plt/mandatory_config.yaml b/src/pyssg/plt/mandatory_config.yaml new file mode 100644 index 0000000..52bfa04 --- /dev/null +++ b/src/pyssg/plt/mandatory_config.yaml @@ -0,0 +1,14 @@ +%YAML 1.2 +--- +title: +path: + src: + dst: + plt: +url: + main: +fmt: + date: + list_date: + list_sep_date: +... \ No newline at end of file diff --git a/src/pyssg/plt/page.html b/src/pyssg/plt/page.html index 39101c4..d7f5e43 100644 --- a/src/pyssg/plt/page.html +++ b/src/pyssg/plt/page.html @@ -3,7 +3,7 @@ - {{page.title}} -- {{config['info']['title']}} + {{page.title}} -- {{config['title']}}

{{page.title}}

diff --git a/src/pyssg/plt/rss.xml b/src/pyssg/plt/rss.xml index 31abd48..6a3eb00 100644 --- a/src/pyssg/plt/rss.xml +++ b/src/pyssg/plt/rss.xml @@ -3,7 +3,7 @@ xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/"> - {{config['info']['title']}} + {{config['title']}} {{config['url']['main']}} Short site description. @@ -19,7 +19,7 @@ 30 {{config['url']['static']}}/images/blog.png - {{config['info']['title']}} + {{config['title']}} {{config['url']['main']}} {%for p in all_pages%} diff --git a/src/pyssg/plt/static_config.yaml b/src/pyssg/plt/static_config.yaml new file mode 100644 index 0000000..745c767 --- /dev/null +++ b/src/pyssg/plt/static_config.yaml @@ -0,0 +1,8 @@ +%YAML 1.2 +--- +fmt: + rss_date: "%a, %d %b %Y %H:%M:%S GMT" + sitemap_date: "%Y-%m-%d" +info: + version: "0.0.0" +... \ No newline at end of file diff --git a/src/pyssg/plt/tag.html b/src/pyssg/plt/tag.html index eadfb95..59cbdf1 100644 --- a/src/pyssg/plt/tag.html +++ b/src/pyssg/plt/tag.html @@ -3,7 +3,7 @@ - Posts filtered by {{tag[0]}} -- {{config['info']['title']}} + Posts filtered by {{tag[0]}} -- {{config['title']}}

Posts filtered by {{tag[0]}}

diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py index 2734a99..acf4542 100644 --- a/src/pyssg/pyssg.py +++ b/src/pyssg/pyssg.py @@ -2,13 +2,12 @@ import os import sys from importlib.resources import path as rpath from typing import Union -from configparser import ConfigParser from logging import Logger, getLogger, DEBUG from argparse import ArgumentParser from .arg_parser import get_parser from .utils import create_dir, copy_file, get_expanded_path -from .configuration import get_parsed_config, DEFAULT_CONFIG_PATH, VERSION +from .configuration import get_parsed_config, get_static_config, DEFAULT_CONFIG_PATH, VERSION from .database import Database from .builder import Builder @@ -60,7 +59,7 @@ def main() -> None: if args['copy_default_config']: log.info('copying default config file') create_dir(config_dir) - with rpath('pyssg.plt', 'default.ini') as p: + with rpath('pyssg.plt', 'default.yaml') as p: copy_file(str(p), config_path) sys.exit(0) @@ -70,8 +69,18 @@ def main() -> None: ' first time if you haven\'t already', config_path) sys.exit(1) - config: ConfigParser = get_parsed_config(config_path) - config.set('info', 'debug', str(args['debug'])) + log.debug('reading config files') + config_all: list[dict] = get_parsed_config(config_path) + static_config: dict = get_static_config() + + # easier to add static into config than changing existing code + config: dict = config_all[0] + config['fmt']['rss_date'] = static_config['fmt']['rss_date'] + config['fmt']['sitemap_date'] = static_config['fmt']['sitemap_date'] + config['info'] = dict() + config['info']['version'] = static_config['info']['version'] + config['info']['debug'] = str(args['debug']) + config['info']['force'] = str(args['force']) if args['init']: log.info('initializing the directory structure and copying over templates') @@ -94,8 +103,9 @@ def main() -> None: if args['build']: log.info('building the html files') + # TODO: need to add this to the config and not assume it db_path: str = os.path.join(config['path']['src'], '.files') - db: Database = Database(db_path, config) + db: Database = Database(db_path) db.read() builder: Builder = Builder(config, db) diff --git a/src/pyssg/yaml_parser.py b/src/pyssg/yaml_parser.py new file mode 100644 index 0000000..48c2eec --- /dev/null +++ b/src/pyssg/yaml_parser.py @@ -0,0 +1,45 @@ +import yaml +from yaml import SafeLoader +from yaml.nodes import SequenceNode +from io import TextIOWrapper +from importlib.resources import path as rpath +from logging import Logger, getLogger + +log: Logger = getLogger(__name__) + + +# required to concat values in yaml using !join [value, value, ...] +def __join_constructor(loader: SafeLoader, node: SequenceNode) -> str: + seq = loader.construct_sequence(node) + return ''.join([str(i) for i in seq]) +log.warning('adding the custom join constructor to yaml.SafeLoader') +SafeLoader.add_constructor('!join', __join_constructor) + + +# "file" is either a path or the yaml content itself +def __read_raw_yaml(file: TextIOWrapper) -> list[dict]: + all_docs: list[dict] = [] + all_docs_gen = yaml.safe_load_all(file) + for doc in all_docs_gen: + all_docs.append(doc) + + return all_docs + + +def get_parsed_yaml(resource: str, package: str='') -> list[dict]: + all_yaml_docs: list[dict] = [] + if package == '': + log.debug('no package specified, reading file "%s"', resource) + with open(resource, 'r') as f: + all_yaml_docs = __read_raw_yaml(f) + else: + log.debug('package "%s" specified, reading resource "%s"', + package, resource) + with rpath(package, resource) as p: + with open(p, 'r') as f: + all_yaml_docs = __read_raw_yaml(f) + + log.info('found %s document(s) for configuration "%s"', + len(all_yaml_docs), f'{package}.{resource}' if package != '' else resource) + + return all_yaml_docs -- cgit v1.2.3-54-g00ecf