From 0bc00ce9352ba843d62c189b68e0e07724cc4b58 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <david@luevano.xyz>
Date: Sun, 4 Dec 2022 15:06:48 -0600
Subject: migrate from INI to YAML, breaks compatibility

config file and template files need to be converted to the new format to use with YAML config
---
 README.md                           | 99 ++++++++++++++++++++-----------------
 requirements.txt                    |  1 +
 src/pyssg/builder.py                | 10 ++--
 src/pyssg/configuration.py          | 79 ++++++++++++++---------------
 src/pyssg/database.py               |  5 +-
 src/pyssg/md_parser.py              |  5 +-
 src/pyssg/page.py                   |  5 +-
 src/pyssg/plt/default.ini           | 16 ------
 src/pyssg/plt/default.yaml          | 18 +++++++
 src/pyssg/plt/index.html            |  4 +-
 src/pyssg/plt/mandatory_config.yaml | 14 ++++++
 src/pyssg/plt/page.html             |  2 +-
 src/pyssg/plt/rss.xml               |  4 +-
 src/pyssg/plt/static_config.yaml    |  8 +++
 src/pyssg/plt/tag.html              |  2 +-
 src/pyssg/pyssg.py                  | 22 ++++++---
 src/pyssg/yaml_parser.py            | 45 +++++++++++++++++
 17 files changed, 212 insertions(+), 127 deletions(-)
 delete mode 100644 src/pyssg/plt/default.ini
 create mode 100644 src/pyssg/plt/default.yaml
 create mode 100644 src/pyssg/plt/mandatory_config.yaml
 create mode 100644 src/pyssg/plt/static_config.yaml
 create mode 100644 src/pyssg/yaml_parser.py

diff --git a/README.md b/README.md
index f01e03c..fb3f19e 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,8 @@
 # pyssg - Static Site Generator written in Python
 
-Inspired (initially) by Roman Zolotarev's [`ssg5`](https://rgz.ee/bin/ssg5) and [`rssg`](https://rgz.ee/bin/rssg), Luke Smith's [`lb` and `sup`](https://github.com/LukeSmithxyz/lb) and, pedantic.software's great (but *"mamador"*, as I would say in spanish) [`blogit`](https://pedantic.software/git/blogit/).
+Generates HTML files from MD files for a static site, personally using it for a blog-like site.
+
+Initially inspired by Roman Zolotarev's [`ssg5`](https://rgz.ee/bin/ssg5) and [`rssg`](https://rgz.ee/bin/rssg), Luke Smith's [`lb` and `sup`](https://github.com/LukeSmithxyz/lb) and, pedantic.software's [`blogit`](https://pedantic.software/git/blogit/).
 
 ## Features and to-do
 
@@ -19,15 +21,15 @@ Inspired (initially) by Roman Zolotarev's [`ssg5`](https://rgz.ee/bin/ssg5) and
 	- [ ] Include manually added `*.html` files.
 - [x] Only build page if `*.md` is new or updated.
 	- [ ] Extend this to tag pages and index (right now all tags and index is built no matter if no new/updated file is present).
-- [x] Configuration file. ~~as an alternative to using command line flags (configuration file options are prioritized).~~ 
-	- [x] Use [`configparser`](https://docs.python.org/3/library/configparser.html) instead of custom config handler.
-	- [ ] Migrate to YAML instead of INI, as it is way more flexible.
+- [x] Configuration file. ~~as an alternative to using command line flags (configuration file options are prioritized).~~
+	- [x] ~~Use [`configparser`](https://docs.python.org/3/library/configparser.html) instead of custom config handler.~~
+	- [x] Migrate to YAML instead of INI, as it is way more flexible. Uses [`PyYAML`](https://pyyaml.org/).
 - [x] Avoid the program to freak out when there are directories created in advance.
 - [x] Provide more meaningful error messages when you are missing mandatory metadata in your `*.md` files.
 - [ ] More complex directory structure to support multiple subdomains and different types of pages.
 - [ ] Option/change to using an SQL database instead of the custom solution.
 - [x] Checksum checking because the timestamp of the file is not enough.
-- [ ] Better management of the extensions.
+- [ ] Better management of the markdown extensions.
 
 ### Markdown features
 
@@ -64,11 +66,11 @@ Will add a PKBUILD (and possibly submit it to the AUR) sometime later.
 pyssg --copy-default-config -c <path/to/config>
 ```
 
-- Where `-c` is optional as by default `$XDG_CONFIG_HOME/pyssg/config.ini` is used.
+- Where `-c` is optional as by default `$XDG_CONFIG_HOME/pyssg/config.yaml` is used.
 
 2. Edit the config file created as needed.
 
-- `config.ini` is parsed using Python's [`configparser`](https://docs.python.org/3/library/configparser.html), [more about the config file](#config-file).
+- `config.yaml` is parsed using [`PyYAML`](https://pyyaml.org/), [more about the config file](#config-file).
 
 3. Initialize the directory structures (source, destination, template) and move template files:
 
@@ -107,54 +109,61 @@ pyssg -b
 
 ## Config file
 
-All sections/options need to be compliant with the [`configparser`](https://docs.python.org/3/library/configparser.html).
-
-At least the sections and options given in the default config should be present:
-
-```ini
-[path]
-src=src # source
-dst=dst # destination
-plt=plt # template
-[url]
-main=https://example.com
-static=https://static.example.com # used for static resources (images, js, css, etc)
-default_image=/images/default.png # this will be appended to 'static' at the end
-[fmt] # % needs to be escaped with another %
-date=%%a, %%b %%d, %%Y @ %%H:%%M %%Z
-list_date=%%b %%d
-list_sep_date=%%B %%Y
-[info]
-title=Example site
-[other]
-force=False
-```
+All sections/options need to be compliant with [`PyYAML`](https://pyyaml.org/) which should be compliant with [`YAML 1.2`](https://yaml.org/spec/1.2.2/). Additionaly, I've added the custom tag `!join` which concatenates strings from an array, which an be used as follows:
 
-Along with these, these extra ones will be added on runtime:
-
-```ini
-[fmt]
-rss_date=%%a, %%d %%b %%Y %%H:%%M:%%S GMT # fixed
-sitemap_date=%%Y-%%m-%%d # fixed
-[info]
-version= # current 'pyssg' version (0.5.1.dev16, for example)
-debug=True/False # depending if --debug was used when executing
-rss_run_date= # date the program was run, formatted with 'rss_date'
-sitemap_run_date= # date the program was run, formatted with 'sitemap_date'
+```yaml
+variable: &variable_reference_name "value"
+other_variable: !join [*variable_reference_name, "other_value", 1]
 ```
 
-You can add any other option/section that you can later use in the Jinja templates via the exposed config object. 
+Which would produce `other_variable: "valueother_value1`. Also environment variables will be expanded internally.
+
+At least the following config items should be present in the config:
+
+```yaml
+%YAML 1.2
+---
+# not needed, shown here as an example of the !join tag
+define: &root "$HOME/path/to/" # $HOME expands to /home/user, for example
+
+title: "Example site"
+path:
+  src: !join [*root, "src"] # $HOME/path/to/src
+  dst: "$HOME/some/other/path/to/dst"
+  plt: "plt"
+url:
+  main: "https://example.com"
+fmt:
+  date: "%a, %b %d, %Y @ %H:%M %Z"
+  list_date: "%b %d"
+  list_sep_date: "%B %Y"
+...
+```
 
-Other requisites are:
+The following will be added on runtime:
+
+```yaml
+%YAML 1.2
+---
+fmt:
+  rss_date: "%a, %d %b %Y %H:%M:%S GMT" # fixed
+  sitemap_date: "%Y-%m-%d" # fixed
+info:
+  version: "x.y.z" # current 'pyssg' version (0.5.1.dev16, for example)
+  debug: True/False # depending if --debug was used when executing
+  force: True/False # depending if --force was used when executing
+rss_run_date: # date the program was run, formatted with 'fmt.rss_date'
+sitemap_run_date: # date the program was run, formatted with 'fmt.sitemap_date'
+...
+```
 
-- Urls shouldn't have the trailing slash `/`.
-- The only character that needs to be escaped is `%` with another `%`.
+You can add any other option/section that you can later use in the Jinja templates via the exposed config object. URL's shouldn't have the trailing slash `/`
 
 ## Available Jinja variables
 
 These variables are exposed to use within the templates. The below list is in the form of *variable (type) (available from): description*. `section/option` describe config file section and option and `object.attribute` corresponding object and it's attribute.
 
-- `config` (`ConfigParser`) (all): parsed config file plus the added options internally (as described in [config file](#config-file)).
+- `config` (`dict`) (all): parsed config file plus the added options internally (as described in [config file](#config-file)).
 - `all_pages` (`list(Page)`) (all): list of all the pages, sorted by creation time, reversed.
 - `page` (`Page`) (`page.html`): contains the following attributes (genarally these are parsed from the metadata in the `*.md` files):
 	- `title` (`str`): title of the page.
diff --git a/requirements.txt b/requirements.txt
index d5997fd..9192da2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ markdown-checklist>=0.4.4
 MarkupSafe>=2.1.1
 yafg>=0.3
 pymdown-extensions>=9.9
+PyYAML>=6.0
diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py
index 9834e1d..391c7e0 100644
--- a/src/pyssg/builder.py
+++ b/src/pyssg/builder.py
@@ -1,7 +1,6 @@
 import os
 from copy import deepcopy
 from operator import itemgetter
-from configparser import ConfigParser
 from logging import Logger, getLogger
 
 from jinja2 import Environment, Template, FileSystemLoader as FSLoader
@@ -15,10 +14,10 @@ log: Logger = getLogger(__name__)
 
 
 class Builder:
-    def __init__(self, config: ConfigParser,
+    def __init__(self, config: dict,
                  db: Database):
         log.debug('initializing site builder')
-        self.config: ConfigParser = config
+        self.config: dict = config
         self.db: Database = db
 
         # the autoescape option could be a security risk if used in a dynamic
@@ -104,7 +103,8 @@ class Builder:
                 log.debug('file "%s" has been modified or is new, copying', f)
                 copy_file(src_file, dst_file)
             else:
-                if self.config.getboolean('other', 'force'):
+                # TODO: need to check if this holds after yaml update
+                if self.config['info']['force']:
                     log.debug('file "%s" hasn\'t been modified, but option force is set to true, copying anyways', f)
                     copy_file(src_file, dst_file)
                 else:
@@ -117,7 +117,7 @@ class Builder:
         temp_files: list[Page]
 
         # check if only updated should be created
-        if self.config.getboolean('other', 'force'):
+        if self.config['info']['force']:
             log.debug('all html will be rendered, force is set to true')
             temp_files = self.all_files
         else:
diff --git a/src/pyssg/configuration.py b/src/pyssg/configuration.py
index 895df5c..1d05289 100644
--- a/src/pyssg/configuration.py
+++ b/src/pyssg/configuration.py
@@ -1,63 +1,64 @@
 import sys
-import yaml
-import pprint
 from importlib.metadata import version
 from importlib.resources import path as rpath
 from datetime import datetime, timezone
-from configparser import ConfigParser
 from logging import Logger, getLogger
 
 from .utils import get_expanded_path
+from .yaml_parser import get_parsed_yaml
 
 log: Logger = getLogger(__name__)
-
-
 DEFAULT_CONFIG_PATH: str = '$XDG_CONFIG_HOME/pyssg/config.yaml'
-VERSION = version('pyssg')
-
+VERSION: str = version('pyssg')
 
-def __expand_all_paths(config: ConfigParser) -> None:
-    log.debug('expanding all path options')
-    for option in config.options('path'):
-        path: str = config['path'][option]
-        config.set('path', option, get_expanded_path(path))
 
+def __check_well_formed_config(config: dict) -> None:
+    log.debug('checking that config file is well formed (at least contains mandatory fields')
+    mandatory_config: dict = get_parsed_yaml('mandatory_config.yaml', 'pyssg.plt')[0]
 
-def __check_well_formed_config(config: ConfigParser) -> None:
-    log.debug('checking that config file is well formed')
-    default_config: ConfigParser = ConfigParser()
-    with rpath('pyssg.plt', 'default.ini') as p:
-        log.debug('reading config file "%s"', p)
-        default_config.read(p)
-
-    for section in default_config.sections():
+    for section in mandatory_config.keys():
         log.debug('checking section "%s"', section)
-        if not config.has_section(section):
+        if not config[section]:
             log.error('config does not have section "%s"', section)
             sys.exit(1)
-        for option in default_config.options(section):
+        # the case for elements that don't have nested elements
+        if not mandatory_config[section]:
+            log.debug('section "%s" doesn\'t need nested elements', section)
+            continue
+        for option in mandatory_config[section].keys():
             log.debug('checking option "%s"', option)
-            if not config.has_option(section, option):
+            if option not in config[section] or not config[section][option]:
                 log.error('config does not have option "%s" in section "%s"', option, section)
                 sys.exit(1)
 
 
-def get_parsed_config(path: str) -> ConfigParser:
-    config: ConfigParser = ConfigParser()
+def __expand_all_paths(config: dict) -> None:
+    log.debug('expanding all path options: %s', config['path'].keys())
+    for option in config['path'].keys():
+        config['path'][option] = get_expanded_path(config['path'][option])
+
+
+# not necessary to type deeper than the first dict
+def get_parsed_config(path: str) -> list[dict]:
     log.debug('reading config file "%s"', path)
-    config.read(path)
-
-    __check_well_formed_config(config)
-    __expand_all_paths(config)
-
-    # set other required options
-    log.debug('setting extra config options')
-    config.set('fmt', 'rss_date', '%%a, %%d %%b %%Y %%H:%%M:%%S GMT')
-    config.set('fmt', 'sitemap_date', '%%Y-%%m-%%d')
-    config.set('info', 'version', VERSION)
-    config.set('info', 'rss_run_date', datetime.now(
-        tz=timezone.utc).strftime(config['fmt']['rss_date']))
-    config.set('info', 'sitemap_run_date', datetime.now(
-        tz=timezone.utc).strftime(config['fmt']['sitemap_date']))
+    config: list[dict] = get_parsed_yaml(path)  # type: ignore
+
+    __check_well_formed_config(config[0])
+    __expand_all_paths(config[0])
+
+    return config
+
+
+# not necessary to type deeper than the first dict,
+#   static config means config that shouldn't be changed by the user
+def get_static_config() -> dict[str, dict]:
+    log.debug('reading and setting static config')
+    config: dict = get_parsed_yaml('static_config.yaml', 'pyssg.plt')[0]  # type: ignore
+
+    config['info']['version'] = VERSION
+    config['info']['rss_run_date'] = datetime.now(tz=timezone.utc)\
+        .strftime(config['fmt']['rss_date'])
+    config['info']['sitemap_run_date'] = datetime.now(tz=timezone.utc)\
+        .strftime(config['fmt']['sitemap_date'])
 
     return config
diff --git a/src/pyssg/database.py b/src/pyssg/database.py
index 5a174c9..34bf534 100644
--- a/src/pyssg/database.py
+++ b/src/pyssg/database.py
@@ -2,7 +2,6 @@ import os
 import sys
 import csv
 from logging import Logger, getLogger
-from configparser import ConfigParser
 
 from .utils import get_checksum
 from .database_entry import DatabaseEntry
@@ -15,11 +14,9 @@ class Database:
     __COLUMN_NUM: int = 5
     __COLUMN_DELIMITER: str = '|'
 
-    def __init__(self, db_path: str,
-                 config: ConfigParser):
+    def __init__(self, db_path: str) -> None:
         log.debug('initializing the page db on path "%s"', db_path)
         self.db_path: str = db_path
-        self.config: ConfigParser = config
         self.e: dict[str, DatabaseEntry] = dict()
 
 
diff --git a/src/pyssg/md_parser.py b/src/pyssg/md_parser.py
index 061fcd5..5f4fb46 100644
--- a/src/pyssg/md_parser.py
+++ b/src/pyssg/md_parser.py
@@ -1,7 +1,6 @@
 import os
 from operator import itemgetter
 from markdown import Markdown
-from configparser import ConfigParser
 from logging import Logger, getLogger
 
 from markdown import Markdown
@@ -44,12 +43,12 @@ def _get_md_obj() -> Markdown:
 # page and file is basically a synonym here...
 class MDParser:
     def __init__(self, files: list[str],
-                 config: ConfigParser,
+                 config: dict,
                  db: Database):
         log.debug('initializing the md parser with %d files', len(files))
         self.files: list[str] = files
 
-        self.config: ConfigParser = config
+        self.config: dict = config
         self.db: Database = db
         self.md: Markdown = _get_md_obj()
 
diff --git a/src/pyssg/page.py b/src/pyssg/page.py
index 4f2ee43..4a12f62 100644
--- a/src/pyssg/page.py
+++ b/src/pyssg/page.py
@@ -2,7 +2,6 @@ import os
 import sys
 from datetime import datetime, timezone
 from logging import Logger, getLogger
-from configparser import ConfigParser
 
 log: Logger = getLogger(__name__)
 
@@ -14,7 +13,7 @@ class Page:
                  mtime: float,
                  html: str,
                  meta: dict,
-                 config: ConfigParser):
+                 config: dict):
         log.debug('initializing the page object with name "%s"', name)
         # initial data
         self.name: str = name
@@ -22,7 +21,7 @@ class Page:
         self.mtimestamp: float = mtime
         self.content: str = html
         self.meta: dict = meta
-        self.config: ConfigParser = config
+        self.config: dict = config
 
         # data from self.meta
         self.title: str
diff --git a/src/pyssg/plt/default.ini b/src/pyssg/plt/default.ini
deleted file mode 100644
index ab4eac1..0000000
--- a/src/pyssg/plt/default.ini
+++ /dev/null
@@ -1,16 +0,0 @@
-[path]
-src=src
-dst=dst
-plt=plt
-[url]
-main=https://example.com
-static=https://static.example.com
-default_image=/images/default.png
-[fmt]
-date=%%a, %%b %%d, %%Y @ %%H:%%M %%Z
-list_date=%%b %%d
-list_sep_date=%%B %%Y
-[info]
-title=Example site
-[other]
-force=False
\ No newline at end of file
diff --git a/src/pyssg/plt/default.yaml b/src/pyssg/plt/default.yaml
new file mode 100644
index 0000000..c90d44d
--- /dev/null
+++ b/src/pyssg/plt/default.yaml
@@ -0,0 +1,18 @@
+%YAML 1.2
+---
+define: &root "$HOME/pyssg/site_example/"
+
+title: "Example site"
+path:
+  src: !join [*root, "src"]
+  dst: !join [*root, "dst"]
+  plt: !join [*root, "plt"]
+url:
+  main: "https://example.com"
+  static: "https://static.example.com"
+  default_image: "/images/default.png"
+fmt:
+  date: "%a, %b %d, %Y @ %H:%M %Z"
+  list_date: "%b %d"
+  list_sep_date: "%B %Y"
+...
\ No newline at end of file
diff --git a/src/pyssg/plt/index.html b/src/pyssg/plt/index.html
index d061625..96d66ef 100644
--- a/src/pyssg/plt/index.html
+++ b/src/pyssg/plt/index.html
@@ -3,10 +3,10 @@
   <head>
     <meta charset="utf-8">
     <base href="{{config['url']['static']}}">
-    <title>Index -- {{config['info']['title']}}</title>
+    <title>Index -- {{config['title']}}</title>
   </head>
   <body>
-  <h1>Index -- {{config['info']['title']}}</h1>
+  <h1>Index -- {{config['title']}}</h1>
   <p>Some text here.</p>
 
   <p>Tags:
diff --git a/src/pyssg/plt/mandatory_config.yaml b/src/pyssg/plt/mandatory_config.yaml
new file mode 100644
index 0000000..52bfa04
--- /dev/null
+++ b/src/pyssg/plt/mandatory_config.yaml
@@ -0,0 +1,14 @@
+%YAML 1.2
+---
+title:
+path:
+  src:
+  dst:
+  plt:
+url:
+  main:
+fmt:
+  date:
+  list_date:
+  list_sep_date:
+...
\ No newline at end of file
diff --git a/src/pyssg/plt/page.html b/src/pyssg/plt/page.html
index 39101c4..d7f5e43 100644
--- a/src/pyssg/plt/page.html
+++ b/src/pyssg/plt/page.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="utf-8">
     <base href="{{config['url']['static']}}">
-    <title>{{page.title}} -- {{config['info']['title']}}</title>
+    <title>{{page.title}} -- {{config['title']}}</title>
   </head>
   <body>
     <h1>{{page.title}}</h1>
diff --git a/src/pyssg/plt/rss.xml b/src/pyssg/plt/rss.xml
index 31abd48..6a3eb00 100644
--- a/src/pyssg/plt/rss.xml
+++ b/src/pyssg/plt/rss.xml
@@ -3,7 +3,7 @@
   xmlns:atom="http://www.w3.org/2005/Atom"
   xmlns:content="http://purl.org/rss/1.0/modules/content/">
   <channel>
-    <title>{{config['info']['title']}}</title>
+    <title>{{config['title']}}</title>
     <link>{{config['url']['main']}}</link>
     <atom:link href="{{config['url']['main']}}/rss.xml" rel="self" type="application/rss+xml"/>
     <description>Short site description.</description>
@@ -19,7 +19,7 @@
     <ttl>30</ttl>
     <image>
       <url>{{config['url']['static']}}/images/blog.png</url>
-      <title>{{config['info']['title']}}</title>
+      <title>{{config['title']}}</title>
       <link>{{config['url']['main']}}</link>
     </image>
     {%for p in all_pages%}
diff --git a/src/pyssg/plt/static_config.yaml b/src/pyssg/plt/static_config.yaml
new file mode 100644
index 0000000..745c767
--- /dev/null
+++ b/src/pyssg/plt/static_config.yaml
@@ -0,0 +1,8 @@
+%YAML 1.2
+---
+fmt:
+  rss_date: "%a, %d %b %Y %H:%M:%S GMT"
+  sitemap_date: "%Y-%m-%d"
+info:
+  version: "0.0.0"
+...
\ No newline at end of file
diff --git a/src/pyssg/plt/tag.html b/src/pyssg/plt/tag.html
index eadfb95..59cbdf1 100644
--- a/src/pyssg/plt/tag.html
+++ b/src/pyssg/plt/tag.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="utf-8">
     <base href="{{config['url']['static']}}">
-    <title>Posts filtered by {{tag[0]}} -- {{config['info']['title']}}</title>
+    <title>Posts filtered by {{tag[0]}} -- {{config['title']}}</title>
   </head>
   <body>
   <h1>Posts filtered by {{tag[0]}}</h1>
diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py
index 2734a99..acf4542 100644
--- a/src/pyssg/pyssg.py
+++ b/src/pyssg/pyssg.py
@@ -2,13 +2,12 @@ import os
 import sys
 from importlib.resources import path as rpath
 from typing import Union
-from configparser import ConfigParser
 from logging import Logger, getLogger, DEBUG
 from argparse import ArgumentParser
 
 from .arg_parser import get_parser
 from .utils import create_dir, copy_file, get_expanded_path
-from .configuration import get_parsed_config, DEFAULT_CONFIG_PATH, VERSION
+from .configuration import get_parsed_config, get_static_config, DEFAULT_CONFIG_PATH, VERSION
 from .database import Database
 from .builder import Builder
 
@@ -60,7 +59,7 @@ def main() -> None:
     if args['copy_default_config']:
         log.info('copying default config file')
         create_dir(config_dir)
-        with rpath('pyssg.plt', 'default.ini') as p:
+        with rpath('pyssg.plt', 'default.yaml') as p:
             copy_file(str(p), config_path)
         sys.exit(0)
 
@@ -70,8 +69,18 @@ def main() -> None:
                   ' first time if you haven\'t already', config_path)
         sys.exit(1)
 
-    config: ConfigParser = get_parsed_config(config_path)
-    config.set('info', 'debug', str(args['debug']))
+    log.debug('reading config files')
+    config_all: list[dict] = get_parsed_config(config_path)
+    static_config: dict = get_static_config()
+
+    # easier to add static into config than changing existing code
+    config: dict = config_all[0]
+    config['fmt']['rss_date'] = static_config['fmt']['rss_date']
+    config['fmt']['sitemap_date'] = static_config['fmt']['sitemap_date']
+    config['info'] = dict()
+    config['info']['version'] = static_config['info']['version']
+    config['info']['debug'] = str(args['debug'])
+    config['info']['force'] = str(args['force'])
 
     if args['init']:
         log.info('initializing the directory structure and copying over templates')
@@ -94,8 +103,9 @@ def main() -> None:
 
     if args['build']:
         log.info('building the html files')
+        # TODO: need to add this to the config and not assume it
         db_path: str = os.path.join(config['path']['src'], '.files')
-        db: Database = Database(db_path, config)
+        db: Database = Database(db_path)
         db.read()
 
         builder: Builder = Builder(config, db)
diff --git a/src/pyssg/yaml_parser.py b/src/pyssg/yaml_parser.py
new file mode 100644
index 0000000..48c2eec
--- /dev/null
+++ b/src/pyssg/yaml_parser.py
@@ -0,0 +1,45 @@
+import yaml
+from yaml import SafeLoader
+from yaml.nodes import SequenceNode
+from io import TextIOWrapper
+from importlib.resources import path as rpath
+from logging import Logger, getLogger
+
+log: Logger = getLogger(__name__)
+
+
+# required to concat values in yaml using !join [value, value, ...]
+def __join_constructor(loader: SafeLoader, node: SequenceNode) -> str:
+    seq = loader.construct_sequence(node)
+    return ''.join([str(i) for i in seq])
+log.warning('adding the custom join constructor to yaml.SafeLoader')
+SafeLoader.add_constructor('!join', __join_constructor)
+
+
+# "file" is either a path or the yaml content itself
+def __read_raw_yaml(file: TextIOWrapper) -> list[dict]:
+    all_docs: list[dict] = []
+    all_docs_gen = yaml.safe_load_all(file)
+    for doc in all_docs_gen:
+        all_docs.append(doc)
+
+    return all_docs
+
+
+def get_parsed_yaml(resource: str, package: str='') -> list[dict]:
+    all_yaml_docs: list[dict] = []
+    if package == '':
+        log.debug('no package specified, reading file "%s"', resource)
+        with open(resource, 'r') as f:
+            all_yaml_docs = __read_raw_yaml(f)
+    else:
+        log.debug('package "%s" specified, reading resource "%s"',
+            package, resource)
+        with rpath(package, resource) as p:
+            with open(p, 'r') as f:
+                all_yaml_docs = __read_raw_yaml(f)
+
+    log.info('found %s document(s) for configuration "%s"',
+        len(all_yaml_docs), f'{package}.{resource}' if package != '' else resource)
+
+    return all_yaml_docs
-- 
cgit v1.2.3-70-g09d2