From d4d8601c32a18b9934302fea849006369a688d1f Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado Date: Sun, 27 Aug 2023 22:54:31 -0600 Subject: refactor: configuration in its own subpackage, minor tweaks --- src/pyssg/__init__.py | 6 ++--- src/pyssg/cfg/__init__.py | 0 src/pyssg/cfg/configuration.py | 51 ++++++++++++++++++++++++++++++++++++++ src/pyssg/cfg/yaml_parser.py | 23 +++++++++++++++++ src/pyssg/configuration.py | 52 --------------------------------------- src/pyssg/pyssg.py | 49 +++++++++++++++++------------------- src/pyssg/utils.py | 56 +++++++++++++++++------------------------- src/pyssg/yaml_parser.py | 27 -------------------- 8 files changed, 123 insertions(+), 141 deletions(-) create mode 100644 src/pyssg/cfg/__init__.py create mode 100644 src/pyssg/cfg/configuration.py create mode 100644 src/pyssg/cfg/yaml_parser.py delete mode 100644 src/pyssg/configuration.py delete mode 100644 src/pyssg/yaml_parser.py diff --git a/src/pyssg/__init__.py b/src/pyssg/__init__.py index 46e2323..dafd58b 100644 --- a/src/pyssg/__init__.py +++ b/src/pyssg/__init__.py @@ -1,6 +1,6 @@ -from .pyssg import main -from .custom_logger import setup_logger -from .yaml_parser import setup_custom_yaml +from pyssg.pyssg import main +from pyssg.custom_logger import setup_logger +from pyssg.cfg.yaml_parser import setup_custom_yaml setup_logger() diff --git a/src/pyssg/cfg/__init__.py b/src/pyssg/cfg/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pyssg/cfg/configuration.py b/src/pyssg/cfg/configuration.py new file mode 100644 index 0000000..55bfa0b --- /dev/null +++ b/src/pyssg/cfg/configuration.py @@ -0,0 +1,51 @@ +import sys +from importlib.metadata import version +from logging import Logger, getLogger +from typing import Any + +from pyssg.utils import get_expanded_path, get_time_now +from pyssg.cfg.yaml_parser import get_yaml + +log: Logger = getLogger(__name__) +VERSION: str = version('pyssg') + + +def __expand_all_paths(config: list[dict[str, Any]]) -> None: + for option in config[0]['path'].keys(): + path: str = get_expanded_path(config[0]['path'][option]) + config[0]['path'][option] = path + + +def __add_mandatory_config(config: list[dict[str, Any]]) -> None: + if 'fmt' not in config[0]: + config[0]['fmt'] = dict() + if 'rss_date' not in config[0]['fmt']: + config[0]['fmt']['rss_date'] = '%a, %d %b %Y %H:%M:%S GMT' + if 'sitemap_date' not in config[0]['fmt']: + config[0]['fmt']['sitemap_date'] = '%Y-%m-%d' + + if 'info' not in config[0]: + config[0]['info'] = dict() + config[0]['info']['version'] = VERSION + config[0]['info']['rss_run_date'] = get_time_now('rss_date') + config[0]['info']['sitemap_run_date'] = get_time_now('sitemap_date') + + +def get_parsed_config(path: str) -> list[dict[str, Any]]: + config: list[dict[str, Any]] = get_yaml(path) + log.info('found %s documents for config "%s"', len(config), path) + + if len(config) < 2: + log.error('config file requires at least 2 documents:' + ' main config and root dir config') + sys.exit(1) + + __expand_all_paths(config) + __add_mandatory_config(config) + + if config[1]['dir'] != "/": + log.error('the first directory config needs to be' + ' root (/), found %s instead', config[1]['dir']) + sys.exit(1) + return config + diff --git a/src/pyssg/cfg/yaml_parser.py b/src/pyssg/cfg/yaml_parser.py new file mode 100644 index 0000000..dacad70 --- /dev/null +++ b/src/pyssg/cfg/yaml_parser.py @@ -0,0 +1,23 @@ +import yaml +from yaml import SafeLoader +from yaml.nodes import SequenceNode +from typing import Any + + +# required to concat values in yaml using !join [value, value, ...] +def __join_constructor(loader: SafeLoader, node: SequenceNode) -> str: + seq = loader.construct_sequence(node) + return ''.join([str(i) for i in seq]) + + +def setup_custom_yaml() -> None: + SafeLoader.add_constructor('!join', __join_constructor) + + +def get_yaml(path: str) -> list[dict[str, Any]]: + all_docs: list[dict[str, Any]] = [] + with open(path, 'r') as f: + for doc in yaml.safe_load_all(f): + all_docs.append(doc) + return all_docs + diff --git a/src/pyssg/configuration.py b/src/pyssg/configuration.py deleted file mode 100644 index 7ac83f8..0000000 --- a/src/pyssg/configuration.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -import sys -from importlib.metadata import version -from logging import Logger, getLogger -from typing import Any - -from .utils import get_expanded_path, get_time_now -from .yaml_parser import get_yaml - -log: Logger = getLogger(__name__) -VERSION: str = version('pyssg') - - -def __expand_all_paths(config: dict[str, Any]) -> None: - log.debug('expanding all path options: %s', config['path'].keys()) - for option in config['path'].keys(): - config['path'][option] = get_expanded_path(config['path'][option]) - - -# not necessary to type deeper than the first dict -def get_parsed_config(path: str) -> list[dict[str, Any]]: - log.debug('reading default config') - config: list[dict[str, Any]] = get_yaml(path) - log.info('found %s document(s) for config "%s"', len(config), path) - - if len(config) < 2: - log.error('config file requires at least 2 documents:' - ' main config and root dir config') - sys.exit(1) - - __expand_all_paths(config[0]) - - log.debug('adding possible missing configuration and populating') - if 'fmt' not in config[0]: - config[0]['fmt'] = dict() - if 'rss_date' not in config[0]['fmt']: - config[0]['fmt']['rss_date'] = '%a, %d %b %Y %H:%M:%S GMT' - if 'sitemap_date' not in config[0]['fmt']: - config[0]['fmt']['sitemap_date'] = '%Y-%m-%d' - - if 'info' not in config[0]: - config[0]['info'] = dict() - config[0]['info']['version'] = VERSION - config[0]['info']['rss_run_date'] = get_time_now('rss_date') - config[0]['info']['sitemap_run_date'] = get_time_now('sitemap_date') - - if config[1]['dir'] != "/": - log.error('the first directory config needs to be' - ' root (/), found %s instead', config[1]['dir']) - sys.exit(1) - return config - diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py index e266747..15e284c 100644 --- a/src/pyssg/pyssg.py +++ b/src/pyssg/pyssg.py @@ -5,11 +5,11 @@ from typing import Union from logging import Logger, getLogger, DEBUG from argparse import ArgumentParser -from .arg_parser import get_parser -from .utils import create_dir, copy_file, get_expanded_path -from .configuration import get_parsed_config, VERSION -from .database import Database -from .builder import Builder +from pyssg.arg_parser import get_parser +from pyssg.utils import create_dir, copy_file, get_expanded_path +from pyssg.cfg.configuration import get_parsed_config, VERSION +from pyssg.database import Database +from pyssg.builder import Builder log: Logger = getLogger(__name__) @@ -52,25 +52,24 @@ def main() -> None: handler.setLevel(DEBUG) log.debug('changed logging level to DEBUG') - # TODO: modify init arg in argparser if args['init']: - init_dir: str = os.path.normpath(get_expanded_path(str(args['init']))) + idir: str = os.path.normpath(get_expanded_path(str(args['init']))) log.info('initializing directory structure and copying templates') - create_dir(init_dir) + create_dir(idir) with rpath('pyssg.plt', 'default.yaml') as p: - copy_file(str(p), os.path.join(init_dir, 'config.yaml')) - create_dir(os.path.join(init_dir, 'src')) - create_dir(os.path.join(init_dir, 'dst')) - create_dir(os.path.join(init_dir, 'plt')) + copy_file(str(p), os.path.join(idir, 'config.yaml')) + create_dir(os.path.join(idir, 'src')) + create_dir(os.path.join(idir, 'dst')) + create_dir(os.path.join(idir, 'plt')) files: list[str] = ['index.html', 'page.html', 'tag.html', 'rss.xml', 'sitemap.xml', 'entry.md'] - log.debug('list of files to copy over: (%s)', ', '.join(files)) + log.debug('list of files to copy over: %s', files) for f in files: - plt_file: str = os.path.join(os.path.join(init_dir, 'plt'), f) + plt_file: str = os.path.join(os.path.join(idir, 'plt'), f) with rpath('pyssg.plt', f) as p: copy_file(str(p), plt_file) log.info('finished initialization') @@ -89,21 +88,19 @@ def main() -> None: config: list[dict] = get_parsed_config(config_path) print(config) - log.debug('exiting due to testing') - sys.exit(0) if args['build']: log.info('building the html files') - for conf in config: - log.info('building html for "%s"', conf['title']) - db: Database = Database(conf['path']['db']) - db.read() + # TODO: move from filesystem database to sqlite3 + db: Database = Database(config[0]['path']['db']) + db.read() - log.debug('building all dir_paths found in conf') - for dir_path in conf['dirs'].keys(): - log.debug('building for "%s"', dir_path) - builder: Builder = Builder(conf, db, dir_path) - builder.build() + # TODO: change logic from "dir_paths" to single config + log.debug('building all dir_paths found in conf') + for dir_path in config[0]['dirs'].keys(): + log.debug('building for "%s"', dir_path) + builder: Builder = Builder(config[0], db, dir_path) + builder.build() - db.write() + db.write() log.info('finished building the html files') sys.exit(0) diff --git a/src/pyssg/utils.py b/src/pyssg/utils.py index d391ccf..216f535 100644 --- a/src/pyssg/utils.py +++ b/src/pyssg/utils.py @@ -12,8 +12,9 @@ log: Logger = getLogger(__name__) def get_file_list(path: str, exts: tuple[str], exclude_dirs: list[str] = []) -> list[str]: - log.debug('retrieving file list in path "%s" that contain file' - ' extensions %s except directories %s', path, exts, exclude_dirs) + log.debug('retrieving file list in "%s",' + ' extensions %s, except dirs %s', + path, exts, exclude_dirs) file_list: list[str] = [] for root, dirs, files in os.walk(path): if exclude_dirs != []: @@ -23,20 +24,20 @@ def get_file_list(path: str, if file.endswith(exts): # [1:] is required to remove the '/' # at the beginning after replacing - file_name: str = os.path.join(root, file).replace(path, '')[1:] - file_list.append(file_name) - log.debug('added file "%s" without "%s" part: "%s"', - file, path, file_name) + fname: str = os.path.join(root, file) + fname = fname.replace(path, '')[1:] + file_list.append(fname) + log.debug('added "%s"', fname) else: - log.debug('ignoring file "%s" as it doesn\'t contain' - ' any of the extensions %s', file, exts) + log.debug('ignoring "%s", doesn\'t contain' + ' extensions %s', file, exts) return file_list def get_dir_structure(path: str, exclude: list[str] = []) -> list[str]: - log.debug('retrieving dir structure in path "%s" except directories (%s)', - path, ', '.join(exclude)) + log.debug('retrieving dir structure in "%s",' + ' except dirs %s', path, exclude) dir_list: list[str] = [] for root, dirs, files in os.walk(path): if exclude != []: @@ -45,34 +46,25 @@ def get_dir_structure(path: str, for d in dirs: if root in dir_list: dir_list.remove(root) - log.debug('removed dir "%s" as it already is in the list', root) # not removing the 'path' part here, # as comparisons with 'root' would fail - joined_dir: str = os.path.join(root, d) - dir_list.append(joined_dir) - log.debug('added dir "%s" to the list', joined_dir) - log.debug('removing "%s" from all dirs in list', path) + dname: str = os.path.join(root, d) + dir_list.append(dname) + log.debug('added dir "%s" to the list', dname) # [1:] is required to remove the '/' at the beginning after replacing return [d.replace(path, '')[1:] for d in dir_list] # TODO: probably change it so it returns a bool, easier to check -def create_dir(path: str, p: bool = False, silent=False) -> None: - log_msg: str = '' +def create_dir(path: str, p: bool = False) -> None: try: if p: os.makedirs(path) else: os.mkdir(path) - log_msg = f'created directory "{path}"' - if not silent: - log.info(log_msg) - log.debug(log_msg) + log.info('created directory "%s"', path) except FileExistsError: - log_msg = f'directory "{path}" exists, ignoring' - if not silent: - log.info(log_msg) - log.debug(log_msg) + log.debug('directory "%s" exists, ignoring', path) # TODO: change this as it doesn't take directories into account, @@ -87,7 +79,6 @@ def copy_file(src: str, dst: str) -> None: log.info('file "%s" already exists, ignoring', dst) -# only used for database, but keeping it here as it is an independent function # as seen in SO: https://stackoverflow.com/a/1131238 def get_checksum(path: str) -> str: log.debug('calculating md5 checksum for "%s"', path) @@ -99,14 +90,13 @@ def get_checksum(path: str) -> str: def get_expanded_path(path: str) -> str: - log.debug('expanding path "%s"', path) - expanded_path: str = os.path.normpath(os.path.expandvars(path)) - if '$' in expanded_path: - log.error('"$" character found in expanded path "%s";' - ' could be due to non-existant env var', expanded_path) + epath: str = os.path.normpath(os.path.expandvars(path)) + if '$' in epath: + log.error('"$" character found in expanded path "%s",' + ' could be due to non-existant env var', epath) sys.exit(1) - log.debug('expanded path "%s" to "%s"', path, expanded_path) - return expanded_path + log.debug('expanded path "%s" to "%s"', path, epath) + return epath def get_time_now(fmt: str, tz: timezone=timezone.utc) -> str: diff --git a/src/pyssg/yaml_parser.py b/src/pyssg/yaml_parser.py deleted file mode 100644 index 227dc63..0000000 --- a/src/pyssg/yaml_parser.py +++ /dev/null @@ -1,27 +0,0 @@ -import yaml -from yaml import SafeLoader -from yaml.nodes import SequenceNode -from importlib.resources import path as rpath -from logging import Logger, getLogger -from typing import Any - -log: Logger = getLogger(__name__) - - -# required to concat values in yaml using !join [value, value, ...] -def __join_constructor(loader: SafeLoader, node: SequenceNode) -> str: - seq = loader.construct_sequence(node) - return ''.join([str(i) for i in seq]) - - -def setup_custom_yaml() -> None: - SafeLoader.add_constructor('!join', __join_constructor) - - -def get_yaml(path: str) -> list[dict[str, Any]]: - all_docs: list[dict[str, Any]] = [] - with open(path, 'r') as f: - for doc in yaml.safe_load_all(f): - all_docs.append(doc) - return all_docs - -- cgit v1.2.3-54-g00ecf