From 98bfded1b407431ad62642d7f029e4e5f3534c07 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado Date: Sun, 27 Nov 2022 06:44:07 -0600 Subject: refactor code and fix type checks still need to refactor more code before migrating to yaml config file --- .gitignore | 6 ++++ build_upload.sh | 1 - src/pyssg/arg_parser.py | 63 ++----------------------------------- src/pyssg/builder.py | 24 +++++++------- src/pyssg/configuration.py | 2 +- src/pyssg/database.py | 68 ++++++---------------------------------- src/pyssg/md_parser.py | 20 +++++------- src/pyssg/page.py | 21 +++++++------ src/pyssg/per_level_formatter.py | 8 +++-- src/pyssg/pyssg.py | 57 +++++++++++++++++++-------------- src/pyssg/utils.py | 14 ++++----- 11 files changed, 96 insertions(+), 188 deletions(-) diff --git a/.gitignore b/.gitignore index b6e4761..8bf6475 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,9 @@ dmypy.json # Pyre type checker .pyre/ + +# project specific +.vscode/ +site_example/ +dst/**/*.html +.files \ No newline at end of file diff --git a/build_upload.sh b/build_upload.sh index 7ddd573..03e1f68 100755 --- a/build_upload.sh +++ b/build_upload.sh @@ -7,7 +7,6 @@ echo "building package" python -m build echo "uploading to pypi" -# alternatively, use /bin/python -m twine, i use twine in arch twine upload dist/* echo "removing dist/*" diff --git a/src/pyssg/arg_parser.py b/src/pyssg/arg_parser.py index 2fc6853..2eb7d72 100644 --- a/src/pyssg/arg_parser.py +++ b/src/pyssg/arg_parser.py @@ -1,7 +1,7 @@ -from argparse import ArgumentParser, Namespace +from argparse import ArgumentParser -def get_parsed_arguments() -> Namespace: +def get_parser() -> ArgumentParser: parser = ArgumentParser(prog='pyssg', description='''Static Site Generator that parses Markdown files into HTML files. For datetime @@ -37,62 +37,5 @@ def get_parsed_arguments() -> Namespace: parser.add_argument('--debug', action='store_true', help='''change logging level from info to debug''') - parser.add_argument('--add-checksum-to-db', - action='store_true', - help='''add checksum column to db entries''') - # really not needed, too much bloat and case scenarios to check for, - # instead, just read from config file or default config file - """ - parser.add_argument('-s', '--src', - default='src', - type=str, - help='''src directory; handmade files, templates and - metadata directory; defaults to 'src' ''') - parser.add_argument('-d', '--dst', - default='dst', - type=str, - help='''dst directory; generated (and transfered html) - files; defaults to 'dst' ''') - parser.add_argument('-t', '--plt', - default='plt', - type=str, - help='''plt directory; all template files; defaults to - 'plt' ''') - parser.add_argument('-u', '--url', - default='', - type=str, - help='''base url without trailing slash''') - parser.add_argument('--static-url', - default='', - type=str, - help='''base static url without trailing slash''') - parser.add_argument('--default-image-url', - default='', - type=str, - help='''default image url''') - parser.add_argument('--title', - default='Blog', - type=str, - help='''general title for the website; defaults to - 'Blog' ''') - parser.add_argument('--date-format', - default='%a, %b %d, %Y @ %H:%M %Z', - type=str, - help='''date format used inside pages (for creation and - modification times, for example); defaults to '%%a, %%b - %%d, %%Y @ %%H:%%M %%Z' ('Tue, Mar 16, 2021 @ 02:46 UTC', - for example)''') - parser.add_argument('--list-date-format', - default='%b %d', - type=str, - help='''date format used for page entries in a list; - defaults to '%%b %%d' ('Mar 16', for example)''') - parser.add_argument('--list-sep-date-format', - default='%B %Y', - type=str, - help='''date format used for the separator between page - entries in a list; defaults to '%%B %%Y' ('March 2021', - for example)''') - """ - return parser.parse_args() + return parser diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py index 6d65187..f0ca802 100644 --- a/src/pyssg/builder.py +++ b/src/pyssg/builder.py @@ -30,15 +30,15 @@ class Builder: trim_blocks=True, lstrip_blocks=True) - self.dirs: list[str] = None - self.md_files: list[str] = None - self.html_files: list[str] = None + self.dirs: list[str] + self.md_files: list[str] + self.html_files: list[str] # files and pages are synoyms - self.all_files: list[Page] = None - self.updated_files: list[Page] = None - self.all_tags: list[str] = None - self.common_vars: dict = None + self.all_files: list[Page] + self.updated_files: list[Page] + self.all_tags: list[tuple[str, str]] + self.common_vars: dict def build(self) -> None: @@ -80,7 +80,7 @@ class Builder: def __create_dir_structure(self) -> None: log.debug('creating dir structure') - dir_path: str = None + dir_path: str for d in self.dirs: dir_path = os.path.join(self.config.get('path', 'dst'), d) # using silent=True to not print the info create dir msgs for this @@ -92,8 +92,8 @@ class Builder: log.debug('copying all html files') else: log.debug('no html files to copy') - src_file: str = None - dst_file: str = None + src_file: str + dst_file: str for f in self.html_files: src_file = os.path.join(self.config.get('path', 'src'), f) @@ -114,7 +114,7 @@ class Builder: def __render_articles(self) -> None: log.debug('rendering html') article_vars: dict = deepcopy(self.common_vars) - temp_files: list[Page] = None + temp_files: list[Page] # check if only updated should be created if self.config.getboolean('other', 'force'): @@ -136,7 +136,7 @@ class Builder: def __render_tags(self) -> None: log.debug('rendering tags') tag_vars: dict = deepcopy(self.common_vars) - tag_pages: list[Page] = None + tag_pages: list[Page] for t in self.all_tags: log.debug('rendering tag "%s"', t[0]) # clean tag_pages diff --git a/src/pyssg/configuration.py b/src/pyssg/configuration.py index 32a010b..d420fe8 100644 --- a/src/pyssg/configuration.py +++ b/src/pyssg/configuration.py @@ -10,7 +10,7 @@ from .utils import get_expanded_path log: Logger = getLogger(__name__) -DEFAULT_CONFIG_PATH = '$XDG_CONFIG_HOME/pyssg/config.ini' +DEFAULT_CONFIG_PATH: str = '$XDG_CONFIG_HOME/pyssg/config.ini' VERSION = version('pyssg') diff --git a/src/pyssg/database.py b/src/pyssg/database.py index 290ba51..913adb7 100644 --- a/src/pyssg/database.py +++ b/src/pyssg/database.py @@ -10,7 +10,6 @@ log: Logger = getLogger(__name__) # db class that works for both html and md files class Database: - __OLD_COLUMN_NUM: int = 4 __COLUMN_NUM: int = 5 def __init__(self, db_path: str, @@ -42,12 +41,12 @@ class Database: # returns a bool that indicates if the entry # was (includes new entries) or wasn't updated def update(self, file_name: str, - remove: str=None) -> bool: + remove: str='') -> bool: log.debug('updating entry for file "%s"', file_name) # initial default values f: str = file_name tags: list[str] = [] - if remove is not None: + if remove != '': f = file_name.replace(remove, '') log.debug('removed "%s" from "%s": "%s"', remove, file_name, f) @@ -94,8 +93,8 @@ class Database: with open(self.db_path, 'w') as file: for k, v in self.e.items(): log.debug('parsing row for page "%s"', k) - t: str = None - row: str = None + t: str + row: str if len(v[3]) == 0: t = '-' else: @@ -123,7 +122,7 @@ class Database: def _read_raw(self) -> list[str]: - rows: list[str] = None + rows: list[str] with open(self.db_path, 'r') as file: rows = file.readlines() log.debug('db contains %d rows', len(rows)) @@ -131,70 +130,22 @@ class Database: return rows - def read_old(self) -> None: - log.debug('reading db with old schema (%d columns)', self.__OLD_COLUMN_NUM) - if not self._db_path_exists(): - log.error('db path "%s" desn\'t exist, --add-checksum-to-db should' - 'only be used when updating the old db schema', self.db_path) - sys.exit(1) - - rows: list[str] = self._read_raw() - cols: list[str] = None - # l=list of values in entry - log.debug('parsing rows from db') - for it, row in enumerate(rows): - i: int = it + 1 - r: str = row.strip() - log.debug('row %d content: "%s"', i, r) - # (file_name, ctimestamp, mtimestamp, [tags]) - cols: tuple[str, float, float, list[str]] = tuple(r.split()) - col_num: int = len(cols) - if col_num != self.__OLD_COLUMN_NUM: - log.critical('row %d doesn\'t contain %s columns, contains %d' - ' columns: "%s"', - i, self.__OLD_COLUMN_NUM, col_num, r) - sys.exit(1) - - t: list[str] = None - if cols[3] == '-': - t = [] - else: - t = cols[3].split(',') - log.debug('tag content: (%s)', ', '.join(t)) - file_path: str = os.path.join(self.config.get('path', 'src'), cols[0]) - checksum: str = get_checksum(file_path) - log.debug('checksum for "%s": "%s"', file_path, checksum) - - self.e[cols[0]] = (float(cols[1]), float(cols[2]), checksum, t) - - - def read(self) -> None: log.debug('reading db') if not self._db_path_exists(): return rows: list[str] = self._read_raw() - cols: list[str] = None # l=list of values in entry log.debug('parsing rows from db') for it, row in enumerate(rows): i: int = it + 1 r: str = row.strip() log.debug('row %d content: "%s"', i, r) + # ignoring type error, as i'm doing the check later # (file_name, ctimestamp, mtimestamp, checksum, [tags]) - cols: tuple[str, float, float, str, list[str]] = tuple(r.split()) + cols: tuple[str, float, float, str, list[str]] = tuple(r.split()) # type: ignore col_num: int = len(cols) - if col_num == self.__OLD_COLUMN_NUM: - log.error('row %d contains %d columns: "%s"; this is probably' - ' because of missing checksum column, which is used' - ' now to also check if a file has changed. Rerun' - ' with flag --add-checksum-to-db to add the checksum' - ' column to the current db; if you did any changes' - ' since last timestamp in db, it won\'t update' - ' modification timestamp', - i, self.__OLD_COLUMN_NUM, r) - sys.exit(1) if col_num != self.__COLUMN_NUM: log.critical('row %d doesn\'t contain %s columns, contains %d' @@ -202,11 +153,12 @@ class Database: i, self.__COLUMN_NUM, col_num, r) sys.exit(1) - t: list[str] = None + t: list[str] if cols[4] == '-': t = [] else: - t = cols[4].split(',') + # ignoring type error, the "check" is done in this whole if/else + t = cols[4].split(',') # type: ignore log.debug('tag content: (%s)', ', '.join(t)) self.e[cols[0]] = (float(cols[1]), float(cols[2]), cols[3], t) diff --git a/src/pyssg/md_parser.py b/src/pyssg/md_parser.py index 759ead6..664532a 100644 --- a/src/pyssg/md_parser.py +++ b/src/pyssg/md_parser.py @@ -36,7 +36,9 @@ def _get_md_obj() -> Markdown: log.debug('list of md extensions: (%s)', ', '.join([e if isinstance(e, str) else type(e).__name__ for e in exts])) - return Markdown(extensions=exts, output_format='html5') + # for some reason, the d efinition for output_format doesn't include html5 + # even though it is listed in the documentation, ignoring + return Markdown(extensions=exts, output_format='html5') # type: ignore # page and file is basically a synonym here... @@ -51,21 +53,14 @@ class MDParser: self.db: Database = db self.md: Markdown = _get_md_obj() - self.all_files: list[Page] = None + self.all_files: list[Page] = [] # updated and modified are synonyms here - self.updated_files: list[Page] = None - self.all_tags: list[tuple[str]] = None + self.updated_files: list[Page] = [] + self.all_tags: list[tuple[str, str]] = [] def parse_files(self) -> None: log.debug('parsing all files') - # initialize lists - self.all_files = [] - self.updated_files = [] - self.all_tags = [] - # not used, not sure why i had this - # all_tag_names: list[str] = [] - for f in self.files: log.debug('parsing file "%s"', f) src_file: str = os.path.join(self.config.get('path', 'src'), f) @@ -75,11 +70,12 @@ class MDParser: log.debug('parsing md into html') content: str = self.md.reset().convert(open(src_file).read()) + # ignoring md.Meta type as it is not yet defined (because it is from an extension) page: Page = Page(f, self.db.e[f][0], self.db.e[f][1], content, - self.md.Meta, + self.md.Meta, # type: ignore self.config) page.parse_metadata() diff --git a/src/pyssg/page.py b/src/pyssg/page.py index 21add82..467dd7e 100644 --- a/src/pyssg/page.py +++ b/src/pyssg/page.py @@ -27,11 +27,11 @@ class Page: # data from self.meta self.title: str = '' self.author: str = '' - self.cdatetime: datetime = None - self.mdatetime: datetime = None + self.cdatetime: datetime + self.mdatetime: datetime self.summary: str = '' self.lang: str = 'en' - self.tags: list[tuple[str]] = [] + self.tags: list[tuple[str, str]] = [] # constructed self.url: str = '' @@ -41,15 +41,16 @@ class Page: self.cdate_list_sep: str = '' self.cdate_rss: str = '' self.cdate_sitemap: str = '' - self.mdate: str = None - self.mdate_list: str = None - self.mdate_list_sep: str = None + self.mdate: str + self.mdate_list: str + self.mdate_list_sep: str self.mdate_rss: str = '' self.mdate_sitemap: str = '' # later assigned references to next and previous pages - self.next: Page = None - self.previous: Page = None + # not always assigned (tail ends), and the None helps check it, ignoring + self.next: Page = None # type: ignore + self.previous: Page = None # type: ignore # also from self.meta, but for og metadata self.og: dict[str, str] = dict() @@ -70,7 +71,7 @@ class Page: # parses meta from self.meta, for og, it prioritizes, - # the actual og meta + # the actual og meta def parse_metadata(self): log.debug('parsing metadata for file "%s"', self.name) self.title = self.__get_mandatory_meta('title') @@ -132,7 +133,7 @@ class Page: og_elements: list[str] = self.meta['og'] log.debug('parsing og metadata') for og_e in og_elements: - kv: str = og_e.split(',', 1) + kv: list[str] = og_e.split(',', 1) if len(kv) != 2: log.error('invalid og syntax for "%s", needs to be "k, v"', og_e) sys.exit(1) diff --git a/src/pyssg/per_level_formatter.py b/src/pyssg/per_level_formatter.py index 5ab3946..04f943b 100644 --- a/src/pyssg/per_level_formatter.py +++ b/src/pyssg/per_level_formatter.py @@ -1,4 +1,4 @@ -from logging import Formatter, DEBUG, INFO, WARNING, ERROR, CRITICAL +from logging import Formatter, LogRecord, DEBUG, INFO, WARNING, ERROR, CRITICAL # only reason for this class is to get info formatting as normal text # and everything else with more info and with colors @@ -20,8 +20,10 @@ class PerLevelFormatter(Formatter): } - def format(self, record: str) -> str: - fmt: str = self.__FORMATS.get(record.levelno) + def format(self, record: LogRecord) -> str: + # this should never fail, as __FORMATS is defined above, + # so no issue of just converting to str + fmt: str = str(self.__FORMATS.get(record.levelno)) formatter: Formatter = Formatter( fmt=fmt, datefmt=self.__DATE_FMT, style='%') diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py index eb042b6..a496b34 100644 --- a/src/pyssg/pyssg.py +++ b/src/pyssg/pyssg.py @@ -4,9 +4,10 @@ from importlib.resources import path as rpath from typing import Union from configparser import ConfigParser from logging import Logger, getLogger, DEBUG +from argparse import ArgumentParser +from .arg_parser import get_parser from .utils import create_dir, copy_file, get_expanded_path -from .arg_parser import get_parsed_arguments from .configuration import get_parsed_config, DEFAULT_CONFIG_PATH, VERSION from .database import Database from .builder import Builder @@ -15,7 +16,30 @@ log: Logger = getLogger(__name__) def main() -> None: - args: dict[str, Union[str, bool]] = vars(get_parsed_arguments()) + arg_parser: ArgumentParser = get_parser() + args: dict[str, Union[str, bool]] = vars(arg_parser.parse_args()) + + # too messy to place at utils.py, don't want to be + # passing the arg parser around + def _log_perror(message: str) -> None: + arg_parser.print_usage() + # even if it's an error, print it as info + # as it is not critical, only config related + log.info(message) + sys.exit(1) + + # -1 as first argument is program path + num_args = len(sys.argv) - 1 + if num_args == 2 and args['config']: + _log_perror('pyssg: error: only config argument passed, --help for more') + elif not num_args > 0 or (num_args == 1 and args['debug']): + _log_perror('pyssg: error: no arguments passed, --help for more') + elif num_args == 3 and (args['debug'] and args['config']): + _log_perror("pyssg: error: no arguments passed other than 'debug' and 'config', --help for more") + + if args['version']: + log.info('pyssg v%s', VERSION) + sys.exit(0) if args['debug']: # need to modify the root logger specifically, @@ -27,15 +51,7 @@ def main() -> None: handler.setLevel(DEBUG) log.debug('changed logging level to DEBUG') - if not len(sys.argv) > 1 or (len(sys.argv) == 2 and args['debug']): - log.info('pyssg v%s - no arguments passed, --help for more', VERSION) - sys.exit(0) - - if args['version']: - log.info('pyssg v%s', VERSION) - sys.exit(0) - - config_path: str = args['config'] if args['config'] else DEFAULT_CONFIG_PATH + config_path: str = str(args['config']) if args['config'] else DEFAULT_CONFIG_PATH config_path = get_expanded_path(config_path) config_dir, _ = os.path.split(config_path) log.debug('checked config file path, final config path "%s"', config_path) @@ -44,7 +60,7 @@ def main() -> None: log.info('copying default config file') create_dir(config_dir) with rpath('pyssg.plt', 'default.ini') as p: - copy_file(p, config_path) + copy_file(str(p), config_path) sys.exit(0) if not os.path.exists(config_path): @@ -61,27 +77,19 @@ def main() -> None: create_dir(config.get('path', 'src')) create_dir(os.path.join(config.get('path', 'dst'), 'tag'), True) create_dir(config.get('path', 'plt')) - files: list[str] = ('index.html', + files: list[str] = ['index.html', 'page.html', 'tag.html', 'rss.xml', - 'sitemap.xml') + 'sitemap.xml'] log.debug('list of files to copy over: (%s)', ', '.join(files)) for f in files: plt_file: str = os.path.join(config.get('path', 'plt'), f) with rpath('pyssg.plt', f) as p: - copy_file(p, plt_file) + copy_file(str(p), plt_file) + log.info('finished initialization') sys.exit(0) - if args['add_checksum_to_db']: - log.info('adding checksum column to existing db') - db_path: str = os.path.join(config.get('path', 'src'), '.files') - db: Database = Database(db_path, config) - # needs to be read_old instead of read - db.read_old() - db.write() - - sys.exit(0) if args['build']: log.info('building the html files') @@ -93,4 +101,5 @@ def main() -> None: builder.build() db.write() + log.info('finished building the html files') sys.exit(0) diff --git a/src/pyssg/utils.py b/src/pyssg/utils.py index 4b525cf..3e05d0a 100644 --- a/src/pyssg/utils.py +++ b/src/pyssg/utils.py @@ -9,14 +9,14 @@ log: Logger = getLogger(__name__) def get_file_list(path: str, exts: list[str], - exclude: list[str]=None) -> list[str]: + exclude: list[str]=[]) -> list[str]: log.debug('retrieving file list in path "%s" that contain file' ' extensions (%s) except (%s)', path, ', '.join(exts), - ', '.join(exclude if exclude is not None else [])) + ', '.join(exclude)) out: list[str] = [] for root, dirs, files in os.walk(path): - if exclude is not None: + if exclude != []: log.debug('removing excludes from list') dirs[:] = [d for d in dirs if d not in exclude] @@ -34,12 +34,12 @@ def get_file_list(path: str, def get_dir_structure(path: str, - exclude: list[str]=None) -> list[str]: + exclude: list[str]=[]) -> list[str]: log.debug('retrieving dir structure in path "%s" except (%s)', - path, ', '.join(exclude if exclude is not None else [])) + path, ', '.join(exclude)) out: list[str] = [] for root, dirs, files in os.walk(path): - if exclude is not None: + if exclude != []: log.debug('removing excludes from list') dirs[:] = [d for d in dirs if d not in exclude] @@ -85,7 +85,7 @@ def get_checksum(path: str) -> str: return file_hash.hexdigest() -def get_expanded_path(path: str) -> None: +def get_expanded_path(path: str) -> str: log.debug('expanding path "%s"', path) expanded_path: str = os.path.normpath(os.path.expandvars(path)) if '$' in expanded_path: -- cgit v1.2.3-54-g00ecf