summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano Alvarado <david@luevano.xyz>2022-11-27 06:44:07 -0600
committerDavid Luevano Alvarado <david@luevano.xyz>2022-11-27 06:44:07 -0600
commit98bfded1b407431ad62642d7f029e4e5f3534c07 (patch)
tree8f38dd229fe54889d130eb0838c9c47619e96376
parent9ea43f16d6440dce54ae7f78d46618d25e52f6ec (diff)
refactor code and fix type checks
still need to refactor more code before migrating to yaml config file
-rw-r--r--.gitignore6
-rwxr-xr-xbuild_upload.sh1
-rw-r--r--src/pyssg/arg_parser.py63
-rw-r--r--src/pyssg/builder.py24
-rw-r--r--src/pyssg/configuration.py2
-rw-r--r--src/pyssg/database.py68
-rw-r--r--src/pyssg/md_parser.py20
-rw-r--r--src/pyssg/page.py21
-rw-r--r--src/pyssg/per_level_formatter.py8
-rw-r--r--src/pyssg/pyssg.py57
-rw-r--r--src/pyssg/utils.py14
11 files changed, 96 insertions, 188 deletions
diff --git a/.gitignore b/.gitignore
index b6e4761..8bf6475 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,9 @@ dmypy.json
# Pyre type checker
.pyre/
+
+# project specific
+.vscode/
+site_example/
+dst/**/*.html
+.files \ No newline at end of file
diff --git a/build_upload.sh b/build_upload.sh
index 7ddd573..03e1f68 100755
--- a/build_upload.sh
+++ b/build_upload.sh
@@ -7,7 +7,6 @@ echo "building package"
python -m build
echo "uploading to pypi"
-# alternatively, use /bin/python -m twine, i use twine in arch
twine upload dist/*
echo "removing dist/*"
diff --git a/src/pyssg/arg_parser.py b/src/pyssg/arg_parser.py
index 2fc6853..2eb7d72 100644
--- a/src/pyssg/arg_parser.py
+++ b/src/pyssg/arg_parser.py
@@ -1,7 +1,7 @@
-from argparse import ArgumentParser, Namespace
+from argparse import ArgumentParser
-def get_parsed_arguments() -> Namespace:
+def get_parser() -> ArgumentParser:
parser = ArgumentParser(prog='pyssg',
description='''Static Site Generator that parses
Markdown files into HTML files. For datetime
@@ -37,62 +37,5 @@ def get_parsed_arguments() -> Namespace:
parser.add_argument('--debug',
action='store_true',
help='''change logging level from info to debug''')
- parser.add_argument('--add-checksum-to-db',
- action='store_true',
- help='''add checksum column to db entries''')
- # really not needed, too much bloat and case scenarios to check for,
- # instead, just read from config file or default config file
- """
- parser.add_argument('-s', '--src',
- default='src',
- type=str,
- help='''src directory; handmade files, templates and
- metadata directory; defaults to 'src' ''')
- parser.add_argument('-d', '--dst',
- default='dst',
- type=str,
- help='''dst directory; generated (and transfered html)
- files; defaults to 'dst' ''')
- parser.add_argument('-t', '--plt',
- default='plt',
- type=str,
- help='''plt directory; all template files; defaults to
- 'plt' ''')
- parser.add_argument('-u', '--url',
- default='',
- type=str,
- help='''base url without trailing slash''')
- parser.add_argument('--static-url',
- default='',
- type=str,
- help='''base static url without trailing slash''')
- parser.add_argument('--default-image-url',
- default='',
- type=str,
- help='''default image url''')
- parser.add_argument('--title',
- default='Blog',
- type=str,
- help='''general title for the website; defaults to
- 'Blog' ''')
- parser.add_argument('--date-format',
- default='%a, %b %d, %Y @ %H:%M %Z',
- type=str,
- help='''date format used inside pages (for creation and
- modification times, for example); defaults to '%%a, %%b
- %%d, %%Y @ %%H:%%M %%Z' ('Tue, Mar 16, 2021 @ 02:46 UTC',
- for example)''')
- parser.add_argument('--list-date-format',
- default='%b %d',
- type=str,
- help='''date format used for page entries in a list;
- defaults to '%%b %%d' ('Mar 16', for example)''')
- parser.add_argument('--list-sep-date-format',
- default='%B %Y',
- type=str,
- help='''date format used for the separator between page
- entries in a list; defaults to '%%B %%Y' ('March 2021',
- for example)''')
- """
- return parser.parse_args()
+ return parser
diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py
index 6d65187..f0ca802 100644
--- a/src/pyssg/builder.py
+++ b/src/pyssg/builder.py
@@ -30,15 +30,15 @@ class Builder:
trim_blocks=True,
lstrip_blocks=True)
- self.dirs: list[str] = None
- self.md_files: list[str] = None
- self.html_files: list[str] = None
+ self.dirs: list[str]
+ self.md_files: list[str]
+ self.html_files: list[str]
# files and pages are synoyms
- self.all_files: list[Page] = None
- self.updated_files: list[Page] = None
- self.all_tags: list[str] = None
- self.common_vars: dict = None
+ self.all_files: list[Page]
+ self.updated_files: list[Page]
+ self.all_tags: list[tuple[str, str]]
+ self.common_vars: dict
def build(self) -> None:
@@ -80,7 +80,7 @@ class Builder:
def __create_dir_structure(self) -> None:
log.debug('creating dir structure')
- dir_path: str = None
+ dir_path: str
for d in self.dirs:
dir_path = os.path.join(self.config.get('path', 'dst'), d)
# using silent=True to not print the info create dir msgs for this
@@ -92,8 +92,8 @@ class Builder:
log.debug('copying all html files')
else:
log.debug('no html files to copy')
- src_file: str = None
- dst_file: str = None
+ src_file: str
+ dst_file: str
for f in self.html_files:
src_file = os.path.join(self.config.get('path', 'src'), f)
@@ -114,7 +114,7 @@ class Builder:
def __render_articles(self) -> None:
log.debug('rendering html')
article_vars: dict = deepcopy(self.common_vars)
- temp_files: list[Page] = None
+ temp_files: list[Page]
# check if only updated should be created
if self.config.getboolean('other', 'force'):
@@ -136,7 +136,7 @@ class Builder:
def __render_tags(self) -> None:
log.debug('rendering tags')
tag_vars: dict = deepcopy(self.common_vars)
- tag_pages: list[Page] = None
+ tag_pages: list[Page]
for t in self.all_tags:
log.debug('rendering tag "%s"', t[0])
# clean tag_pages
diff --git a/src/pyssg/configuration.py b/src/pyssg/configuration.py
index 32a010b..d420fe8 100644
--- a/src/pyssg/configuration.py
+++ b/src/pyssg/configuration.py
@@ -10,7 +10,7 @@ from .utils import get_expanded_path
log: Logger = getLogger(__name__)
-DEFAULT_CONFIG_PATH = '$XDG_CONFIG_HOME/pyssg/config.ini'
+DEFAULT_CONFIG_PATH: str = '$XDG_CONFIG_HOME/pyssg/config.ini'
VERSION = version('pyssg')
diff --git a/src/pyssg/database.py b/src/pyssg/database.py
index 290ba51..913adb7 100644
--- a/src/pyssg/database.py
+++ b/src/pyssg/database.py
@@ -10,7 +10,6 @@ log: Logger = getLogger(__name__)
# db class that works for both html and md files
class Database:
- __OLD_COLUMN_NUM: int = 4
__COLUMN_NUM: int = 5
def __init__(self, db_path: str,
@@ -42,12 +41,12 @@ class Database:
# returns a bool that indicates if the entry
# was (includes new entries) or wasn't updated
def update(self, file_name: str,
- remove: str=None) -> bool:
+ remove: str='') -> bool:
log.debug('updating entry for file "%s"', file_name)
# initial default values
f: str = file_name
tags: list[str] = []
- if remove is not None:
+ if remove != '':
f = file_name.replace(remove, '')
log.debug('removed "%s" from "%s": "%s"', remove, file_name, f)
@@ -94,8 +93,8 @@ class Database:
with open(self.db_path, 'w') as file:
for k, v in self.e.items():
log.debug('parsing row for page "%s"', k)
- t: str = None
- row: str = None
+ t: str
+ row: str
if len(v[3]) == 0:
t = '-'
else:
@@ -123,7 +122,7 @@ class Database:
def _read_raw(self) -> list[str]:
- rows: list[str] = None
+ rows: list[str]
with open(self.db_path, 'r') as file:
rows = file.readlines()
log.debug('db contains %d rows', len(rows))
@@ -131,70 +130,22 @@ class Database:
return rows
- def read_old(self) -> None:
- log.debug('reading db with old schema (%d columns)', self.__OLD_COLUMN_NUM)
- if not self._db_path_exists():
- log.error('db path "%s" desn\'t exist, --add-checksum-to-db should'
- 'only be used when updating the old db schema', self.db_path)
- sys.exit(1)
-
- rows: list[str] = self._read_raw()
- cols: list[str] = None
- # l=list of values in entry
- log.debug('parsing rows from db')
- for it, row in enumerate(rows):
- i: int = it + 1
- r: str = row.strip()
- log.debug('row %d content: "%s"', i, r)
- # (file_name, ctimestamp, mtimestamp, [tags])
- cols: tuple[str, float, float, list[str]] = tuple(r.split())
- col_num: int = len(cols)
- if col_num != self.__OLD_COLUMN_NUM:
- log.critical('row %d doesn\'t contain %s columns, contains %d'
- ' columns: "%s"',
- i, self.__OLD_COLUMN_NUM, col_num, r)
- sys.exit(1)
-
- t: list[str] = None
- if cols[3] == '-':
- t = []
- else:
- t = cols[3].split(',')
- log.debug('tag content: (%s)', ', '.join(t))
- file_path: str = os.path.join(self.config.get('path', 'src'), cols[0])
- checksum: str = get_checksum(file_path)
- log.debug('checksum for "%s": "%s"', file_path, checksum)
-
- self.e[cols[0]] = (float(cols[1]), float(cols[2]), checksum, t)
-
-
-
def read(self) -> None:
log.debug('reading db')
if not self._db_path_exists():
return
rows: list[str] = self._read_raw()
- cols: list[str] = None
# l=list of values in entry
log.debug('parsing rows from db')
for it, row in enumerate(rows):
i: int = it + 1
r: str = row.strip()
log.debug('row %d content: "%s"', i, r)
+ # ignoring type error, as i'm doing the check later
# (file_name, ctimestamp, mtimestamp, checksum, [tags])
- cols: tuple[str, float, float, str, list[str]] = tuple(r.split())
+ cols: tuple[str, float, float, str, list[str]] = tuple(r.split()) # type: ignore
col_num: int = len(cols)
- if col_num == self.__OLD_COLUMN_NUM:
- log.error('row %d contains %d columns: "%s"; this is probably'
- ' because of missing checksum column, which is used'
- ' now to also check if a file has changed. Rerun'
- ' with flag --add-checksum-to-db to add the checksum'
- ' column to the current db; if you did any changes'
- ' since last timestamp in db, it won\'t update'
- ' modification timestamp',
- i, self.__OLD_COLUMN_NUM, r)
- sys.exit(1)
if col_num != self.__COLUMN_NUM:
log.critical('row %d doesn\'t contain %s columns, contains %d'
@@ -202,11 +153,12 @@ class Database:
i, self.__COLUMN_NUM, col_num, r)
sys.exit(1)
- t: list[str] = None
+ t: list[str]
if cols[4] == '-':
t = []
else:
- t = cols[4].split(',')
+ # ignoring type error, the "check" is done in this whole if/else
+ t = cols[4].split(',') # type: ignore
log.debug('tag content: (%s)', ', '.join(t))
self.e[cols[0]] = (float(cols[1]), float(cols[2]), cols[3], t)
diff --git a/src/pyssg/md_parser.py b/src/pyssg/md_parser.py
index 759ead6..664532a 100644
--- a/src/pyssg/md_parser.py
+++ b/src/pyssg/md_parser.py
@@ -36,7 +36,9 @@ def _get_md_obj() -> Markdown:
log.debug('list of md extensions: (%s)',
', '.join([e if isinstance(e, str) else type(e).__name__
for e in exts]))
- return Markdown(extensions=exts, output_format='html5')
+ # for some reason, the d efinition for output_format doesn't include html5
+ # even though it is listed in the documentation, ignoring
+ return Markdown(extensions=exts, output_format='html5') # type: ignore
# page and file is basically a synonym here...
@@ -51,21 +53,14 @@ class MDParser:
self.db: Database = db
self.md: Markdown = _get_md_obj()
- self.all_files: list[Page] = None
+ self.all_files: list[Page] = []
# updated and modified are synonyms here
- self.updated_files: list[Page] = None
- self.all_tags: list[tuple[str]] = None
+ self.updated_files: list[Page] = []
+ self.all_tags: list[tuple[str, str]] = []
def parse_files(self) -> None:
log.debug('parsing all files')
- # initialize lists
- self.all_files = []
- self.updated_files = []
- self.all_tags = []
- # not used, not sure why i had this
- # all_tag_names: list[str] = []
-
for f in self.files:
log.debug('parsing file "%s"', f)
src_file: str = os.path.join(self.config.get('path', 'src'), f)
@@ -75,11 +70,12 @@ class MDParser:
log.debug('parsing md into html')
content: str = self.md.reset().convert(open(src_file).read())
+ # ignoring md.Meta type as it is not yet defined (because it is from an extension)
page: Page = Page(f,
self.db.e[f][0],
self.db.e[f][1],
content,
- self.md.Meta,
+ self.md.Meta, # type: ignore
self.config)
page.parse_metadata()
diff --git a/src/pyssg/page.py b/src/pyssg/page.py
index 21add82..467dd7e 100644
--- a/src/pyssg/page.py
+++ b/src/pyssg/page.py
@@ -27,11 +27,11 @@ class Page:
# data from self.meta
self.title: str = ''
self.author: str = ''
- self.cdatetime: datetime = None
- self.mdatetime: datetime = None
+ self.cdatetime: datetime
+ self.mdatetime: datetime
self.summary: str = ''
self.lang: str = 'en'
- self.tags: list[tuple[str]] = []
+ self.tags: list[tuple[str, str]] = []
# constructed
self.url: str = ''
@@ -41,15 +41,16 @@ class Page:
self.cdate_list_sep: str = ''
self.cdate_rss: str = ''
self.cdate_sitemap: str = ''
- self.mdate: str = None
- self.mdate_list: str = None
- self.mdate_list_sep: str = None
+ self.mdate: str
+ self.mdate_list: str
+ self.mdate_list_sep: str
self.mdate_rss: str = ''
self.mdate_sitemap: str = ''
# later assigned references to next and previous pages
- self.next: Page = None
- self.previous: Page = None
+ # not always assigned (tail ends), and the None helps check it, ignoring
+ self.next: Page = None # type: ignore
+ self.previous: Page = None # type: ignore
# also from self.meta, but for og metadata
self.og: dict[str, str] = dict()
@@ -70,7 +71,7 @@ class Page:
# parses meta from self.meta, for og, it prioritizes,
- # the actual og meta
+ # the actual og meta
def parse_metadata(self):
log.debug('parsing metadata for file "%s"', self.name)
self.title = self.__get_mandatory_meta('title')
@@ -132,7 +133,7 @@ class Page:
og_elements: list[str] = self.meta['og']
log.debug('parsing og metadata')
for og_e in og_elements:
- kv: str = og_e.split(',', 1)
+ kv: list[str] = og_e.split(',', 1)
if len(kv) != 2:
log.error('invalid og syntax for "%s", needs to be "k, v"', og_e)
sys.exit(1)
diff --git a/src/pyssg/per_level_formatter.py b/src/pyssg/per_level_formatter.py
index 5ab3946..04f943b 100644
--- a/src/pyssg/per_level_formatter.py
+++ b/src/pyssg/per_level_formatter.py
@@ -1,4 +1,4 @@
-from logging import Formatter, DEBUG, INFO, WARNING, ERROR, CRITICAL
+from logging import Formatter, LogRecord, DEBUG, INFO, WARNING, ERROR, CRITICAL
# only reason for this class is to get info formatting as normal text
# and everything else with more info and with colors
@@ -20,8 +20,10 @@ class PerLevelFormatter(Formatter):
}
- def format(self, record: str) -> str:
- fmt: str = self.__FORMATS.get(record.levelno)
+ def format(self, record: LogRecord) -> str:
+ # this should never fail, as __FORMATS is defined above,
+ # so no issue of just converting to str
+ fmt: str = str(self.__FORMATS.get(record.levelno))
formatter: Formatter = Formatter(
fmt=fmt, datefmt=self.__DATE_FMT, style='%')
diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py
index eb042b6..a496b34 100644
--- a/src/pyssg/pyssg.py
+++ b/src/pyssg/pyssg.py
@@ -4,9 +4,10 @@ from importlib.resources import path as rpath
from typing import Union
from configparser import ConfigParser
from logging import Logger, getLogger, DEBUG
+from argparse import ArgumentParser
+from .arg_parser import get_parser
from .utils import create_dir, copy_file, get_expanded_path
-from .arg_parser import get_parsed_arguments
from .configuration import get_parsed_config, DEFAULT_CONFIG_PATH, VERSION
from .database import Database
from .builder import Builder
@@ -15,7 +16,30 @@ log: Logger = getLogger(__name__)
def main() -> None:
- args: dict[str, Union[str, bool]] = vars(get_parsed_arguments())
+ arg_parser: ArgumentParser = get_parser()
+ args: dict[str, Union[str, bool]] = vars(arg_parser.parse_args())
+
+ # too messy to place at utils.py, don't want to be
+ # passing the arg parser around
+ def _log_perror(message: str) -> None:
+ arg_parser.print_usage()
+ # even if it's an error, print it as info
+ # as it is not critical, only config related
+ log.info(message)
+ sys.exit(1)
+
+ # -1 as first argument is program path
+ num_args = len(sys.argv) - 1
+ if num_args == 2 and args['config']:
+ _log_perror('pyssg: error: only config argument passed, --help for more')
+ elif not num_args > 0 or (num_args == 1 and args['debug']):
+ _log_perror('pyssg: error: no arguments passed, --help for more')
+ elif num_args == 3 and (args['debug'] and args['config']):
+ _log_perror("pyssg: error: no arguments passed other than 'debug' and 'config', --help for more")
+
+ if args['version']:
+ log.info('pyssg v%s', VERSION)
+ sys.exit(0)
if args['debug']:
# need to modify the root logger specifically,
@@ -27,15 +51,7 @@ def main() -> None:
handler.setLevel(DEBUG)
log.debug('changed logging level to DEBUG')
- if not len(sys.argv) > 1 or (len(sys.argv) == 2 and args['debug']):
- log.info('pyssg v%s - no arguments passed, --help for more', VERSION)
- sys.exit(0)
-
- if args['version']:
- log.info('pyssg v%s', VERSION)
- sys.exit(0)
-
- config_path: str = args['config'] if args['config'] else DEFAULT_CONFIG_PATH
+ config_path: str = str(args['config']) if args['config'] else DEFAULT_CONFIG_PATH
config_path = get_expanded_path(config_path)
config_dir, _ = os.path.split(config_path)
log.debug('checked config file path, final config path "%s"', config_path)
@@ -44,7 +60,7 @@ def main() -> None:
log.info('copying default config file')
create_dir(config_dir)
with rpath('pyssg.plt', 'default.ini') as p:
- copy_file(p, config_path)
+ copy_file(str(p), config_path)
sys.exit(0)
if not os.path.exists(config_path):
@@ -61,27 +77,19 @@ def main() -> None:
create_dir(config.get('path', 'src'))
create_dir(os.path.join(config.get('path', 'dst'), 'tag'), True)
create_dir(config.get('path', 'plt'))
- files: list[str] = ('index.html',
+ files: list[str] = ['index.html',
'page.html',
'tag.html',
'rss.xml',
- 'sitemap.xml')
+ 'sitemap.xml']
log.debug('list of files to copy over: (%s)', ', '.join(files))
for f in files:
plt_file: str = os.path.join(config.get('path', 'plt'), f)
with rpath('pyssg.plt', f) as p:
- copy_file(p, plt_file)
+ copy_file(str(p), plt_file)
+ log.info('finished initialization')
sys.exit(0)
- if args['add_checksum_to_db']:
- log.info('adding checksum column to existing db')
- db_path: str = os.path.join(config.get('path', 'src'), '.files')
- db: Database = Database(db_path, config)
- # needs to be read_old instead of read
- db.read_old()
- db.write()
-
- sys.exit(0)
if args['build']:
log.info('building the html files')
@@ -93,4 +101,5 @@ def main() -> None:
builder.build()
db.write()
+ log.info('finished building the html files')
sys.exit(0)
diff --git a/src/pyssg/utils.py b/src/pyssg/utils.py
index 4b525cf..3e05d0a 100644
--- a/src/pyssg/utils.py
+++ b/src/pyssg/utils.py
@@ -9,14 +9,14 @@ log: Logger = getLogger(__name__)
def get_file_list(path: str,
exts: list[str],
- exclude: list[str]=None) -> list[str]:
+ exclude: list[str]=[]) -> list[str]:
log.debug('retrieving file list in path "%s" that contain file'
' extensions (%s) except (%s)',
path, ', '.join(exts),
- ', '.join(exclude if exclude is not None else []))
+ ', '.join(exclude))
out: list[str] = []
for root, dirs, files in os.walk(path):
- if exclude is not None:
+ if exclude != []:
log.debug('removing excludes from list')
dirs[:] = [d for d in dirs if d not in exclude]
@@ -34,12 +34,12 @@ def get_file_list(path: str,
def get_dir_structure(path: str,
- exclude: list[str]=None) -> list[str]:
+ exclude: list[str]=[]) -> list[str]:
log.debug('retrieving dir structure in path "%s" except (%s)',
- path, ', '.join(exclude if exclude is not None else []))
+ path, ', '.join(exclude))
out: list[str] = []
for root, dirs, files in os.walk(path):
- if exclude is not None:
+ if exclude != []:
log.debug('removing excludes from list')
dirs[:] = [d for d in dirs if d not in exclude]
@@ -85,7 +85,7 @@ def get_checksum(path: str) -> str:
return file_hash.hexdigest()
-def get_expanded_path(path: str) -> None:
+def get_expanded_path(path: str) -> str:
log.debug('expanding path "%s"', path)
expanded_path: str = os.path.normpath(os.path.expandvars(path))
if '$' in expanded_path: