From 0dae5d53c49dd7b946990ca9e232fb924bf4f918 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado Date: Sun, 16 May 2021 18:46:01 -0600 Subject: refactor code and finish basic features --- src/pyssg/builder.py | 318 ++++++++++++++++++++++++++++++++++++++++++------- src/pyssg/converter.py | 209 -------------------------------- src/pyssg/database.py | 5 +- src/pyssg/discovery.py | 13 -- src/pyssg/page.py | 42 ++++--- src/pyssg/parser.py | 58 +++++++++ src/pyssg/pyssg.py | 17 ++- src/pyssg/template.py | 16 ++- 8 files changed, 396 insertions(+), 282 deletions(-) delete mode 100644 src/pyssg/converter.py create mode 100644 src/pyssg/parser.py (limited to 'src') diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py index 002d80e..2ba1b6c 100644 --- a/src/pyssg/builder.py +++ b/src/pyssg/builder.py @@ -1,44 +1,282 @@ import os import shutil +from copy import deepcopy +from .template import Template from .database import Database -from .discovery import get_all_files -from .converter import create_html_files - - -def create_dir_structure(dst: str, - dirs: list[str]) -> None: - for d in dirs: - # for the dir structure, - # doesn't matter if the dir already exists - try: - os.makedirs(os.path.join(dst, d)) - except FileExistsError: - pass - - -def copy_html_files(src: str, - dst: str, - files: list[str], - db: Database) -> None: - src_file = None - dst_file = None - - for f in files: - src_file = os.path.join(src, f) - dst_file = os.path.join(dst, f) - - # only copy files if they have been modified (or are new) - if db.update(src_file, remove=f'{src}/'): - shutil.copy2(src_file, dst_file) - - -def build_static_site(src: str, - dst: str, - db: Database) -> None: - # get all file data and create necessary dir structure - dirs, md_files, html_files = get_all_files(src) - create_dir_structure(dst, dirs) - - copy_html_files(src, dst, html_files, db) - create_html_files(src, dst, md_files, db) +from .parser import MDParser +from .page import Page +from .discovery import get_file_list, get_dir_structure + +class HTMLBuilder: + def __init__(self, src: str, + dst: str, + base_url: str, + template: Template, + db: Database, + dformat: str=None, + l_dformat: str=None, + lsep_dformat: str=None): + self.src: str = src + self.dst: str = dst + self.base_url: str = base_url + self.template: Template = template + self.db: Database = db + self.dformat: str = None + self.l_dformat: str = None + self.lsep_dformat: str = None + + if dformat is not None: + self.dformat = dformat + else: + self.dformat = "%a, %d %b, %Y @ %H:%M %Z" + + if l_dformat is not None: + self.l_dformat = l_dformat + else: + self.l_dformat = "%b %d" + + if lsep_dformat is not None: + self.lsep_dformat = lsep_dformat + else: + self.lsep_dformat = "%B %Y" + + self.dirs: list[str] = None + self.md_files: list[str] = None + self.html_files: list[str] = None + + + def build(self) -> None: + self.dirs = get_dir_structure(self.src, ['templates']) + self.md_files = get_file_list(self.src, ['.md'], ['templates']) + self.html_files = get_file_list(self.src, ['.html'], ['templates']) + + self.__create_dir_structure() + self.__copy_html_files() + + parser: MDParser = MDParser(self.src, self.md_files, self.db) + parser.parse() + + # create the article index + self.__create_article_index(parser.all_tags, parser.all_pages) + + # create each category of html pages + self.__create_articles(parser.updated_pages) + self.__create_tags(parser.all_tags, parser.all_pages) + + + def __create_dir_structure(self) -> None: + for d in self.dirs: + # for the dir structure, + # doesn't matter if the dir already exists + try: + os.makedirs(os.path.join(self.dst, d)) + except FileExistsError: + pass + + + def __copy_html_files(self) -> None: + src_file: str = None + dst_file: str = None + + for f in self.html_files: + src_file = os.path.join(self.src, f) + dst_file = os.path.join(self.dst, f) + + # only copy files if they have been modified (or are new) + if self.db.update(src_file, remove=f'{self.src}/'): + shutil.copy2(src_file, dst_file) + + + # this is really similar to create_tag (singular) + def __create_article_index(self, tags: list[str], + pages: list[Page]) -> None: + # make temporary template + t: Template = deepcopy(self.template) + + # do basic replacements + # get page and tag list formated, both functions do replacements + p_list: list[str] = self.__get_pages_formatted(pages, t) + t_list: list[str] = self.__get_tags_formatted(tags, t) + # common + t.header = t.header.replace("$$LANG", 'en') + t.header = t.header.replace('$$TITLE', f'Index') + + with open(os.path.join(self.dst, 'index.html'), 'w') as f: + f.write(t.header) + f.write(t.articles.header) + + f.write(t.tags.list_header) + for tag in t_list: + f.write(tag) + f.write(t.tags.list_footer) + + f.write(t.articles.list_header) + for page in p_list: + f.write(page) + f.write(t.articles.list_footer) + + f.write(t.articles.footer) + f.write(t.footer) + + + def __create_articles(self, pages: list[Page]) -> None: + for p in pages: + self.__create_article(p) + + + def __create_article(self, page: Page) -> None: + # TODO: create better solution for replace + # make temporary template + t: Template = deepcopy(self.template) + + # prepare html file name + f_name: str = page.name + f_name = f_name.replace('.md', '.html') + + # get timestamps + c_date: str = page.c_datetime.strftime(self.dformat) + m_date: str = None + if page.m_datetime is not None: + m_date: str = page.m_datetime.strftime(self.dformat) + + # do basic replacements + # get tag list formatted (some replacements done inside + # get_tags_formatted) + t_list: list[str] = None + if page.tags is not None: + t_list = self.__get_tags_formatted(page.tags, t) + + # common + t.header = t.header.replace("$$LANG", page.lang) + t.header = t.header.replace('$$TITLE', page.title) + + # article header + t.article.header = t.article.header.replace('$$TITLE', page.title) + t.article.header = t.article.header.replace('$$AUTHOR', page.author) + t.article.header = t.article.header.replace('$$CTIME', c_date) + if m_date is not None: + t.article.header = t.article.header.replace('$$MTIME', m_date) + else: + t.article.header = t.article.header.replace('$$MTIME', '') + + # article footer (same replaces as header) + t.article.footer = t.article.footer.replace('$$TITLE', page.title) + t.article.footer = t.article.footer.replace('$$AUTHOR', page.author) + t.article.footer = t.article.footer.replace('$$CTIME', c_date) + if m_date is not None: + t.article.footer = t.article.footer.replace('$$MTIME', m_date) + else: + t.article.footer = t.article.footer.replace('$$MTIME', '') + + + with open(os.path.join(self.dst, f_name), 'w') as f: + f.write(t.header) + f.write(t.article.header) + f.write(page.html) + + if t_list is not None: + f.write(t.tags.list_header) + for tag in t_list: + f.write(tag) + f.write(t.tags.list_footer) + + f.write(t.article.footer) + f.write(t.footer) + + + def __get_tags_formatted(self, tags: list[str], + template: Template) -> list[str]: + tag_amount: int = len(tags) + tags_formatted: list[str] = [] + for i, t in enumerate(tags): + # t_e=tag entry + t_e: str = template.tags.list_entry + t_e = t_e.replace('$$URL', + f'{self.base_url}/tag/@{t}.html') + t_e = t_e.replace('$$NAME', t) + + tags_formatted.append(t_e) + if i != tag_amount - 1: + tags_formatted.append(template.tags.list_separator) + + return tags_formatted + + + def __create_tags(self, tags: list[str], + pages: list[Page]) -> None: + for t in tags: + # get a list of all pages that have current tag + # and sort them (by time) + tag_pages: list[Page] = [] + for p in pages: + if p.tags is not None and t in p.tags: + tag_pages.append(p) + tag_pages.sort(reverse=True) + + # build tag page + self.__create_tag(t, tag_pages) + + # clean list of pages with current tag + tag_pages = [] + + + def __create_tag(self, tag: str, + pages: list[Page]) -> None: + # TODO: create better solution for replace + # make temporary template + t: Template = deepcopy(self.template) + + # do basic replacements + # get page list formated (some replacements done inside + # get_pages_formatted) + p_list: list[str] = self.__get_pages_formatted(pages, t) + # common + t.header = t.header.replace("$$LANG", 'en') + t.header = t.header.replace('$$TITLE', f'Posts filtered by: {tag}') + + # tag header + tag_url: str = f'{self.base_url}/tag/@{tag}.html' + t.tags.header = t.tags.header.replace('$$NAME', tag) + t.tags.header = t.tags.header.replace('$$URL', tag_url) + + with open(os.path.join(self.dst, f'tag/@{tag}.html'), 'w') as f: + f.write(t.header) + f.write(t.tags.header) + + f.write(t.articles.list_header) + for p in p_list: + f.write(p) + f.write(t.articles.list_footer) + + f.write(t.tags.footer) + f.write(t.footer) + + + def __get_pages_formatted(self, pages: list[Page], + template: Template) -> list[str]: + month_year: str = '-' + pages_formatted: list[str] = [] + for p in pages: + # check if the monthly separator should be included + c_month_year: str = p.c_datetime.strftime(self.lsep_dformat) + if c_month_year != month_year: + month_year = c_month_year + + month_sep: str = template.articles.list_separator + month_sep = month_sep.replace('$$SEP', month_year) + + pages_formatted.append(month_sep) + + f_name: str = p.name + f_name = f_name.replace('.md', '.html') + + # p_e=page entry + p_e: str = template.articles.list_entry + p_e = p_e.replace('$$URL', f'{self.base_url}/{f_name}') + p_e = p_e.replace('$$DATE', p.c_datetime.strftime(self.l_dformat)) + p_e = p_e.replace('$$TITLE', p.title) + + pages_formatted.append(p_e) + + return pages_formatted diff --git a/src/pyssg/converter.py b/src/pyssg/converter.py deleted file mode 100644 index d054855..0000000 --- a/src/pyssg/converter.py +++ /dev/null @@ -1,209 +0,0 @@ -import os -from datetime import datetime -from markdown import Markdown -from copy import deepcopy - -from .database import Database -from .template import Template -from .page import Page - - -def get_pages(src: str, - files: list[str], - db: Database) -> (list[Page], list[Page]): - md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists', - 'smarty', 'toc', 'wikilinks'], - output_format='html5') - - all_pages: list[Page] = [] - updated_pages: list[Page] = [] - for f in files: - src_file: str = os.path.join(src, f) - # get flag if update is successful - updated: bool = db.update(src_file, remove=f'{src}/') - - page: Page = None - content: str = md.reset().convert(open(src_file).read()) - page = Page(f, db.e[f][0], db.e[f][1], content, md.Meta) - - if updated: - updated_pages.append(page) - all_pages.append(page) - - # add its tag to corresponding entry if existent - if page.tags is not None: - db.update_tags(f, page.tags) - - - return (all_pages, updated_pages) - - -def create_articles(dst: str, - pages: list[Page], - template: Template) -> None: - # TODO: clean this mess - # TODO: proper creation of html files - for p in pages: - create_article(dst, p, template) - - -def create_article(dst: str, - page: Page, - template: Template) -> None: - # TODO: clean this mess - # make temporary template - t: Template = deepcopy(template) - # TODO: make this configurable - base_url: str = 'https://blog.luevano.xyz/' - - f_name: str = page.name - f_name = f_name.replace('.md', '.html') - f_name = f_name.replace('.markdown', '.html') - - with open(os.path.join(dst, f_name), 'w') as f: - # common - t.header = t.header.replace("$$LANG", - page.lang if page.lang is not None else 'en') - t.header = t.header.replace('$$TITLE', page.title) - t.header = t.header.replace('$$EXTRAHEAD', '') - - # article header - t.article.header = t.article.header.replace('$$TITLE', page.title) - - # Actually write to the html file - f.write(t.header) - f.write(t.article.header) - f.write(page.html) - - if page.tags is not None: - tag_amount: int = len(page.tags) - - f.write(t.tags.list_header) - for i, tag in enumerate(page.tags): - t_entry: str = t.tags.list_entry - t_entry = t_entry.replace('$$URL', f'{base_url}tag/@{tag}.html') - t_entry = t_entry.replace('$$NAME', tag) - - f.write(t_entry) - # don't write last separator, not needed - if i != tag_amount - 1: - f.write(t.tags.list_separator) - f.write(t.tags.list_footer) - - f.write(t.article.footer) - f.write(t.footer) - - -def get_all_tags(pages: list[Page]) -> list[str]: - tags: list[str] = [] - for p in pages: - if p.tags is not None: - for t in p.tags: - if t not in tags: - tags.append(t) - tags.sort() - - return tags - - -def create_tags(dst: str, - tags: list[str], - pages: list[Page], - template: Template) -> None: - for t in tags: - # get a list of all pages that have current tag - # and sort them (by time) - tag_pages: list[Page] = [] - for p in pages: - if p.tags is not None and t in p.tags: - tag_pages.append(p) - tag_pages.sort(reverse=True) - - # build tag page - create_tag(dst, t, tag_pages, template) - - # clean list of pages with current tag - tag_pages = [] - - -def create_tag(dst: str, - tag: str, - pages: list[Page], - template: Template) -> None: - # TODO: clean this mess - # make temporary template - t: Template = deepcopy(template) - # TODO: make this configurable - base_url: str = 'https://blog.luevano.xyz/' - - with open(os.path.join(dst, f'tag/@{tag}.html'), 'w') as f: - # common - t.header = t.header.replace("$$LANG", 'en') - t.header = t.header.replace('$$TITLE', f'Posts filtered by tag "{tag}"') - t.header = t.header.replace('$$EXTRAHEAD', '') - - # tag header - t.tags.header = t.tags.header.replace('$$NAME', tag) - t.tags.header = t.tags.header.replace('$$URL', - f'{base_url}tag/@{tag}.html') - - # Actually write to the html file - f.write(t.header) - f.write(t.tags.header) - f.write(t.articles.list_header) - - month_year: str = '-' - for p in pages: - c_month_year: str = p.c_datetime.strftime('%B %Y') - if c_month_year != month_year: - month_year = c_month_year - - month_sep: str = t.articles.list_separator - month_sep = month_sep.replace('$$SEP', month_year) - - f.write(month_sep) - - f_name: str = p.name - f_name = f_name.replace('.md', '.html') - f_name = f_name.replace('.markdown', '.html') - - page_entry: str = t.articles.list_entry - page_entry = page_entry.replace('$$URL', f'{base_url}{f_name}') - page_entry = page_entry.replace('$$DATE', - p.c_datetime.strftime('%b %d')) - page_entry = page_entry.replace('$$TITLE', p.title) - - f.write(page_entry) - - f.write(t.articles.list_footer) - f.write(t.tags.footer) - f.write(t.footer) - - -def create_article_index(dst: str, - tags: list[str], - pages: list[Page]) -> None: - # TODO: actually make this function - pass - - -def create_html_files(src: str, - dst: str, - files: list[str], - db: Database) -> None: - # get the list of page objects - all_pages, updated_pages = get_pages(src, files, db) - - # get all tags - all_tags = get_all_tags(all_pages) - - # read all templates into a template obj - template: Template = Template(src) - template.read() - - # create each category of html pages - create_articles(dst, updated_pages, template) - create_tags(dst, all_tags, all_pages, template) - - # create the article index - create_article_index(dst, all_tags, all_pages) diff --git a/src/pyssg/database.py b/src/pyssg/database.py index 61ca502..1b421c0 100644 --- a/src/pyssg/database.py +++ b/src/pyssg/database.py @@ -7,9 +7,8 @@ class Database: self.db_path: str = db_path self.e: dict[str, tuple[float, float, list[str]]] = dict() - self.__read() - + # updates the tags for a specific entry (file) def update_tags(self, file_name: str, tags: list[str]) -> None: if file_name in self.e: @@ -68,7 +67,7 @@ class Database: file.write(f'{k} {v[0]} {v[1]} {t}\n') - def __read(self) -> None: + def read(self) -> None: # only if the path exists and it is a file if os.path.exists(self.db_path) and os.path.isfile(self.db_path): # get all db file lines diff --git a/src/pyssg/discovery.py b/src/pyssg/discovery.py index 7fe5964..8dbbf69 100644 --- a/src/pyssg/discovery.py +++ b/src/pyssg/discovery.py @@ -29,16 +29,3 @@ def get_dir_structure(directory: str, out.append(os.path.join(root, d)) return [o.replace(directory, '')[1:] for o in out] - - -def get_all_files(src: str) -> tuple[list[str], list[str], list[str]]: - md_files: list[str] = get_file_list(src, - ['.md', '.markdown'], - ['templates']) - html_files: list[str] = get_file_list(src, - ['.html'], - ['templates']) - dirs: list[str] = get_dir_structure(src, - ['templates']) - - return (dirs, md_files, html_files) diff --git a/src/pyssg/page.py b/src/pyssg/page.py index 1d2f6dd..e03ca6b 100644 --- a/src/pyssg/page.py +++ b/src/pyssg/page.py @@ -15,30 +15,33 @@ class Page: self.meta: dict = meta # data from self.meta - self.title: str = None - self.author: str = None + self.title: str = '' + self.author: str = '' self.c_datetime: datetime = None self.m_datetime: datetime = None - self.summary: str = None - self.lang: str = None + self.summary: str = '' + self.lang: str = 'en' self.tags: list = None + # also from self.meta, but for og metadata + self.og: dict[str, str] = dict() + self.__parse_meta() def __lt__(self, other): return self.c_time < other.c_time + # parses meta from self.meta, for og, it prioritizes, + # the actual og meta def __parse_meta(self): try: self.title = self.meta['title'][0] - except KeyError: - pass + except KeyError: pass try: self.author = self.meta['author'][0] - except KeyError: - pass + except KeyError: pass self.c_datetime = datetime.fromtimestamp(self.c_time, tz=timezone.utc) @@ -49,15 +52,26 @@ class Page: try: self.summary = self.meta['summary'][0] - except KeyError: - pass + except KeyError: pass try: self.lang = self.meta['lang'][0] - except KeyError: - pass + except KeyError: pass try: self.tags = self.meta['tags'] - except KeyError: - pass + self.tags.sort() + except KeyError: pass + + try: + # og_e = object graph entry + for og_e in self.meta['og']: + kv: str = og_e.split(',', 1) + if len(kv) != 2: + raise Exception('invalid og syntax') + + k: str = kv[0].strip() + v: str = kv[1].strip() + + self.og[k] = v + except KeyError: pass diff --git a/src/pyssg/parser.py b/src/pyssg/parser.py new file mode 100644 index 0000000..43028ef --- /dev/null +++ b/src/pyssg/parser.py @@ -0,0 +1,58 @@ +import os +from datetime import datetime +from markdown import Markdown + +from .database import Database +from .page import Page + + +# parser of md files, stores list of pages and tags +class MDParser: + def __init__(self, src: str, + files: list[str], + db: Database): + self.md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists', + 'smarty', 'toc', 'wikilinks'], + output_format='html5') + self.src: str = src + self.files: list[str] = files + self.db: Database = db + + self.all_pages: list[Page] = None + self.updated_pages: list[Page] = None + self.all_tags: list[str] = None + + + def parse(self): + # initialize lists + self.all_pages = [] + self.updated_pages = [] + self.all_tags = [] + + for f in self.files: + src_file: str = os.path.join(self.src, f) + # get flag if update is successful + updated: bool = self.db.update(src_file, remove=f'{self.src}/') + + page: Page = None + content: str = self.md.reset().convert(open(src_file).read()) + page = Page(f, self.db.e[f][0], self.db.e[f][1], content, self.md.Meta) + + # keep a separated list for all and updated pages + if updated: + self.updated_pages.append(page) + self.all_pages.append(page) + + # parse tags + if page.tags is not None: + # add its tag to corresponding db entry if existent + self.db.update_tags(f, page.tags) + + # update all_tags attribute + for t in page.tags: + if t not in self.all_tags: + self.all_tags.append(t) + + # sort list of tags for consistency + self.all_tags.sort() + self.updated_pages.sort() diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py index b076abb..3f8cb8d 100644 --- a/src/pyssg/pyssg.py +++ b/src/pyssg/pyssg.py @@ -3,7 +3,7 @@ from argparse import ArgumentParser, Namespace from .database import Database from .template import Template -from .builder import build_static_site +from .builder import HTMLBuilder def get_options() -> Namespace: @@ -20,6 +20,10 @@ def get_options() -> Namespace: type=str, help='''dst directory; generated (and transfered html) files; defaults to 'dst' ''') + parser.add_argument('-u', '--url', + required=True, + type=str, + help='''base url without trailing slash''') parser.add_argument('-i', '--init', action='store_true', help='''initializes the dir structure, templates, @@ -36,6 +40,7 @@ def main() -> None: opts: dict[str] = vars(get_options()) src: str = opts['src'] dst: str = opts['dst'] + base_url: str = opts['url'] if opts['init']: try: @@ -44,14 +49,22 @@ def main() -> None: except FileExistsError: pass + # write default templates template: Template = Template(src) template.write() return if opts['build']: + # start the db db: Database = Database(os.path.join(src, '.files')) + db.read() + + # read templates + template: Template = Template(src) + template.read() - build_static_site(src, dst, db) + builder: HTMLBuilder = HTMLBuilder(src, dst, base_url, template, db) + builder.build() db.write() return diff --git a/src/pyssg/template.py b/src/pyssg/template.py index 61610d6..cd80d28 100644 --- a/src/pyssg/template.py +++ b/src/pyssg/template.py @@ -1,12 +1,16 @@ import os +from .page import Page + +# all objects here require a header and footer as minimum class HF: def __init__(self): self.header: str = None self.footer: str = None +# some objects require a "list-like" set of attributes class Common(HF): def __init__(self): self.list_header: str = None @@ -15,6 +19,7 @@ class Common(HF): self.list_separator: str = None +# main class class Template(HF): def __init__(self, src: str): self.src: str = src @@ -22,7 +27,10 @@ class Template(HF): self.articles: Common = Common() self.tags: Common = Common() + self.is_read: bool = False + + # writes default templates def write(self) -> None: # get initial working directory iwd = os.getcwd() @@ -41,7 +49,6 @@ class Template(HF): '\n', '\n', '$$TITLE\n', - '$$EXTRAHEAD\n', '\n', '\n']) self.__write_template('footer.html', @@ -102,7 +109,14 @@ class Template(HF): os.chdir(iwd) + # reads templates and stores them into class attributes def read(self) -> None: + # only read templates if not read already + # (might want to change this behaviour) + if self.is_read: + return + self.is_read = True + # get initial working directory iwd = os.getcwd() os.chdir(os.path.join(self.src, 'templates')) -- cgit v1.2.3-70-g09d2