From eaee38a4b6ebedc106548876cdbe1fe433c514bb Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado Date: Tue, 11 May 2021 20:38:48 -0600 Subject: refactor code and almost finish main functionality --- src/pyssg/builder.py | 44 ++++++++++ src/pyssg/converter.py | 231 ++++++++++++++++++++++++++++++++++++++++--------- src/pyssg/database.py | 93 ++++++++++++++++++++ src/pyssg/discovery.py | 41 +++++---- src/pyssg/generator.py | 40 --------- src/pyssg/page.py | 48 ++++++---- src/pyssg/pyssg.py | 19 ++-- src/pyssg/template.py | 11 +-- 8 files changed, 395 insertions(+), 132 deletions(-) create mode 100644 src/pyssg/builder.py create mode 100644 src/pyssg/database.py delete mode 100644 src/pyssg/generator.py (limited to 'src') diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py new file mode 100644 index 0000000..002d80e --- /dev/null +++ b/src/pyssg/builder.py @@ -0,0 +1,44 @@ +import os +import shutil + +from .database import Database +from .discovery import get_all_files +from .converter import create_html_files + + +def create_dir_structure(dst: str, + dirs: list[str]) -> None: + for d in dirs: + # for the dir structure, + # doesn't matter if the dir already exists + try: + os.makedirs(os.path.join(dst, d)) + except FileExistsError: + pass + + +def copy_html_files(src: str, + dst: str, + files: list[str], + db: Database) -> None: + src_file = None + dst_file = None + + for f in files: + src_file = os.path.join(src, f) + dst_file = os.path.join(dst, f) + + # only copy files if they have been modified (or are new) + if db.update(src_file, remove=f'{src}/'): + shutil.copy2(src_file, dst_file) + + +def build_static_site(src: str, + dst: str, + db: Database) -> None: + # get all file data and create necessary dir structure + dirs, md_files, html_files = get_all_files(src) + create_dir_structure(dst, dirs) + + copy_html_files(src, dst, html_files, db) + create_html_files(src, dst, md_files, db) diff --git a/src/pyssg/converter.py b/src/pyssg/converter.py index 5af2bd2..d054855 100644 --- a/src/pyssg/converter.py +++ b/src/pyssg/converter.py @@ -1,62 +1,209 @@ import os +from datetime import datetime from markdown import Markdown from copy import deepcopy -from .page import Page + +from .database import Database from .template import Template +from .page import Page -def get_pages(src: str, files: list[str]) -> list[Page]: +def get_pages(src: str, + files: list[str], + db: Database) -> (list[Page], list[Page]): md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists', 'smarty', 'toc', 'wikilinks'], output_format='html5') - pages: list[Page] = [] - + all_pages: list[Page] = [] + updated_pages: list[Page] = [] for f in files: - f_name = os.path.join(src, f) + src_file: str = os.path.join(src, f) + # get flag if update is successful + updated: bool = db.update(src_file, remove=f'{src}/') - content = md.reset().convert(open(f_name).read()) - f_time = os.stat(f_name).st_mtime + page: Page = None + content: str = md.reset().convert(open(src_file).read()) + page = Page(f, db.e[f][0], db.e[f][1], content, md.Meta) - pages.append(Page(f_name, f_time, content, md.Meta)) + if updated: + updated_pages.append(page) + all_pages.append(page) - return pages + # add its tag to corresponding entry if existent + if page.tags is not None: + db.update_tags(f, page.tags) -def create_html_files(src: str, dst: str, files: list[str]) -> None: - # get the list of page objects - pages: list[Page] = get_pages(src, files) + return (all_pages, updated_pages) - # read all templates into a template obj - template: Template = Template() - template.read_templates(src) + +def create_articles(dst: str, + pages: list[Page], + template: Template) -> None: + # TODO: clean this mess + # TODO: proper creation of html files for p in pages: - # t=template, p=page - t: Template = deepcopy(template) - p.parse_meta() + create_article(dst, p, template) + + +def create_article(dst: str, + page: Page, + template: Template) -> None: + # TODO: clean this mess + # make temporary template + t: Template = deepcopy(template) + # TODO: make this configurable + base_url: str = 'https://blog.luevano.xyz/' + f_name: str = page.name + f_name = f_name.replace('.md', '.html') + f_name = f_name.replace('.markdown', '.html') + + with open(os.path.join(dst, f_name), 'w') as f: # common - t.header = t.header.replace("$$LANG", p.lang) - t.header = t.header.replace('$$TITLE', p.title) - t.header = t.header.replace('$$EXTRAHEAD', f''' - - - - ''') - - # article entry - t.article.header = t.article.header.replace('$$TITLE', p.title) - - print(t.header) - print(t.article.header) - print(p.c_html) - print(t.tags.list_header, sep='') - for tag in p.tags: - tag_entry = t.tags.list_entry - tag_entry = tag_entry.replace('$$NAME', tag) - tag_entry = tag_entry.replace('$$URL', p.f_name) - print(tag_entry, sep='') - print(t.tags.list_separator, sep='') - print(t.tags.list_footer) - print(t.article.footer) - print(t.footer) + t.header = t.header.replace("$$LANG", + page.lang if page.lang is not None else 'en') + t.header = t.header.replace('$$TITLE', page.title) + t.header = t.header.replace('$$EXTRAHEAD', '') + + # article header + t.article.header = t.article.header.replace('$$TITLE', page.title) + + # Actually write to the html file + f.write(t.header) + f.write(t.article.header) + f.write(page.html) + + if page.tags is not None: + tag_amount: int = len(page.tags) + + f.write(t.tags.list_header) + for i, tag in enumerate(page.tags): + t_entry: str = t.tags.list_entry + t_entry = t_entry.replace('$$URL', f'{base_url}tag/@{tag}.html') + t_entry = t_entry.replace('$$NAME', tag) + + f.write(t_entry) + # don't write last separator, not needed + if i != tag_amount - 1: + f.write(t.tags.list_separator) + f.write(t.tags.list_footer) + + f.write(t.article.footer) + f.write(t.footer) + + +def get_all_tags(pages: list[Page]) -> list[str]: + tags: list[str] = [] + for p in pages: + if p.tags is not None: + for t in p.tags: + if t not in tags: + tags.append(t) + tags.sort() + + return tags + + +def create_tags(dst: str, + tags: list[str], + pages: list[Page], + template: Template) -> None: + for t in tags: + # get a list of all pages that have current tag + # and sort them (by time) + tag_pages: list[Page] = [] + for p in pages: + if p.tags is not None and t in p.tags: + tag_pages.append(p) + tag_pages.sort(reverse=True) + + # build tag page + create_tag(dst, t, tag_pages, template) + + # clean list of pages with current tag + tag_pages = [] + + +def create_tag(dst: str, + tag: str, + pages: list[Page], + template: Template) -> None: + # TODO: clean this mess + # make temporary template + t: Template = deepcopy(template) + # TODO: make this configurable + base_url: str = 'https://blog.luevano.xyz/' + + with open(os.path.join(dst, f'tag/@{tag}.html'), 'w') as f: + # common + t.header = t.header.replace("$$LANG", 'en') + t.header = t.header.replace('$$TITLE', f'Posts filtered by tag "{tag}"') + t.header = t.header.replace('$$EXTRAHEAD', '') + + # tag header + t.tags.header = t.tags.header.replace('$$NAME', tag) + t.tags.header = t.tags.header.replace('$$URL', + f'{base_url}tag/@{tag}.html') + + # Actually write to the html file + f.write(t.header) + f.write(t.tags.header) + f.write(t.articles.list_header) + + month_year: str = '-' + for p in pages: + c_month_year: str = p.c_datetime.strftime('%B %Y') + if c_month_year != month_year: + month_year = c_month_year + + month_sep: str = t.articles.list_separator + month_sep = month_sep.replace('$$SEP', month_year) + + f.write(month_sep) + + f_name: str = p.name + f_name = f_name.replace('.md', '.html') + f_name = f_name.replace('.markdown', '.html') + + page_entry: str = t.articles.list_entry + page_entry = page_entry.replace('$$URL', f'{base_url}{f_name}') + page_entry = page_entry.replace('$$DATE', + p.c_datetime.strftime('%b %d')) + page_entry = page_entry.replace('$$TITLE', p.title) + + f.write(page_entry) + + f.write(t.articles.list_footer) + f.write(t.tags.footer) + f.write(t.footer) + + +def create_article_index(dst: str, + tags: list[str], + pages: list[Page]) -> None: + # TODO: actually make this function + pass + + +def create_html_files(src: str, + dst: str, + files: list[str], + db: Database) -> None: + # get the list of page objects + all_pages, updated_pages = get_pages(src, files, db) + + # get all tags + all_tags = get_all_tags(all_pages) + + # read all templates into a template obj + template: Template = Template(src) + template.read() + + # create each category of html pages + create_articles(dst, updated_pages, template) + create_tags(dst, all_tags, all_pages, template) + + # create the article index + create_article_index(dst, all_tags, all_pages) diff --git a/src/pyssg/database.py b/src/pyssg/database.py new file mode 100644 index 0000000..61ca502 --- /dev/null +++ b/src/pyssg/database.py @@ -0,0 +1,93 @@ +import os + + +# db class that works for both html and md files +class Database: + def __init__(self, db_path: str): + self.db_path: str = db_path + self.e: dict[str, tuple[float, float, list[str]]] = dict() + + self.__read() + + + def update_tags(self, file_name: str, + tags: list[str]) -> None: + if file_name in self.e: + cts, mts, _ = self.e[file_name] + self.e[file_name] = (cts, mts, tags) + + + # returns a bool that indicates if the entry + # was (includes new entries) or wasn't updated + # 0.0 means no mod + def update(self, file_name: str, + remove: str=None) -> bool: + # initial default values + f: str = file_name + tags: list[str] = [] + if remove is not None: + f = file_name.replace(remove, '') + + + # get current time, needs actual file name + time: float = os.stat(file_name).st_mtime + + # three cases, 1) entry didn't exist, + # 2) entry hasn't been mod and, + # 3) entry has been mod + #1) + if f not in self.e: + self.e[f] = (time, 0.0, tags) + return True + + old_time, old_mod_time, tags = self.e[f] + + # 2) + if old_mod_time == 0.0: + if time > old_time: + self.e[f] = (old_time, time, tags) + return True + # 3) + else: + if time > old_mod_time: + self.e[f] = (old_time, time, tags) + return True + + return False + + + def write(self) -> None: + with open(self.db_path, 'w') as file: + # write each k,v pair in dict to db file + for k, v in self.e.items(): + t: str = None + if len(v[2]) == 0: + t = '-' + else: + t = ','.join(v[2]) + file.write(f'{k} {v[0]} {v[1]} {t}\n') + + + def __read(self) -> None: + # only if the path exists and it is a file + if os.path.exists(self.db_path) and os.path.isfile(self.db_path): + # get all db file lines + lines: list[str] = None + with open(self.db_path, 'r') as file: + lines = file.readlines() + + # parse each entry and populate accordingly + l: list[str] = None + # l=list of values in entry + for line in lines: + l = tuple(line.strip().split()) + if len(l) != 4: + raise Exception('db entry doesn\'t contain 4 elements') + + t: list[str] = None + if l[3] == '-': + t = [] + else: + t = l[3].split(',') + + self.e[l[0]] = (float(l[1]), float(l[2]), t) diff --git a/src/pyssg/discovery.py b/src/pyssg/discovery.py index 606c1d0..7fe5964 100644 --- a/src/pyssg/discovery.py +++ b/src/pyssg/discovery.py @@ -1,45 +1,44 @@ import os -def get_file_list(extensions: list[str], exclude: list[str]=None) -> list[str]: - cwd = os.getcwd() - - out = [] - for root, dirs, files in os.walk(cwd): +def get_file_list(directory: str, + extensions: list[str], + exclude: list[str]=None) -> list[str]: + out: list[str] = [] + for root, dirs, files in os.walk(directory): if exclude is not None: dirs[:] = [d for d in dirs if d not in exclude] for f in files: if f.endswith(tuple(extensions)): - out.append(os.path.join(root, f).replace(cwd, '')[1:]) + out.append(os.path.join(root, f).replace(directory, '')[1:]) return out -def get_dir_structure(exclude: list[str]=None) -> list[str]: - cwd = os.getcwd() - - out = [] - for root, dirs, files in os.walk(cwd): +def get_dir_structure(directory: str, + exclude: list[str]=None) -> list[str]: + out: list[str] = [] + for root, dirs, files in os.walk(directory): if exclude is not None: dirs[:] = [d for d in dirs if d not in exclude] for d in dirs: if root in out: out.remove(root) - out.append(os.path.join(root, d)) + out.append(os.path.join(root, d)) - return [o.replace(cwd, '')[1:] for o in out] + return [o.replace(directory, '')[1:] for o in out] def get_all_files(src: str) -> tuple[list[str], list[str], list[str]]: - iwd = os.getcwd() - os.chdir(src) - - md_files = get_file_list(['.md', '.markdown'], ['templates']) - html_files = get_file_list(['.html'], ['templates']) - dirs = get_dir_structure(['templates']) - - os.chdir(iwd) + md_files: list[str] = get_file_list(src, + ['.md', '.markdown'], + ['templates']) + html_files: list[str] = get_file_list(src, + ['.html'], + ['templates']) + dirs: list[str] = get_dir_structure(src, + ['templates']) return (dirs, md_files, html_files) diff --git a/src/pyssg/generator.py b/src/pyssg/generator.py deleted file mode 100644 index 143eae1..0000000 --- a/src/pyssg/generator.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -import shutil -from .discovery import get_all_files -from .converter import create_html_files - - -def create_dir_structure(dst: str, dirs: list[str]) -> None: - iwd = os.getcwd() - - os.chdir(dst) - cwd = os.getcwd() - - for d in dirs: - # for the dir structure, - # doesn't matter if the dir already exists - try: - os.makedirs(os.path.join(cwd, d)) - except FileExistsError: - pass - - os.chdir(iwd) - - -def copy_html_files(src: str, dst: str, files: list[str]) -> None: - src_file = None - dst_file = None - - for f in files: - src_file = os.path.join(src, f) - dst_file = os.path.join(dst, f) - - shutil.copy2(src_file, dst_file) - - -def generate_static_site(src: str, dst: str) -> None: - dirs, md_files, html_files = get_all_files(src) - create_dir_structure(dst, dirs) - - copy_html_files(src, dst, html_files) - create_html_files(src, dst, md_files) diff --git a/src/pyssg/page.py b/src/pyssg/page.py index 09486ea..1d2f6dd 100644 --- a/src/pyssg/page.py +++ b/src/pyssg/page.py @@ -3,47 +3,61 @@ from datetime import datetime, timezone class Page: def __init__(self, - f_name: str, - f_time: float, - c_html: str, - c_meta: dict): - self.f_name: str = f_name - self.f_time: float = f_time - self.c_html: str = c_html - self.c_meta: dict = c_meta - + name: str, + c_time: float, + m_time: float, + html: str, + meta: dict): + self.name: str = name + self.c_time: float = c_time + self.m_time: float = m_time + self.html: str = html + self.meta: dict = meta + + # data from self.meta self.title: str = None self.author: str = None - self.timestamp: str = None + self.c_datetime: datetime = None + self.m_datetime: datetime = None self.summary: str = None self.lang: str = None self.tags: list = None + self.__parse_meta() + + def __lt__(self, other): + return self.c_time < other.c_time - def parse_meta(self): + + def __parse_meta(self): try: - self.title = self.c_meta['title'][0] + self.title = self.meta['title'][0] except KeyError: pass try: - self.author = self.c_meta['author'][0] + self.author = self.meta['author'][0] except KeyError: pass - self.timestamp = datetime.fromtimestamp(self.f_time, tz=timezone.utc) + self.c_datetime = datetime.fromtimestamp(self.c_time, + tz=timezone.utc) + + if self.m_time != 0.0: + self.m_datetime = datetime.fromtimestamp(self.m_time, + tz=timezone.utc) try: - self.summary = self.c_meta['summary'][0] + self.summary = self.meta['summary'][0] except KeyError: pass try: - self.lang = self.c_meta['lang'][0] + self.lang = self.meta['lang'][0] except KeyError: pass try: - self.tags = self.c_meta['tags'] + self.tags = self.meta['tags'] except KeyError: pass diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py index 5b736ca..b076abb 100644 --- a/src/pyssg/pyssg.py +++ b/src/pyssg/pyssg.py @@ -1,8 +1,9 @@ import os from argparse import ArgumentParser, Namespace +from .database import Database from .template import Template -from .generator import generate_static_site +from .builder import build_static_site def get_options() -> Namespace: @@ -32,9 +33,9 @@ def get_options() -> Namespace: def main() -> None: - opts = vars(get_options()) - src = opts['src'] - dst = opts['dst'] + opts: dict[str] = vars(get_options()) + src: str = opts['src'] + dst: str = opts['dst'] if opts['init']: try: @@ -43,10 +44,14 @@ def main() -> None: except FileExistsError: pass - template = Template() - template.write_templates(src) + template: Template = Template(src) + template.write() return if opts['build']: - generate_static_site(src, dst) + db: Database = Database(os.path.join(src, '.files')) + + build_static_site(src, dst, db) + + db.write() return diff --git a/src/pyssg/template.py b/src/pyssg/template.py index c85df72..61610d6 100644 --- a/src/pyssg/template.py +++ b/src/pyssg/template.py @@ -16,16 +16,17 @@ class Common(HF): class Template(HF): - def __init__(self): + def __init__(self, src: str): + self.src: str = src self.article: HF = HF() self.articles: Common = Common() self.tags: Common = Common() - def write_templates(self, src: str) -> None: + def write(self) -> None: # get initial working directory iwd = os.getcwd() - os.chdir(src) + os.chdir(self.src) # create templates dir os.mkdir('templates') @@ -101,10 +102,10 @@ class Template(HF): os.chdir(iwd) - def read_templates(self, src: str) -> None: + def read(self) -> None: # get initial working directory iwd = os.getcwd() - os.chdir(os.path.join(src, 'templates')) + os.chdir(os.path.join(self.src, 'templates')) # common os.chdir('common') -- cgit v1.2.3-70-g09d2