From 2c4afde1c93bbf3990514c45b06b63e648c46f63 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado Date: Tue, 25 May 2021 21:21:20 -0600 Subject: add sitemap support --- ChangeLog | 4 ++++ README.md | 4 ++-- src/pyssg/builder.py | 6 ++++++ src/pyssg/pyssg.py | 7 +++++++ src/pyssg/sitemap.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/pyssg/template.py | 20 +++++++++++++++++- 6 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 src/pyssg/sitemap.py diff --git a/ChangeLog b/ChangeLog index c35abee..2177fc1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,10 @@ CHANGES ======= +v0.3.1 +------ + +* fix readme links * add tags as categories * add content:encoded tag in rss and update readme diff --git a/README.md b/README.md index 00335d7..59a1333 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ I'm writing this in *pYtHoN* (thought about doing it in Go, but I'm most comfort - [x] Preserves hand-made `*.html` files. - [x] Tag functionality. - [ ] Open Graph (and similar) support. -- [ ] Build `sitemap.xml` file. +- [x] Build `sitemap.xml` file. - [x] Build `rss.xml` file. - [x] Only build page if `*.md` is new or updated. - [ ] Extend this to tag pages and index (right now all tags and index is built no matter if no new/updated file is present). @@ -63,4 +63,4 @@ pyssg -s src_dir -d dst_dir -u https://base.url -b That creates all `*.html` for the site and can be easily moved to the server. Here, the `-u` flag is technically optional in the sense that you'll not receive a warning/error, but it's used to prepend links with this URL (not strictly required everywhere), so don't ignore it; also don't include the trailing `/`. -For now, the `-b`uild tag also creates a `rss.xml` file based on a template (created when initializing the directories/templates) adding all converted `*.md` files, meaning that separate `*.html` files should be included manually in the template. +For now, the `-b`uild tag also creates the `rss.xml` and `sitemap.xml` files based on templates including only all converted `*.md` files (and processed tags in case of the sitemap), meaning that separate `*.html` files should be included manually in the template. diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py index d2163d2..4939bcc 100644 --- a/src/pyssg/builder.py +++ b/src/pyssg/builder.py @@ -29,6 +29,7 @@ class HTMLBuilder: self.html_files: list[str] = None self.all_pages: list[Page] = None + self.all_tags: list[str] = None def build(self) -> None: @@ -44,6 +45,7 @@ class HTMLBuilder: # just to be able to extract all pages out of this class self.all_pages = parser.all_pages + self.all_tags = parser.all_tags # create the article index self.__create_article_index(parser.all_tags, parser.all_pages) @@ -61,6 +63,10 @@ class HTMLBuilder: return self.all_pages + def get_tags(self) -> list[str]: + return self.all_tags + + def __create_dir_structure(self) -> None: for d in self.dirs: # for the dir structure, diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py index dbc5f0b..72b89ab 100644 --- a/src/pyssg/pyssg.py +++ b/src/pyssg/pyssg.py @@ -114,5 +114,12 @@ def main() -> None: rss_builder: RSSBuilder = RSSBuilder(config, template.rss, all_pages) rss_builder.build() + # get all tags for sitemap construction + all_tags: list[str] = builder.get_tags() + sitemap_builder: SitemapBuilder = SitemapBuilder(config, + template.sitemap, + all_tags) + sitemap_builder.build() + db.write() return diff --git a/src/pyssg/sitemap.py b/src/pyssg/sitemap.py new file mode 100644 index 0000000..e8338cb --- /dev/null +++ b/src/pyssg/sitemap.py @@ -0,0 +1,57 @@ +import os +from datetime import datetime, timezone + +from .page import Page +from .configuration import Configuration + + +DFORMAT = '%Y-%m-%d' + + +class SitemapBuilder: + def __init__(self, config: Configuration, + template: str, + pages: list[Page], + tags: list[str]): + self.config: Configuration = config + self.sitemap: str = template + self.pages: list[Page] = pages + self.tags: list[str] = tags + + + def build(self): + # initial base replacements + urls_formatted: str = self.__get_urls_formatted() + self.sitemap = self.sitemap.replace('$$URLS', urls_formatted) + + + with open(os.path.join(self.config.dst, 'sitemap.xml'), 'w') as f: + f.write(self.sitemap) + + + def __get_urls_formatted(self) -> str: + # u_f=items formatted for short + u_f: str = '' + for p in self.pages: + url: str = f'{self.config.base_url}/{p.name.replace(".md", ".html")}' + date: str = p.m_datetime.strftime(DFORMAT) + + u_f = f'{u_f} \n' + u_f = f'{u_f} {url}\n' + u_f = f'{u_f} {date}\n' + u_f = f'{u_f} weekly\n' + u_f = f'{u_f} 1.0\n' + u_f = f'{u_f} \n' + + for t in self.tags: + url: str = f'{self.config.base_url}/tag/@{t}.html' + date: str = datetime.now(tz=timezone.utc).strftime(DFORMAT) + + u_f = f'{u_f} \n' + u_f = f'{u_f} {url}\n' + u_f = f'{u_f} {date}\n' + u_f = f'{u_f} daily\n' + u_f = f'{u_f} 0.5\n' + u_f = f'{u_f} \n' + + return u_f diff --git a/src/pyssg/template.py b/src/pyssg/template.py index 0c43f22..932b6ae 100644 --- a/src/pyssg/template.py +++ b/src/pyssg/template.py @@ -27,6 +27,7 @@ class Template(HF): self.articles: Common = Common() self.tags: Common = Common() self.rss: str = None + self.sitemap: str = None self.is_read: bool = False @@ -141,6 +142,19 @@ class Template(HF): ' \n', '']) + # go back to templates + os.chdir('..') + + os.mkdir('sitemap') + os.chdir('sitemap') + self.__write_template('sitemap.xml', + ['\n', + '\n', + '$$URLS\n', + '']) # return to initial working directory os.chdir(iwd) @@ -201,10 +215,14 @@ class Template(HF): # go back to templates os.chdir('..') - # tag + # rss os.chdir('rss') self.rss = self.__read_template('rss.xml') + # sitemap + os.chdir('sitemap') + self.sitemap = self.__read_template('sitemap.xml') + # return to initial working directory os.chdir(iwd) -- cgit v1.2.3-70-g09d2