diff options
author | David Luevano Alvarado <david@luevano.xyz> | 2021-05-25 21:21:20 -0600 |
---|---|---|
committer | David Luevano Alvarado <david@luevano.xyz> | 2021-05-25 21:21:20 -0600 |
commit | 2c4afde1c93bbf3990514c45b06b63e648c46f63 (patch) | |
tree | f7497bf7885543b4d59ac7cafeb5012e8c8930c8 | |
parent | a9d5d971961bd28f2d6275aabfa78f2fd268b851 (diff) |
add sitemap support
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | src/pyssg/builder.py | 6 | ||||
-rw-r--r-- | src/pyssg/pyssg.py | 7 | ||||
-rw-r--r-- | src/pyssg/sitemap.py | 57 | ||||
-rw-r--r-- | src/pyssg/template.py | 20 |
6 files changed, 95 insertions, 3 deletions
@@ -1,6 +1,10 @@ CHANGES ======= +v0.3.1 +------ + +* fix readme links * add tags as categories * add content:encoded tag in rss and update readme @@ -14,7 +14,7 @@ I'm writing this in *pYtHoN* (thought about doing it in Go, but I'm most comfort - [x] Preserves hand-made `*.html` files. - [x] Tag functionality. - [ ] Open Graph (and similar) support. -- [ ] Build `sitemap.xml` file. +- [x] Build `sitemap.xml` file. - [x] Build `rss.xml` file. - [x] Only build page if `*.md` is new or updated. - [ ] Extend this to tag pages and index (right now all tags and index is built no matter if no new/updated file is present). @@ -63,4 +63,4 @@ pyssg -s src_dir -d dst_dir -u https://base.url -b That creates all `*.html` for the site and can be easily moved to the server. Here, the `-u` flag is technically optional in the sense that you'll not receive a warning/error, but it's used to prepend links with this URL (not strictly required everywhere), so don't ignore it; also don't include the trailing `/`. -For now, the `-b`uild tag also creates a `rss.xml` file based on a template (created when initializing the directories/templates) adding all converted `*.md` files, meaning that separate `*.html` files should be included manually in the template. +For now, the `-b`uild tag also creates the `rss.xml` and `sitemap.xml` files based on templates including only all converted `*.md` files (and processed tags in case of the sitemap), meaning that separate `*.html` files should be included manually in the template. diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py index d2163d2..4939bcc 100644 --- a/src/pyssg/builder.py +++ b/src/pyssg/builder.py @@ -29,6 +29,7 @@ class HTMLBuilder: self.html_files: list[str] = None self.all_pages: list[Page] = None + self.all_tags: list[str] = None def build(self) -> None: @@ -44,6 +45,7 @@ class HTMLBuilder: # just to be able to extract all pages out of this class self.all_pages = parser.all_pages + self.all_tags = parser.all_tags # create the article index self.__create_article_index(parser.all_tags, parser.all_pages) @@ -61,6 +63,10 @@ class HTMLBuilder: return self.all_pages + def get_tags(self) -> list[str]: + return self.all_tags + + def __create_dir_structure(self) -> None: for d in self.dirs: # for the dir structure, diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py index dbc5f0b..72b89ab 100644 --- a/src/pyssg/pyssg.py +++ b/src/pyssg/pyssg.py @@ -114,5 +114,12 @@ def main() -> None: rss_builder: RSSBuilder = RSSBuilder(config, template.rss, all_pages) rss_builder.build() + # get all tags for sitemap construction + all_tags: list[str] = builder.get_tags() + sitemap_builder: SitemapBuilder = SitemapBuilder(config, + template.sitemap, + all_tags) + sitemap_builder.build() + db.write() return diff --git a/src/pyssg/sitemap.py b/src/pyssg/sitemap.py new file mode 100644 index 0000000..e8338cb --- /dev/null +++ b/src/pyssg/sitemap.py @@ -0,0 +1,57 @@ +import os +from datetime import datetime, timezone + +from .page import Page +from .configuration import Configuration + + +DFORMAT = '%Y-%m-%d' + + +class SitemapBuilder: + def __init__(self, config: Configuration, + template: str, + pages: list[Page], + tags: list[str]): + self.config: Configuration = config + self.sitemap: str = template + self.pages: list[Page] = pages + self.tags: list[str] = tags + + + def build(self): + # initial base replacements + urls_formatted: str = self.__get_urls_formatted() + self.sitemap = self.sitemap.replace('$$URLS', urls_formatted) + + + with open(os.path.join(self.config.dst, 'sitemap.xml'), 'w') as f: + f.write(self.sitemap) + + + def __get_urls_formatted(self) -> str: + # u_f=items formatted for short + u_f: str = '' + for p in self.pages: + url: str = f'{self.config.base_url}/{p.name.replace(".md", ".html")}' + date: str = p.m_datetime.strftime(DFORMAT) + + u_f = f'{u_f} <url>\n' + u_f = f'{u_f} <loc>{url}</loc>\n' + u_f = f'{u_f} <lastmod>{date}</lastmod>\n' + u_f = f'{u_f} <changefreq>weekly</changefreq>\n' + u_f = f'{u_f} <priority>1.0</priority>\n' + u_f = f'{u_f} </url>\n' + + for t in self.tags: + url: str = f'{self.config.base_url}/tag/@{t}.html' + date: str = datetime.now(tz=timezone.utc).strftime(DFORMAT) + + u_f = f'{u_f} <url>\n' + u_f = f'{u_f} <loc>{url}</loc>\n' + u_f = f'{u_f} <lastmod>{date}</lastmod>\n' + u_f = f'{u_f} <changefreq>daily</changefreq>\n' + u_f = f'{u_f} <priority>0.5</priority>\n' + u_f = f'{u_f} </url>\n' + + return u_f diff --git a/src/pyssg/template.py b/src/pyssg/template.py index 0c43f22..932b6ae 100644 --- a/src/pyssg/template.py +++ b/src/pyssg/template.py @@ -27,6 +27,7 @@ class Template(HF): self.articles: Common = Common() self.tags: Common = Common() self.rss: str = None + self.sitemap: str = None self.is_read: bool = False @@ -141,6 +142,19 @@ class Template(HF): ' </channel>\n', '</rss>']) + # go back to templates + os.chdir('..') + + os.mkdir('sitemap') + os.chdir('sitemap') + self.__write_template('sitemap.xml', + ['<?xml version="1.0" encoding="utf-8"?>\n', + '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n', + ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n', + ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n', + 'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n', + '$$URLS\n', + '</urlset>']) # return to initial working directory os.chdir(iwd) @@ -201,10 +215,14 @@ class Template(HF): # go back to templates os.chdir('..') - # tag + # rss os.chdir('rss') self.rss = self.__read_template('rss.xml') + # sitemap + os.chdir('sitemap') + self.sitemap = self.__read_template('sitemap.xml') + # return to initial working directory os.chdir(iwd) |