summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Luevano Alvarado <david@luevano.xyz>2021-05-25 21:21:20 -0600
committerDavid Luevano Alvarado <david@luevano.xyz>2021-05-25 21:21:20 -0600
commit2c4afde1c93bbf3990514c45b06b63e648c46f63 (patch)
treef7497bf7885543b4d59ac7cafeb5012e8c8930c8
parenta9d5d971961bd28f2d6275aabfa78f2fd268b851 (diff)
add sitemap support
-rw-r--r--ChangeLog4
-rw-r--r--README.md4
-rw-r--r--src/pyssg/builder.py6
-rw-r--r--src/pyssg/pyssg.py7
-rw-r--r--src/pyssg/sitemap.py57
-rw-r--r--src/pyssg/template.py20
6 files changed, 95 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index c35abee..2177fc1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,10 @@
CHANGES
=======
+v0.3.1
+------
+
+* fix readme links
* add tags as categories
* add content:encoded tag in rss and update readme
diff --git a/README.md b/README.md
index 00335d7..59a1333 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ I'm writing this in *pYtHoN* (thought about doing it in Go, but I'm most comfort
- [x] Preserves hand-made `*.html` files.
- [x] Tag functionality.
- [ ] Open Graph (and similar) support.
-- [ ] Build `sitemap.xml` file.
+- [x] Build `sitemap.xml` file.
- [x] Build `rss.xml` file.
- [x] Only build page if `*.md` is new or updated.
- [ ] Extend this to tag pages and index (right now all tags and index is built no matter if no new/updated file is present).
@@ -63,4 +63,4 @@ pyssg -s src_dir -d dst_dir -u https://base.url -b
That creates all `*.html` for the site and can be easily moved to the server. Here, the `-u` flag is technically optional in the sense that you'll not receive a warning/error, but it's used to prepend links with this URL (not strictly required everywhere), so don't ignore it; also don't include the trailing `/`.
-For now, the `-b`uild tag also creates a `rss.xml` file based on a template (created when initializing the directories/templates) adding all converted `*.md` files, meaning that separate `*.html` files should be included manually in the template.
+For now, the `-b`uild tag also creates the `rss.xml` and `sitemap.xml` files based on templates including only all converted `*.md` files (and processed tags in case of the sitemap), meaning that separate `*.html` files should be included manually in the template.
diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py
index d2163d2..4939bcc 100644
--- a/src/pyssg/builder.py
+++ b/src/pyssg/builder.py
@@ -29,6 +29,7 @@ class HTMLBuilder:
self.html_files: list[str] = None
self.all_pages: list[Page] = None
+ self.all_tags: list[str] = None
def build(self) -> None:
@@ -44,6 +45,7 @@ class HTMLBuilder:
# just to be able to extract all pages out of this class
self.all_pages = parser.all_pages
+ self.all_tags = parser.all_tags
# create the article index
self.__create_article_index(parser.all_tags, parser.all_pages)
@@ -61,6 +63,10 @@ class HTMLBuilder:
return self.all_pages
+ def get_tags(self) -> list[str]:
+ return self.all_tags
+
+
def __create_dir_structure(self) -> None:
for d in self.dirs:
# for the dir structure,
diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py
index dbc5f0b..72b89ab 100644
--- a/src/pyssg/pyssg.py
+++ b/src/pyssg/pyssg.py
@@ -114,5 +114,12 @@ def main() -> None:
rss_builder: RSSBuilder = RSSBuilder(config, template.rss, all_pages)
rss_builder.build()
+ # get all tags for sitemap construction
+ all_tags: list[str] = builder.get_tags()
+ sitemap_builder: SitemapBuilder = SitemapBuilder(config,
+ template.sitemap,
+ all_tags)
+ sitemap_builder.build()
+
db.write()
return
diff --git a/src/pyssg/sitemap.py b/src/pyssg/sitemap.py
new file mode 100644
index 0000000..e8338cb
--- /dev/null
+++ b/src/pyssg/sitemap.py
@@ -0,0 +1,57 @@
+import os
+from datetime import datetime, timezone
+
+from .page import Page
+from .configuration import Configuration
+
+
+DFORMAT = '%Y-%m-%d'
+
+
+class SitemapBuilder:
+ def __init__(self, config: Configuration,
+ template: str,
+ pages: list[Page],
+ tags: list[str]):
+ self.config: Configuration = config
+ self.sitemap: str = template
+ self.pages: list[Page] = pages
+ self.tags: list[str] = tags
+
+
+ def build(self):
+ # initial base replacements
+ urls_formatted: str = self.__get_urls_formatted()
+ self.sitemap = self.sitemap.replace('$$URLS', urls_formatted)
+
+
+ with open(os.path.join(self.config.dst, 'sitemap.xml'), 'w') as f:
+ f.write(self.sitemap)
+
+
+ def __get_urls_formatted(self) -> str:
+ # u_f=items formatted for short
+ u_f: str = ''
+ for p in self.pages:
+ url: str = f'{self.config.base_url}/{p.name.replace(".md", ".html")}'
+ date: str = p.m_datetime.strftime(DFORMAT)
+
+ u_f = f'{u_f} <url>\n'
+ u_f = f'{u_f} <loc>{url}</loc>\n'
+ u_f = f'{u_f} <lastmod>{date}</lastmod>\n'
+ u_f = f'{u_f} <changefreq>weekly</changefreq>\n'
+ u_f = f'{u_f} <priority>1.0</priority>\n'
+ u_f = f'{u_f} </url>\n'
+
+ for t in self.tags:
+ url: str = f'{self.config.base_url}/tag/@{t}.html'
+ date: str = datetime.now(tz=timezone.utc).strftime(DFORMAT)
+
+ u_f = f'{u_f} <url>\n'
+ u_f = f'{u_f} <loc>{url}</loc>\n'
+ u_f = f'{u_f} <lastmod>{date}</lastmod>\n'
+ u_f = f'{u_f} <changefreq>daily</changefreq>\n'
+ u_f = f'{u_f} <priority>0.5</priority>\n'
+ u_f = f'{u_f} </url>\n'
+
+ return u_f
diff --git a/src/pyssg/template.py b/src/pyssg/template.py
index 0c43f22..932b6ae 100644
--- a/src/pyssg/template.py
+++ b/src/pyssg/template.py
@@ -27,6 +27,7 @@ class Template(HF):
self.articles: Common = Common()
self.tags: Common = Common()
self.rss: str = None
+ self.sitemap: str = None
self.is_read: bool = False
@@ -141,6 +142,19 @@ class Template(HF):
' </channel>\n',
'</rss>'])
+ # go back to templates
+ os.chdir('..')
+
+ os.mkdir('sitemap')
+ os.chdir('sitemap')
+ self.__write_template('sitemap.xml',
+ ['<?xml version="1.0" encoding="utf-8"?>\n',
+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n',
+ ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n',
+ ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n',
+ 'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n',
+ '$$URLS\n',
+ '</urlset>'])
# return to initial working directory
os.chdir(iwd)
@@ -201,10 +215,14 @@ class Template(HF):
# go back to templates
os.chdir('..')
- # tag
+ # rss
os.chdir('rss')
self.rss = self.__read_template('rss.xml')
+ # sitemap
+ os.chdir('sitemap')
+ self.sitemap = self.__read_template('sitemap.xml')
+
# return to initial working directory
os.chdir(iwd)