From 2c4afde1c93bbf3990514c45b06b63e648c46f63 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <david@luevano.xyz>
Date: Tue, 25 May 2021 21:21:20 -0600
Subject: add sitemap support

---
 src/pyssg/builder.py  |  6 ++++++
 src/pyssg/pyssg.py    |  7 +++++++
 src/pyssg/sitemap.py  | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/pyssg/template.py | 20 +++++++++++++++++-
 4 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100644 src/pyssg/sitemap.py

(limited to 'src')

diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py
index d2163d2..4939bcc 100644
--- a/src/pyssg/builder.py
+++ b/src/pyssg/builder.py
@@ -29,6 +29,7 @@ class HTMLBuilder:
         self.html_files: list[str] = None
 
         self.all_pages: list[Page] = None
+        self.all_tags: list[str] = None
 
 
     def build(self) -> None:
@@ -44,6 +45,7 @@ class HTMLBuilder:
 
         # just to be able to extract all pages out of this class
         self.all_pages = parser.all_pages
+        self.all_tags = parser.all_tags
 
         # create the article index
         self.__create_article_index(parser.all_tags, parser.all_pages)
@@ -61,6 +63,10 @@ class HTMLBuilder:
         return self.all_pages
 
 
+    def get_tags(self) -> list[str]:
+        return self.all_tags
+
+
     def __create_dir_structure(self) -> None:
         for d in self.dirs:
             # for the dir structure,
diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py
index dbc5f0b..72b89ab 100644
--- a/src/pyssg/pyssg.py
+++ b/src/pyssg/pyssg.py
@@ -114,5 +114,12 @@ def main() -> None:
         rss_builder: RSSBuilder = RSSBuilder(config, template.rss, all_pages)
         rss_builder.build()
 
+        # get all tags for sitemap construction
+        all_tags: list[str] = builder.get_tags()
+        sitemap_builder: SitemapBuilder = SitemapBuilder(config,
+                                                         template.sitemap,
+                                                         all_tags)
+        sitemap_builder.build()
+
         db.write()
         return
diff --git a/src/pyssg/sitemap.py b/src/pyssg/sitemap.py
new file mode 100644
index 0000000..e8338cb
--- /dev/null
+++ b/src/pyssg/sitemap.py
@@ -0,0 +1,57 @@
+import os
+from datetime import datetime, timezone
+
+from .page import Page
+from .configuration import Configuration
+
+
+DFORMAT = '%Y-%m-%d'
+
+
+class SitemapBuilder:
+    def __init__(self, config: Configuration,
+                 template: str,
+                 pages: list[Page],
+                 tags: list[str]):
+        self.config: Configuration = config
+        self.sitemap: str = template
+        self.pages: list[Page] = pages
+        self.tags: list[str] = tags
+
+
+    def build(self):
+        # initial base replacements
+        urls_formatted: str = self.__get_urls_formatted()
+        self.sitemap = self.sitemap.replace('$$URLS', urls_formatted)
+
+
+        with open(os.path.join(self.config.dst, 'sitemap.xml'), 'w') as f:
+            f.write(self.sitemap)
+
+
+    def __get_urls_formatted(self) -> str:
+        # u_f=items formatted for short
+        u_f: str = ''
+        for p in self.pages:
+            url: str = f'{self.config.base_url}/{p.name.replace(".md", ".html")}'
+            date: str = p.m_datetime.strftime(DFORMAT)
+
+            u_f = f'{u_f}    <url>\n'
+            u_f = f'{u_f}      <loc>{url}</loc>\n'
+            u_f = f'{u_f}      <lastmod>{date}</lastmod>\n'
+            u_f = f'{u_f}      <changefreq>weekly</changefreq>\n'
+            u_f = f'{u_f}      <priority>1.0</priority>\n'
+            u_f = f'{u_f}    </url>\n'
+
+        for t in self.tags:
+            url: str = f'{self.config.base_url}/tag/@{t}.html'
+            date: str = datetime.now(tz=timezone.utc).strftime(DFORMAT)
+
+            u_f = f'{u_f}    <url>\n'
+            u_f = f'{u_f}      <loc>{url}</loc>\n'
+            u_f = f'{u_f}      <lastmod>{date}</lastmod>\n'
+            u_f = f'{u_f}      <changefreq>daily</changefreq>\n'
+            u_f = f'{u_f}      <priority>0.5</priority>\n'
+            u_f = f'{u_f}    </url>\n'
+
+        return u_f
diff --git a/src/pyssg/template.py b/src/pyssg/template.py
index 0c43f22..932b6ae 100644
--- a/src/pyssg/template.py
+++ b/src/pyssg/template.py
@@ -27,6 +27,7 @@ class Template(HF):
         self.articles: Common = Common()
         self.tags: Common = Common()
         self.rss: str = None
+        self.sitemap: str = None
 
         self.is_read: bool = False
 
@@ -141,6 +142,19 @@ class Template(HF):
                                '  </channel>\n',
                                '</rss>'])
 
+        # go back to templates
+        os.chdir('..')
+
+        os.mkdir('sitemap')
+        os.chdir('sitemap')
+        self.__write_template('sitemap.xml',
+                              ['<?xml version="1.0" encoding="utf-8"?>\n',
+                               '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n',
+                               '  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n',
+                               '  xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n',
+                               'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n',
+                               '$$URLS\n',
+                               '</urlset>'])
         # return to initial working directory
         os.chdir(iwd)
 
@@ -201,10 +215,14 @@ class Template(HF):
         # go back to templates
         os.chdir('..')
 
-        # tag
+        # rss
         os.chdir('rss')
         self.rss = self.__read_template('rss.xml')
 
+        # sitemap
+        os.chdir('sitemap')
+        self.sitemap = self.__read_template('sitemap.xml')
+
         # return to initial working directory
         os.chdir(iwd)
 
-- 
cgit v1.2.3-70-g09d2