From 2c4afde1c93bbf3990514c45b06b63e648c46f63 Mon Sep 17 00:00:00 2001
From: David Luevano Alvarado <david@luevano.xyz>
Date: Tue, 25 May 2021 21:21:20 -0600
Subject: add sitemap support

---
 ChangeLog             |  4 ++++
 README.md             |  4 ++--
 src/pyssg/builder.py  |  6 ++++++
 src/pyssg/pyssg.py    |  7 +++++++
 src/pyssg/sitemap.py  | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/pyssg/template.py | 20 +++++++++++++++++-
 6 files changed, 95 insertions(+), 3 deletions(-)
 create mode 100644 src/pyssg/sitemap.py

diff --git a/ChangeLog b/ChangeLog
index c35abee..2177fc1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,10 @@
 CHANGES
 =======
 
+v0.3.1
+------
+
+* fix readme links
 * add tags as categories
 * add content:encoded tag in rss and update readme
 
diff --git a/README.md b/README.md
index 00335d7..59a1333 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ I'm writing this in *pYtHoN* (thought about doing it in Go, but I'm most comfort
 	- [x] Preserves hand-made `*.html` files.
 	- [x] Tag functionality.
 	- [ ] Open Graph (and similar) support.
-- [ ] Build `sitemap.xml` file.
+- [x] Build `sitemap.xml` file.
 - [x] Build `rss.xml` file.
 - [x] Only build page if `*.md` is new or updated.
 	- [ ] Extend this to tag pages and index (right now all tags and index is built no matter if no new/updated file is present).
@@ -63,4 +63,4 @@ pyssg -s src_dir -d dst_dir -u https://base.url -b
 
 That creates all `*.html` for the site and can be easily moved to the server. Here, the `-u` flag is technically optional in the sense that you'll not receive a warning/error, but it's used to prepend links with this URL (not strictly required everywhere), so don't ignore it; also don't include the trailing `/`.
 
-For now, the `-b`uild tag also creates a `rss.xml` file based on a template (created when initializing the directories/templates) adding all converted `*.md` files, meaning that separate `*.html` files should be included manually in the template.
+For now, the `-b`uild tag also creates the `rss.xml` and `sitemap.xml` files based on templates including only all converted `*.md` files (and processed tags in case of the sitemap), meaning that separate `*.html` files should be included manually in the template.
diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py
index d2163d2..4939bcc 100644
--- a/src/pyssg/builder.py
+++ b/src/pyssg/builder.py
@@ -29,6 +29,7 @@ class HTMLBuilder:
         self.html_files: list[str] = None
 
         self.all_pages: list[Page] = None
+        self.all_tags: list[str] = None
 
 
     def build(self) -> None:
@@ -44,6 +45,7 @@ class HTMLBuilder:
 
         # just to be able to extract all pages out of this class
         self.all_pages = parser.all_pages
+        self.all_tags = parser.all_tags
 
         # create the article index
         self.__create_article_index(parser.all_tags, parser.all_pages)
@@ -61,6 +63,10 @@ class HTMLBuilder:
         return self.all_pages
 
 
+    def get_tags(self) -> list[str]:
+        return self.all_tags
+
+
     def __create_dir_structure(self) -> None:
         for d in self.dirs:
             # for the dir structure,
diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py
index dbc5f0b..72b89ab 100644
--- a/src/pyssg/pyssg.py
+++ b/src/pyssg/pyssg.py
@@ -114,5 +114,12 @@ def main() -> None:
         rss_builder: RSSBuilder = RSSBuilder(config, template.rss, all_pages)
         rss_builder.build()
 
+        # get all tags for sitemap construction
+        all_tags: list[str] = builder.get_tags()
+        sitemap_builder: SitemapBuilder = SitemapBuilder(config,
+                                                         template.sitemap,
+                                                         all_tags)
+        sitemap_builder.build()
+
         db.write()
         return
diff --git a/src/pyssg/sitemap.py b/src/pyssg/sitemap.py
new file mode 100644
index 0000000..e8338cb
--- /dev/null
+++ b/src/pyssg/sitemap.py
@@ -0,0 +1,57 @@
+import os
+from datetime import datetime, timezone
+
+from .page import Page
+from .configuration import Configuration
+
+
+DFORMAT = '%Y-%m-%d'
+
+
+class SitemapBuilder:
+    def __init__(self, config: Configuration,
+                 template: str,
+                 pages: list[Page],
+                 tags: list[str]):
+        self.config: Configuration = config
+        self.sitemap: str = template
+        self.pages: list[Page] = pages
+        self.tags: list[str] = tags
+
+
+    def build(self):
+        # initial base replacements
+        urls_formatted: str = self.__get_urls_formatted()
+        self.sitemap = self.sitemap.replace('$$URLS', urls_formatted)
+
+
+        with open(os.path.join(self.config.dst, 'sitemap.xml'), 'w') as f:
+            f.write(self.sitemap)
+
+
+    def __get_urls_formatted(self) -> str:
+        # u_f=items formatted for short
+        u_f: str = ''
+        for p in self.pages:
+            url: str = f'{self.config.base_url}/{p.name.replace(".md", ".html")}'
+            date: str = p.m_datetime.strftime(DFORMAT)
+
+            u_f = f'{u_f}    <url>\n'
+            u_f = f'{u_f}      <loc>{url}</loc>\n'
+            u_f = f'{u_f}      <lastmod>{date}</lastmod>\n'
+            u_f = f'{u_f}      <changefreq>weekly</changefreq>\n'
+            u_f = f'{u_f}      <priority>1.0</priority>\n'
+            u_f = f'{u_f}    </url>\n'
+
+        for t in self.tags:
+            url: str = f'{self.config.base_url}/tag/@{t}.html'
+            date: str = datetime.now(tz=timezone.utc).strftime(DFORMAT)
+
+            u_f = f'{u_f}    <url>\n'
+            u_f = f'{u_f}      <loc>{url}</loc>\n'
+            u_f = f'{u_f}      <lastmod>{date}</lastmod>\n'
+            u_f = f'{u_f}      <changefreq>daily</changefreq>\n'
+            u_f = f'{u_f}      <priority>0.5</priority>\n'
+            u_f = f'{u_f}    </url>\n'
+
+        return u_f
diff --git a/src/pyssg/template.py b/src/pyssg/template.py
index 0c43f22..932b6ae 100644
--- a/src/pyssg/template.py
+++ b/src/pyssg/template.py
@@ -27,6 +27,7 @@ class Template(HF):
         self.articles: Common = Common()
         self.tags: Common = Common()
         self.rss: str = None
+        self.sitemap: str = None
 
         self.is_read: bool = False
 
@@ -141,6 +142,19 @@ class Template(HF):
                                '  </channel>\n',
                                '</rss>'])
 
+        # go back to templates
+        os.chdir('..')
+
+        os.mkdir('sitemap')
+        os.chdir('sitemap')
+        self.__write_template('sitemap.xml',
+                              ['<?xml version="1.0" encoding="utf-8"?>\n',
+                               '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n',
+                               '  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n',
+                               '  xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n',
+                               'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n',
+                               '$$URLS\n',
+                               '</urlset>'])
         # return to initial working directory
         os.chdir(iwd)
 
@@ -201,10 +215,14 @@ class Template(HF):
         # go back to templates
         os.chdir('..')
 
-        # tag
+        # rss
         os.chdir('rss')
         self.rss = self.__read_template('rss.xml')
 
+        # sitemap
+        os.chdir('sitemap')
+        self.sitemap = self.__read_template('sitemap.xml')
+
         # return to initial working directory
         os.chdir(iwd)
 
-- 
cgit v1.2.3-70-g09d2