20 files changed, 417 insertions, 291 deletions
diff --git a/ChangeLog b/ChangeLog
index 5747183..d5d7cbb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,10 @@
 CHANGES
 =======
 
+v0.1.0
+------
+
+* refactor code and almost finish main functionality
 * semiworking program, still very alpha
 * Add working file discovery and destination file structure creation
 * initial template creation, barebones arg parser
diff --git a/e_dst/a/test2.html b/e_dst/a/test2.html
index ddb9b53..57f2b07 100644
--- a/e_dst/a/test2.html
+++ b/e_dst/a/test2.html
@@ -3,7 +3,6 @@
 <head>
 <meta charset="utf-8">
 <title>Test 2</title>
-
 </head>
 <body>
 <h1>Test 2</h1><p>This is an article test, again</p>
@@ -13,6 +12,6 @@
 <li>??</li>
 </ul>
 </li>
-</ul><p>Tags: <a href="https://blog.luevano.xyz/tag/@test.html">test</a>, <a href="https://blog.luevano.xyz/tag/@archive.html">archive</a>, <a href="https://blog.luevano.xyz/tag/@another-test.html">another-test</a>, <a href="https://blog.luevano.xyz/tag/@123.html">123</a></p>
+</ul><p>Tags: <a href="https://blog.luevano.xyz/tag/@123.html">123</a>, <a href="https://blog.luevano.xyz/tag/@another-test.html">another-test</a>, <a href="https://blog.luevano.xyz/tag/@archive.html">archive</a>, <a href="https://blog.luevano.xyz/tag/@test.html">test</a></p>
 </body>
 </html>
diff --git a/e_dst/index.html b/e_dst/index.html
new file mode 100644
index 0000000..1247d09
--- /dev/null
+++ b/e_dst/index.html
@@ -0,0 +1,17 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<title>Index</title>
+</head>
+<body>
+<p>Tags: <a href="https://blog.luevano.xyz/tag/@123.html">123</a>, <a href="https://blog.luevano.xyz/tag/@another-test.html">another-test</a>, <a href="https://blog.luevano.xyz/tag/@archive.html">archive</a>, <a href="https://blog.luevano.xyz/tag/@small.html">small</a>, <a href="https://blog.luevano.xyz/tag/@test.html">test</a></p>
+<h2>Articles</h2>
+<ul>
+<h3>May 2021</h3>
+<li>May 11 - <a href="https://blog.luevano.xyz/test1.html">Test 1</a></li>
+<li>May 12 - <a href="https://blog.luevano.xyz/test2.html">Test 1</a></li>
+<li>May 12 - <a href="https://blog.luevano.xyz/a/test2.html">Test 2</a></li>
+</ul>
+</body>
+</html>
diff --git a/e_dst/tag/@123.html b/e_dst/tag/@123.html
index 36f1c04..da82747 100644
--- a/e_dst/tag/@123.html
+++ b/e_dst/tag/@123.html
@@ -2,8 +2,7 @@
 <html lang="en">
 <head>
 <meta charset="utf-8">
-<title>Posts filtered by tag "123"</title>
-
+<title>Posts filtered by: 123</title>
 </head>
 <body>
 <p>Posts filtered by tag: <a href="https://blog.luevano.xyz/tag/@123.html">123</a></p>
diff --git a/e_dst/tag/@another-test.html b/e_dst/tag/@another-test.html
index dfbc30e..dcbb482 100644
--- a/e_dst/tag/@another-test.html
+++ b/e_dst/tag/@another-test.html
@@ -2,8 +2,7 @@
 <html lang="en">
 <head>
 <meta charset="utf-8">
-<title>Posts filtered by tag "another-test"</title>
-
+<title>Posts filtered by: another-test</title>
 </head>
 <body>
 <p>Posts filtered by tag: <a href="https://blog.luevano.xyz/tag/@another-test.html">another-test</a></p>
diff --git a/e_dst/tag/@archive.html b/e_dst/tag/@archive.html
index 80fb231..57cdbe4 100644
--- a/e_dst/tag/@archive.html
+++ b/e_dst/tag/@archive.html
@@ -2,8 +2,7 @@
 <html lang="en">
 <head>
 <meta charset="utf-8">
-<title>Posts filtered by tag "archive"</title>
-
+<title>Posts filtered by: archive</title>
 </head>
 <body>
 <p>Posts filtered by tag: <a href="https://blog.luevano.xyz/tag/@archive.html">archive</a></p>
diff --git a/e_dst/tag/@small.html b/e_dst/tag/@small.html
index bdc2920..8531a8c 100644
--- a/e_dst/tag/@small.html
+++ b/e_dst/tag/@small.html
@@ -2,8 +2,7 @@
 <html lang="en">
 <head>
 <meta charset="utf-8">
-<title>Posts filtered by tag "small"</title>
-
+<title>Posts filtered by: small</title>
 </head>
 <body>
 <p>Posts filtered by tag: <a href="https://blog.luevano.xyz/tag/@small.html">small</a></p>
diff --git a/e_dst/tag/@test.html b/e_dst/tag/@test.html
index 5cbb322..d5fa3be 100644
--- a/e_dst/tag/@test.html
+++ b/e_dst/tag/@test.html
@@ -2,8 +2,7 @@
 <html lang="en">
 <head>
 <meta charset="utf-8">
-<title>Posts filtered by tag "test"</title>
-
+<title>Posts filtered by: test</title>
 </head>
 <body>
 <p>Posts filtered by tag: <a href="https://blog.luevano.xyz/tag/@test.html">test</a></p>
diff --git a/e_dst/test1.html b/e_dst/test1.html
index 6ce9593..7073ee6 100644
--- a/e_dst/test1.html
+++ b/e_dst/test1.html
@@ -3,7 +3,6 @@
 <head>
 <meta charset="utf-8">
 <title>Test 1</title>
-
 </head>
 <body>
 <h1>Test 1</h1><p>This is an article test</p>
@@ -11,6 +10,6 @@
 <li>asdfg</li>
 <li>hjikl</li>
 <li>modified</li>
-</ul><p>Tags: <a href="https://blog.luevano.xyz/tag/@test.html">test</a>, <a href="https://blog.luevano.xyz/tag/@small.html">small</a>, <a href="https://blog.luevano.xyz/tag/@archive.html">archive</a></p>
+</ul><p>Tags: <a href="https://blog.luevano.xyz/tag/@archive.html">archive</a>, <a href="https://blog.luevano.xyz/tag/@small.html">small</a>, <a href="https://blog.luevano.xyz/tag/@test.html">test</a></p>
 </body>
 </html>
diff --git a/e_dst/test2.html b/e_dst/test2.html
index 714fed6..e5bbffd 100644
--- a/e_dst/test2.html
+++ b/e_dst/test2.html
@@ -3,7 +3,6 @@
 <head>
 <meta charset="utf-8">
 <title>Test 1</title>
-
 </head>
 <body>
 <h1>Test 1</h1><p>This is an article test</p>
diff --git a/e_src/.files b/e_src/.files
index 5381199..6896406 100644
--- a/e_src/.files
+++ b/e_src/.files
@@ -1,5 +1,5 @@
 test1.html 1620747134.884041 0.0 -
 a/test2.html 1620583182.7999856 0.0 -
-test1.md 1620745454.0012724 0.0 test,small,archive
+test1.md 1620745454.0012724 0.0 archive,small,test
 test2.md 1620786518.755961 0.0 test
-a/test2.md 1620781032.5948799 0.0 test,archive,another-test,123
+a/test2.md 1620781032.5948799 0.0 123,another-test,archive,test
diff --git a/e_src/templates/common/header.html b/e_src/templates/common/header.html
index 9c423df..6237620 100644
--- a/e_src/templates/common/header.html
+++ b/e_src/templates/common/header.html
@@ -3,6 +3,5 @@
 <head>
 <meta charset="utf-8">
 <title>$$TITLE</title>
-$$EXTRAHEAD
 </head>
 <body>
diff --git a/src/pyssg/builder.py b/src/pyssg/builder.py
index 002d80e..2ba1b6c 100644
--- a/src/pyssg/builder.py
+++ b/src/pyssg/builder.py
@@ -1,44 +1,282 @@
 import os
 import shutil
+from copy import deepcopy
 
+from .template import Template
 from .database import Database
-from .discovery import get_all_files
-from .converter import create_html_files
+from .parser import MDParser
+from .page import Page
+from .discovery import get_file_list, get_dir_structure
 
+class HTMLBuilder:
+    def __init__(self, src: str,
+                 dst: str,
+                 base_url: str,
+                 template: Template,
+                 db: Database,
+                 dformat: str=None,
+                 l_dformat: str=None,
+                 lsep_dformat: str=None):
+        self.src: str = src
+        self.dst: str = dst
+        self.base_url: str = base_url
+        self.template: Template = template
+        self.db: Database = db
+        self.dformat: str = None
+        self.l_dformat: str = None
+        self.lsep_dformat: str = None
 
-def create_dir_structure(dst: str,
-                         dirs: list[str]) -> None:
-    for d in dirs:
-        # for the dir structure,
-        # doesn't matter if the dir already exists
-        try:
-            os.makedirs(os.path.join(dst, d))
-        except FileExistsError:
-            pass
+        if dformat is not None:
+            self.dformat = dformat
+        else:
+            self.dformat = "%a, %d %b, %Y @ %H:%M %Z"
 
+        if l_dformat is not None:
+            self.l_dformat = l_dformat
+        else:
+            self.l_dformat = "%b %d"
 
-def copy_html_files(src: str,
-                    dst: str,
-                    files: list[str],
-                    db: Database) -> None:
-    src_file = None
-    dst_file = None
+        if lsep_dformat is not None:
+            self.lsep_dformat = lsep_dformat
+        else:
+            self.lsep_dformat = "%B %Y"
 
-    for f in files:
-        src_file = os.path.join(src, f)
-        dst_file = os.path.join(dst, f)
+        self.dirs: list[str] = None
+        self.md_files: list[str] = None
+        self.html_files: list[str] = None
 
-        # only copy files if they have been modified (or are new)
-        if db.update(src_file, remove=f'{src}/'):
-            shutil.copy2(src_file, dst_file)
 
+    def build(self) -> None:
+        self.dirs = get_dir_structure(self.src, ['templates'])
+        self.md_files = get_file_list(self.src, ['.md'], ['templates'])
+        self.html_files = get_file_list(self.src, ['.html'], ['templates'])
 
-def build_static_site(src: str,
-                      dst: str,
-                      db: Database) -> None:
-    # get all file data and create necessary dir structure
-    dirs, md_files, html_files = get_all_files(src)
-    create_dir_structure(dst, dirs)
+        self.__create_dir_structure()
+        self.__copy_html_files()
 
-    copy_html_files(src, dst, html_files, db)
-    create_html_files(src, dst, md_files, db)
+        parser: MDParser = MDParser(self.src, self.md_files, self.db)
+        parser.parse()
+
+        # create the article index
+        self.__create_article_index(parser.all_tags, parser.all_pages)
+
+        # create each category of html pages
+        self.__create_articles(parser.updated_pages)
+        self.__create_tags(parser.all_tags, parser.all_pages)
+
+
+    def __create_dir_structure(self) -> None:
+        for d in self.dirs:
+            # for the dir structure,
+            # doesn't matter if the dir already exists
+            try:
+                os.makedirs(os.path.join(self.dst, d))
+            except FileExistsError:
+                pass
+
+
+    def __copy_html_files(self) -> None:
+        src_file: str = None
+        dst_file: str = None
+
+        for f in self.html_files:
+            src_file = os.path.join(self.src, f)
+            dst_file = os.path.join(self.dst, f)
+
+            # only copy files if they have been modified (or are new)
+            if self.db.update(src_file, remove=f'{self.src}/'):
+                shutil.copy2(src_file, dst_file)
+
+
+    # this is really similar to create_tag (singular)
+    def __create_article_index(self, tags: list[str],
+                               pages: list[Page]) -> None:
+        # make temporary template
+        t: Template = deepcopy(self.template)
+
+        # do basic replacements
+        # get page and tag list formated, both functions do replacements
+        p_list: list[str] = self.__get_pages_formatted(pages, t)
+        t_list: list[str] = self.__get_tags_formatted(tags, t)
+        # common
+        t.header = t.header.replace("$$LANG", 'en')
+        t.header = t.header.replace('$$TITLE', f'Index')
+
+        with open(os.path.join(self.dst, 'index.html'), 'w') as f:
+            f.write(t.header)
+            f.write(t.articles.header)
+
+            f.write(t.tags.list_header)
+            for tag in t_list:
+                f.write(tag)
+            f.write(t.tags.list_footer)
+
+            f.write(t.articles.list_header)
+            for page in p_list:
+                f.write(page)
+            f.write(t.articles.list_footer)
+
+            f.write(t.articles.footer)
+            f.write(t.footer)
+
+
+    def __create_articles(self, pages: list[Page]) -> None:
+        for p in pages:
+            self.__create_article(p)
+
+
+    def __create_article(self, page: Page) -> None:
+        # TODO: create better solution for replace
+        # make temporary template
+        t: Template = deepcopy(self.template)
+
+        # prepare html file name
+        f_name: str = page.name
+        f_name = f_name.replace('.md', '.html')
+
+        # get timestamps
+        c_date: str = page.c_datetime.strftime(self.dformat)
+        m_date: str = None
+        if page.m_datetime is not None:
+            m_date: str = page.m_datetime.strftime(self.dformat)
+
+        # do basic replacements
+        # get tag list formatted (some replacements done inside
+        # get_tags_formatted)
+        t_list: list[str] = None
+        if page.tags is not None:
+            t_list = self.__get_tags_formatted(page.tags, t)
+
+        # common
+        t.header = t.header.replace("$$LANG", page.lang)
+        t.header = t.header.replace('$$TITLE', page.title)
+
+        # article header
+        t.article.header = t.article.header.replace('$$TITLE', page.title)
+        t.article.header = t.article.header.replace('$$AUTHOR', page.author)
+        t.article.header = t.article.header.replace('$$CTIME', c_date)
+        if m_date is not None:
+            t.article.header = t.article.header.replace('$$MTIME', m_date)
+        else:
+            t.article.header = t.article.header.replace('$$MTIME', '')
+
+        # article footer (same replaces as header)
+        t.article.footer = t.article.footer.replace('$$TITLE', page.title)
+        t.article.footer = t.article.footer.replace('$$AUTHOR', page.author)
+        t.article.footer = t.article.footer.replace('$$CTIME', c_date)
+        if m_date is not None:
+            t.article.footer = t.article.footer.replace('$$MTIME', m_date)
+        else:
+            t.article.footer = t.article.footer.replace('$$MTIME', '')
+
+
+        with open(os.path.join(self.dst, f_name), 'w') as f:
+            f.write(t.header)
+            f.write(t.article.header)
+            f.write(page.html)
+
+            if t_list is not None:
+                f.write(t.tags.list_header)
+                for tag in t_list:
+                    f.write(tag)
+                f.write(t.tags.list_footer)
+
+            f.write(t.article.footer)
+            f.write(t.footer)
+
+
+    def __get_tags_formatted(self, tags: list[str],
+                             template: Template) -> list[str]:
+        tag_amount: int = len(tags)
+        tags_formatted: list[str] = []
+        for i, t in enumerate(tags):
+            # t_e=tag entry
+            t_e: str = template.tags.list_entry
+            t_e = t_e.replace('$$URL',
+                              f'{self.base_url}/tag/@{t}.html')
+            t_e = t_e.replace('$$NAME', t)
+
+            tags_formatted.append(t_e)
+            if i != tag_amount - 1:
+                tags_formatted.append(template.tags.list_separator)
+
+        return tags_formatted
+
+
+    def __create_tags(self, tags: list[str],
+                      pages: list[Page]) -> None:
+        for t in tags:
+            # get a list of all pages that have current tag
+            # and sort them (by time)
+            tag_pages: list[Page] = []
+            for p in pages:
+                if p.tags is not None and t in p.tags:
+                    tag_pages.append(p)
+            tag_pages.sort(reverse=True)
+
+            # build tag page
+            self.__create_tag(t, tag_pages)
+
+            # clean list of pages with current tag
+            tag_pages = []
+
+
+    def __create_tag(self, tag: str,
+                     pages: list[Page]) -> None:
+        # TODO: create better solution for replace
+        # make temporary template
+        t: Template = deepcopy(self.template)
+
+        # do basic replacements
+        # get page list formated (some replacements done inside
+        # get_pages_formatted)
+        p_list: list[str] = self.__get_pages_formatted(pages, t)
+        # common
+        t.header = t.header.replace("$$LANG", 'en')
+        t.header = t.header.replace('$$TITLE', f'Posts filtered by: {tag}')
+
+        # tag header
+        tag_url: str = f'{self.base_url}/tag/@{tag}.html'
+        t.tags.header = t.tags.header.replace('$$NAME', tag)
+        t.tags.header = t.tags.header.replace('$$URL', tag_url)
+
+        with open(os.path.join(self.dst, f'tag/@{tag}.html'), 'w') as f:
+            f.write(t.header)
+            f.write(t.tags.header)
+
+            f.write(t.articles.list_header)
+            for p in p_list:
+                f.write(p)
+            f.write(t.articles.list_footer)
+
+            f.write(t.tags.footer)
+            f.write(t.footer)
+
+
+    def __get_pages_formatted(self, pages: list[Page],
+                              template: Template) -> list[str]:
+        month_year: str = '-'
+        pages_formatted: list[str] = []
+        for p in pages:
+            # check if the monthly separator should be included
+            c_month_year: str = p.c_datetime.strftime(self.lsep_dformat)
+            if c_month_year != month_year:
+                month_year = c_month_year
+
+                month_sep: str = template.articles.list_separator
+                month_sep = month_sep.replace('$$SEP', month_year)
+
+                pages_formatted.append(month_sep)
+
+            f_name: str = p.name
+            f_name = f_name.replace('.md', '.html')
+
+            # p_e=page entry
+            p_e: str = template.articles.list_entry
+            p_e = p_e.replace('$$URL', f'{self.base_url}/{f_name}')
+            p_e = p_e.replace('$$DATE', p.c_datetime.strftime(self.l_dformat))
+            p_e = p_e.replace('$$TITLE', p.title)
+
+            pages_formatted.append(p_e)
+
+        return pages_formatted
diff --git a/src/pyssg/converter.py b/src/pyssg/converter.py
deleted file mode 100644
index d054855..0000000
--- a/src/pyssg/converter.py
+++ /dev/null
@@ -1,209 +0,0 @@
-import os
-from datetime import datetime
-from markdown import Markdown
-from copy import deepcopy
-
-from .database import Database
-from .template import Template
-from .page import Page
-
-
-def get_pages(src: str,
-              files: list[str],
-              db: Database) -> (list[Page], list[Page]):
-    md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists',
-                                        'smarty', 'toc', 'wikilinks'],
-                            output_format='html5')
-
-    all_pages: list[Page] = []
-    updated_pages: list[Page] = []
-    for f in files:
-        src_file: str = os.path.join(src, f)
-        # get flag if update is successful
-        updated: bool = db.update(src_file, remove=f'{src}/')
-
-        page: Page = None
-        content: str = md.reset().convert(open(src_file).read())
-        page = Page(f, db.e[f][0], db.e[f][1], content, md.Meta)
-
-        if updated:
-            updated_pages.append(page)
-        all_pages.append(page)
-
-        # add its tag to corresponding entry if existent
-        if page.tags is not None:
-            db.update_tags(f, page.tags)
-
-
-    return (all_pages, updated_pages)
-
-
-def create_articles(dst: str,
-                    pages: list[Page],
-                    template: Template) -> None:
-    # TODO: clean this mess
-    # TODO: proper creation of html files
-    for p in pages:
-        create_article(dst, p, template)
-
-
-def create_article(dst: str,
-                   page: Page,
-                   template: Template) -> None:
-    # TODO: clean this mess
-    # make temporary template
-    t: Template = deepcopy(template)
-    # TODO: make this configurable
-    base_url: str = 'https://blog.luevano.xyz/'
-
-    f_name: str = page.name
-    f_name = f_name.replace('.md', '.html')
-    f_name = f_name.replace('.markdown', '.html')
-
-    with open(os.path.join(dst, f_name), 'w') as f:
-        # common
-        t.header = t.header.replace("$$LANG",
-                                    page.lang if page.lang is not None else 'en')
-        t.header = t.header.replace('$$TITLE', page.title)
-        t.header = t.header.replace('$$EXTRAHEAD', '')
-
-        # article header
-        t.article.header = t.article.header.replace('$$TITLE', page.title)
-
-        # Actually write to the html file
-        f.write(t.header)
-        f.write(t.article.header)
-        f.write(page.html)
-
-        if page.tags is not None:
-            tag_amount: int = len(page.tags)
-
-            f.write(t.tags.list_header)
-            for i, tag in enumerate(page.tags):
-                t_entry: str = t.tags.list_entry
-                t_entry = t_entry.replace('$$URL', f'{base_url}tag/@{tag}.html')
-                t_entry = t_entry.replace('$$NAME', tag)
-
-                f.write(t_entry)
-                # don't write last separator, not needed
-                if i != tag_amount - 1:
-                    f.write(t.tags.list_separator)
-            f.write(t.tags.list_footer)
-
-        f.write(t.article.footer)
-        f.write(t.footer)
-
-
-def get_all_tags(pages: list[Page]) -> list[str]:
-    tags: list[str] = []
-    for p in pages:
-        if p.tags is not None:
-            for t in p.tags:
-                if t not in tags:
-                    tags.append(t)
-    tags.sort()
-
-    return tags
-
-
-def create_tags(dst: str,
-                tags: list[str],
-                pages: list[Page],
-                template: Template) -> None:
-    for t in tags:
-        # get a list of all pages that have current tag
-        # and sort them (by time)
-        tag_pages: list[Page] = []
-        for p in pages:
-            if p.tags is not None and t in p.tags:
-                tag_pages.append(p)
-        tag_pages.sort(reverse=True)
-
-        # build tag page
-        create_tag(dst, t, tag_pages, template)
-
-        # clean list of pages with current tag
-        tag_pages = []
-
-
-def create_tag(dst: str,
-               tag: str,
-               pages: list[Page],
-               template: Template) -> None:
-    # TODO: clean this mess
-    # make temporary template
-    t: Template = deepcopy(template)
-    # TODO: make this configurable
-    base_url: str = 'https://blog.luevano.xyz/'
-
-    with open(os.path.join(dst, f'tag/@{tag}.html'), 'w') as f:
-        # common
-        t.header = t.header.replace("$$LANG", 'en')
-        t.header = t.header.replace('$$TITLE', f'Posts filtered by tag "{tag}"')
-        t.header = t.header.replace('$$EXTRAHEAD', '')
-
-        # tag header
-        t.tags.header = t.tags.header.replace('$$NAME', tag)
-        t.tags.header = t.tags.header.replace('$$URL',
-                                              f'{base_url}tag/@{tag}.html')
-
-        # Actually write to the html file
-        f.write(t.header)
-        f.write(t.tags.header)
-        f.write(t.articles.list_header)
-
-        month_year: str = '-'
-        for p in pages:
-            c_month_year: str = p.c_datetime.strftime('%B %Y')
-            if c_month_year != month_year:
-                month_year = c_month_year
-
-                month_sep: str = t.articles.list_separator
-                month_sep = month_sep.replace('$$SEP', month_year)
-
-                f.write(month_sep)
-
-            f_name: str = p.name
-            f_name = f_name.replace('.md', '.html')
-            f_name = f_name.replace('.markdown', '.html')
-
-            page_entry: str = t.articles.list_entry
-            page_entry = page_entry.replace('$$URL', f'{base_url}{f_name}')
-            page_entry = page_entry.replace('$$DATE',
-                                            p.c_datetime.strftime('%b %d'))
-            page_entry = page_entry.replace('$$TITLE', p.title)
-
-            f.write(page_entry)
-
-        f.write(t.articles.list_footer)
-        f.write(t.tags.footer)
-        f.write(t.footer)
-
-
-def create_article_index(dst: str,
-                         tags: list[str],
-                         pages: list[Page]) -> None:
-    # TODO: actually make this function
-    pass
-
-
-def create_html_files(src: str,
-                      dst: str,
-                      files: list[str],
-                      db: Database) -> None:
-    # get the list of page objects
-    all_pages, updated_pages = get_pages(src, files, db)
-
-    # get all tags
-    all_tags = get_all_tags(all_pages)
-
-    # read all templates into a template obj
-    template: Template = Template(src)
-    template.read()
-
-    # create each category of html pages
-    create_articles(dst, updated_pages, template)
-    create_tags(dst, all_tags, all_pages, template)
-
-    # create the article index
-    create_article_index(dst, all_tags, all_pages)
diff --git a/src/pyssg/database.py b/src/pyssg/database.py
index 61ca502..1b421c0 100644
--- a/src/pyssg/database.py
+++ b/src/pyssg/database.py
@@ -7,9 +7,8 @@ class Database:
         self.db_path: str = db_path
         self.e: dict[str, tuple[float, float, list[str]]] = dict()
 
-        self.__read()
-
 
+    # updates the tags for a specific entry (file)
     def update_tags(self, file_name: str,
                     tags: list[str]) -> None:
         if file_name in self.e:
@@ -68,7 +67,7 @@ class Database:
                 file.write(f'{k} {v[0]} {v[1]} {t}\n')
 
 
-    def __read(self) -> None:
+    def read(self) -> None:
         # only if the path exists and it is a file
         if os.path.exists(self.db_path) and os.path.isfile(self.db_path):
             # get all db file lines
diff --git a/src/pyssg/discovery.py b/src/pyssg/discovery.py
index 7fe5964..8dbbf69 100644
--- a/src/pyssg/discovery.py
+++ b/src/pyssg/discovery.py
@@ -29,16 +29,3 @@ def get_dir_structure(directory: str,
                 out.append(os.path.join(root, d))
 
     return [o.replace(directory, '')[1:] for o in out]
-
-
-def get_all_files(src: str) -> tuple[list[str], list[str], list[str]]:
-    md_files: list[str] = get_file_list(src,
-                                        ['.md', '.markdown'],
-                                        ['templates'])
-    html_files: list[str] = get_file_list(src,
-                                          ['.html'],
-                                          ['templates'])
-    dirs: list[str] = get_dir_structure(src,
-                                        ['templates'])
-
-    return (dirs, md_files, html_files)
diff --git a/src/pyssg/page.py b/src/pyssg/page.py
index 1d2f6dd..e03ca6b 100644
--- a/src/pyssg/page.py
+++ b/src/pyssg/page.py
@@ -15,30 +15,33 @@ class Page:
         self.meta: dict = meta
 
         # data from self.meta
-        self.title: str = None
-        self.author: str = None
+        self.title: str = ''
+        self.author: str = ''
         self.c_datetime: datetime = None
         self.m_datetime: datetime = None
-        self.summary: str = None
-        self.lang: str = None
+        self.summary: str = ''
+        self.lang: str = 'en'
         self.tags: list = None
 
+        # also from self.meta, but for og metadata
+        self.og: dict[str, str] = dict()
+
         self.__parse_meta()
 
     def __lt__(self, other):
         return self.c_time < other.c_time
 
 
+    # parses meta from self.meta, for og, it prioritizes,
+    # the actual og meta
     def __parse_meta(self):
         try:
             self.title = self.meta['title'][0]
-        except KeyError:
-            pass
+        except KeyError: pass
 
         try:
             self.author = self.meta['author'][0]
-        except KeyError:
-            pass
+        except KeyError: pass
 
         self.c_datetime = datetime.fromtimestamp(self.c_time,
                                                  tz=timezone.utc)
@@ -49,15 +52,26 @@ class Page:
 
         try:
             self.summary = self.meta['summary'][0]
-        except KeyError:
-            pass
+        except KeyError: pass
 
         try:
             self.lang = self.meta['lang'][0]
-        except KeyError:
-            pass
+        except KeyError: pass
 
         try:
             self.tags = self.meta['tags']
-        except KeyError:
-            pass
+            self.tags.sort()
+        except KeyError: pass
+
+        try:
+            # og_e = object graph entry
+            for og_e in self.meta['og']:
+                kv: str = og_e.split(',', 1)
+                if len(kv) != 2:
+                    raise Exception('invalid og syntax')
+
+                k: str = kv[0].strip()
+                v: str = kv[1].strip()
+
+                self.og[k] = v
+        except KeyError: pass
diff --git a/src/pyssg/parser.py b/src/pyssg/parser.py
new file mode 100644
index 0000000..43028ef
--- /dev/null
+++ b/src/pyssg/parser.py
@@ -0,0 +1,58 @@
+import os
+from datetime import datetime
+from markdown import Markdown
+
+from .database import Database
+from .page import Page
+
+
+# parser of md files, stores list of pages and tags
+class MDParser:
+    def __init__(self, src: str,
+                 files: list[str],
+                 db: Database):
+        self.md: Markdown = Markdown(extensions=['extra', 'meta', 'sane_lists',
+                                                 'smarty', 'toc', 'wikilinks'],
+                                     output_format='html5')
+        self.src: str = src
+        self.files: list[str] = files
+        self.db: Database = db
+
+        self.all_pages: list[Page] = None
+        self.updated_pages: list[Page] = None
+        self.all_tags: list[str] = None
+
+
+    def parse(self):
+        # initialize lists
+        self.all_pages = []
+        self.updated_pages = []
+        self.all_tags = []
+
+        for f in self.files:
+            src_file: str = os.path.join(self.src, f)
+            # get flag if update is successful
+            updated: bool = self.db.update(src_file, remove=f'{self.src}/')
+
+            page: Page = None
+            content: str = self.md.reset().convert(open(src_file).read())
+            page = Page(f, self.db.e[f][0], self.db.e[f][1], content, self.md.Meta)
+
+            # keep a separated list for all and updated pages
+            if updated:
+                self.updated_pages.append(page)
+            self.all_pages.append(page)
+
+            # parse tags
+            if page.tags is not None:
+                # add its tag to corresponding db entry if existent
+                self.db.update_tags(f, page.tags)
+
+                # update all_tags attribute
+                for t in page.tags:
+                    if t not in self.all_tags:
+                        self.all_tags.append(t)
+
+        # sort list of tags for consistency
+        self.all_tags.sort()
+        self.updated_pages.sort()
diff --git a/src/pyssg/pyssg.py b/src/pyssg/pyssg.py
index b076abb..3f8cb8d 100644
--- a/src/pyssg/pyssg.py
+++ b/src/pyssg/pyssg.py
@@ -3,7 +3,7 @@ from argparse import ArgumentParser, Namespace
 
 from .database import Database
 from .template import Template
-from .builder import build_static_site
+from .builder import HTMLBuilder
 
 
 def get_options() -> Namespace:
@@ -20,6 +20,10 @@ def get_options() -> Namespace:
                         type=str,
                         help='''dst directory; generated (and transfered html)
                         files; defaults to 'dst' ''')
+    parser.add_argument('-u', '--url',
+                        required=True,
+                        type=str,
+                        help='''base url without trailing slash''')
     parser.add_argument('-i', '--init',
                         action='store_true',
                         help='''initializes the dir structure, templates,
@@ -36,6 +40,7 @@ def main() -> None:
     opts: dict[str] = vars(get_options())
     src: str = opts['src']
     dst: str = opts['dst']
+    base_url: str = opts['url']
 
     if opts['init']:
         try:
@@ -44,14 +49,22 @@ def main() -> None:
         except FileExistsError:
             pass
 
+        # write default templates
         template: Template = Template(src)
         template.write()
         return
 
     if opts['build']:
+        # start the db
         db: Database = Database(os.path.join(src, '.files'))
+        db.read()
+
+        # read templates
+        template: Template = Template(src)
+        template.read()
 
-        build_static_site(src, dst, db)
+        builder: HTMLBuilder = HTMLBuilder(src, dst, base_url, template, db)
+        builder.build()
 
         db.write()
         return
diff --git a/src/pyssg/template.py b/src/pyssg/template.py
index 61610d6..cd80d28 100644
--- a/src/pyssg/template.py
+++ b/src/pyssg/template.py
@@ -1,12 +1,16 @@
 import os
 
+from .page import Page
 
+
+# all objects here require a header and footer as minimum
 class HF:
     def __init__(self):
         self.header: str = None
         self.footer: str = None
 
 
+# some objects require a "list-like" set of attributes
 class Common(HF):
     def __init__(self):
         self.list_header: str = None
@@ -15,6 +19,7 @@ class Common(HF):
         self.list_separator: str = None
 
 
+# main class
 class Template(HF):
     def __init__(self, src: str):
         self.src: str = src
@@ -22,7 +27,10 @@ class Template(HF):
         self.articles: Common = Common()
         self.tags: Common = Common()
 
+        self.is_read: bool = False
+
 
+    # writes default templates
     def write(self) -> None:
         # get initial working directory
         iwd = os.getcwd()
@@ -41,7 +49,6 @@ class Template(HF):
                                '<head>\n',
                                '<meta charset="utf-8">\n',
                                '<title>$$TITLE</title>\n',
-                               '$$EXTRAHEAD\n',
                                '</head>\n',
                                '<body>\n'])
         self.__write_template('footer.html',
@@ -102,7 +109,14 @@ class Template(HF):
         os.chdir(iwd)
 
 
+    # reads templates and stores them into class attributes
     def read(self) -> None:
+        # only read templates if not read already
+        # (might want to change this behaviour)
+        if self.is_read:
+            return
+        self.is_read = True
+
         # get initial working directory
         iwd = os.getcwd()
         os.chdir(os.path.join(self.src, 'templates'))