From bf330db8d9d503c26fee54ec3d1f29cc0d0fda09 Mon Sep 17 00:00:00 2001 From: David Luevano Alvarado Date: Tue, 16 Mar 2021 21:37:40 -0700 Subject: Add new sbg command, based on ssg, rssg and lb, wip --- .local/bin/sbg | 430 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .local/bin/ssg | 12 ++ 2 files changed, 442 insertions(+) create mode 100755 .local/bin/sbg diff --git a/.local/bin/sbg b/.local/bin/sbg new file mode 100755 index 0000000..c989709 --- /dev/null +++ b/.local/bin/sbg @@ -0,0 +1,430 @@ +#!/bin/sh + +# +# (s)tatic (b)log (g)enerator. +# +# Heavily inspired (ripped off) by Roman Zolotarev's ssg5 (https://rgz.ee/bin/ssg5) +# and rssg (https://rgz.ee/bin/rssg), as well as Luke Smith's lb and sup +# (https://github.com/LukeSmithxyz/lb). +# +# Aimed to correct 'flaws' of both sets of scripts, and made to work +# as how I wanted them both to work. +# +# This script uses lowdown (https://github.com/kristapsdz/lowdown) for +# the .md to .html translation. +# + + +print_help(){ + # $1: dir/file or parameter name, $2: dir/file name + + if [ "$1" = "dir" ] + then echo "$(basename "$0"): $2: no such directory." + elif [ "$1" = "file" ] + then echo "$(basename "$0"): $2: no such file." + else + if test -n "$1" + then echo "$(basename "$0"): missing -$1 parameter." + fi + fi + echo "" + echo "Usage: $(basename "$0") [-s] [-d] [-t] [-b] [other options]" + echo " -s 'src' directory." + echo " -d 'dst' directory." + echo " -t 'title' blog title." + echo " -b 'base url' blog base url." + echo " -f 'date format' date format as stated in 'find' command." + echo " -e 'extract' extract the header and footer from this .html file (replaces _header.html and _footer.html in src)." + exit 1 +} + + +readlink_f(){ + # $1: src/dst + + file="$1" + cd "$(dirname "$file")" + file=$(basename "$file") + + # Tests if src or dst is a symbolic link, + # in which case resolves the symbolic link + # and gets the actual directories. + while test -L "$file" + do + file=$(readlink "$file") + cd "$(dirname "$file")" + file=$(basename "$file") + done + + # Avoid all symbolic links and return the actual path. + dir=$(pwd -P) + echo "$dir/$file" +} + + +extract_hf_from_html(){ + # $1: html file name, $2: title + + # Get the header part, and replace title. + sed "/<\/header/q" "$1" > "$src/_header.html" + sed -i "s/.*/<title>$2<\/title>/g" "$src/_header.html" + + # Not proud of this... get the footer part. + tac "$1" | sed "/<footer/q" | tac > "$src/_footer.html" +} + + +list_dirs(){ + # $1: dir path. + + cd "$1" && eval "find . -type d ! -name '.' ! -path '*/_*' $IGNORE" +} + + +list_files(){ + # $1: dir path. + + cd "$1" && eval "find . -type f ! -name '.' ! -path '*/_*' $IGNORE -exec stat -c '%Y 0 %n' {} \\;" +} + + +list_pages(){ + # $1: dir path. + + e="\\( -name '*.html' -o -name '*.md' \\)" + cd "$1" && eval "find . -type f ! -path '*/.*' ! -path '*/_*' $IGNORE $e" | + sed 's#^./##;s#.md$#.html#;s#/index.html$#/#' +} + + +list_files_add_mod(){ + # $1: dir path, $2: .files. + # 'n' stands for new and 'o' for old. + + cd "$1" && fn=$(list_files "$1") + fo=$(cat "$2") + + # Loop over new file list. + while IFS= read -r fn_line + do + fn_n=$(echo "$fn_line" | cut -d' ' -f3) + fn_ts=$(echo "$fn_line" | cut -d' ' -f1) + + fo_line=$(echo "$fo" | grep "$fn_n") + if test -n "$fo_line" + then + # If file name exists in old list... + fo_ts=$(echo "$fo_line" | cut -d' ' -f1) + + # Either add the new timestamp on the + # second column (mod) or keep the old line. + if [ $fn_ts -gt $fo_ts ] + then echo $fo_ts $fn_ts $fn + else echo $fo_line + fi + else + # If not, just print the new file data. + echo $fn_ts" 0 "$fn + fi + done <<EOF + $fn +EOF +} + + +md_to_html(){ + # $1: src, $2: dst, $3: title, $4: .files + + while read -r line + do + # Only for .md files, duh. + if echo $line | grep -q '\.md$' + then + # File name, birth ts and mod ts. + f=$(echo "$line" | cut -d' ' -f3 | cut -d'/' -f2-) + b=$(echo "$line" | cut -d' ' -f1) + m=$(echo "$line" | cut -d' ' -f2) + + html=$( + lowdown \ + --html-no-skiphtml \ + --html-no-escapehtml \ + --html-no-owasp \ + --html-no-head-ids < "$1/$f") + + if echo "$f" | grep -q "index" + then html=$(add_article_list "$html" "$1" "$4") + else html=$(add_html_ts "$html" "$b" "$m") + fi + + add_hf_html "$html" > "$2/${f%\.md}.html" + fi + done <<EOF + $4 +EOF +} + + +add_article_list() { + # $1: html text, $2: src, $3: .files + + # Reverse sorted file list. + fs_reversed="$(echo "$3" | sort -r -k 1)" + + # echo the current html text and add the ul tag for the list. + echo "$1" + echo "" + echo '<ul class="articles">' + + c_month_year= + while IFS= read -r line + do + # Some necessary varaibles + fn=$(echo $line | cut -d' ' -f3 | cut -d'/' -f2-) + d=$(echo $line | cut -d' ' -f1) + month_year=$(date --date @$d +"%B %Y") + date=$(date --date @$d +"%b %d") + + # Only work on non index files. + if ! echo "$fn" | grep -q 'index' + then + if [ "$c_month_year" = "" ] + then + # For the first appearing year. + c_month_year=$month_year + echo "" + echo "<h3>$month_year</h3>" + echo "" + else + # Each time the year changes, print it. + if [ "$c_month_year" != "$month_year" ] + then + echo "" + echo "<h3>$month_year</h3>" + echo "" + fi + fi + + page_title=$(head -n 1 "$2/$fn" | cut -c 3-) + echo "<li>$date - <a href=https://blog.luevano.xyz/${fn%\.md}>$page_title</a></li>" + fi + done <<EOF + $fs_reversed +EOF + echo "</ul>" +} + + +add_html_ts(){ + # $1: html text, $2: birth ts, $3: mod ts. + + echo "$1" + + if [ $m -gt $b ] + then + printf "\n<div class="timestamp">\n<hr>\n<p>Created: $(date --date @$b +"$date_f"); modified: $(date --date @$m +"$date_f")</p>\n</div>" + else + printf "\n<div class="timestamp">\n<hr>\n<p>Created: $(date --date @$b +"$date_f")</p>\n</div>" + fi +} + + +add_hf_html(){ + # $1: html text. + + # lol. + echo "$HEADER" + echo "$1" + echo "$FOOTER" +} + + +html_add_hf(){ + # $1: src, $2: dst, $3: .files. + + while IFS= read -r line + do + if echo "$line" | grep -q '\.html$' + then + f=$(echo "$line" | cut -d' ' -f3 | cut -d'/' -f2-) + + html=$(cat "$src/$f") + + add_hf_html "$html" > "$2/$f" + fi + done <<EOF + $3 +EOF +} + + +print_status() { + # $1: singular, $2: plural, $3: list of something (urls, files, etc.) + + # Prints a simple status line. + test -z "$3" && printf 'no %s' "$2" && return + + echo "$3" | awk -v singular="$1" -v plural="$2" ' + END { + if (NR==1) printf NR " " singular + if (NR>1) printf NR " " plural + }' +} + + +create_sitemap() { + # $1: .files, $2: urls, $3: base_url. + + # $1 = urls, $2 = base_url, $3 = date, $4 = .files_ts + + date=$(date +%Y-%m-%d) + # Since fs saves lists as .md files, change .md to .html + # to be able to parse it. + fs="$(echo "$1" | sed 's#\.md#\.html#')" + + echo '<?xml version="1.0" encoding="UTF-8"?>' + echo '<urlset' + echo 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' + echo 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9' + echo 'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"' + echo 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' + # Read each line on the urls list. + while IFS= read -r line + do + # Get the corresponding .files_ts line. + fd=$(echo "$fs" | grep "$line") + + if test -n "$fd" + then + # Get timestamps present in .files_ts. + fdb=$(echo "$fd" | cut -d' ' -f1) + fdm=$(echo "$fd" | cut -d' ' -f2) + + # echo "b: $fdb, m: $fdm" + # If modification timestamp is greater than creation timestamp... + if [ $fdm -gt $fdb ] + then + # Use modification timestamp. + echo $line | + sed -E 's#^(.*)$#<url><loc>'"$3"'/\1</loc><lastmod>'"$(date --date @$fdm +%Y-%m-%d)"'</lastmod><priority>1.0</priority></url>#' + else + # Use creation timestamp. + echo $line | + sed -E 's#^(.*)$#<url><loc>'"$3"'/\1</loc><lastmod>'"$(date --date @$fdb +%Y-%m-%d)"'</lastmod><priority>1.0</priority></url>#' + fi + else + # Use 'current' timestamp. + echo $line | + sed -E 's#^(.*)$#<url><loc>'"$3"'/\1</loc><lastmod>'"$date"'</lastmod><priority>1.0</priority></url>#' + fi + done <<EOF + $2 +EOF + echo '</urlset>' +} + +### Parameter catching. +while getopts ":s:d:t:b:f:e:" opt +do + case "$opt" in + s) src="$OPTARG" ;; + d) dst="$OPTARG" ;; + t) title="$OPTARG" ;; + b) base_url="$OPTARG" ;; + f) date_f="$OPTARG" ;; + e) extract_hf="$OPTARG" ;; + \?) echo "$(basename "$0"): invalid option -$OPTARG." + print_help ;; + :) echo "$(basename "$0"): invalid option -$OPTARG: requires an argument." + print_help ;; + esac +done + +### Handle exeptions, prepare variables and add default values. +test -n "$src" || print_help "s" +test -n "$dst" || print_help "d" +test -n "$title" || print_help "t" +test -n "$base_url" || print_help "b" +test -d "$src" || print_help "dir" "$src" +test -d "$dst" || print_help "dir" "$dst" +test -z "$extract_hf" || test -f "$extract_hf" || print_help "file" "$extract_hf" + +# Only tests if there is a string in date_f, +# doesn't really check if it has the correct formatting. +if test -z "$date_f" +then date_f="%a, %b %d, %+4Y @ %R %Z" +fi + +# Convert src and dst to full paths, avoiding symlinks. +src=$(readlink_f "$src") +dst=$(readlink_f "$dst") + +### Actual program logic. +# General ignores for 'find'. +IGNORE=$( + if test -f "$src/.sbgignore" + then + while read -r x + do + test -n "$x" || continue + printf ' ! -path "*/%s*"' "$x" + done < "$src/.sbgignore" + else + # If no .sbgignore, just ignore all dotfiles. + printf ' ! -path "*/.*"' + fi +) + + +# Get the _header.html and _footer.html. +if test -n "$extract_hf" +then extract_hf_from_html "$extract_hf" "$title" +fi + +h_file="$src/_header.html" +f_file="$src/_footer.html" +test -f "$h_file" && HEADER=$(cat "$h_file") + +test -f "$f_file" && FOOTER=$(cat "$f_file") + +# Get list of directories inside src +# and create them inside dst. +list_dirs "$src" | (cd "$src" && cpio -pdu "$dst") + +# Get file list. +echo "[file list]" +fs=$( + if test -f "$src/.files" + then list_files_add_mod "$src" "$src/.files" + else list_files "$src" + fi +) +echo "$fs" | tee "$src/.files" + +# If the file list contains .md files, +# parse them with lowdown. +if echo "$fs" | grep -q '\.md$' +then + if test -x "$(which lowdown 2> /dev/null)" + then + md_to_html "$src" "$dst" "$title" "$fs" + else + echo "$(basename "$0"): couldn't find lowdown." + exit 1 + fi +fi + +# If the file list contains .html files, +# just add the header and the footer. +html_add_hf "$src" "$dst" "$fs" + +printf '[ssg] ' >&2 +print_status 'file, ' 'files, ' "$fs" >&2 + +# Create sitemap. +urls=$(list_pages "$src") +test -n "$urls" && test -n "$fs" && +create_sitemap "$fs" "$urls" "$base_url" > "$dst/sitemap.xml" + +print_status 'url' 'urls' "$urls" >&2 +echo >&2 diff --git a/.local/bin/ssg b/.local/bin/ssg index 5c26190..2b4a4ec 100755 --- a/.local/bin/ssg +++ b/.local/bin/ssg @@ -30,6 +30,8 @@ main() { test -d "$1" || no_dir "$1" test -d "$2" || no_dir "$2" + # Convert src and dst to full paths, + # avoiding symlinks. src=$(readlink_f "$1") dst=$(readlink_f "$2") @@ -131,15 +133,25 @@ main() { readlink_f() { + # $1 = src or dst + file="$1" + # Go to the root of the blog (strips dst or src from path) cd "$(dirname "$file")" + # Only get the directory name (src or dst) file=$(basename "$file") + + # Tests if src or dst is a symbolic link, + # in which case resolves the symbolic link (readlink) + # and gets the actual directories. while test -L "$file" do file=$(readlink "$file") cd "$(dirname "$file")" file=$(basename "$file") done + + # Avoid all symbolic links and return the actual path. dir=$(pwd -P) echo "$dir/$file" } -- cgit v1.2.3