#!/bin/sh
#
# (s)tatic (b)log (g)enerator.
#
# Heavily inspired (ripped off) by Roman Zolotarev's ssg5 (https://rgz.ee/bin/ssg5)
# and rssg (https://rgz.ee/bin/rssg), as well as Luke Smith's lb and sup
# (https://github.com/LukeSmithxyz/lb).
#
# Aimed to correct 'flaws' of both sets of scripts, and made to work
# as how I wanted them both to work.
#
# This script uses lowdown (https://github.com/kristapsdz/lowdown) for
# the .md to .html translation.
#
print_help(){
# $1: dir/file or parameter name, $2: dir/file name
if [ "$1" = "dir" ]
then echo "$(basename "$0"): $2: no such directory."
elif [ "$1" = "file" ]
then echo "$(basename "$0"): $2: no such file."
else
if test -n "$1"
then echo "$(basename "$0"): missing -$1 parameter."
fi
fi
echo ""
echo "Usage: $(basename "$0") [-s] [-d] [-t] [-b] [other options]"
echo " -s 'src' directory."
echo " -d 'dst' directory."
echo " -t 'title' blog title."
echo " -b 'base url' blog base url."
echo " -f 'date format' date format as stated in 'find' command."
echo " -e 'extract' extract the header and footer from this .html file (replaces _header.html and _footer.html in src)."
exit 1
}
readlink_f(){
# $1: src/dst
file="$1"
cd "$(dirname "$file")"
file=$(basename "$file")
# Tests if src or dst is a symbolic link,
# in which case resolves the symbolic link
# and gets the actual directories.
while test -L "$file"
do
file=$(readlink "$file")
cd "$(dirname "$file")"
file=$(basename "$file")
done
# Avoid all symbolic links and return the actual path.
dir=$(pwd -P)
echo "$dir/$file"
}
extract_hf_from_html(){
# $1: html file name, $2: title
# Get the header part, and replace title.
sed "/<\/header/q" "$1" > "$src/_header.html"
sed -i "s/
.*/$2<\/title>/g" "$src/_header.html"
# Not proud of this... get the footer part.
tac "$1" | sed "/ "$src/_footer.html"
}
list_dirs(){
# $1: dir path.
cd "$1" && eval "find . -type d ! -name '.' ! -path '*/_*' $IGNORE"
}
list_files(){
# $1: dir path.
cd "$1" && eval "find . -type f ! -name '.' ! -path '*/_*' $IGNORE -exec stat -c '%Y 0 %n' {} \\;"
}
list_pages(){
# $1: dir path.
e="\\( -name '*.html' -o -name '*.md' \\)"
cd "$1" && eval "find . -type f ! -path '*/.*' ! -path '*/_*' $IGNORE $e" |
sed 's#^./##;s#.md$#.html#;s#/index.html$#/#'
}
list_files_add_mod(){
# $1: dir path, $2: .files.
# 'n' stands for new and 'o' for old.
cd "$1" && fn=$(list_files "$1")
fo=$(cat "$2")
# Loop over new file list.
while IFS= read -r fn_line
do
fn_n=$(echo "$fn_line" | cut -d' ' -f3)
fn_ts=$(echo "$fn_line" | cut -d' ' -f1)
fo_line=$(echo "$fo" | grep "$fn_n")
if test -n "$fo_line"
then
# If file name exists in old list...
fo_ts=$(echo "$fo_line" | cut -d' ' -f1)
# Either add the new timestamp on the
# second column (mod) or keep the old line.
if [ $fn_ts -gt $fo_ts ]
then echo $fo_ts $fn_ts $fn
else echo $fo_line
fi
else
# If not, just print the new file data.
echo $fn_ts" 0 "$fn_n
fi
done < "$2/${f%\.md}.html"
fi
done <'
c_month_year=
while IFS= read -r line
do
# Some necessary varaibles
fn=$(echo $line | cut -d' ' -f3 | cut -d'/' -f2-)
d=$(echo $line | cut -d' ' -f1)
month_year=$(date --date @$d +"%B %Y")
date=$(date --date @$d +"%b %d")
# Only work on non index files.
if ! echo "$fn" | grep -q 'index'
then
if [ "$c_month_year" = "" ]
then
# For the first appearing year.
c_month_year=$month_year
echo ""
echo "$month_year "
echo ""
else
# Each time the year changes, print it.
if [ "$c_month_year" != "$month_year" ]
then
echo ""
echo "$month_year "
echo ""
fi
fi
page_title=$(head -n 1 "$2/$fn" | cut -c 3-)
echo "$date - $page_title "
fi
done <"
}
add_html_ts(){
# $1: html text, $2: birth ts, $3: mod ts.
echo "$1"
if [ $m -gt $b ]
then
printf "\n\n
\n
Created: $(date --date @$b +"$date_f"); modified: $(date --date @$m +"$date_f")
\n
"
else
printf "\n\n
\n
Created: $(date --date @$b +"$date_f")
\n
"
fi
}
add_hf_html(){
# $1: html text.
# lol.
echo "$HEADER"
echo "$1"
echo "$FOOTER"
}
html_add_hf(){
# $1: src, $2: dst, $3: .files.
while IFS= read -r line
do
if echo "$line" | grep -q '\.html$'
then
f=$(echo "$line" | cut -d' ' -f3 | cut -d'/' -f2-)
html=$(cat "$src/$f")
add_hf_html "$html" > "$2/$f"
fi
done <1) printf NR " " plural
}'
}
create_sitemap() {
# $1: .files, $2: urls, $3: base_url.
# $1 = urls, $2 = base_url, $3 = date, $4 = .files_ts
date=$(date +%Y-%m-%d)
# Since fs saves lists as .md files, change .md to .html
# to be able to parse it.
fs="$(echo "$1" | sed 's#\.md#\.html#')"
echo ''
echo ''
# Read each line on the urls list.
while IFS= read -r line
do
# Get the corresponding .files_ts line.
fd=$(echo "$fs" | grep "$line")
if test -n "$fd"
then
# Get timestamps present in .files_ts.
fdb=$(echo "$fd" | cut -d' ' -f1)
fdm=$(echo "$fd" | cut -d' ' -f2)
# echo "b: $fdb, m: $fdm"
# If modification timestamp is greater than creation timestamp...
if [ $fdm -gt $fdb ]
then
# Use modification timestamp.
echo $line |
sed -E 's#^(.*)$#'"$3"'/\1 '"$(date --date @$fdm +%Y-%m-%d)"' 1.0 #'
else
# Use creation timestamp.
echo $line |
sed -E 's#^(.*)$#'"$3"'/\1 '"$(date --date @$fdb +%Y-%m-%d)"' 1.0 #'
fi
else
# Use 'current' timestamp.
echo $line |
sed -E 's#^(.*)$#'"$3"'/\1 '"$date"' 1.0 #'
fi
done <'
}
### Parameter catching.
while getopts ":s:d:t:b:f:e:" opt
do
case "$opt" in
s) src="$OPTARG" ;;
d) dst="$OPTARG" ;;
t) title="$OPTARG" ;;
b) base_url="$OPTARG" ;;
f) date_f="$OPTARG" ;;
e) extract_hf="$OPTARG" ;;
\?) echo "$(basename "$0"): invalid option -$OPTARG."
print_help ;;
:) echo "$(basename "$0"): invalid option -$OPTARG: requires an argument."
print_help ;;
esac
done
### Handle exeptions, prepare variables and add default values.
test -n "$src" || print_help "s"
test -n "$dst" || print_help "d"
test -n "$title" || print_help "t"
test -n "$base_url" || print_help "b"
test -d "$src" || print_help "dir" "$src"
test -d "$dst" || print_help "dir" "$dst"
test -z "$extract_hf" || test -f "$extract_hf" || print_help "file" "$extract_hf"
# Only tests if there is a string in date_f,
# doesn't really check if it has the correct formatting.
if test -z "$date_f"
then date_f="%a, %b %d, %+4Y @ %R %Z"
fi
# Convert src and dst to full paths, avoiding symlinks.
src=$(readlink_f "$src")
dst=$(readlink_f "$dst")
### Actual program logic.
# General ignores for 'find'.
IGNORE=$(
if test -f "$src/.sbgignore"
then
# Currently not working...
while read -r x
do
test -n "$x" || continue
printf ' ! -path "*/%s*"' "$x"
done < "$src/.sbgignore"
else
# If no .sbgignore, just ignore all dotfiles.
printf ' ! -path "*/.*"'
fi
)
# Get the _header.html and _footer.html.
if test -n "$extract_hf"
then extract_hf_from_html "$extract_hf" "$title"
fi
h_file="$src/_header.html"
f_file="$src/_footer.html"
test -f "$h_file" && HEADER=$(cat "$h_file")
test -f "$f_file" && FOOTER=$(cat "$f_file")
# Get list of directories inside src
# and create them inside dst.
list_dirs "$src" | (cd "$src" && cpio -pdu "$dst")
# Get file list.
echo "[file list]"
fs=$(
if test -f "$src/.files"
then list_files_add_mod "$src" "$src/.files"
else list_files "$src"
fi
)
echo "$fs" | tee "$src/.files"
# If the file list contains .md files,
# parse them with lowdown.
if echo "$fs" | grep -q '\.md$'
then
if test -x "$(which lowdown 2> /dev/null)"
then
md_to_html "$src" "$dst" "$title" "$fs"
else
echo "$(basename "$0"): couldn't find lowdown."
exit 1
fi
fi
# If the file list contains .html files,
# just add the header and the footer.
html_add_hf "$src" "$dst" "$fs"
printf '[sbg] ' >&2
print_status 'file, ' 'files, ' "$fs" >&2
# Create sitemap.
urls=$(list_pages "$src")
test -n "$urls" && test -n "$fs" &&
create_sitemap "$fs" "$urls" "$base_url" > "$dst/sitemap.xml"
print_status 'url' 'urls' "$urls" >&2
echo >&2