#!/bin/sh
#
# https://rgz.ee/bin/ssg5
# Copyright 2018-2019 Roman Zolotarev <hi@romanzolotarev.com>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#

# Commented and modified by David Luévano Alvarado <david@luevano.xyz>
# Just slight modifications and added functionality.
# Removed the 'functionality' of only updating newer files as it wasn't working.
date_format="%a, %b %d, %+4Y @ %R %Z"

main() {
	### Initial variables and ignores.
	test -n "$1" || usage # src
	test -n "$2" || usage # dst
	test -n "$3" || usage # files
	test -n "$4" || usage # base_path
	test -d "$1" || no_dir "$1"
	test -d "$2" || no_dir "$2"

	src=$(readlink_f "$1")
	dst=$(readlink_f "$2")

	# Files/directories to ignore.
	IGNORE=$(
		# If .ssgignore doesn't exist,
		# only ignore dotfiles.
		if ! test -f "$src/.ssgignore"
		then
			printf ' ! -path "*/.*"'
			return
		fi

		while read -r x
		do
			test -n "$x" || continue
			printf ' ! -path "*/%s*"' "$x"
		done < "$src/.ssgignore"
	)

	### Files.
	title="$3"

	# If _header.html and _footer.html exist,
	# export its contents to corresponding variables.
	h_file="$src/_header.html"
	f_file="$src/_footer.html"
	test -f "$f_file" && FOOTER=$(cat "$f_file") && export FOOTER
	test -f "$h_file" && HEADER=$(cat "$h_file") && export HEADER

	# Get list of directories inside src and
	# create them inside dst.
	list_dirs "$src" |
	(cd "$src" && cpio -pdu "$dst")

	# Get file list and file list with timestamps.
	echo "[file list]"
	fs=$(list_files "$1")
	echo "$fs" | tee "$dst/.files"

	echo "[file list w/ ts]"
	fs_ts=$(
		if test -f "$dst/.files_ts"
		then list_files_add_mod_ts "$src" "$dst/.files_ts"
		else list_files_ts "$1"
		fi
	)
	echo "$fs_ts" | tee "$dst/.files_ts"

	# If the file list contains .md files, select which
	# parser to use (lowdown or markdown.pl).
	if echo "$fs" | grep -q '\.md$'
	then
		if test -x "$(which lowdown 2> /dev/null)"
		then
			# Give the file list with timestamps instead of the normal one.
			# echo "$fs" | grep '\.md$' |
			echo "$fs_ts" | grep '\.md$' |
			render_md_files_lowdown "$src" "$dst" "$title" "$4" "$fs_ts"
		else
			# I don't really use Markdown.pl, so I didn't change
			# anything Markdown.pl specific.
			if test -x "$(which Markdown.pl 2> /dev/null)"
			then
				echo "$fs" | grep '\.md$' |
				render_md_files_Markdown_pl "$src" "$dst" "$title"
			else
				echo "Couldn't find lowdown nor Markdown.pl"
				exit 3
			fi
		fi
	fi

	# If the file is html, do simple parsing.
	echo "$fs" | grep '\.html$' |
	render_html_files "$src" "$dst" "$title"

	# For any other file that it's not a .md or .html
	# just send it directly to dst.
	echo "$fs" | grep -Ev '\.md$|\.html$' |
	(cd "$src" && cpio -pu "$dst")

	printf '[ssg] ' >&2
	print_status 'file, ' 'files, ' "$fs" >&2


	### Sitemap.
	urls=$(list_pages "$src")
	base_url="$4"
	date=$(date +%Y-%m-%d)

	# Creates the sitemap.xml using .files_ts timestamps or current date ($date).
	test -n "$urls" && test -n "$fs_ts" &&
	render_sitemap "$urls" "$base_url" "$date" "$fs_ts" > "$dst/sitemap.xml"

	print_status 'url' 'urls' "$urls" >&2
	echo >&2
}


readlink_f() {
	file="$1"
	cd "$(dirname "$file")"
	file=$(basename "$file")
	while test -L "$file"
	do
		file=$(readlink "$file")
		cd "$(dirname "$file")"
		file=$(basename "$file")
	done
	dir=$(pwd -P)
	echo "$dir/$file"
}


print_status() {
	# $1 = singular, $2 = plural, $3 = list of somthing (urls, files, etc.)

	# Prints a simple status line.
	test -z "$3" && printf 'no %s' "$2" && return

	echo "$3" | awk -v singular="$1" -v plural="$2" '
	END {
		if (NR==1) printf NR " " singular
		if (NR>1) printf NR " " plural
	}'
}


usage() {
	echo "usage: ${0##*/} src dst title base_url" >&2
	exit 1
}


no_dir() {
	echo "${0##*/}: $1: No such directory" >&2
	exit 2
}


list_dirs() {
	# $1 = src directory.

	# Find all directories inside src excluding the contents of .ssgignore.
	cd "$1" && eval "find . -type d ! -name '.' ! -path '*/_*' $IGNORE"
}


list_files() {
	# $1 = src

	# Find all files inside src excluding the contents of .ssgignore.
	cd "$1" && eval "find . -type f ! -name '.' ! -path '*/_*' $IGNORE"
}


list_files_add_mod_ts() {
	# $1 = src, $2 = $dst/.files_ts

	# Add modification timestamp on column 2 if stored timestamp
	# (either column 1 or column 2) is less than the current timestamp.

	# First get the current timestamp of all files.
	cd "$1" && fn=$(eval "find . -type f ! -name '.' ! -path '*/_*' $IGNORE -exec stat -c '%Y 0 %n' {} \\;")

	# Read .files_ts for later comparation. And create
	# new temp file for storing new data.
	fo=$(cat "$2")

	# Loop over all new files.
	while IFS= read -r line
	do
		# File name and timestamp of new files.
		fn=$(echo "$line" | cut -d' ' -f3)
		fn_ts=$(echo "$line" | cut -d' ' -f1)

		# Check if current line already exists in the old file.
		fo_line=$(echo "$fo" | grep "$fn")
		if test -n "$fo_line"
		then
			fo_ts=$(echo "$fo_line" | cut -d' ' -f1)

			# Either add the new timestamp on the second column or
			# just keep the old line.
			if [ $fn_ts -gt $fo_ts ]
			then echo $fo_ts $fn_ts $fn
			else echo $fo_line
			fi

		else
			# Just echo the output of a new file without previous timestamp.
			echo $fn_ts" 0 "$fn
		fi
	done <<EOF
	$fn
EOF
}


list_files_ts() {
	# $1 = src

	# Find all files inside src excluding the contents of .ssgignore.
	cd "$1" && eval "find . -type f ! -name '.' ! -path '*/_*' $IGNORE -exec stat -c '%Y 0 %n' {} \\;"
}


render_html_files() {
	# $1 = src, $2 = dst, $3 = title

	# Just calls render_html_file for each .html
	# and sends it to it's corresponding .html file in dst.
	while read -r f
	do render_html_file "$3" < "$1/$f" > "$2/$f"
	done
}


render_md_files_lowdown() {
	# $1 = src, $2 = dst, $3 = title, $4 = base_url, $5 = .files_ts
	# (also what is received) and receives a list of .md files.

	src="$1"
	dst="$2"
	title="$3"
	base_url="$4"
	fs_ts="$5"

	# Transforms .md files into .html files and adds
	# the creation and edit timestamps at the end.
	# echo "[parse .md files]"
	while read -r line
	do
		# Get file name and remove ./ at the start of each file name.
		f=$(echo $line | cut -d' ' -f3)
		f=$(echo $f | cut -d'/' -f2-)

		# Get timestamps since birth and modification.
		b=$(echo $line | cut -d' ' -f1)
		m=$(echo $line | cut -d' ' -f2)

		# Lowdown outputs an html file,
		# timestamps are added and then
		# it's parsed by 'render_html_file'.
		# Finally it writes to an equivalent .html file.
		html=$(lowdown \
		--html-no-skiphtml \
		--html-no-escapehtml \
		--html-no-owasp \
		--html-no-head-ids < "$src/$f")

		# Don't add the timestamp to the index.
		if echo $f | grep "index"
		then
			urls=$(list_pages "$src")

			html=$(add_article_list "$urls" "$base_url" "$dst" "$src" "$fs_ts" "$html")
		else
			html=$(add_html_timestamps "$html" "$b" "$m")
		fi

		echo "$html" | render_html_file "$title" \
		> "$dst/${f%\.md}.html"
	done
}


add_article_list() {
	# $1 = urls, $2 = base_url, $3 = dst, $4 = src, $5 = .files_ts, $6 = html file from lowdown.

	urls="$1"
	base_url="$2"
	dst="$3"
	src="$4"
	# Reverse sorted file list.
	fs="$(echo "$5" | sort -r -k 1)"
	html="$6"

	# echo the current html text and add the ul tag for the list.
	echo "$html"
	echo ""
	echo '<ul class="articles">'

	c_month_year=
	while IFS= read -r line
	do
		# Some necessary varaibles
		fn=$(echo $line | cut -d' ' -f3 | cut -d'/' -f2-)
		d=$(echo $line | cut -d' ' -f1)
		month_year=$(date --date @$d +"%B %Y")
		date=$(date --date @$d +"%b %d")

		# Only work on non index files.
		if [ "$(echo "$fn" | grep "index")" = "" ]
		then
			if [ "$c_month_year" = "" ]
			then
				# For the first appearing year.
				c_month_year=$month_year
				echo ""
				echo "<h3>$month_year</h3>"
				echo ""
			else
				# Each time the year changes, print it.
				if [ "$c_month_year" != "$month_year" ]
				then
					echo ""
					echo "<h3>$month_year</h3>"
					echo ""
				fi
			fi

			page_title=$(head -n 1 "$src/$fn" | cut -c 3-)
			echo "<li>$date - <a href=https://blog.luevano.xyz/${fn%\.md}>${page_title}</a></li>"
		fi
	done <<EOF
	$fs
EOF
	echo "</ul>"
}


add_html_timestamps(){
	# $1 = html text from lowdown, $2 = creation ts, $3 = modification ts

	html="$1"
	b="$2"
	m="$3"

	# Add timestamps at the end.
	if [ $m -gt $b ]
	then
		html=$(printf "%s\n\n<div class="timestamp">\n<hr>\n<p>Created: $(date --date @$b +"$date_format"); modified: $(date --date @$m +"$date_format")</p>\n</div>" "$html")
	else
		html=$(printf "%s\n\n<div class="timestamp">\n<hr>\n<p>Created: $(date --date @$b +"$date_format")</p>\n</div>" "$html")
	fi

	echo "$html"
}


# I don't really use Markdown.pl so I didn't even looked up into it.
render_md_files_Markdown_pl() {
	while read -r f
	do
		Markdown.pl < "$1/$f" |
		render_html_file "$3" \
		> "$2/${f%\.md}.html"
	done
}


render_html_file() {
	# $1 = title

	# Just adds the header and the footer to the html file.
	# h/t Devin Teske
	awk -v title="$1" '
	{ body = body "\n" $0 }
	END {
		body = substr(body, 2)
		if (body ~ /<[Hh][Tt][Mm][Ll]/) {
			print body
			exit
		}
		if (match(body, /<[[:space:]]*[Hh]1(>|[[:space:]][^>]*>)/)) {
			t = substr(body, RSTART + RLENGTH)
			sub("<[[:space:]]*/[[:space:]]*[Hh]1.*", "", t)
			gsub(/^[[:space:]]*|[[:space:]]$/, "", t)
			if (t) title = t " &mdash; " title
		}
		n = split(ENVIRON["HEADER"], header, /\n/)
		for (i = 1; i <= n; i++) {
			if (match(tolower(header[i]), "<title></title>")) {
				head = substr(header[i], 1, RSTART - 1)
				tail = substr(header[i], RSTART + RLENGTH)
				print head "<title>" title "</title>" tail
			} else print header[i]
		}
		print body
		print ENVIRON["FOOTER"]
	}'
}


list_pages() {
	# $1 = src

	# Find all files inside src that are either .md or .html.
	e="\\( -name '*.html' -o -name '*.md' \\)"
	cd "$1" && eval "find . -type f ! -path '*/.*' ! -path '*/_*' $IGNORE $e" |
	sed 's#^./##;s#.md$#.html#;s#/index.html$#/#'
}


render_sitemap() {
	# $1 = urls, $2 = base_url, $3 = date, $4 = .files_ts

	urls="$1"
	base_url="$2"
	date="$3"
	fs="$4"
	# Since fs saves lists as .md files, change .md to .html
	# to be able to parse it.
	fs="$(echo "$fs" | sed 's/\.md/\.html/')"

	echo '<?xml version="1.0" encoding="UTF-8"?>'
	echo '<urlset'
	echo 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
	echo 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9'
	echo 'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"'
	echo 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
	# Read each line on the urls list.
	while IFS= read -r line
	do
		# Get the corresponding .files_ts line.
		fd=$(echo "$fs" | grep "$line")

		if test -n "$fd"
		then
			# Get timestamps present in .files_ts.
			fdb=$(echo "$fd" | cut -d' ' -f1)
			fdm=$(echo "$fd" | cut -d' ' -f2)

			# echo "b: $fdb, m: $fdm"
			# If modification timestamp is greater than creation timestamp...
			if [ $fdm -gt $fdb ]
			then
				# Use modification timestamp.
				echo $line |
				sed -E 's#^(.*)$#<url><loc>'"$base_url"'/\1</loc><lastmod>'"$(date --date @$fdm +%Y-%m-%d)"'</lastmod><priority>1.0</priority></url>#'
			else
				# Use creation timestamp.
				echo $line |
				sed -E 's#^(.*)$#<url><loc>'"$base_url"'/\1</loc><lastmod>'"$(date --date @$fdb +%Y-%m-%d)"'</lastmod><priority>1.0</priority></url>#'
			fi
		else
			# Use 'current' timestamp.
			echo $line |
			sed -E 's#^(.*)$#<url><loc>'"$base_url"'/\1</loc><lastmod>'"$date"'</lastmod><priority>1.0</priority></url>#'
		fi
	done <<EOF
	$urls
EOF
	echo '</urlset>'
}


main "$@"