#!/bin/sh # # https://rgz.ee/bin/ssg5 # Copyright 2018-2019 Roman Zolotarev # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # # Commented and modified by David Luévano Alvarado # Just slight modifications and added functionality. # Removed the 'functionality' of only updating newer files as it wasn't working. date_format="%a, %b %d, %+4Y @ %R %Z" main() { ### Initial variables and ignores. test -n "$1" || usage # src test -n "$2" || usage # dst test -n "$3" || usage # files test -n "$4" || usage # base_path test -d "$1" || no_dir "$1" test -d "$2" || no_dir "$2" src=$(readlink_f "$1") dst=$(readlink_f "$2") # Files/directories to ignore. IGNORE=$( # If .ssgignore doesn't exist, # only ignore dotfiles. if ! test -f "$src/.ssgignore" then printf ' ! -path "*/.*"' return fi while read -r x do test -n "$x" || continue printf ' ! -path "*/%s*"' "$x" done < "$src/.ssgignore" ) ### Files. title="$3" # If _header.html and _footer.html exist, # export its contents to corresponding variables. h_file="$src/_header.html" f_file="$src/_footer.html" test -f "$f_file" && FOOTER=$(cat "$f_file") && export FOOTER test -f "$h_file" && HEADER=$(cat "$h_file") && export HEADER # Get list of directories inside src and # create them inside dst. list_dirs "$src" | (cd "$src" && cpio -pdu "$dst") # Get file list and file list with timestamps. echo "[file list]" fs=$(list_files "$1") echo "$fs" | tee "$dst/.files" echo "[file list w/ ts]" fs_ts=$( if test -f "$dst/.files_ts" then list_files_add_mod_ts "$src" "$dst/.files_ts" else list_files_ts "$1" fi ) echo "$fs_ts" | tee "$dst/.files_ts" # If the file list contains .md files, select which # parser to use (lowdown or markdown.pl). if echo "$fs" | grep -q '\.md$' then if test -x "$(which lowdown 2> /dev/null)" then # Give the file list with timestamps instead of the normal one. # echo "$fs" | grep '\.md$' | echo "$fs_ts" | grep '\.md$' | render_md_files_lowdown "$src" "$dst" "$title" "$4" "$fs_ts" else # I don't really use Markdown.pl, so I didn't change # anything Markdown.pl specific. if test -x "$(which Markdown.pl 2> /dev/null)" then echo "$fs" | grep '\.md$' | render_md_files_Markdown_pl "$src" "$dst" "$title" else echo "Couldn't find lowdown nor Markdown.pl" exit 3 fi fi fi # If the file is html, do simple parsing. echo "$fs" | grep '\.html$' | render_html_files "$src" "$dst" "$title" # For any other file that it's not a .md or .html # just send it directly to dst. echo "$fs" | grep -Ev '\.md$|\.html$' | (cd "$src" && cpio -pu "$dst") printf '[ssg] ' >&2 print_status 'file, ' 'files, ' "$fs" >&2 ### Sitemap. urls=$(list_pages "$src") base_url="$4" date=$(date +%Y-%m-%d) # Creates the sitemap.xml using .files_ts timestamps or current date ($date). test -n "$urls" && test -n "$fs_ts" && render_sitemap "$urls" "$base_url" "$date" "$fs_ts" > "$dst/sitemap.xml" print_status 'url' 'urls' "$urls" >&2 echo >&2 } readlink_f() { file="$1" cd "$(dirname "$file")" file=$(basename "$file") while test -L "$file" do file=$(readlink "$file") cd "$(dirname "$file")" file=$(basename "$file") done dir=$(pwd -P) echo "$dir/$file" } print_status() { # $1 = singular, $2 = plural, $3 = list of somthing (urls, files, etc.) # Prints a simple status line. test -z "$3" && printf 'no %s' "$2" && return echo "$3" | awk -v singular="$1" -v plural="$2" ' END { if (NR==1) printf NR " " singular if (NR>1) printf NR " " plural }' } usage() { echo "usage: ${0##*/} src dst title base_url" >&2 exit 1 } no_dir() { echo "${0##*/}: $1: No such directory" >&2 exit 2 } list_dirs() { # $1 = src directory. # Find all directories inside src excluding the contents of .ssgignore. cd "$1" && eval "find . -type d ! -name '.' ! -path '*/_*' $IGNORE" } list_files() { # $1 = src # Find all files inside src excluding the contents of .ssgignore. cd "$1" && eval "find . -type f ! -name '.' ! -path '*/_*' $IGNORE" } list_files_add_mod_ts() { # $1 = src, $2 = $dst/.files_ts # Add modification timestamp on column 2 if stored timestamp # (either column 1 or column 2) is less than the current timestamp. # First get the current timestamp of all files. cd "$1" && fn=$(eval "find . -type f ! -name '.' ! -path '*/_*' $IGNORE -exec stat -c '%Y 0 %n' {} \\;") # Read .files_ts for later comparation. And create # new temp file for storing new data. fo=$(cat "$2") # Loop over all new files. while IFS= read -r line do # File name and timestamp of new files. fn=$(echo "$line" | cut -d' ' -f3) fn_ts=$(echo "$line" | cut -d' ' -f1) # Check if current line already exists in the old file. fo_line=$(echo "$fo" | grep "$fn") if test -n "$fo_line" then fo_ts=$(echo "$fo_line" | cut -d' ' -f1) # Either add the new timestamp on the second column or # just keep the old line. if [ $fn_ts -gt $fo_ts ] then echo $fo_ts $fn_ts $fn else echo $fo_line fi else # Just echo the output of a new file without previous timestamp. echo $fn_ts" 0 "$fn fi done < "$2/$f" done } render_md_files_lowdown() { # $1 = src, $2 = dst, $3 = title, $4 = base_url, $5 = .files_ts # (also what is received) and receives a list of .md files. src="$1" dst="$2" title="$3" base_url="$4" fs_ts="$5" # Transforms .md files into .html files and adds # the creation and edit timestamps at the end. # echo "[parse .md files]" while read -r line do # Get file name and remove ./ at the start of each file name. f=$(echo $line | cut -d' ' -f3) f=$(echo $f | cut -d'/' -f2-) # Get timestamps since birth and modification. b=$(echo $line | cut -d' ' -f1) m=$(echo $line | cut -d' ' -f2) # Lowdown outputs an html file, # timestamps are added and then # it's parsed by 'render_html_file'. # Finally it writes to an equivalent .html file. html=$(lowdown \ --html-no-skiphtml \ --html-no-escapehtml \ --html-no-owasp \ --html-no-head-ids < "$src/$f") # Don't add the timestamp to the index. if echo $f | grep "index" then urls=$(list_pages "$src") html=$(add_article_list "$urls" "$base_url" "$dst" "$src" "$fs_ts" "$html") else html=$(add_html_timestamps "$html" "$b" "$m") fi echo "$html" | render_html_file "$title" \ > "$dst/${f%\.md}.html" done } add_article_list() { # $1 = urls, $2 = base_url, $3 = dst, $4 = src, $5 = .files_ts, $6 = html file from lowdown. urls="$1" base_url="$2" dst="$3" src="$4" # Reverse sorted file list. fs="$(echo "$5" | sort -r -k 1)" html="$6" # echo the current html text and add the ul tag for the list. echo "$html" echo "" echo '
    ' c_month_year= while IFS= read -r line do # Some necessary varaibles fn=$(echo $line | cut -d' ' -f3 | cut -d'/' -f2-) d=$(echo $line | cut -d' ' -f1) month_year=$(date --date @$d +"%B %Y") date=$(date --date @$d +"%b %d") # Only work on non index files. if [ "$(echo "$fn" | grep "index")" = "" ] then if [ "$c_month_year" = "" ] then # For the first appearing year. c_month_year=$month_year echo "" echo "

    $month_year

    " echo "" else # Each time the year changes, print it. if [ "$c_month_year" != "$month_year" ] then echo "" echo "

    $month_year

    " echo "" fi fi page_title=$(head -n 1 "$src/$fn" | cut -c 3-) echo "
  • $date - ${page_title}
  • " fi done <" } add_html_timestamps(){ # $1 = html text from lowdown, $2 = creation ts, $3 = modification ts html="$1" b="$2" m="$3" # Add timestamps at the end. if [ $m -gt $b ] then html=$(printf "%s\n\n
    \n
    \n

    Created: $(date --date @$b +"$date_format"); modified: $(date --date @$m +"$date_format")

    \n
    " "$html") else html=$(printf "%s\n\n
    \n
    \n

    Created: $(date --date @$b +"$date_format")

    \n
    " "$html") fi echo "$html" } # I don't really use Markdown.pl so I didn't even looked up into it. render_md_files_Markdown_pl() { while read -r f do Markdown.pl < "$1/$f" | render_html_file "$3" \ > "$2/${f%\.md}.html" done } render_html_file() { # $1 = title # Just adds the header and the footer to the html file. # h/t Devin Teske awk -v title="$1" ' { body = body "\n" $0 } END { body = substr(body, 2) if (body ~ /<[Hh][Tt][Mm][Ll]/) { print body exit } if (match(body, /<[[:space:]]*[Hh]1(>|[[:space:]][^>]*>)/)) { t = substr(body, RSTART + RLENGTH) sub("<[[:space:]]*/[[:space:]]*[Hh]1.*", "", t) gsub(/^[[:space:]]*|[[:space:]]$/, "", t) if (t) title = t " — " title } n = split(ENVIRON["HEADER"], header, /\n/) for (i = 1; i <= n; i++) { if (match(tolower(header[i]), "")) { head = substr(header[i], 1, RSTART - 1) tail = substr(header[i], RSTART + RLENGTH) print head "" title "" tail } else print header[i] } print body print ENVIRON["FOOTER"] }' } list_pages() { # $1 = src # Find all files inside src that are either .md or .html. e="\\( -name '*.html' -o -name '*.md' \\)" cd "$1" && eval "find . -type f ! -path '*/.*' ! -path '*/_*' $IGNORE $e" | sed 's#^./##;s#.md$#.html#;s#/index.html$#/#' } render_sitemap() { # $1 = urls, $2 = base_url, $3 = date, $4 = .files_ts urls="$1" base_url="$2" date="$3" fs="$4" # Since fs saves lists as .md files, change .md to .html # to be able to parse it. fs="$(echo "$fs" | sed 's/\.md/\.html/')" echo '' echo '' # Read each line on the urls list. while IFS= read -r line do # Get the corresponding .files_ts line. fd=$(echo "$fs" | grep "$line") if test -n "$fd" then # Get timestamps present in .files_ts. fdb=$(echo "$fd" | cut -d' ' -f1) fdm=$(echo "$fd" | cut -d' ' -f2) # echo "b: $fdb, m: $fdm" # If modification timestamp is greater than creation timestamp... if [ $fdm -gt $fdb ] then # Use modification timestamp. echo $line | sed -E 's#^(.*)$#'"$base_url"'/\1'"$(date --date @$fdm +%Y-%m-%d)"'1.0#' else # Use creation timestamp. echo $line | sed -E 's#^(.*)$#'"$base_url"'/\1'"$(date --date @$fdb +%Y-%m-%d)"'1.0#' fi else # Use 'current' timestamp. echo $line | sed -E 's#^(.*)$#'"$base_url"'/\1'"$date"'1.0#' fi done <' } main "$@"