2
0
Files
docker-skeleton/.utils/downsync/downsync

230 lines
7.9 KiB
Bash
Executable File

#!/bin/bash
#
# A humble shell script for one-way (down) synchronization of a remote
# web folder (e.g created by the xbackup utility). Performs the basic
# authentication if it is necessary.
#
# It does not handle any remote subfolders, only the root folder. Downloads
# all files that do not exist locally. Updates only an existing file that is
# older than the remote source. It warns of errors or possible inconsistencies.
#
# Actually it is only a pretty fatty wrapper to the wget :).
# Creates a unique log file in the local folder (this can be disabled).
#
# Usage: $0 [ -u remote_usename ] [ -p base64_encoded_password ]
# [ -m max_tries ] [ -w wait_seconds ] [ -s small_size_warn ]
# [ --nolog ] [ --info | --verbose ]
# remote_URI [ local_folder ]
#
# Author: Kovács Zoltán <kovacsz@marcusconsulting.hu>
# License: GNU/GPL v3+ (https://www.gnu.org/licenses/gpl-3.0.en.html)
# 2025-03-21 v0.1 Initial release.
# Messages.
#
MSG_BADEXIT="The worker finished with an exit code:"
MSG_BADOPT="Invalid option"
MSG_BADLOCAL="Must be an existing writable folder:"
MSG_BADPARAM="Doubtful parameter:"
MSG_BADPASS="Password must be base64-encoded:"
MSG_BADURI="Must be a valid http(s) address:"
MSG_MISSINGLOCAL="Please specify the local folder."
MSG_MISSINGURI="Remote URI is mandatory."
MSG_SMALLFILES="List of the downloaded files shorter than"
MSG_USAGE="Usage: $0 [ -u remote_usename ] [ -p base64_encoded_password ] "
MSG_USAGE+="[ -m max_tries ] [ -w wait_seconds ] [ -s small_size_warn ] "
MSG_USAGE+="[ --nolog ] [ --info | --verbose ] "
MSG_USAGE+=" remote_URI [ local_folder ]"
# Basic environment settings.
#
LANG=C
LC_ALL=C
# Initialisations.
#
LOGSTAMP="\"\$DATE\" +%Y%m%d-%H%M%S" # Timestamp format for logfile
NEWFILEMINS=60 # A file younger than this is "new"
SHORTFILEEX="\(\.log\|\.tmp\)$" # Exceptions to short file checking
#
WGET_OPTIONS="-e robots=off --no-parent --no-directories "
WGET_OPTIONS+="--recursive --level=1 --exclude-directories='*' --reject index.htm* "
WGET_OPTIONS+="--timestamping --continue "
WGET_OPTIONS+="--no-verbose "
# Default parameters.
#
MAXTRIES=3 # On error it will try to download a file
# at most this many times.
NOLOG="" # If not empty, it will not write log file.
SMALLSIZE=1024 # Warns if the downloaded file isn't a log
# and is shorter than this value.
VERBOSE="" # If not empty, it will display log lines.
WAITSECS=5 # On error it will wait this many seconds between
# two download attempts.
# Gets the options (if any).
#
while getopts ":-:m:M:p:P:s:S:u:U:w:W:" option
do
case ${option} in
"-" )
if [ "$OPTARG" = "nolog" ]; then NOLOG="yes"
elif [ "$OPTARG" = "info" ]; then VERBOSE="yes"
elif [ "$OPTARG" = "verbose" ]; then VERBOSE="yes"
elif [ "$OPTARG" = "help" ]; then echo -e "$MSG_USAGE" >&2; exit
else echo "$MSG_BADOPT --$OPTARG" >&2; exit 1
fi
;;
"m" | "M" )
MAXTRIES="$OPTARG"
;;
"p" | "P" )
MYPASS="$OPTARG"
;;
"s" | "S" )
SMALLSIZE="$OPTARG"
;;
"u" | "U" )
MYUSER="$OPTARG"
;;
"w" | "W" )
WAITSECS="$OPTARG"
;;
\? )
echo "$MSG_BADOPT -$OPTARG" >&2; exit 1
;;
esac
done; shift $((OPTIND -1))
#
# All provided options were processed.
# Checks the dependencies.
#
TR=$(which tr 2>/dev/null)
if [ -z "$TR" ]; then echo "$MSG_MISSINGDEP tr."; exit 1 ; fi
for item in base64 basename cat date find grep tee wget
do
if [ -n "$(which $item)" ]
then export $(echo $item | "$TR" '[:lower:]' '[:upper:]')=$(which $item)
else echo "$MSG_MISSINGDEP $item." >&2; exit 1; fi
done
#
# All dependencies are available via "$THECOMMAND" (upper case) call.
# Sanitizes the options.
# Some below are just arbitrary restrictions (reliable source: TODO!).
#
# MAXTRIES is a non-zero positive integer:
[[ -n "$MAXTRIES" ]] && [[ ! "$MAXTRIES" =~ ^[1-9][0-9]*$ ]] \
&& echo -e "$MSG_BADPARAM -m $MAXTRIES\n$MSG_USAGE" >&2 && exit 1
# MYUSER is empty or not too strange (whatever this means):
[[ -n "$MYUSER" ]] && [[ ! "$MYUSER" =~ ^([[:alnum:]]|[.-_\\+])*$ ]] \
&& echo -e "$MSG_BADPARAM -u $MYUSER\n$MSG_USAGE" >&2 && exit 1
# MYPASS is empty or Base64-encoded:
if [ -n "$MYPASS" ]; then
[[ ! "$MYPASS" =~ ^[-A-Za-z0-9+/]*={0,3}$ ]] \
&& echo -e "$MSG_BADPASS -p $MYPASS\n$MSG_USAGE" >&2 && exit 1
# Tries to decode it.
echo "$MYPASS" | "$BASE64" --decode >/dev/null 2>&1
[[ $? -gt 0 ]] \
&& echo -e "$MSG_BADPASS -p $MYPASS\n$MSG_USAGE" >&2 && exit 1
MYPASS=$(echo "$MYPASS" | "$BASE64" --decode)
fi
# SMALLSIZE is a non-zero positive integer:
[[ -n "$SMALLSIZE" ]] && [[ ! "$SMALLSIZE" =~ ^[1-9][0-9]*$ ]] \
&& echo -e "$MSG_BADPARAM -s $SMALLSIZE\n$MSG_USAGE" >&2 && exit 1
# WAITSECS is a non-negative integer (can be zero):
[[ -n "$WAITSECS" ]] && [[ ! "$WAITSECS" =~ ^[0-9][0-9]*$ ]] \
&& echo -e "$MSG_BADPARAM -w $WAITSECS\n$MSG_USAGE" >&2 && exit 1
#
# We checked the options at least minimally.
# Formally checks the remote URI povided.
#
# 1st non-option parameter is the remote URI.
if [ -z "$REMOTEURI" -a -n "$1" ]; then REMOTEURI="$1"; shift; fi
# It is mandatory.
[[ -z "$REMOTEURI" ]] \
&& echo -e "$MSG_MISSINGURI\n$MSG_USAGE" >&2 && exit 1
# Must be a valid http(s) address.
[[ ! "$REMOTEURI" =~ ^https?://([[:alnum:]]|[.-])/?.*$ ]] \
&& echo -e "$MSG_BADURI $REMOTEURI" >&2 && exit 1
# Adds a trailing slash.
REMOTEURI="${REMOTEURI%/}/"
#
# We checked the remote URI at least minimally.
# Determines the download directory.
#
# 2nd non-option parameter is the local folder's pathname.
if [ -z "$LOCALDIR" -a -n "$1" ]; then LOCALDIR="$1"; shift; fi
# Defaults to the current folder.
[[ -z "$LOCALDIR" ]] && LOCALDIR="$PWD"
[[ -z "$LOCALDIR" ]] && LOCALDIR="$($(which pwd))"
# This should not happen... Gives it up.
[[ -z "$LOCALDIR" ]] \
&& echo -e "$MSG_MISSINGLOCAL" >&2 && exit 1
# Must be a writable folder.
if [ ! -d "$LOCALDIR" -o ! -w "$LOCALDIR" ]; then
echo -e "$MSG_BADLOCAL $LOCALDIR" >&2; exit 1; fi
# Removes the trailing slash (if any).
LOCALDIR="${LOCALDIR%/}"
#
# We've a suitable download directory.
# Tries to retrieve only newer files from the remote URL.
#
# Composes the credentials (if any).
WGET_CREDENTIALS=""
[[ -n "$MYUSER" ]] && WGET_CREDENTIALS="--http-user=$MYUSER --http-password=$MYPASS "
#
# Figures out how do we should logging.
[[ -n "$NOLOG" ]] \
&& LOGFILE="/dev/null" \
|| LOGFILE="$LOCALDIR/$("$BASENAME" "$0")_$(eval $LOGSTAMP).log"
#
# Calls parametrized wget as a worker.
if [ -n "$VERBOSE" ]; then
# We also need to write to the console.
"$WGET" $WGET_OPTIONS $WGET_CREDENTIALS --waitretry=$WAITSECS --tries=$MAXTRIES \
--directory-prefix="$LOCALDIR" "$REMOTEURI" \
>/dev/null 2> >("$TEE" "$LOGFILE" >&2); excode=$?
else
# We don't write to the console.
"$WGET" $WGET_OPTIONS $WGET_CREDENTIALS --waitretry=$WAITSECS --tries=$MAXTRIES \
--directory-prefix="$LOCALDIR" "$REMOTEURI" \
>/dev/null 2>"$LOGFILE"; excode=$?
fi
#
# Checks the exit code, warns if non-zero.
if [[ excode -ne 0 ]]; then
# Displays the log file even if we called it to be quiet.
if [ -z "$VERBOSE" -a -z "$NOLOG" ]; then
"$CAT" "$LOGFILE" 2>/dev/null
fi
# Shows/appends the warning.
echo -e "\n$MSG_BADEXIT $excode" >&2
fi
#
# We tried to synchronize, we did what we could.
# Checks the files that are currently being downloaded,
# and warns if there are any short files among them.
# Files with extensions in SHORTFILEEX and the current
# logfile are excluded from this check.
#
SMALLFILES=$( \
"$FIND" "$LOCALDIR" -type f \
! -wholename "$LOGFILE" \
-newermt "- $NEWFILEMINS minutes" \
-size -${SMALLSIZE}c 2>/dev/null | \
"$GREP" -iv -e "$SHORTFILEEX"
)
# Warns if there are small files even if we called it to be quiet.
[[ -n "$SMALLFILES" ]] \
&& echo -e "\n$MSG_SMALLFILES $SMALLSIZE:\n$SMALLFILES" >&2
# That's all, Folks! :)