2
0

Added the downsync utility (both Bash and Powershell) to retrieve exported backups.

This commit is contained in:
2025-05-20 18:55:24 +02:00
parent a2c6a76956
commit 7638a0ea2d
5 changed files with 526 additions and 0 deletions

BIN
.metadata

Binary file not shown.

229
.utils/downsync/downsync Executable file
View File

@ -0,0 +1,229 @@
#!/bin/bash
#
# A humble shell script for one-way (down) synchronization of a remote
# web folder (e.g created by the xbackup utility). Performs the basic
# authentication if it is necessary.
#
# It does not handle any remote subfolders, only the root folder. Downloads
# all files that do not exist locally. Updates only an existing file that is
# older than the remote source. It warns of errors or possible inconsistencies.
#
# Actually it is only a pretty fatty wrapper to the wget :).
# Creates a unique log file in the local folder (this can be disabled).
#
# Usage: $0 [ -u remote_usename ] [ -p base64_encoded_password ]
# [ -m max_tries ] [ -w wait_seconds ] [ -s small_size_warn ]
# [ --nolog ] [ --info | --verbose ]
# remote_URI [ local_folder ]
#
# Author: Kovács Zoltán <kovacsz@marcusconsulting.hu>
# License: GNU/GPL v3+ (https://www.gnu.org/licenses/gpl-3.0.en.html)
# 2025-03-21 v0.1 Initial release.
# Messages.
#
MSG_BADEXIT="The worker finished with an exit code:"
MSG_BADOPT="Invalid option"
MSG_BADLOCAL="Must be an existing writable folder:"
MSG_BADPARAM="Doubtful parameter:"
MSG_BADPASS="Password must be base64-encoded:"
MSG_BADURI="Must be a valid http(s) address:"
MSG_MISSINGLOCAL="Please specify the local folder."
MSG_MISSINGURI="Remote URI is mandatory."
MSG_SMALLFILES="List of the downloaded files shorter than"
MSG_USAGE="Usage: $0 [ -u remote_usename ] [ -p base64_encoded_password ] "
MSG_USAGE+="[ -m max_tries ] [ -w wait_seconds ] [ -s small_size_warn ] "
MSG_USAGE+="[ --nolog ] [ --info | --verbose ] "
MSG_USAGE+=" remote_URI [ local_folder ]"
# Basic environment settings.
#
LANG=C
LC_ALL=C
# Initialisations.
#
LOGSTAMP="\"\$DATE\" +%Y%m%d-%H%M%S" # Timestamp format for logfile
NEWFILEMINS=60 # A file younger than this is "new"
SHORTFILEEX="\(\.log\|\.tmp\)$" # Exceptions to short file checking
#
WGET_OPTIONS="-e robots=off --no-parent --no-directories "
WGET_OPTIONS+="--recursive --level=1 --exclude-directories='*' --reject index.htm* "
WGET_OPTIONS+="--timestamping --continue "
WGET_OPTIONS+="--no-verbose "
# Default parameters.
#
MAXTRIES=3 # On error it will try to download a file
# at most this many times.
NOLOG="" # If not empty, it will not write log file.
SMALLSIZE=1024 # Warns if the downloaded file isn't a log
# and is shorter than this value.
VERBOSE="" # If not empty, it will display log lines.
WAITSECS=5 # On error it will wait this many seconds between
# two download attempts.
# Gets the options (if any).
#
while getopts ":-:m:M:p:P:s:S:u:U:w:W:" option
do
case ${option} in
"-" )
if [ "$OPTARG" = "nolog" ]; then NOLOG="yes"
elif [ "$OPTARG" = "info" ]; then VERBOSE="yes"
elif [ "$OPTARG" = "verbose" ]; then VERBOSE="yes"
elif [ "$OPTARG" = "help" ]; then echo -e "$MSG_USAGE" >&2; exit
else echo "$MSG_BADOPT --$OPTARG" >&2; exit 1
fi
;;
"m" | "M" )
MAXTRIES="$OPTARG"
;;
"p" | "P" )
MYPASS="$OPTARG"
;;
"s" | "S" )
SMALLSIZE="$OPTARG"
;;
"u" | "U" )
MYUSER="$OPTARG"
;;
"w" | "W" )
WAITSECS="$OPTARG"
;;
\? )
echo "$MSG_BADOPT -$OPTARG" >&2; exit 1
;;
esac
done; shift $((OPTIND -1))
#
# All provided options were processed.
# Checks the dependencies.
#
TR=$(which tr 2>/dev/null)
if [ -z "$TR" ]; then echo "$MSG_MISSINGDEP tr."; exit 1 ; fi
for item in base64 basename cat date find grep tee wget
do
if [ -n "$(which $item)" ]
then export $(echo $item | "$TR" '[:lower:]' '[:upper:]')=$(which $item)
else echo "$MSG_MISSINGDEP $item." >&2; exit 1; fi
done
#
# All dependencies are available via "$THECOMMAND" (upper case) call.
# Sanitizes the options.
# Some below are just arbitrary restrictions (reliable source: TODO!).
#
# MAXTRIES is a non-zero positive integer:
[[ -n "$MAXTRIES" ]] && [[ ! "$MAXTRIES" =~ ^[1-9][0-9]*$ ]] \
&& echo -e "$MSG_BADPARAM -m $MAXTRIES\n$MSG_USAGE" >&2 && exit 1
# MYUSER is empty or not too strange (whatever this means):
[[ -n "$MYUSER" ]] && [[ ! "$MYUSER" =~ ^([[:alnum:]]|[.-_\\+])*$ ]] \
&& echo -e "$MSG_BADPARAM -u $MYUSER\n$MSG_USAGE" >&2 && exit 1
# MYPASS is empty or Base64-encoded:
if [ -n "$MYPASS" ]; then
[[ ! "$MYPASS" =~ ^[-A-Za-z0-9+/]*={0,3}$ ]] \
&& echo -e "$MSG_BADPASS -p $MYPASS\n$MSG_USAGE" >&2 && exit 1
# Tries to decode it.
echo "$MYPASS" | "$BASE64" --decode >/dev/null 2>&1
[[ $? -gt 0 ]] \
&& echo -e "$MSG_BADPASS -p $MYPASS\n$MSG_USAGE" >&2 && exit 1
MYPASS=$(echo "$MYPASS" | "$BASE64" --decode)
fi
# SMALLSIZE is a non-zero positive integer:
[[ -n "$SMALLSIZE" ]] && [[ ! "$SMALLSIZE" =~ ^[1-9][0-9]*$ ]] \
&& echo -e "$MSG_BADPARAM -s $SMALLSIZE\n$MSG_USAGE" >&2 && exit 1
# WAITSECS is a non-negative integer (can be zero):
[[ -n "$WAITSECS" ]] && [[ ! "$WAITSECS" =~ ^[0-9][0-9]*$ ]] \
&& echo -e "$MSG_BADPARAM -w $WAITSECS\n$MSG_USAGE" >&2 && exit 1
#
# We checked the options at least minimally.
# Formally checks the remote URI povided.
#
# 1st non-option parameter is the remote URI.
if [ -z "$REMOTEURI" -a -n "$1" ]; then REMOTEURI="$1"; shift; fi
# It is mandatory.
[[ -z "$REMOTEURI" ]] \
&& echo -e "$MSG_MISSINGURI\n$MSG_USAGE" >&2 && exit 1
# Must be a valid http(s) address.
[[ ! "$REMOTEURI" =~ ^https?://([[:alnum:]]|[.-])/?.*$ ]] \
&& echo -e "$MSG_BADURI $REMOTEURI" >&2 && exit 1
# Adds a trailing slash.
REMOTEURI="${REMOTEURI%/}/"
#
# We checked the remote URI at least minimally.
# Determines the download directory.
#
# 2nd non-option parameter is the local folder's pathname.
if [ -z "$LOCALDIR" -a -n "$1" ]; then LOCALDIR="$1"; shift; fi
# Defaults to the current folder.
[[ -z "$LOCALDIR" ]] && LOCALDIR="$PWD"
[[ -z "$LOCALDIR" ]] && LOCALDIR="$($(which pwd))"
# This should not happen... Gives it up.
[[ -z "$LOCALDIR" ]] \
&& echo -e "$MSG_MISSINGLOCAL" >&2 && exit 1
# Must be a writable folder.
if [ ! -d "$LOCALDIR" -o ! -w "$LOCALDIR" ]; then
echo -e "$MSG_BADLOCAL $LOCALDIR" >&2; exit 1; fi
# Removes the trailing slash (if any).
LOCALDIR="${LOCALDIR%/}"
#
# We've a suitable download directory.
# Tries to retrieve only newer files from the remote URL.
#
# Composes the credentials (if any).
WGET_CREDENTIALS=""
[[ -n "$MYUSER" ]] && WGET_CREDENTIALS="--http-user=$MYUSER --http-password=$MYPASS "
#
# Figures out how do we should logging.
[[ -n "$NOLOG" ]] \
&& LOGFILE="/dev/null" \
|| LOGFILE="$LOCALDIR/$("$BASENAME" "$0")_$(eval $LOGSTAMP).log"
#
# Calls parametrized wget as a worker.
if [ -n "$VERBOSE" ]; then
# We also need to write to the console.
"$WGET" $WGET_OPTIONS $WGET_CREDENTIALS --waitretry=$WAITSECS --tries=$MAXTRIES \
--directory-prefix="$LOCALDIR" "$REMOTEURI" \
>/dev/null 2> >("$TEE" "$LOGFILE" >&2); excode=$?
else
# We don't write to the console.
"$WGET" $WGET_OPTIONS $WGET_CREDENTIALS --waitretry=$WAITSECS --tries=$MAXTRIES \
--directory-prefix="$LOCALDIR" "$REMOTEURI" \
>/dev/null 2>"$LOGFILE"; excode=$?
fi
#
# Checks the exit code, warns if non-zero.
if [[ excode -ne 0 ]]; then
# Displays the log file even if we called it to be quiet.
if [ -z "$VERBOSE" -a -z "$NOLOG" ]; then
"$CAT" "$LOGFILE" 2>/dev/null
fi
# Shows/appends the warning.
echo -e "\n$MSG_BADEXIT $excode" >&2
fi
#
# We tried to synchronize, we did what we could.
# Checks the files that are currently being downloaded,
# and warns if there are any short files among them.
# Files with extensions in SHORTFILEEX and the current
# logfile are excluded from this check.
#
SMALLFILES=$( \
"$FIND" "$LOCALDIR" -type f \
! -wholename "$LOGFILE" \
-newermt "- $NEWFILEMINS minutes" \
-size -${SMALLSIZE}c 2>/dev/null | \
"$GREP" -iv -e "$SHORTFILEEX"
)
# Warns if there are small files even if we called it to be quiet.
[[ -n "$SMALLFILES" ]] \
&& echo -e "\n$MSG_SMALLFILES $SMALLSIZE:\n$SMALLFILES" >&2
# That's all, Folks! :)

View File

@ -0,0 +1,233 @@
<#
.SYNOPSIS
Powershell script for one-way (down) synchronization of a remote web folder.
.DESCRIPTION
It does not handle any remote subfolders, only the root folder. Downloads
all files that do not exist locally. Updates only an existing file that is
older than the remote source. It warns of errors or possible inconsistencies.
Creates a unique log file in the local folder (this can be disabled).
Usage: $PSCommandPath -Remote URI_to_be_synced [-Local local_folder]
[-User username] [-Pass base64_encoded_password]
[-NoLog] [-Info]
Author: Zolt<6C>n KOV<4F>CS <kovacsz@marcusconsulting.hu>
License: GNU/GPL 3+ https://www.gnu.org/licenses/gpl-3.0.html
.NOTES
Changelog:
2025-03-12 v0.1 Initial release.
#>
# Command line parameters.
#
param (
# An http(s) URI pointing to a remote web folder containing the files to synchronize.
[Parameter()][string]$Remote,
# An existing and writable local folder where the script will download the files.
[Parameter()][string]$Local = $PSScriptRoot,
# Credentials, if required by the remote website.
[Parameter()][string]$User,
# A base64-encoded password (if necessary).
[Parameter()][string]$Pass,
# On error the script will try to download a file at most this many times. Defaults to 3 tries.
[Parameter()][int]$MaxTries,
# On error the script will wait this many seconds between two download attempts. Defaults to 5 seconds.
[Parameter()][int]$WaitRetry,
# The script warns if the downloaded file is shorter than this value. Defaults to 1024 bytes.
[Parameter()][int]$SmallSize,
# If set, the script will not write log file.
[Parameter()][switch]$NoLog = $false,
# If set, the script will display log lines.
[Parameter()][switch]$Info = $false
)
# Initialisations.
#
if (-not $MaxTries) { $MaxTries = 3 }
if (-not $SmallSize) { $SmallSize = 1024 }
if (-not $WaitRetry) { $WaitRetry = 5 }
# Messages.
#
$Message = @{}
$Message['Bad DNS'] = 'Remote host is not an IP and not resolvable.'
$Message['Bad folder'] = "The local path must point to a writable folder."
$Message['Bad URI'] = 'Remote parameter must be a valid http(s) URI.'
$Message['Collision array'] = "The local path is an existing array:"
$Message['Downloaded'] = "Downloaded file:"
$Message['Empty filelist'] = "List of files is empty:"
$Message['Finished'] = "Synchronisation finished."
$Message['Is a folder'] = "Remote subfolders are ignored:"
$Message['Local newer'] = "The files are different but the local one is newer:"
$Message['Size mismatch'] = "Size of the downloaded file differ:"
$Message['Started'] = "Sychronisation started."
$Message['Unable fetchdir'] = "Unable to fetch the content of the remote folder."
$Message['Unable to decode'] = 'Password must be properly base64 encoded.'
$Message['Unable to stat remote'] = 'Unable to stat the remote object:'
$Message['Small file'] = "File is smaller than " + $SmallSize + " bytes:"
$Message['Usage'] = "Usage:`n" + `
$PSCommandPath + ' -Remote URI_to_be_synced [-Local local_folder] ' + `
'[-User username] [-Pass base64_encoded_password] ' + `
'[-NoLog True] [-Info True]'
# Logger function.
#
function Write-Log {
$date = Get-Date -Format "yyyy-MM-dd HH:mm:ss.fff"
if ( -not($NoLog)) { Add-Content -Path $LogFilePath -Value "$date $args" }
if ($Info) { Write-Host $args }
}
# Checks the -Remote parameter.
#
# It is mandatory.
if ( -not("$Remote")) { Write-Host $Message['Usage']; exit 1 }
# The closing / is necessary.
$Remote = $Remote.TrimEnd('/') + '/'
# Must be well-formed and http(s).
if ( -not([uri]::IsWellFormedUriString("$Remote", 'Absolute')) -or -not(([uri] "$Remote").Scheme -in 'http', 'https')) {
Write-Host $Message['Bad URI']; exit 1 }
# Must be IPv4 or resolvable.
if ( -not(([uri]"$Remote").Host -match "^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$" -and [bool](([uri]"$Remote").Host -as [ipaddress]))) {
# It is resolvable?
try { Resolve-DnsName -Name ([uri]"$Remote").Host -ErrorAction Stop | Out-Null }
catch { Write-Host $Message['Bad DNS']; exit 1 }
}
#
# We've a somewhat checked remote.
# Checks the -Local parameter.
#
# Must be an existing, writable folder.
if ( -not("$Local")) { $Local = $PSScriptRoot }
if ( -not(Test-Path -LiteralPath "$Local" -pathType Container)) { Write-Host $Message['Bad folder']; exit 1 }
# Can we write into?
try {
$testfile = $Local + '\' + [guid]::NewGuid() + '.tmp'
[io.file]::OpenWrite("$testfile").close()
Remove-Item -ErrorAction SilentlyContinue "$testfile" }
catch { Write-Host $Message['Bad folder']; exit 1 }
#
# We've a somewhat checked local folder.
# Decodes the provided -Pass (if any).
#
if ("$Pass") {
try { $Pass = ([System.Text.Encoding]::ASCII.GetString([System.Convert]::FromBase64String($Pass))) }
catch { Write-Host $Message['Unable to decode']; exit 1 }
}
#
# We've a decoded (or empty) password.
# Initializes the log file.
#
$LogFilePath = $Local + '\' + (Get-Item $PSCommandPath ).Basename + (Get-Date -Format "-yyyyMMdd-HHmmss") +'.log'
Write-Log $Message['Started']
#
# We've the log file ready to use.
# Prepares the Authorization header from provided credentials (if any).
#
$Headers = ''
if ("$User" ) {
$encoded = [System.Convert]::ToBase64String([System.Text.Encoding]::ASCII.GetBytes("$($User):$($Pass)"))
$Headers = @{ Authorization = "Basic $encoded" }
}
# We've an Authorization header ready to use for Webrequests.
# Let's get the directory index from the remote source.
#
$response = ''
try {
$ProgressPreference = 'SilentlyContinue'
if ("$Headers") {$response = (Invoke-WebRequest -Uri "$Remote" -Headers $Headers -UseBasicParsing) }
else {$response = (Invoke-WebRequest -Uri "$Remote" -UseBasicParsing ) }
}
catch { Write-Log $Message['Unable fetchdir'] "$Remote" $_.Exception.Response.StatusCode.Value__ $_.Exception.Response.StatusDescription; exit 1 }
$files = @($response.Links.HREF | select -skip 1)
#
# We send a warning if it is empty.
#
if ($files.Count -eq 0) { Write-Log $Message['Empty filelist'] "$Remote" }
#
# We've the list of remote files.
# Processes the remote files in a row, one after the other.
#
foreach ($file in $files) {
#
# Let's get the parameters of the remote object. On error we send a warning and move on.
#
$remoteHeaders = ''
try {
$ProgressPreference = 'SilentlyContinue'
if ("$Headers") { $remoteHeaders = (Invoke-WebRequest -Uri ("$Remote" + "$file") -Headers $Headers -Method Head -UseBasicParsing ).Headers }
else { $remoteHeaders = (Invoke-WebRequest -Uri ("$Remote" + "$file") -Method Head -UseBasicParsing).Headers }
}
catch { Write-Log $Message['Unable to stat remote'] ("$Remote" + "$file") $_.Exception.Message; continue }
$remoteDate = $remoteHeaders['Last-Modified']
$remoteSize = $remoteHeaders['Content-Length']
$remoteType = $remoteHeaders['Content-Type']
#
# If the remote object is a folder we send a warning and move on.
#
if ("$remoteType" -eq 'text/directory') { Write-Log $Message['Is a folder'] ("$Remote" + "$file"); continue }
#
# If we've a local object and it is a folder we send a warning and move on.
#
if (Test-Path -LiteralPath "$Local\$file" -PathType Container) { Write-Log $Message['Collision array'] "$Local\$file"; continue }
#
# We've an existing local file?
#
if (Test-Path -LiteralPath "$Local\$file" -PathType Leaf) {
$localDate = (Get-Item -LiteralPath ("$Local" + '\' + "$file")).LastWriteTime.DateTime
$localSize = (Get-Item -LiteralPath ("$Local" + '\' + "$file")).Length
#
# If the local file is newer than remote we don't replace it, but we send a warning if the sizes are different.
#
if ((Get-Date $localDate) -gt (Get-Date $remoteDate)) {
if ( $localSize -ne $remoteSize ) { Write-Log $Message['Local newer'] $file }
continue
}
}
#
# OK, we decided to download the remote file.
# On failure, we'll try again a few times.
#
for ($i = 1; $i -le $MaxTries; $i++) {
try {
$ProgressPreference = 'SilentlyContinue'
if ("$Headers") { Invoke-WebRequest -Uri ("$Remote" + "$file") -Headers $Headers -OutFile ($Local + '\' + $file) }
else { Invoke-WebRequest -Uri ("$Remote" + "$file") -OutFile ($Local + '\' + $file) }
#
Write-Log $Message['Downloaded'] ("$Remote" + "$file")
#
# Checks the size of the downloaded file, stops trying if it is OK.
#
$localSize = (Get-Item -LiteralPath ("$Local" + '\' + "$file")).Length
if ( $localSize -eq $remoteSize ) {
#
# We send a warning on small files (except the logs).
#
if ($localSize -lt $SmallSize -and (Get-Item ("$Local" + "\" + "$file")).Extension -notin ('.log')) {
Write-Log $Message['Small file'] ("$Local" + "\" + "$file") }
break
}
#
Write-Log $Message['Size mismatch'] $Local\$file $localSize $remoteSize
}
catch { Write-Log $Message['Unable to download'] ("$Remote" + "$file") $_.Exception.Message }
#
# Waits before retrying.
#
Start-Sleep -Seconds $WaitRetry
}
}
#
# That's all.
#
Write-Log $Message['Finished']

View File

@ -0,0 +1,17 @@
#!/bin/bash
#
# This script retrieves daily backups from our remote web services.
# Contains (encoded) passwords, keep it confidentially!
# Maintained by hand.
# A download step
$HOME/bin/downsync \
-u <https user> -p "<base64-encoded https password>" \
'https://<remote website>/export' \
'<download destination pathname>'
# More download steps (if any)
# Rotates all backup folders.
# The actual schedule is defined per folder in the .rotate_folder.conf files.
$HOME/bin/rotatebackups "<downloads destination root folder>"

47
.utils/downsync/rotatebackups Executable file
View File

@ -0,0 +1,47 @@
#!/bin/bash
#
# Backup folders maintenance operation planned at once a day.
# This script called usually by the cron (but indirectly).
# Uses the rotate_folder utility which must be available on path.
#
# Author: Kovács Zoltán <kovacsz@marcusconsulting.hu>
# License: GNU/GPL v3+ (https://www.gnu.org/licenses/gpl-3.0.en.html)
# 2025-05-20 v0.1 Initial release
# Will maintain child subfolders of this directory.
# Input parameter (if any) will be sanitized later.
[[ -n "$1" ]] \
&& BACKUPSROOT="$1" \
|| BACKUPSROOT="$PWD"
# Checks the components.
[[ -z "$(which dirname)" ]] && exit 1
[[ -z "$(which readlink)" ]] && exit 1
[[ -z "$(which xargs)" ]] && exit 1
# Where I'm?
SCRPATH="$( cd -P "$( "$(which dirname)" "$0" )" && echo "$PWD" )"
# Rotates the backup folders.
#
# Enumerates the folders and tries to rotate they content.
for folder in $(ls -1 "$BACKUPSROOT" 2>/dev/null | $(which xargs) -0 ) ""
do
if [ -n "$folder" ]; then
# Dereferenced absolute path.
folder="$("$(which readlink)" -e "$BACKUPSROOT/$folder")" #"
# Does it a folder with a prepared configuration?
if [ -d "$folder" -a -r "$folder/.rotate_folder.conf" ]; then
# Does the rotate job.
if [ -x "$SCRPATH/rotate_folder" ]; then
"$SCRPATH/rotate_folder" -f "$folder" >/dev/null
elif [ -x "$(which rotate_folder)" ]; then
"$(which rotate_folder)" -f "$folder" >/dev/null
fi
fi
fi
done
#
# Done with rotating.
# That's all, Folks :)