2
0

397 lines
15 KiB
Bash
Executable File

#!/bin/bash
#
# Dumps a PostgreSQL database from a native or dockerized PostgreSQL instance
# running on this box. This is a wrapper script to the pg_dump command.
# Uses only the TCP connection, therefore you must enable this in pg_hba file.
#
# If the PostgreSQL is dockerized you need call as a Docker manager user
# (member of the docker Linux group).
#
# Accepts few pg_dump options as well as the optional database password
# and the optional output pathname:
#
# $0 [-U dbuser] [-P dbpass] [-h dbhost] [-p dbport]
# [-C container] [-d database] [-f dumpfile ] [-F dumpformat ]
# [--acl ] [--force]
# [database (if not in -d)] [dumpfile (if not in -f)]
#
# A special -F9 option makes a PostgreSQL 9.x compatible plain text dump.
#
# Author: Kovács Zoltán <kovacs.zoltan@smartfront.hu>
# Kovács Zoltán <kovacsz@marcusconsulting.hu>
# License: GNU/GPL v3+ (https://www.gnu.org/licenses/gpl-3.0.en.html)
# 2023-06-18 v1.0
# new: forked from the "SMARTERP_skeleton" repository.
# 2022-11-03 v0.4
# mod: May use the peer authentication as a secondary preference.
# fix: pg_restore output overwrited the previous log lines, this has been fixed.
# 2022-01-23 v0.3
# mod: More sophisticated estimate made on storage space needed.
# 2021-03-19 v0.2
# new: Option --acl (include ACLs as well) has been added.
# new: Option -F9 (PSQL 9.x compatible plain dump) has been added (native only).
# mod: Plain text dumps are compressed now.
# mod: Typos and comments.
# fix: The sbin directories has appended to the $PATH (Debian doesn't add them).
# 2020-09-17 v0.1 Initial release
# Accepted environment variables and their defaults.
#
PGCONTAINER=${PGCONTAINER-""} # Docker container's name
PGDATABASE=${PGDATABASE-""} # Database name to dump
PGDUMP=${PGDUMP-""} # Dump file pathname
PGDUMPFORMAT=${PGDUMPFORMAT-"c"} # Dump file format
PGHOST=${PGHOST:-"localhost"} # Connection parameter
PGOPTIONS=${PGOPTIONS-""} # Options to pass to pg_dump
PGPASSWORD=${PGPASSWORD-""} # Credential for the DB user
PGPORT=${PGPORT:-"5432"} # Connection parameter
PGUSER=${PGUSER:-"postgres"} # DB user for this dump
### Temporailly ignored! Need to sanitize.
PGOPTIONS=""
# Other initialisations.
#
PGDUMPACLS="--no-acl --no-owner" # Excludes the ACLs and grants.
PGDUMPFORCED="" # Dumps despite failed checks
vetodatabases="postgres template0 template1" # Technical DBs aren't to dump
# Messages.
#
MSG_ABORTED="aborted"
MSG_BADCRED="Bad credentials for MySQL"
MSG_BADDUMPPATH="Dumpfile's directory isn't writable"
MSG_BADOPT="Invalid option"
MSG_DOESNOTRUN="Doesn't run the database container"
MSG_DOCKERGRPNEED="You must be a member of the docker group."
MSG_FAILBKP="Archiver exited with error code"
MSG_FAILDB="Unable to dump the database"
MSG_FAILCONN="Failed to connect the database"
MSG_FAILSIZE="Failed to size the database"
MSG_FORCED="but forced to continue"
MSG_MISSINGDEP="Fatal: missing dependency"
MSG_NOCOMP="Fatal: missing component"
MSG_NODIRDUMP="Directory format doesn't implemented with a dockerized database."
MSG_NOSPACE="Not enough space to dump the database"
MSG_PEERAUTH="Peer authentication has been used."
MSG_USAGE="Usage: $0 [options] [database [dump_pathname|-]]\n"
MSG_USAGE+="Option:\tENVVAR:\n"
MSG_USAGE+=" -C\tPGCONTAINER\tPostgres Docker container's name\n"
MSG_USAGE+=" -d\tPGDATABASE\tPostgres database to dump ($USER)\n"
MSG_USAGE+=" -f\tPGDUMP\t\tDumpfile pathname\n"
MSG_USAGE+=" -F\tPGDUMPFORMAT\tDumpfile format ($PGDUMPFORMAT)\n"
MSG_USAGE+=" -h\tPGHOST\t\tHostname or IP to connect (localhost)\n"
MSG_USAGE+=" -p\tPGPORT\t\tTCP port to connect (5432)\n"
MSG_USAGE+=" -P\tPGPASSWORD\tPostgres password\n"
MSG_USAGE+=" -U\tPGUSER\t\tPostgres username ($PGUSER)\n"
MSG_USAGE+="--acl\t\t\tIncludes the grants and ACLs as well\n"
MSG_USAGE+="--force\t\t\tForces the operation despite the failed checks\n"
# Basic environment settings.
LANG=C
LC_ALL=C
# We need also the sbin directories.
if ! [[ "$PATH" =~ '/sbin:' ]]; then
PATH="$PATH:/usr/local/sbin:/usr/sbin:/sbin"; fi
# Getting options.
#
while getopts ":-:C:d:D:f:F:h:H:p:P:u:U:" option
do
case ${option} in
"-" )
if [ "$OPTARG" = "acl" ]; then PGDUMPACLS=""
elif [ "$OPTARG" = "force" ]; then PGDUMPFORCED="yes"
elif [ "$OPTARG" = "help" ]; then echo -e "$MSG_USAGE" >&2; exit
else echo "$MSG_BADOPT --$OPTARG" >&2; exit 1
fi
;;
"C" ) PGCONTAINER="$OPTARG" ;;
"d" | "D" ) PGDATABASE="$OPTARG" ;;
"f" ) PGDUMPFILE="$OPTARG" ;;
"F" ) PGDUMPFORMAT="$OPTARG" ;;
"h" | "H" ) PGHOST="$OPTARG" ;;
"P" ) PGPASSWORD="$OPTARG" ;;
"p" ) PGPORT="$OPTARG" ;;
"u" | "U" ) PGUSER="$OPTARG" ;;
\? )
echo "$MSG_BADOPT -$OPTARG" >&2; exit 1
;;
esac
done; shift $((OPTIND -1))
# All options have been processed.
# Checks the dependencies.
#
# Conditional dependencies (according to native or dockerized environment).
[[ -z "$PGCONTAINER" ]] \
&& additem="psql pg_dump" \
|| additem="docker"
# Common dependencies.
TR=$(which tr 2>/dev/null)
if [ -z "$TR" ]; then echo "$MSG_MISSINGDEP tr."; exit 1 ; fi
for item in basename df date dirname egrep grep gzip hostname id \
pwd sed tail tee $additem
do
if [ -n "$(which $item)" ]
then export $(echo $item | "$TR" '[:lower:]' '[:upper:]')=$(which $item)
else echo "$MSG_MISSINGDEP $item." >&2; exit 1; fi
done
# All dependencies are available via "$THECOMMAND" (upper case) call.
#
# An additional bugfix (use "$(which gzip)" instead of "$GZIP"):
# https://www.gnu.org/software/gzip/manual/html_node/Environment.html
GZIP=""
# Need to be root or a Docker manager user if the DB runs in a container.
#
[[ -n "$PGCONTAINER" ]] && [[ "$USER" != 'root' ]] \
&& [[ -z "$(echo "$("$ID" -Gn "$USER") " | "$GREP" ' docker ')" ]] \
&& echo "$MSG_DOCKERGRPNEED" >&2 && exit 1 #"
# If the PostgreSQL is dockerized the container must be running.
#
[[ -n "$PGCONTAINER" ]] \
&& [[ -z "$("$DOCKER" ps -q -f name=$PGCONTAINER)" ]] \
&& echo "$MSG_DOESNOTRUN $PGCONTAINER" >&2 && exit 1
# Determines the database to dump.
#
# Lack of -d the 1st non-option parameter is the database's name.
if [ -z "$PGDATABASE" -a -n "$1" ]; then PGDATABASE="$1"; shift; fi
# The last resort is the Linux user's name.
if [ -z "$PGDATABASE" ]; then PGDATABASE="$USER"; fi
# A humble sanitization.
if [[ ! "$PGDATABASE" =~ ^([[:alnum:]]|[_])*$ ]]; then
echo -e "$MSG_USAGE" >&2; exit 1; fi
# Silently refuses the PostgreSQL internal databases.
for veto in $vetodatabases ""
do
[[ "$PGDATABASE" = "$veto" ]] && exit 0
done
# We've a database name to dump.
# Determines the output file (or maybe a target directory) and the logfile.
#
# A generated file- or directory name maybe necessary.
DUMPNAME="$PGDATABASE.$("$DATE" '+%Y%m%d_%H%M%S').$("$HOSTNAME")"
# Lack of -f the next non-option parameter is the dumpfile's pathname.
if [ -z "$PGDUMPFILE" -a -n "$1" ]; then PGDUMPFILE="$1"; shift; fi
# The last resort is the generated pathname.
if [ -z "$PGDUMPFILE" ]; then PGDUMPFILE="$("$PWD")/$DUMPNAME"; fi
#
# Let's make some checks.
#
# Dumping to the STDOUT is invalid with the directory format, we'll dump
# into a newly created directory instead.
[[ "$PGDUMPFILE" = "-" ]] && [[ "$PGDUMPFORMAT" = "d" ]] \
&& PGDUMPFILE="$("$PWD")/$DUMPNAME"
# If the given pathname is an existing directory, we need append
# a generated target filename (or directory name for Fd format).
[[ -d "$PGDUMPFILE" ]] && PGDUMPFILE+="/$DUMPNAME"
#
# Here we go with the desired pathname.
#
if [ "$PGDUMPFILE" = "-" ]; then
# If '-' was given as the PGDUMPFILE, we'll write to STDOUT w/o logs.
PGDUMPFILE=""
logfile="/dev/null"
else
# We'll write and log to a directory within the filesystem.
PGDUMPDIR="$("$DIRNAME" "$PGDUMPFILE")"
# This directory must be exist and be writable.
if [ -n "$PGDUMPDIR" ] && [ ! -d "$PGDUMPDIR" -o ! -x "$PGDUMPDIR" ]; then
echo "$MSG_BADDUMPPATH: $PGDUMPDIR" >&2; exit 1
fi
# Extends the output files properly.
if [ "$PGDUMPFORMAT" = "d" ]; then
logfile="$PGDUMPFILE.log"
elif [ "$PGDUMPFORMAT" = "9" -o "$PGDUMPFORMAT" = "p" ]; then
PGDUMPFILE="${PGDUMPFILE%.sql}.sql"
logfile="${PGDUMPFILE%.sql}.log"
elif [ "$PGDUMPFORMAT" = "t" ]; then
PGDUMPFILE="${PGDUMPFILE%.tar}.tar"
logfile="${PGDUMPFILE%.tar}.log"
else
PGDUMPFILE="${PGDUMPFILE%.dmp}.dmp"
logfile="${PGDUMPFILE%.dmp}.log"
fi
fi
# We've a suitable output and log pathname (or we've to use the STDOUT w/o logs).
# Do we connect the database?
#
if [ -n "$PGCONTAINER" ]; then
# Dockerized database.
result=$("$DOCKER" exec $PGCONTAINER \
sh -c "export PGPASSWORD=\"$PGPASSWORD\"; \
psql -U \"$PGUSER\" -w -h \"$PGHOST\" -p \"$PGPORT\" \
-d \"$PGDATABASE\" -t -c \"SELECT 1\"" 2>/dev/null)
result="${result//[[:space:]]/}"
[[ $result -ne 1 ]] && echo -e "$MSG_FAILCONN." >&2 | "$TEE" -a "$logfile" && exit 1
else
# Self-hosted database.
# Preferred method: TCP with username and password.
CONNECT="-U $PGUSER -w -h $PGHOST -p $PGPORT"
export PGPASSWORD
"$PSQL" $CONNECT -d "$PGDATABASE" -c "SELECT 1" >/dev/null 2>&1; result=$?
if [[ $result -ne 0 ]]; then
# On failure we will try the peer authentication.
CONNECT=""
"$PSQL" $CONNECT -d "$PGDATABASE" -c "SELECT 1" >/dev/null 2>&1; result=$?
[[ $result -ne 0 ]] && echo -e "$MSG_FAILCONN." >&2 | "$TEE" -a "$logfile" && exit $result
# Leaves a warning about using the peer authentication.
echo -e "$MSG_PEERAUTH" >>"$logfile"
fi
# We've a valid CONNECT clause.
fi
# We've the database connect checked.
# Do we size the database?
#
dbsize=0
# It isn't relevant when we'll dump to the STDOUT.
if [ -n "$PGDUMPFILE" ]; then
# Calculates the size of the database (MB).
if [ -n "$PGCONTAINER" ]; then
# Dockerized database.
dbsize=$("$DOCKER" exec $PGCONTAINER \
sh -c "export PGPASSWORD=\"$PGPASSWORD\"; \
psql -U \"$PGUSER\" -w -h \"$PGHOST\" -p \"$PGPORT\" -d \"$PGDATABASE\" \
-t -c \"SELECT pg_database_size('$PGDATABASE');\"" 2>/dev/null)
else
# Self-hosted database.
export PGPASSWORD
dbsize=$("$PSQL" $CONNECT -d "$PGDATABASE" \
-t -c "SELECT pg_database_size('$PGDATABASE');" 2>/dev/null)
fi
# Some sanitization
dbsize="${dbsize//[[:space:]]/}"
[[ -z "$dbsize" ]] && dbsize=0
[[ ! "$dbsize" =~ ^([[:digit:]])*$ ]] && dbsize=0
# KB units
dbsize=$(( dbsize /1024 ))
# On failure aborts here, except if it had forced.
if [ $dbsize -eq 0 ]; then echo -en "$MSG_FAILSIZE" | "$TEE" -a "$logfile"
if [ "$PGDUMPFORCED" ]; then
echo " - $MSG_FORCED" | "$TEE" -a "$logfile"
else
echo " - $MSG_ABORTED" | "$TEE" -a "$logfile"; exit 1
fi
fi
fi
# We've the database size.
# Checks the storage space available.
#
# It isn't relevant when we'll dump to the STDOUT or the database has no size.
if [ -n "$PGDUMPFILE" -a "$dbsize" -gt 0 ]; then
# Let's estimate the dump size.
dumpsize=$(( dbsize / 10 * 8 ))
# We'll estimate 1:4 ratio on compression on the fly.
[[ "$PGDUMPFORMAT" = "c" ]] && dumpsize=$(( dumpsize / 4 ))
# We'll estimate 5:4 ratio on native dump followed by the compression.
[[ "$PGDUMPFORMAT" = "9" ]] && dumpsize=$(( dumpsize / 4 * 5 ))
# Let's calculate the available space (KB units).
freespace=$("$DF" --output=avail -k "$("$DIRNAME" "$PGDUMPFILE")" | $TAIL -n1) #"
# Is it enough?
if [ $freespace -lt $dumpsize ]; then
echo -en "$MSG_NOSPACE" | "$TEE" -a "$logfile"
# On failure aborts here, except if it had forced.
if [ "$PGDUMPFORCED" ]; then
echo " - $MSG_FORCED" | "$TEE" -a "$logfile"
else
echo " - $MSG_ABORTED" | "$TEE" -a "$logfile"; exit 1
fi
fi
fi
# We've the space checked.
# Let's dump!
# Writes the database as it was ordered, into the dump file or to the STDOUT.
#
if [ -n "$PGCONTAINER" ]; then
# Dockerized database.
if [ -z "$PGDUMPFILE" ]; then
# STDOUT
"$DOCKER" exec $PGCONTAINER \
sh -c "export PGPASSWORD=\"$PGPASSWORD\"; \
pg_dump -U \"$PGUSER\" -w -h \"$PGHOST\" -p \"$PGPORT\" \
-F$PGDUMPFORMAT $PGDUMPACLS -d \"$PGDATABASE\""
elif [ "$PGDUMPFORMAT" = "d" ]; then
# Directory format doesn't implemented with a dockerized database.
echo "$MSG_NODIRDUMP" | "$TEE" -a "$logfile"; exit 1
else
# File
"$DOCKER" exec $PGCONTAINER \
sh -c "export PGPASSWORD=\"$PGPASSWORD\"; \
pg_dump -U \"$PGUSER\" -w -h \"$PGHOST\" -p \"$PGPORT\" \
-F$PGDUMPFORMAT $PGDUMPACLS -d \"$PGDATABASE\"" \
>"$PGDUMPFILE" 2>>"$logfile"
# If it is a plain dump, compresses it.
if [ "${PGDUMPFILE##*.}" = "sql" ]; then
"$(which gzip)" "$PGDUMPFILE"
fi
fi
# This is a fake result - TODO!
result=0
else
# Self-hosted database.
export PGPASSWORD
if [ "$PGDUMPFORMAT" = "9" ]; then
# Backward-compatible SQL dump.
if [ -z "$PGDUMPFILE" ]; then
# STDOUT
# 1st the schema with some arbitrary conversions.
"$PG_DUMP" $CONNECT \
$PGDUMPACLS --schema-only -d "$PGDATABASE" | \
"$EGREP" -iv '^SET idle_in_transaction_session_timeout =' | \
"$EGREP" -iv '^SET default_table_access_method =' | \
"$SED" 's/FUNCTION =/PROCEDURE = /' | \
"$SED" "s/CURRENT_DATE/\('now'::text\)::date/g"
# 2nd the data as COPY statements.
"$PG_DUMP" $CONNECT \
$PGDUMPACLS --data-only -d "$PGDATABASE"
else
# File
# 1st the schema with some arbitrary conversions.
"$PG_DUMP" $CONNECT \
$PGDUMPACLS --schema-only -d "$PGDATABASE" | \
"$EGREP" -iv '^SET idle_in_transaction_session_timeout =' | \
"$EGREP" -iv '^SET default_table_access_method =' | \
"$SED" 's/FUNCTION =/PROCEDURE = /' | \
"$SED" "s/CURRENT_DATE/\('now'::text\)::date/g" \
>"$PGDUMPFILE" 2>>"$logfile"; result=$?
# 2nd the data as COPY statements.
"$PG_DUMP" $CONNECT \
$PGDUMPACLS --data-only -d "$PGDATABASE" \
>>"$PGDUMPFILE" 2>>"$logfile"; result=$?
# Finally compresses it.
"$(which gzip)" "$PGDUMPFILE"
fi
elif [ -z "$PGDUMPFILE" ]; then
# STDOUT
"$PG_DUMP" $CONNECT \
-F"$PGDUMPFORMAT" $PGDUMPACLS -d "$PGDATABASE" \
2>>"$logfile"; result=$?
elif [ "$PGDUMPFORMAT" = "d" ]; then
# Directory
"$PG_DUMP" $CONNECT \
-F"$PGDUMPFORMAT" $PGDUMPACLS -d "$PGDATABASE" \
-f "$PGDUMPFILE" 2>>"$logfile"; result=$?
else
# File
"$PG_DUMP" $CONNECT \
-F"$PGDUMPFORMAT" $PGDUMPACLS -d "$PGDATABASE" \
>"$PGDUMPFILE" 2>>"$logfile"; result=$?
# If it is a plain dump, compresses it.
if [ "${PGDUMPFILE##*.}" = "sql" ]; then
"$(which gzip)" "$PGDUMPFILE"
fi
fi
fi
exit $result
# That's all, Folks! :)