#!/bin/bash
#
# Dumps a PostgreSQL database from a native or dockerized PostgreSQL instance
# running on this box. This is a wrapper script to the pg_dump command.
# Uses only the TCP connection, therefore you must enable this in pg_hba file.
#
# If the PostgreSQL is dockerized you need call as a Docker manager user
# (member of the docker Linux group).
#
# Accepts few pg_dump options as well as the optional database password
# and the optional output pathname:
#
# $0 [-U dbuser] [-P dbpass] [-h dbhost] [-p dbport]
#    [-C container] [-d database] [-f dumpfile ] [-F dumpformat ]
#    [--acl ] [--force]
#    [database (if not in -d)] [dumpfile (if not in -f)]
#
# A special -F9 option makes a PostgreSQL 9.x compatible plain text dump.
#
# Author: Kovács Zoltán <kovacs.zoltan@smartfront.hu>
#         Kovács Zoltán <kovacsz@marcusconsulting.hu>
# License: GNU/GPL v3+ (https://www.gnu.org/licenses/gpl-3.0.en.html)
# 2023-06-18 v1.0
# new: forked from the "SMARTERP_skeleton" repository.
# 2022-11-03 v0.4
# mod: May use the peer authentication as a secondary preference.
# fix: pg_restore output overwrited the previous log lines, this has been fixed.
# 2022-01-23 v0.3
# mod: More sophisticated estimate made on storage space needed.
# 2021-03-19 v0.2
# new: Option --acl (include ACLs as well) has been added.
# new: Option -F9 (PSQL 9.x compatible plain dump) has been added (native only).
# mod: Plain text dumps are compressed now.
# mod: Typos and comments.
# fix: The sbin directories has appended to the $PATH (Debian doesn't add them).
# 2020-09-17 v0.1 Initial release

# Accepted environment variables and their defaults.
#
PGCONTAINER=${PGCONTAINER-""}                   # Docker container's name
PGDATABASE=${PGDATABASE-""}                     # Database name to dump
PGDUMP=${PGDUMP-""}                             # Dump file pathname
PGDUMPFORMAT=${PGDUMPFORMAT-"c"}                # Dump file format
PGHOST=${PGHOST:-"localhost"}                   # Connection parameter
PGOPTIONS=${PGOPTIONS-""}                       # Options to pass to pg_dump
PGPASSWORD=${PGPASSWORD-""}                     # Credential for the DB user
PGPORT=${PGPORT:-"5432"}                        # Connection parameter
PGUSER=${PGUSER:-"postgres"}			# DB user for this dump

### Temporailly ignored! Need to sanitize.
PGOPTIONS=""

# Other initialisations.
#
PGDUMPACLS="--no-acl --no-owner"		# Excludes the ACLs and grants.
PGDUMPFORCED=""					# Dumps despite failed checks
vetodatabases="postgres template0 template1"	# Technical DBs aren't to dump

# Messages.
#
MSG_ABORTED="aborted"
MSG_BADCRED="Bad credentials for MySQL"
MSG_BADDUMPPATH="Dumpfile's directory isn't writable"
MSG_BADOPT="Invalid option"
MSG_DOESNOTRUN="Doesn't run the database container"
MSG_DOCKERGRPNEED="You must be a member of the docker group."
MSG_FAILBKP="Archiver exited with error code"
MSG_FAILDB="Unable to dump the database"
MSG_FAILCONN="Failed to connect the database"
MSG_FAILSIZE="Failed to size the database"
MSG_FORCED="but forced to continue"
MSG_MISSINGDEP="Fatal: missing dependency"
MSG_NOCOMP="Fatal: missing component"
MSG_NODIRDUMP="Directory format doesn't implemented with a dockerized database."
MSG_NOSPACE="Not enough space to dump the database"
MSG_PEERAUTH="Peer authentication has been used."

MSG_USAGE="Usage: $0 [options] [database [dump_pathname|-]]\n"
MSG_USAGE+="Option:\tENVVAR:\n"
MSG_USAGE+=" -C\tPGCONTAINER\tPostgres Docker container's name\n"
MSG_USAGE+=" -d\tPGDATABASE\tPostgres database to dump ($USER)\n"
MSG_USAGE+=" -f\tPGDUMP\t\tDumpfile pathname\n"
MSG_USAGE+=" -F\tPGDUMPFORMAT\tDumpfile format ($PGDUMPFORMAT)\n"
MSG_USAGE+=" -h\tPGHOST\t\tHostname or IP to connect (localhost)\n"
MSG_USAGE+=" -p\tPGPORT\t\tTCP port to connect (5432)\n"
MSG_USAGE+=" -P\tPGPASSWORD\tPostgres password\n"
MSG_USAGE+=" -U\tPGUSER\t\tPostgres username ($PGUSER)\n"
MSG_USAGE+="--acl\t\t\tIncludes the grants and ACLs as well\n"
MSG_USAGE+="--force\t\t\tForces the operation despite the failed checks\n"

# Basic environment settings.
LANG=C
LC_ALL=C
# We need also the sbin directories.
if ! [[ "$PATH" =~ '/sbin:' ]]; then
PATH="$PATH:/usr/local/sbin:/usr/sbin:/sbin"; fi

# Getting options.
#
while getopts ":-:C:d:D:f:F:h:H:p:P:u:U:" option
do
    case ${option} in
        "-" )
            if   [ "$OPTARG" = "acl" ]; then PGDUMPACLS=""
            elif [ "$OPTARG" = "force" ]; then PGDUMPFORCED="yes"
            elif [ "$OPTARG" = "help" ]; then echo -e "$MSG_USAGE" >&2; exit
	    else echo "$MSG_BADOPT --$OPTARG" >&2; exit 1
            fi
        ;;
        "C" ) PGCONTAINER="$OPTARG" ;;
        "d" | "D" ) PGDATABASE="$OPTARG" ;;
        "f" ) PGDUMPFILE="$OPTARG" ;;
        "F" ) PGDUMPFORMAT="$OPTARG" ;;
        "h" | "H" ) PGHOST="$OPTARG" ;;
        "P" ) PGPASSWORD="$OPTARG" ;;
        "p" ) PGPORT="$OPTARG" ;;
        "u" | "U" ) PGUSER="$OPTARG" ;;
        \? )
	    echo "$MSG_BADOPT -$OPTARG" >&2; exit 1
        ;;
    esac
done; shift $((OPTIND -1))
# All options have been processed.

# Checks the dependencies.
#
# Conditional dependencies (according to native or dockerized environment).
[[ -z "$PGCONTAINER" ]] \
&& additem="psql pg_dump" \
|| additem="docker"
# Common dependencies.
TR=$(which tr 2>/dev/null)
if [ -z "$TR" ]; then echo "$MSG_MISSINGDEP tr."; exit 1 ; fi
for item in basename df date dirname egrep grep gzip hostname id \
            pwd sed tail tee $additem
do
    if [ -n "$(which $item)" ]
    then export $(echo $item | "$TR" '[:lower:]' '[:upper:]')=$(which $item)
    else echo "$MSG_MISSINGDEP $item." >&2; exit 1; fi
done
# All dependencies are available via "$THECOMMAND" (upper case) call.
#
# An additional bugfix (use "$(which gzip)" instead of "$GZIP"):
# https://www.gnu.org/software/gzip/manual/html_node/Environment.html
GZIP=""

# Need to be root or a Docker manager user if the DB runs in a container.
#
[[ -n "$PGCONTAINER" ]] && [[ "$USER" != 'root' ]] \
&& [[ -z "$(echo "$("$ID" -Gn "$USER") " | "$GREP" ' docker ')" ]] \
&& echo "$MSG_DOCKERGRPNEED" >&2 && exit 1 #"

# If the PostgreSQL is dockerized the container must be running.
#
[[ -n "$PGCONTAINER" ]] \
&& [[ -z "$("$DOCKER" ps -q -f name=$PGCONTAINER)" ]] \
&& echo "$MSG_DOESNOTRUN $PGCONTAINER" >&2 && exit 1

# Determines the database to dump.
#
# Lack of -d the 1st non-option parameter is the database's name.
if [ -z "$PGDATABASE" -a -n "$1" ]; then PGDATABASE="$1"; shift; fi
# The last resort is the Linux user's name.
if [ -z "$PGDATABASE" ]; then PGDATABASE="$USER"; fi
# A humble sanitization.
if [[ ! "$PGDATABASE" =~ ^([[:alnum:]]|[_])*$ ]]; then
    echo -e "$MSG_USAGE" >&2; exit 1; fi
# Silently refuses the PostgreSQL internal databases.
for veto in $vetodatabases ""
do
    [[ "$PGDATABASE" = "$veto" ]] && exit 0
done
# We've a database name to dump.

# Determines the output file (or maybe a target directory) and the logfile.
#
# A generated file- or directory name maybe necessary.
DUMPNAME="$PGDATABASE.$("$DATE" '+%Y%m%d_%H%M%S').$("$HOSTNAME")"
# Lack of -f the next non-option parameter is the dumpfile's pathname.
if [ -z "$PGDUMPFILE" -a -n "$1" ]; then PGDUMPFILE="$1"; shift; fi
# The last resort is the generated pathname.
if [ -z "$PGDUMPFILE" ]; then PGDUMPFILE="$("$PWD")/$DUMPNAME"; fi
#
# Let's make some checks.
#
# Dumping to the STDOUT is invalid with the directory format, we'll dump
# into a newly created directory instead.
[[ "$PGDUMPFILE" = "-" ]] && [[ "$PGDUMPFORMAT" = "d" ]] \
&& PGDUMPFILE="$("$PWD")/$DUMPNAME"
# If the given pathname is an existing directory, we need append
# a generated target filename (or directory name for Fd format).
[[ -d "$PGDUMPFILE" ]] && PGDUMPFILE+="/$DUMPNAME"
#
# Here we go with the desired pathname.
#
if [ "$PGDUMPFILE" = "-" ]; then
    # If '-' was given as the PGDUMPFILE, we'll write to STDOUT w/o logs.
    PGDUMPFILE=""
    logfile="/dev/null"
else
    # We'll write and log to a directory within the filesystem.
    PGDUMPDIR="$("$DIRNAME" "$PGDUMPFILE")"
    # This directory must be exist and be writable.
    if [ -n "$PGDUMPDIR" ] && [ ! -d "$PGDUMPDIR" -o ! -x "$PGDUMPDIR" ]; then
        echo "$MSG_BADDUMPPATH: $PGDUMPDIR" >&2; exit 1
    fi
    # Extends the output files properly.
    if [ "$PGDUMPFORMAT" = "d" ]; then
	logfile="$PGDUMPFILE.log"
    elif [ "$PGDUMPFORMAT" = "9" -o "$PGDUMPFORMAT" = "p" ]; then
	PGDUMPFILE="${PGDUMPFILE%.sql}.sql"
	logfile="${PGDUMPFILE%.sql}.log"
    elif [ "$PGDUMPFORMAT" = "t" ]; then
	PGDUMPFILE="${PGDUMPFILE%.tar}.tar"
	logfile="${PGDUMPFILE%.tar}.log"
    else
	PGDUMPFILE="${PGDUMPFILE%.dmp}.dmp"
	logfile="${PGDUMPFILE%.dmp}.log"
    fi
fi
# We've a suitable output and log pathname (or we've to use the STDOUT w/o logs).

# Do we connect the database?
#
if [ -n "$PGCONTAINER" ]; then
    # Dockerized database.
    result=$("$DOCKER" exec $PGCONTAINER \
                       sh -c "export PGPASSWORD=\"$PGPASSWORD\"; \
                              psql -U \"$PGUSER\" -w -h \"$PGHOST\" -p \"$PGPORT\" \
                                   -d \"$PGDATABASE\" -t -c \"SELECT 1\"" 2>/dev/null)
    result="${result//[[:space:]]/}"
    [[ $result -ne 1 ]] && echo -e "$MSG_FAILCONN." >&2 | "$TEE" -a "$logfile" && exit 1
else
    # Self-hosted database.
    # Preferred method: TCP with username and password.
    CONNECT="-U $PGUSER -w -h $PGHOST -p $PGPORT"
    export PGPASSWORD
    "$PSQL" $CONNECT -d "$PGDATABASE" -c "SELECT 1" >/dev/null 2>&1; result=$?
    if [[ $result -ne 0 ]]; then
	# On failure we will try the peer authentication.
	CONNECT=""
	"$PSQL" $CONNECT -d "$PGDATABASE" -c "SELECT 1" >/dev/null 2>&1; result=$?
	[[ $result -ne 0 ]] && echo -e "$MSG_FAILCONN." >&2 | "$TEE" -a "$logfile" && exit $result
	# Leaves a warning about using the peer authentication.
	echo -e "$MSG_PEERAUTH" >>"$logfile"
    fi
    # We've a valid CONNECT clause.
fi
# We've the database connect checked.

# Do we size the database?
#
dbsize=0
# It isn't relevant when we'll dump to the STDOUT.
if [ -n "$PGDUMPFILE" ]; then
    # Calculates the size of the database (MB).
    if [ -n "$PGCONTAINER" ]; then
	# Dockerized database.
	dbsize=$("$DOCKER" exec $PGCONTAINER \
                       sh -c "export PGPASSWORD=\"$PGPASSWORD\"; \
                              psql -U \"$PGUSER\" -w -h \"$PGHOST\" -p \"$PGPORT\" -d \"$PGDATABASE\" \
	                           -t -c \"SELECT pg_database_size('$PGDATABASE');\"" 2>/dev/null)
    else
	# Self-hosted database.
	export PGPASSWORD
	dbsize=$("$PSQL" $CONNECT -d "$PGDATABASE" \
                         -t -c "SELECT pg_database_size('$PGDATABASE');" 2>/dev/null)
    fi
    # Some sanitization
    dbsize="${dbsize//[[:space:]]/}"
    [[ -z "$dbsize" ]] && dbsize=0
    [[ ! "$dbsize" =~ ^([[:digit:]])*$ ]] && dbsize=0
    # KB units
    dbsize=$(( dbsize /1024 ))
    # On failure aborts here, except if it had forced.
    if [ $dbsize -eq 0 ]; then echo -en "$MSG_FAILSIZE" | "$TEE" -a "$logfile"
        if [ "$PGDUMPFORCED" ]; then
	    echo " - $MSG_FORCED" | "$TEE" -a "$logfile"
        else
	    echo " - $MSG_ABORTED" | "$TEE" -a "$logfile"; exit 1
        fi
    fi
fi
# We've the database size.

# Checks the storage space available.
#
# It isn't relevant when we'll dump to the STDOUT or the database has no size.
if [ -n "$PGDUMPFILE" -a "$dbsize" -gt 0 ]; then
    # Let's estimate the dump size.
    dumpsize=$(( dbsize / 10 * 8 ))
    # We'll estimate 1:4 ratio on compression on the fly.
    [[ "$PGDUMPFORMAT" = "c" ]] && dumpsize=$(( dumpsize / 4 ))
    # We'll estimate 5:4 ratio on native dump followed by the compression.
    [[ "$PGDUMPFORMAT" = "9" ]] && dumpsize=$(( dumpsize / 4 * 5 ))
    # Let's calculate the available space (KB units).
    freespace=$("$DF" --output=avail -k "$("$DIRNAME" "$PGDUMPFILE")" | $TAIL -n1) #"
    # Is it enough?
    if [ $freespace -lt $dumpsize ]; then
	echo -en "$MSG_NOSPACE" | "$TEE" -a "$logfile"
	# On failure aborts here, except if it had forced.
        if [ "$PGDUMPFORCED" ]; then
	    echo " - $MSG_FORCED" | "$TEE" -a "$logfile"
        else
	    echo " - $MSG_ABORTED" | "$TEE" -a "$logfile"; exit 1
        fi
    fi
fi
# We've the space checked.

# Let's dump!
# Writes the database as it was ordered, into the dump file or to the STDOUT.
#
if [ -n "$PGCONTAINER" ]; then
    # Dockerized database.
    if [ -z "$PGDUMPFILE" ]; then
	# STDOUT
	"$DOCKER" exec $PGCONTAINER \
                       sh -c "export PGPASSWORD=\"$PGPASSWORD\"; \
                       pg_dump -U \"$PGUSER\" -w -h \"$PGHOST\" -p \"$PGPORT\" \
                               -F$PGDUMPFORMAT $PGDUMPACLS -d \"$PGDATABASE\""
    elif [ "$PGDUMPFORMAT" = "d" ]; then
	# Directory format doesn't implemented with a dockerized database.
	echo "$MSG_NODIRDUMP" | "$TEE" -a "$logfile"; exit 1
    else
	# File
	"$DOCKER" exec $PGCONTAINER \
                       sh -c "export PGPASSWORD=\"$PGPASSWORD\"; \
                       pg_dump -U \"$PGUSER\" -w -h \"$PGHOST\" -p \"$PGPORT\" \
                               -F$PGDUMPFORMAT $PGDUMPACLS -d \"$PGDATABASE\"" \
        >"$PGDUMPFILE" 2>>"$logfile"
	# If it is a plain dump, compresses it.
	if [ "${PGDUMPFILE##*.}" = "sql" ]; then
	    "$(which gzip)" "$PGDUMPFILE"
	fi
    fi
    # This is a fake result - TODO!
    result=0
else
    # Self-hosted database.
    export PGPASSWORD
    if [ "$PGDUMPFORMAT" = "9" ]; then
	# Backward-compatible SQL dump.
	if [ -z "$PGDUMPFILE" ]; then
	    # STDOUT
	    # 1st the schema with some arbitrary conversions.
	    "$PG_DUMP" $CONNECT \
        	       $PGDUMPACLS --schema-only -d "$PGDATABASE" | \
    	    "$EGREP" -iv '^SET idle_in_transaction_session_timeout =' | \
	    "$EGREP" -iv '^SET default_table_access_method =' | \
	    "$SED" 's/FUNCTION =/PROCEDURE = /' | \
	    "$SED" "s/CURRENT_DATE/\('now'::text\)::date/g"
	    # 2nd the data as COPY statements.
	    "$PG_DUMP" $CONNECT \
        	       $PGDUMPACLS --data-only -d "$PGDATABASE"
	else
	    # File
	    # 1st the schema with some arbitrary conversions.
	    "$PG_DUMP" $CONNECT \
        	       $PGDUMPACLS --schema-only -d "$PGDATABASE" | \
    	    "$EGREP" -iv '^SET idle_in_transaction_session_timeout =' | \
	    "$EGREP" -iv '^SET default_table_access_method =' | \
	    "$SED" 's/FUNCTION =/PROCEDURE = /' | \
	    "$SED" "s/CURRENT_DATE/\('now'::text\)::date/g" \
	    >"$PGDUMPFILE" 2>>"$logfile"; result=$?
	    # 2nd the data as COPY statements.
	    "$PG_DUMP" $CONNECT \
        	       $PGDUMPACLS --data-only -d "$PGDATABASE" \
	               >>"$PGDUMPFILE" 2>>"$logfile"; result=$?
	    # Finally compresses it.
	    "$(which gzip)" "$PGDUMPFILE"
	fi
    elif [ -z "$PGDUMPFILE" ]; then
	# STDOUT
        "$PG_DUMP" $CONNECT \
                   -F"$PGDUMPFORMAT" $PGDUMPACLS -d "$PGDATABASE" \
                   2>>"$logfile"; result=$?
    elif [ "$PGDUMPFORMAT" = "d" ]; then
	# Directory
	"$PG_DUMP" $CONNECT \
                   -F"$PGDUMPFORMAT" $PGDUMPACLS -d "$PGDATABASE" \
                   -f "$PGDUMPFILE" 2>>"$logfile"; result=$?
    else
	# File
	"$PG_DUMP" $CONNECT \
                   -F"$PGDUMPFORMAT" $PGDUMPACLS -d "$PGDATABASE" \
                   >"$PGDUMPFILE" 2>>"$logfile"; result=$?
	# If it is a plain dump, compresses it.
	if [ "${PGDUMPFILE##*.}" = "sql" ]; then
	    "$(which gzip)" "$PGDUMPFILE"
	fi
    fi
fi

exit $result
# That's all, Folks! :)
