#!/bin/bash
#
# Helper script to rotate contents of a folder with a daily-weekly-monthly plan.
# TL;DR: rotate_folder -f path/to/my/folder         # Shows what would be done.
#        rotate_folder -f path/to/my/folder --doit  # Makes the job.
#
# By default the script makes a dry run - doesn't delete anything, only lists
# the operations would be done. You may force the execution by --doit command
# line parameter. Another optional command line parameter is -f followed by
# the pathname of the folder intended to be rotated. Lack of it the script
# assumes the current folder (pwd).
#
# You may configure the script by environment variables and/or by a
# configuration textfile. This file should be placed into the folder intended
# to be rotated. It's name should be a dot followed by the script's name and a
# .conf extension (.rotate_folder.conf by default). The script will create a
# default config file automatically on first (dry) run, if it doesn't exist.
#
# The configurable parameters and their defaults are:
# BACKUP_FOLDER=""		# pathname of the folder intended to be rotated
# CLASSES_PATTERN=""		# see below
# DOIT=""			# if empty the script makes a dry run
# RETAIN_DAYS=7			# retains all files created within that many days
# RETAIN_WEEKS=4		# retains one file per week/month,
# RETAIN_MONTHS=12		# created within that many weeks/months
#
# If you specify a CLASSES_PATTERN the script will classify the files in folder
# and rotate the files class by class independently. A pattern is a regexp:
# * the script considers only the filenames matching the whole regexp;
# * the regexp must contain parts in capturing parentheses (classifiers).
#   A class is a set of filenames where the matching part to the all classifiers
#   are the same. For example, if CLASSES_PATTERN='^(.*)-[0-9].tgz'
#   then "alpha-1.tgz alpha-2.tgz ... alpha-9.tgz" are members of a class;
#   "beta-1.tgz beta-2.tgz ... beta-9.tgz" are members of another class.
#   "beta-10.tgz gamma-1.log" won't be processed beacuse they don't match
#   the pattern at all.
#   In this example the "alpha" and "beta" files will be rotated independently.
#
# The rotating rules are:
# * all files created within RETAIN_DAYS will be retained.
# * furthermore from files created within RETAIN_WEEKS, only one file
#   (the oldest) will be retained for every 7 days period.
# * furthermore from files created within RETAIN_MONTHS, only one file
#   (the oldest) will be retained for every 30 days period.
#
# On dry run the script lists all the files of the class with following
# abbreviations:
#   DR filename - would be retained by daily   rule
#   WR filename - would be retained by weekly  rule
#   WX filename - would be deleted  by weekly  rule
#   MR filename - would be retained by monthly rule
#   MX filename - would be deleted  by monthly rule
#   AX filename - would be deleted  no rule match it, because is too old
#
# Author: Kovács Zoltán <kovacs.zoltan@smartfront.hu>
#         Kovács Zoltán <kovacsz@marcusconsulting.hu>
# License: GNU/GPL v3+ (https://www.gnu.org/licenses/gpl-3.0.en.html)
# 2025-09-15 v1.3
# fix: now correctly handles "zero elements to keep" directives.
# 2025-08-12 v1.2
# fix: now correctly handles configuration file pathnames containing spaces.
# 2025-03-06 v1.1
# new: added the --noconf option which prevents reading and creating
#      the configuration file.
# 2023-06-18 v1.0
# new: forked from the "SMARTERP_skeleton" repository.
# 2021.02.12 v0.3
# add: Multiple classes (mainly rewritten).
# mod: Accepts the first command line parameter as a folder (doesn't
#      need the -f option). But the folder doesn't defaults to the $PWD.
# 2020-11-24 v0.2
# fix: Typos.
# mod: Warnings also go to the STDERR.
# 2020-11-02 v0.1 Initial release

# Accepted environment variables and their defaults.
#
BACKUP_FOLDER=${BACKUP_FOLDER-""}
CLASSES_PATTERN=${CLASSES_PATTERN-""}
RETAIN_DAYS=${RETAIN_DAYS-"7"}
RETAIN_WEEKS=${RETAIN_WEEKS-"4"}
RETAIN_MONTHS=${RETAIN_MONTHS-"12"}

# Other initialisations (maybe overridden by configuration).
#
DOIT=""
NOCONF=""

# Messages (maybe overriden by configuration).
#
MSG_BADFOLDER="Doesn't exist or doesn't writable"
MSG_BADOPT="Invalid option"
MSG_BADPATTERN="The pattern given seems to be illegal"
MSG_CREATED="A new, empty configuration has been created.\n"
MSG_CREATED+="Feel free to fill in and rerun this program!\n"
MSG_CREATED+="You may force the execution unconfigurated with --doit option."
MSG_DELDRY="Dry run - these files would have been deleted:"
MSG_DELREAL="These files have been deleted:"
MSG_FAILCREATE="Failed to create a new, empty configuration file.\n"
MSG_FAILCREATE+="You may force the execution unconfigurated with --doit option."
MSG_MISSINGDEP="Fatal: missing dependency"
MSG_NOCONF="Didn't find the configuration file"
MSG_NOCLASSES="Didn't find suitable classes according to pattern"
MSG_NOFILES="Didn't found files to rotate."
MSG_SCHEDULE="Dry run - this is the schedule:"
MSG_TODOIT="Dry run - you may force the execution with --doit option."

# There is nothing to configure below (I hope).
###############################################

# Getting command line options.
while getopts ":-:f:" option
do
    case ${option} in
        "-" )
            if   [ "$OPTARG" = "doit" ]; then DOIT="yes"
            elif [ "$OPTARG" = "noconf" ]; then NOCONF="yes"
	    else echo "$MSG_BADOPT --$OPTARG" >&2; exit 1
            fi
        ;;
	"f" ) BACKUP_FOLDER="$OPTARG" ;;
        \?  ) echo "$MSG_BADOPT -$OPTARG" >&2; exit 1 ;;
    esac
done
# Done with options.

# Checks the dependencies.
TR=$(which tr 2>/dev/null)
if [ -z "$TR" ]; then echo "$MSG_MISSINGDEP tr."; exit 1 ; fi
for item in basename date dirname egrep sed seq sort stat xargs
do
    if [ -n "$(which $item)" ]
    then export $(echo $item | "$TR" '[:lower:]' '[:upper:]')=$(which $item)
    else echo "$MSG_MISSINGDEP $item." >&2; exit 1; fi
done
# All dependencies are available via "$THECOMMAND" (upper case) call.

# Checks the backup folder.
# If wasn't defined yet accepts the 1st command line parameter as well.
if [ -z "$BACKUP_FOLDER" ]; then BACKUP_FOLDER="$1"; shift; fi
# Removes the trailing slash (if any).
BACKUP_FOLDER=${BACKUP_FOLDER%/}
# Checks and gives up here if fails.
if [ -z "$BACKUP_FOLDER" -o ! -d "$BACKUP_FOLDER" -o ! -w "$BACKUP_FOLDER" ]
then echo -e "$MSG_BADFOLDER $BACKUP_FOLDER" >&2; exit 1; fi

# Applies the configuration (if it exists and if it doesn't need to be ignored).
BACKUP_CONF="$BACKUP_FOLDER/.$("$BASENAME" "$0").conf"
if [ "$NOCONF" = "yes" ]; then :
elif [ -r "$BACKUP_CONF" ]; then . "$BACKUP_CONF"
else
    # Warns about failure.
    echo -e "$MSG_NOCONF $BACKUP_CONF"
    # When on dry run tries to write a new file with some help text and defaults.
    if [ -z "$DOIT" -a -z "$CLASSES_PATTERN" ]; then
	cat > "$BACKUP_CONF" 2>/dev/null << EOF
# This is a shell script excerpt for configuration purposes only.
# Handle with care! Please don't put code here, only variables.
# The configurable parameters for $("$BASENAME" "$0") script and their defaults are:

# CLASSES_PATTERN=""		# see below
# DOIT=""			# if empty the script makes a dry run
# RETAIN_DAYS=7			# retains all files created within that many days
# RETAIN_WEEKS=4		# retains one file per week/month,
# RETAIN_MONTHS=12		# created within that many weeks/months

# If you specify a CLASSES_PATTERN the script will classify the files in folder
# and rotates the files class by class independently. A pattern is a regexp:
# * the script considers only the filenames matching the whole regexp;
# * the regexp must contain parts in capturing parentheses (classifiers).
#   A class is a set of filenames where the matching part to the all classifiers
#   is the same. For example, if CLASSES_PATTERN='^(.*)-[0-9].tgz'
#   then "alpha-1.tgz alpha-2.tgz ... alpha-9.tgz" are members of a class;
#   "beta-1.tgz beta-2.tgz ... beta-9.tgz" are members of another class.
#   "beta-10.tgz gamma-1.log" won't be processed beacuse they don't match
#   the pattern at all.
#   In this example the "alpha" and "beta" files will be rotated independently.
#
# The rotating rules are:
# * all files have created within RETAIN_DAYS will be retained.
# * furthermore from files created within RETAIN_WEEKS, only one file
#   (the oldest) will be retained for every 7 days period.
# * furthermore from files created within RETAIN_MONTHS, only one file
#   (the oldest) will be retained for every 30 days period.
#
# On dry run the script lists all the files of the class with following
# abbreviations:
#   DR filename - would be retained by daily   rule
#   WR filename - would be retained by weekly  rule
#   WX filename - would be deleted  by weekly  rule
#   MR filename - would be retained by monthly rule
#   MX filename - would be deleted  by monthly rule
#   AX filename - would be deleted  no rule match it, because is too old
EOF
	# Reports the success or failure and stops here.
	if [ -r "$BACKUP_CONF" ];
	then echo -e "$MSG_CREATED" >&2; exit
	else echo -e "$MSG_FAILCREATE" >&2; exit 1; fi
    fi
fi
# Configuration file has been handled.

# Initialisations which are protected from configuration.
(( SECS_DAY = 60*60*24 ))
(( SECS_WEEK = 7*SECS_DAY ))
(( SECS_MONTH = 30*SECS_DAY ))
TIMESTAMP=$("$DATE" '+%s')

# This function rotates the files matching to its parameter
# which is a definite regexp (without parenthesised parts).
function rotate_class {

    local CLASSES_PATTERN="$1"; shift
    local files
    # Selection of files to rotate.
    #
    # We consider only the files matching to the pattern.
    # If the pattern is empty, we'll consider all files.
    if [ -z "$CLASSES_PATTERN" ]; then
	# All non-hidden files but no subfolders, symlinks, etc.
	files=$(cd "$BACKUP_FOLDER"; \
    	        ls -1 -t --file-type | "$XARGS" -0 | "$EGREP" -v '[/=>@|$]$' )
    else
	# Non-hidden files (but no subfolders, symlinks, etc.) matching to the pattern.
	files=$(cd "$BACKUP_FOLDER"; \
    	        ls -1 -t --file-type | "$XARGS" -0 | "$EGREP" "$CLASSES_PATTERN" )
    fi
    # Lack of files gives it up here.
    [[ -z "$files" ]] && return
    # Converts the list into an array.
    local class_files=($files)
    # We need to process the files listed within the class_files array.
    # The list is ordered by modification time, reverse.
    # We'll start with the youngest and step toward the oldest.

    # Collectcs the list of files to delete within this class.
    #
    local delete_files=""			# list of filenames to delete
    local pointer=0				# class_files index to process
    local file_mtime
    local file_toretain
    local threshold
    # Starts with the daily schedule.
    # We'll retain all files within this schedule.
    [[ -z "$DOIT" ]] && echo -e "$MSG_SCHEDULE"
    local last_retained=""
    for day in $("$SEQ" 1 "$RETAIN_DAYS")
    do
	# Finishes if we've no more files.
	[[ $pointer -ge ${#class_files[@]} ]] && break
	(( threshold = TIMESTAMP - (day * SECS_DAY) ))
	file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
	# We'll retain all files of this day.
	while [[ $file_mtime -ge $threshold ]]
	do
	    [[ -z "$DOIT" ]] && echo "DR ${class_files[$pointer]}"
	    last_retained="$file_mtime"
	    # Next file; finishes if we're no more files.
	    (( pointer++ )); [[ $pointer -ge ${#class_files[@]} ]] && break
	    file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
	done
	# This day concluded.
    done
    # The daily schedule concluded.
    # If we didn't save any file within this schedule we'll retain this file.
    if [[ -z "$last_retained" &&  $pointer -lt ${#class_files[@]} ]]; then
	last_retained="$file_mtime"
	[[ -z "$DOIT" ]] && echo "DR ${class_files[$pointer]}"
        (( pointer++ ))
	[[ $pointer -lt ${#class_files[@]} ]] \
	&& file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
    fi

    # The weekly schedule.
    # We'll retain only the oldest file from a week within this schedule.
    last_retained=""
    for week in $("$SEQ" 1 "$RETAIN_WEEKS")
    do
	file_toretain=""
	# Finishes if we've no more files.
	[[ $pointer -ge ${#class_files[@]} ]] && break
	(( threshold = TIMESTAMP - (week * SECS_WEEK) ))
	file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
	while [[ $file_mtime -ge $threshold ]]
	do
	    if [ -z "$file_toretain" ]; then
		# This is the first file from this week.
		# marks it to retain temporailly.
		file_toretain="${class_files[$pointer]}"
	    else
		# This is an older file from this week than the previous.
		# Changes the marker, the previous file should be deleted.
		delete_files+="$file_toretain\n"
		[[ -z "$DOIT" ]] && echo "WX $file_toretain"
		file_toretain="${class_files[$pointer]}"
	    fi
	    # Next file; finishes if we're no more files.
	    (( pointer++ )); [[ $pointer -ge ${#class_files[@]} ]] && break
	    file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
	done
	# The marked file from the week passed has been retained.
	if [ -n "$file_toretain" ]; then
	    last_retained=$file_mtime	# a cheat but it isn't important here
	    [[ -z "$DOIT" ]] && echo "WR $file_toretain"
	fi
	# This week concluded.
    done
    # The weekly schedule concluded.
    if [[ "$RETAIN_WEEKS" -ge 1 ]]; then
	# If we didn't save any file within this schedule we'll retain this file.
	if [[ -z "$last_retained" &&  $pointer -lt ${#class_files[@]} ]]; then
	    last_retained="$file_mtime"
	    [[ -z "$DOIT" ]] && echo "WR ${class_files[$pointer]}"
	    (( pointer++ ))
	    [[ $pointer -lt ${#class_files[@]} ]] \
	    && file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
	fi
    fi

    # The monthly schedule.
    # We'll retain only the oldest file from a month within this schedule.
    last_retained=""
    for month in $("$SEQ" 1 "$RETAIN_MONTHS")
    do
	file_toretain=""
	# Finishes if we've no more files.
	[[ $pointer -ge ${#class_files[@]} ]] && break
	(( threshold = TIMESTAMP - (month * SECS_MONTH) ))
	file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
	while [[ $file_mtime -ge $threshold ]]
	do
	    if [ -z "$file_toretain" ]; then
		# This is the first file from this month.
		# marks it to retain temporailly.
		file_toretain="${class_files[$pointer]}"
	    else
		# This is an older file from this month than the previous.
		# Changes the marker, the previous file should be deleted.
		delete_files+="$file_toretain\n"
		[[ -z "$DOIT" ]] && echo "MX $file_toretain"
		file_toretain="${class_files[$pointer]}"
	    fi
	    # Next file; finishes if we're no more files.
	    (( pointer++ )); [[ $pointer -ge ${#class_files[@]} ]] && break
	    file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
	done
	# The marked file from the month passed has been retained.
	if [ -n "$file_toretain" ]; then
	    last_retained=$file_mtime	# a cheat but it isn't important here
	    [[ -z "$DOIT" ]] && echo "MR $file_toretain"
	fi
	# This month concluded.
    done
    # The monthly schedule concluded.
    if [[ "$RETAIN_MONTHS" -ge 1 ]]; then
	# If we didn't save any file within this schedule we'll retain this file.
	if [[ -z "$last_retained" &&  $pointer -lt ${#class_files[@]} ]]; then
	    last_retained="$file_mtime"
	    [[ -z "$DOIT" ]] && echo "MR ${class_files[$pointer]}"
	    (( pointer++ ))
	    [[ $pointer -lt ${#class_files[@]} ]] \
	    && file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
	fi
    fi

    # All the schedules have been processed.
    # The remaining files will be deleted all.
    while [[ $pointer -lt ${#class_files[@]} ]]
    do
	delete_files+="${class_files[$pointer]}\n"
	[[ -z "$DOIT" ]] && echo "AX ${class_files[$pointer]}"
	(( pointer ++ ))
    done

    # The delete_files contain the list of iles to delete according this class.
    if [ -n "$delete_files" ]; then
        if [ -z "$DOIT" ]; then
	    # Simulated deletion.
	    echo -e "\n$MSG_DELDRY\n$delete_files"
	else
	    # Actual deletion file by file.
	    for file in $(echo -e "$delete_files")
	    do [[ -n "$file" ]] && rm "$BACKUP_FOLDER/$file" #2>/dev/null
	    done
	    echo -e "\n$MSG_DELREAL\n$delete_files"
	fi
    else
	# Uniform output formatting.
	[[ -z "$DOIT" ]] && echo
    fi
}

# This function parses the given class pattern, recursively explores
# the classes, subclasses, sub-subclasses and so on, then calls the
# rotator function for each definite class.
function rotate_classes {

    local CLASSES_PATTERN="$1"; shift
    [[ -z "$CLASSES_PATTERN" ]] && return # unusable

    # Tries to validate the pattern.
    # Test calls simulate the later use.
    if [ -n "$CLASSES_PATTERN" ]; then
	echo "test" | "$EGREP" "$CLASSES_PATTERN" >/dev/null 2>&1
	[[ $? -gt 1 ]] && return # unusable
    fi
    # Does contain unexplored classifiers?
    echo "test" | "$SED" -E "s/$CLASSES_PATTERN/\1/"  >/dev/null 2>&1
    if [[ $? -gt 0 ]]; then
	# It is a definite classifier, let's call the rotator function.
        rotate_class "$CLASSES_PATTERN"
    else
	# Needs further exploring.
	# Non-hidden files (but no subfolders, symlinks, etc.) matching to the pattern.
	local files=$(cd "$BACKUP_FOLDER"; \
                      ls -1 -t --file-type | "$XARGS" -0 | "$EGREP" "$CLASSES_PATTERN" )
	# Selects the qualifier substrings which actually have matching files.
	local classes=$(echo -e "$files" | "$SED" -E "s/$CLASSES_PATTERN/\1/" | "$SORT" -u)
	# Enumerates these qualifiers.
	for class in $classes
	do
	    # This is same as the CLASSES_PATTERN but contains the definite qualifier instead of
	    # the parenthesised expression - e.g one of tgz and log instad of (tgz|log)
	    local class_pattern=$(echo -e "$CLASSES_PATTERN" | "$SED" -E "s/\([^)]*\)/$class/") #"
	    # Recurses for further exploring.
	    rotate_classes "$class_pattern"
	done
    fi
}

# Rotates the classes, subclasses and so on with a recursive function call.
if [ -z "$CLASSES_PATTERN" ]; then
    # All files considered within the same class.
    rotate_class
else
    # Tries to validate the pattern (loosely).
    echo "test" | "$EGREP" "$CLASSES_PATTERN" >/dev/null 2>&1
    [[ $? -gt 1 ]] && echo -e "$MSG_BADPATTERN $CLASSES_PATTERN" >&2 && exit 1
    # Seems to be valid, go on!
    rotate_classes "$CLASSES_PATTERN"
fi
# A final thought about the dry run.
[[ -z "$DOIT" ]] && echo -e "$MSG_TODOIT"

# That's all, Folks :).
