2
0

432 lines
17 KiB
Bash
Executable File

#!/bin/bash
#
# Helper script to rotate contents of a folder with a daily-weekly-monthly plan.
# TL;DR: rotate_folder -f path/to/my/folder # Shows what would be done.
# rotate_folder -f path/to/my/folder --doit # Makes the job.
#
# By default the script makes a dry run - doesn't delete anything, only lists
# the operations would be done. You may force the execution by --doit command
# line parameter. Another optional command line parameter is -f followed by
# the pathname of the folder intended to be rotated. Lack of it the script
# assumes the current folder (pwd).
#
# You may configure the script by environment variables and/or by a
# configuration textfile. This file should be placed into the folder intended
# to be rotated. It's name should be a dot followed by the script's name and a
# .conf extension (.rotate_folder.conf by default). The script will create a
# default config file automatically on first (dry) run, if it doesn't exist.
#
# The configurable parameters and their defaults are:
# BACKUP_FOLDER="" # pathname of the folder intended to be rotated
# CLASSES_PATTERN="" # see below
# DOIT="" # if empty the script makes a dry run
# RETAIN_DAYS=7 # retains all files created within that many days
# RETAIN_WEEKS=4 # retains one file per week/month,
# RETAIN_MONTHS=12 # created within that many weeks/months
#
# If you specify a CLASSES_PATTERN the script will classify the files in folder
# and rotate the files class by class independently. A pattern is a regexp:
# * the script considers only the filenames matching the whole regexp;
# * the regexp must contain parts in capturing parentheses (classifiers).
# A class is a set of filenames where the matching part to the all classifiers
# are the same. For example, if CLASSES_PATTERN='^(.*)-[0-9].tgz'
# then "alpha-1.tgz alpha-2.tgz ... alpha-9.tgz" are members of a class;
# "beta-1.tgz beta-2.tgz ... beta-9.tgz" are members of another class.
# "beta-10.tgz gamma-1.log" won't be processed beacuse they don't match
# the pattern at all.
# In this example the "alpha" and "beta" files will be rotated independently.
#
# The rotating rules are:
# * all files created within RETAIN_DAYS will be retained.
# * furthermore from files created within RETAIN_WEEKS, only one file
# (the oldest) will be retained for every 7 days period.
# * furthermore from files created within RETAIN_MONTHS, only one file
# (the oldest) will be retained for every 30 days period.
#
# On dry run the script lists all the files of the class with following
# abbreviations:
# DR filename - would be retained by daily rule
# WR filename - would be retained by weekly rule
# WX filename - would be deleted by weekly rule
# MR filename - would be retained by monthly rule
# MX filename - would be deleted by monthly rule
# AX filename - would be deleted no rule match it, because is too old
#
# Author: Kovács Zoltán <kovacs.zoltan@smartfront.hu>
# Kovács Zoltán <kovacsz@marcusconsulting.hu>
# License: GNU/GPL v3+ (https://www.gnu.org/licenses/gpl-3.0.en.html)
# 2023-06-18 v1.0
# new: forked from the "SMARTERP_skeleton" repository.
# 2021.02.12 v0.3
# add: Multiple classes (mainly rewritten).
# mod: Accepts the first command line parameter as a folder (doesn't
# need the -f option). But the folder doesn't defaults to the $PWD.
# 2020-11-24 v0.2
# fix: Typos.
# mod: Warnings also go to the STDERR.
# 2020-11-02 v0.1 Initial release
# Accepted environment variables and their defaults.
#
BACKUP_FOLDER=${BACKUP_FOLDER-""}
CLASSES_PATTERN=${CLASSES_PATTERN-""}
RETAIN_DAYS=${RETAIN_DAYS-"7"}
RETAIN_WEEKS=${RETAIN_WEEKS-"4"}
RETAIN_MONTHS=${RETAIN_MONTHS-"12"}
# Other initialisations (maybe overridden by configuration).
#
DOIT=""
# Messages (maybe overriden by configuration).
#
MSG_BADFOLDER="Doesn't exist or doesn't writable"
MSG_BADOPT="Invalid option"
MSG_BADPATTERN="The pattern given seems to be illegal"
MSG_CREATED="A new, empty configuration has been created.\n"
MSG_CREATED+="Feel free to fill in and rerun this program!\n"
MSG_CREATED+="You may force the execution unconfigurated with --doit option."
MSG_DELDRY="Dry run - these files would have been deleted:"
MSG_DELREAL="These files have been deleted:"
MSG_FAILCREATE="Failed to create a new, empty configuration file.\n"
MSG_FAILCREATE+="You may force the execution unconfigurated with --doit option."
MSG_MISSINGDEP="Fatal: missing dependency"
MSG_NOCONF="Didn't find the configuration file"
MSG_NOCLASSES="Didn't find suitable classes according to pattern"
MSG_NOFILES="Didn't found files to rotate."
MSG_SCHEDULE="Dry run - this is the schedule:"
MSG_TODOIT="Dry run - you may force the execution with --doit option."
# There is nothing to configure below (I hope).
###############################################
# Getting command line options.
while getopts ":-:f:" option
do
case ${option} in
"-" )
if [ "$OPTARG" = "doit" ]; then DOIT="yes"
else echo "$MSG_BADOPT --$OPTARG" >&2; exit 1
fi
;;
"f" ) BACKUP_FOLDER="$OPTARG" ;;
\? ) echo "$MSG_BADOPT -$OPTARG" >&2; exit 1 ;;
esac
done
# Done with options.
# Checks the dependencies.
TR=$(which tr 2>/dev/null)
if [ -z "$TR" ]; then echo "$MSG_MISSINGDEP tr."; exit 1 ; fi
for item in basename date dirname egrep sed seq sort stat xargs
do
if [ -n "$(which $item)" ]
then export $(echo $item | "$TR" '[:lower:]' '[:upper:]')=$(which $item)
else echo "$MSG_MISSINGDEP $item." >&2; exit 1; fi
done
# All dependencies are available via "$THECOMMAND" (upper case) call.
# Checks the backup folder.
# If wasn't defined yet accepts the 1st command line parameter as well.
if [ -z "$BACKUP_FOLDER" ]; then BACKUP_FOLDER="$1"; shift; fi
# Removes the trailing slash (if any).
BACKUP_FOLDER=${BACKUP_FOLDER%/}
# Checks and gives up here if fails.
if [ -z "$BACKUP_FOLDER" -o ! -d "$BACKUP_FOLDER" -o ! -w "$BACKUP_FOLDER" ]
then echo -e "$MSG_BADFOLDER $BACKUP_FOLDER" >&2; exit 1; fi
# Gets the configuration (if any).
BACKUP_CONF="$BACKUP_FOLDER/.$("$BASENAME" "$0").conf"
if [ -r $BACKUP_CONF ]; then . "$BACKUP_CONF"
else
# Warns about failure.
echo -e "$MSG_NOCONF $BACKUP_CONF"
# When on dry run tries to write a new file with some help text and defaults.
if [ -z "$DOIT" -a -z "$CLASSES_PATTERN" ]; then
cat > "$BACKUP_CONF" 2>/dev/null << EOF
# This is a shell script excerpt for configuration purposes only.
# Handle with care! Please don't put code here, only variables.
# The configurable parameters for $("$BASENAME" "$0") script and their defaults are:
# CLASSES_PATTERN="" # see below
# DOIT="" # if empty the script makes a dry run
# RETAIN_DAYS=7 # retains all files created within that many days
# RETAIN_WEEKS=4 # retains one file per week/month,
# RETAIN_MONTHS=12 # created within that many weeks/months
# If you specify a CLASSES_PATTERN the script will classify the files in folder
# and rotates the files class by class independently. A pattern is a regexp:
# * the script considers only the filenames matching the whole regexp;
# * the regexp must contain parts in capturing parentheses (classifiers).
# A class is a set of filenames where the matching part to the all classifiers
# is the same. For example, if CLASSES_PATTERN='^(.*)-[0-9].tgz'
# then "alpha-1.tgz alpha-2.tgz ... alpha-9.tgz" are members of a class;
# "beta-1.tgz beta-2.tgz ... beta-9.tgz" are members of another class.
# "beta-10.tgz gamma-1.log" won't be processed beacuse they don't match
# the pattern at all.
# In this example the "alpha" and "beta" files will be rotated independently.
#
# The rotating rules are:
# * all files have created within RETAIN_DAYS will be retained.
# * furthermore from files created within RETAIN_WEEKS, only one file
# (the oldest) will be retained for every 7 days period.
# * furthermore from files created within RETAIN_MONTHS, only one file
# (the oldest) will be retained for every 30 days period.
#
# On dry run the script lists all the files of the class with following
# abbreviations:
# DR filename - would be retained by daily rule
# WR filename - would be retained by weekly rule
# WX filename - would be deleted by weekly rule
# MR filename - would be retained by monthly rule
# MX filename - would be deleted by monthly rule
# AX filename - would be deleted no rule match it, because is too old
EOF
# Reports the success or failure and stops here.
if [ -r "$BACKUP_CONF" ];
then echo -e "$MSG_CREATED" >&2; exit
else echo -e "$MSG_FAILCREATE" >&2; exit 1; fi
fi
fi
# Configuration file has been handled.
# Initialisations which are protected from configuration.
(( SECS_DAY = 60*60*24 ))
(( SECS_WEEK = 7*SECS_DAY ))
(( SECS_MONTH = 30*SECS_DAY ))
TIMESTAMP=$("$DATE" '+%s')
# This function rotates the files matching to its parameter
# which is a definite regexp (without parenthesised parts).
function rotate_class {
local CLASSES_PATTERN="$1"; shift
local files
# Selection of files to rotate.
#
# We consider only the files matching to the pattern.
# If the pattern is empty, we'll consider all files.
if [ -z "$CLASSES_PATTERN" ]; then
# All non-hidden files but no subfolders, symlinks, etc.
files=$(cd "$BACKUP_FOLDER"; \
ls -1 -t --file-type | "$XARGS" -0 | "$EGREP" -v '[/=>@|$]$' )
else
# Non-hidden files (but no subfolders, symlinks, etc.) matching to the pattern.
files=$(cd "$BACKUP_FOLDER"; \
ls -1 -t --file-type | "$XARGS" -0 | "$EGREP" "$CLASSES_PATTERN" )
fi
# Lack of files gives it up here.
[[ -z "$files" ]] && return
# Converts the list into an array.
local class_files=($files)
# We need to process the files listed within the class_files array.
# The list is ordered by modification time, reverse.
# We'll start with the youngest and step toward the oldest.
# Collectcs the list of files to delete within this class.
#
local delete_files="" # list of filenames to delete
local pointer=0 # class_files index to process
local file_mtime
local file_toretain
local threshold
# Starts with the daily schedule.
# We'll retain all files within this schedule.
[[ -z "$DOIT" ]] && echo -e "$MSG_SCHEDULE"
local last_retained=""
for day in $("$SEQ" 1 "$RETAIN_DAYS")
do
# Finishes if we've no more files.
[[ $pointer -ge ${#class_files[@]} ]] && break
(( threshold = TIMESTAMP - (day * SECS_DAY) ))
file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
# We'll retain all files of this day.
while [[ $file_mtime -ge $threshold ]]
do
[[ -z "$DOIT" ]] && echo "DR ${class_files[$pointer]}"
last_retained="$file_mtime"
# Next file; finishes if we're no more files.
(( pointer++ )); [[ $pointer -ge ${#class_files[@]} ]] && break
file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
done
# This day concluded.
done
# The daily schedule concluded.
# If we didn't save any file within this schedule we'll retain this file.
if [[ -z "$last_retained" && $pointer -lt ${#class_files[@]} ]]; then
last_retained="$file_mtime"
[[ -z "$DOIT" ]] && echo "DR ${class_files[$pointer]}"
(( pointer++ ))
[[ $pointer -lt ${#class_files[@]} ]] \
&& file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
fi
# The weekly schedule.
# We'll retain only the oldest file from a week within this schedule.
last_retained=""
for week in $("$SEQ" 1 "$RETAIN_WEEKS")
do
file_toretain=""
# Finishes if we've no more files.
[[ $pointer -ge ${#class_files[@]} ]] && break
(( threshold = TIMESTAMP - (week * SECS_WEEK) ))
file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
while [[ $file_mtime -ge $threshold ]]
do
if [ -z "$file_toretain" ]; then
# This is the first file from this week.
# marks it to retain temporailly.
file_toretain="${class_files[$pointer]}"
else
# This is an older file from this week than the previous.
# Changes the marker, the previous file should be deleted.
delete_files+="$file_toretain\n"
[[ -z "$DOIT" ]] && echo "WX $file_toretain"
file_toretain="${class_files[$pointer]}"
fi
# Next file; finishes if we're no more files.
(( pointer++ )); [[ $pointer -ge ${#class_files[@]} ]] && break
file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
done
# The marked file from the week passed has been retained.
if [ -n "$file_toretain" ]; then
last_retained=$file_mtime # a cheat but it isn't important here
[[ -z "$DOIT" ]] && echo "WR $file_toretain"
fi
# This week concluded.
done
# The weekly schedule concluded.
# If we didn't save any file within this schedule we'll retain this file.
if [[ -z "$last_retained" && $pointer -lt ${#class_files[@]} ]]; then
last_retained="$file_mtime"
[[ -z "$DOIT" ]] && echo "WR ${class_files[$pointer]}"
(( pointer++ ))
[[ $pointer -lt ${#class_files[@]} ]] \
&& file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
fi
# The monthly schedule.
# We'll retain only the oldest file from a month within this schedule.
last_retained=""
for month in $("$SEQ" 1 "$RETAIN_MONTHS")
do
file_toretain=""
# Finishes if we've no more files.
[[ $pointer -ge ${#class_files[@]} ]] && break
(( threshold = TIMESTAMP - (month * SECS_MONTH) ))
file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
while [[ $file_mtime -ge $threshold ]]
do
if [ -z "$file_toretain" ]; then
# This is the first file from this month.
# marks it to retain temporailly.
file_toretain="${class_files[$pointer]}"
else
# This is an older file from this month than the previous.
# Changes the marker, the previous file should be deleted.
delete_files+="$file_toretain\n"
[[ -z "$DOIT" ]] && echo "MX $file_toretain"
file_toretain="${class_files[$pointer]}"
fi
# Next file; finishes if we're no more files.
(( pointer++ )); [[ $pointer -ge ${#class_files[@]} ]] && break
file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
done
# The marked file from the month passed has been retained.
if [ -n "$file_toretain" ]; then
last_retained=$file_mtime # a cheat but it isn't important here
[[ -z "$DOIT" ]] && echo "MR $file_toretain"
fi
# This month concluded.
done
# The monthly schedule concluded.
# If we didn't save any file within this schedule we'll retain this file.
if [[ -z "$last_retained" && $pointer -lt ${#class_files[@]} ]]; then
last_retained="$file_mtime"
[[ -z "$DOIT" ]] && echo "MR ${class_files[$pointer]}"
(( pointer++ ))
[[ $pointer -lt ${#class_files[@]} ]] \
&& file_mtime=$("$STAT" -c %Y "$BACKUP_FOLDER/${class_files[$pointer]}")
fi
# All the schedules have been processed.
# The remaining files will be deleted all.
while [[ $pointer -lt ${#class_files[@]} ]]
do
delete_files+="${class_files[$pointer]}\n"
[[ -z "$DOIT" ]] && echo "AX ${class_files[$pointer]}"
(( pointer ++ ))
done
# The delete_files contain the list of iles to delete according this class.
if [ -n "$delete_files" ]; then
if [ -z "$DOIT" ]; then
# Simulated deletion.
echo -e "\n$MSG_DELDRY\n$delete_files"
else
# Actual deletion file by file.
for file in $(echo -e "$delete_files")
do [[ -n "$file" ]] && rm "$BACKUP_FOLDER/$file" #2>/dev/null
done
echo -e "\n$MSG_DELREAL\n$delete_files"
fi
else
# Uniform output formatting.
[[ -z "$DOIT" ]] && echo
fi
}
# This function parses the given class pattern, recursively explores
# the classes, subclasses, sub-subclasses and so on, then calls the
# rotator function for each definite class.
function rotate_classes {
local CLASSES_PATTERN="$1"; shift
[[ -z "$CLASSES_PATTERN" ]] && return # unusable
# Tries to validate the pattern.
# Test calls simulate the later use.
if [ -n "$CLASSES_PATTERN" ]; then
echo "test" | "$EGREP" "$CLASSES_PATTERN" >/dev/null 2>&1
[[ $? -gt 1 ]] && return # unusable
fi
# Does contain unexplored classifiers?
echo "test" | "$SED" -E "s/$CLASSES_PATTERN/\1/" >/dev/null 2>&1
if [[ $? -gt 0 ]]; then
# It is a definite classifier, let's call the rotator function.
rotate_class "$CLASSES_PATTERN"
else
# Needs further exploring.
# Non-hidden files (but no subfolders, symlinks, etc.) matching to the pattern.
local files=$(cd "$BACKUP_FOLDER"; \
ls -1 -t --file-type | "$XARGS" -0 | "$EGREP" "$CLASSES_PATTERN" )
# Selects the qualifier substrings which actually have matching files.
local classes=$(echo -e "$files" | "$SED" -E "s/$CLASSES_PATTERN/\1/" | "$SORT" -u)
# Enumerates these qualifiers.
for class in $classes
do
# This is same as the CLASSES_PATTERN but contains the definite qualifier instead of
# the parenthesised expression - e.g one of tgz and log instad of (tgz|log)
local class_pattern=$(echo -e "$CLASSES_PATTERN" | "$SED" -E "s/\([^)]*\)/$class/") #"
# Recurses for further exploring.
rotate_classes "$class_pattern"
done
fi
}
# Rotates the classes, subclasses and so on with a recursive function call.
if [ -z "$CLASSES_PATTERN" ]; then
# All files considered within the same class.
rotate_class
else
# Tries to validate the pattern (loosely).
echo "test" | "$EGREP" "$CLASSES_PATTERN" >/dev/null 2>&1
[[ $? -gt 1 ]] && echo -e "$MSG_BADPATTERN $CLASSES_PATTERN" >&2 && exit 1
# Seems to be valid, go on!
rotate_classes "$CLASSES_PATTERN"
fi
# A final thought about the dry run.
[[ -z "$DOIT" ]] && echo -e "$MSG_TODOIT"
# That's all, Folks :).