From e6961ff41bc6661a3a300982636cb6f6bbdb097b Mon Sep 17 00:00:00 2001 From: Tamas Gerczei Date: Wed, 14 Oct 2015 15:12:49 +0200 Subject: [PATCH] initial import of working prototype --- README.md | 6 +- zfs-backup.cfg | 14 +++ zfs-backup.sh | 261 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 280 insertions(+), 1 deletion(-) create mode 100644 zfs-backup.cfg create mode 100644 zfs-backup.sh diff --git a/README.md b/README.md index 6613cee..03718cb 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,6 @@ # zfs-backup -home-brew solution for ZFS snapshot replication and rotation +home-brew solution for ZFS snapshot replication and rotation, ideally used as a cron job + +usage: zfs-backup.sh [-h] [-m ] -f + +A configuration (f)ile must be supplied. An e-(m)ail address for reporting is optional. Please refer to the attached example file 'zfs-backup.cfg' for configuration. diff --git a/zfs-backup.cfg b/zfs-backup.cfg new file mode 100644 index 0000000..8718b06 --- /dev/null +++ b/zfs-backup.cfg @@ -0,0 +1,14 @@ +# required format: +# SOURCE TARGET KEEPDAYS ENABLED +# dataset [user@host:]dataset number Y or N + +# we are securing this elsewhere +# user needs an authorized and restricted public key as well as sudoer rights on remotehostto execute as root the following: +# /usr/sbin/zfs recv -Feuv datapool/backup, /usr/sbin/zfs destroy -r datapool/backup/* +data/home user@remotehost:datapool/backup 7 Y + +# this goes to another pool on this host +data/config backup/data 5 Y + +# we no longer back this up +data/volatile backup/data 2 N diff --git a/zfs-backup.sh b/zfs-backup.sh new file mode 100644 index 0000000..31de24d --- /dev/null +++ b/zfs-backup.sh @@ -0,0 +1,261 @@ +#!/usr/bin/env bash +# create and rotate ZFS backups | 150415 | tamas@gerczei.eu + +#### BEGIN FUNCTIONS #### + +function usage() { + # print synopsis + printf "\n-=[ ZFS back-up utility ]=-\n\n\ + usage: $(basename $0) [-h] [-m ] -f \n\n\ +A configuration (f)ile must be supplied. An e-(m)ail address for reporting is optional.\n" +} + +function check_dataset() { + ${R_RMOD} /usr/sbin/zfs get -pHo value creation ${1} &>/dev/null + return $? +} + +function snapuse() { + # whee + local TOTAL=0 + while read value + do + if [[ $value =~ ^[0-9] ]] + then + TOTAL=$(( $TOTAL + $value )) + fi + done \ + <<< "$(${R_RMOD} /usr/sbin/zfs get -Hpro value usedbysnapshots ${1} 2>/dev/null)" + + echo $TOTAL +} + +function human() { + # found at http://unix.stackexchange.com/a/191787 + nawk 'function human(x) { + x[1]/=1024; + if (x[1]>=1000) { x[2]++; human(x); } + } + {a[1]=$1; a[2]=0; human(a); printf "%.2f%s\n",a[1],substr("KMGTEPYZ",a[2]+1,1)}' <<< $1 +} + +function backup() { + # check source + check_dataset ${DATASET} || { + logger -t $(basename ${0%.sh}) -p user.notice "source dataset \"${DATASET}\" in configuration entry #${COUNTER} does not exist; omitting" + continue 2 + } + + # determine which local snapshots exist already + SNAPSHOTS=( $(/usr/sbin/zfs list -rt snapshot -d1 -Ho name -S creation ${DATASET} 2>/dev/null) ) + LASTSNAP=${SNAPSHOTS[0]} + L_USED_BEFORE=$(snapuse ${DATASET}) + + # check target configuration + if [[ ${SAVETO} =~ ^[a-zA-z0-9]+@ ]] + then + # remote target + NETLOC=$(cut -d: -f1 <<< ${SAVETO}) # strip dataset name + USER=$(cut -d@ -f1 <<< ${NETLOC}) # obtain user + TARGET=$(cut -d@ -f2 <<< ${NETLOC}) # obtain host + SAVETO=$(cut -d: -f2 <<< ${SAVETO}) # drop parsed data + unset NETLOC + + # check remote host availability and user validity + /usr/bin/ssh ${USER}@${TARGET} true &>/dev/null + if [ $? -ne 0 ] + then + # failed to connect, bail out + logger -t $(basename ${0%.sh}) -p user.notice "${TARGET} is unreachable as ${USER}" + continue 2 + else + # configure for remote access + R_RMOD="ssh ${USER}@${TARGET}" + RMOD="${R_RMOD} sudo" + fi + fi + + # check target + check_dataset ${SAVETO} || { + logger -t $(basename ${0%.sh}) -p user.notice "target dataset \"${SAVETO}\" in configuration entry #${COUNTER} does not exist; omitting" + continue 2 + } + + R_SNAPSHOTS=( $(${R_RMOD} /usr/sbin/zfs list -rt snapshot -d1 -Ho name -S creation ${SAVETO}/$( basename ${DATASET}) 2>/dev/null) ) + R_USED_BEFORE=$(snapuse ${SAVETO}/$( basename ${DATASET})) + + # determine current timestamp + DATE=$(date +%Y-%m-%d-%H%M) + + # determine the name of the current snapshot to create + NEWSNAP="${DATASET}@${DATE}" + + # take a snapshot + /usr/sbin/zfs snapshot -r ${NEWSNAP} + + # determine whether to do differential send or not + if [ ! -z ${LASTSNAP} ] + then + # local snapshot(s) found + SNAPMODIFIER="i ${LASTSNAP}" + check_dataset ${SAVETO}/$(basename ${LASTSNAP}) || { + # last local snapshot is not available at the destination location + if [ ${#R_SNAPSHOTS[*]} -ge 1 ] + then + # remote snapshot(s) found + R_SNAPMODIFIER="I $(dirname ${DATASET})/$(basename ${R_SNAPSHOTS[*]:(-1)})" + fi + # send any previous snapshots + /usr/sbin/zfs send -R${R_SNAPMODIFIER} ${LASTSNAP} | ${RMOD} /usr/sbin/zfs recv -Feuv ${SAVETO} 2>&1 >> ${LOGFILE} + } + fi + + # send backup + /usr/sbin/zfs send -R${SNAPMODIFIER} ${NEWSNAP} | ${RMOD} /usr/sbin/zfs recv -Feuv ${SAVETO} 2>&1 >> ${LOGFILE} + + # if replication is unsuccessful, omit the aging check so as to prevent data loss + if [ $? -eq 0 ] + then + THRESHOLD=$(( $KEEP * 24 * 3600 )) + for SNAPSHOT in ${SNAPSHOTS[*]} + do + TIMESTAMP=$(/usr/sbin/zfs get -pHo value creation "${SNAPSHOT}") + AGE=$(( $NOW - $TIMESTAMP )) + if [ $AGE -ge $THRESHOLD ] + then + /usr/sbin/zfs destroy -r ${SNAPSHOT} 2>&1 >> ${LOGFILE} + ${RMOD} /usr/sbin/zfs destroy -r ${SAVETO}/$(basename ${SNAPSHOT}) 2>&1 >> ${LOGFILE} + fi + done + else + logger -t $(basename ${0%.sh}) -p user.notice "failed to replicate ${NEWSNAP} to ${TARGET}:${SAVETO}, no aging" + fi + + # re-evaluate snapshot data usage + unset R_RMOD RMOD + L_USED_AFTER=$(snapuse ${DATASET}) + + if [ ! -z $TARGET ] && [ ! -z $USER ] + then + R_RMOD="ssh ${USER}@${TARGET}" + fi + + R_USED_AFTER=$(snapuse ${SAVETO}/$( basename ${DATASET})) + L_DELTA=$(( $L_USED_AFTER - $L_USED_BEFORE )) + R_DELTA=$(( $R_USED_AFTER - $R_USED_BEFORE )) + + for DELTA in L_DELTA R_DELTA + do + unset WHAT WHERE + eval _DELTA=\$$DELTA + + if [ $_DELTA -lt 0 ] + then + _DELTA=$(( _$DELTA * -1 )) + WHAT="freed" + fi + + if [[ $DELTA =~ ^L ]] + then + WHERE="$(dirname ${DATASET})" + else + WHERE="${SAVETO}" + fi + + if [ $_DELTA -ne 0 ] + then + _DELTA=$(human $_DELTA) + logger -t $(basename ${0%.sh}) -p user.notice "$_DELTA ${WHAT:-allocated} in \"${WHERE}\" by backing up \"${DATASET}\"" + fi + done + + # reset remote configuration + unset R_RMOD RMOD +} + +#### END FUNCTIONS #### + +#### BEGIN LOGIC #### + +while getopts hf:m: OPTION + do + case "$OPTION" in + f) + # configuration file for install mode + CFGFILE="$OPTARG" + ;; + + m) + # e-mail recipient for log + RECIPIENT="$OPTARG" + ;; + + h|\?) + # display help + usage + exit 1 + ;; + esac + done + +if [ -z "${CFGFILE}" ] + then + echo "No configuration file supplied!" + exit 1 +fi + +## sanity checks +# verify configuration file +if [ ! -f "${CFGFILE}" ] + then + # cannot proceed + logger -t $(basename ${0%.sh}) -p user.notice "can not access configuration file \""${CFGFILE}"\"" + exit 1 +fi + +# determine platform +PLATFORM_VERSION=$(uname -v) +if [[ "$PLATFORM_VERSION" =~ ^joyent ]] + then + # SmartOS GZ has GNU date shipped by default but no perl interpreter on-board + TIMECMD="\$(date +%s)" +fi + +# determine current timestamp +eval NOW="${TIMECMD:-\$(perl -e 'print time')}" +if [ $? -ne 0 ] + then + # cannot proceed + logger -t $(basename ${0%.sh}) -p user.notice "failed to determine current time on $PLATFORM_VERSION" + exit 1 +fi + +# determine current timestamp +RUNDATE=$(date +%Y-%m-%d-%H%M) + +# determine the name of this script +ME=$(basename ${0%.sh}) + +# define logging directory, defaulting to "/tmp" if omitted +LOGDIR="/var/log" + +# determine session logfile +LOGFILE="${LOGDIR:-/tmp}/${ME}_${RUNDATE}.txt" + +while read -u 4 DATASET SAVETO KEEP ENABLED + # alternate file descriptor in use because SSH might be involved and we can not pass '-n' to it because we need stdin for 'zfs recv' + do + let COUNTER++ + if [ ${ENABLED} == "Y" ] + then + backup + fi + done 4<<< "$(egrep -v '^(#|$)' "${CFGFILE}")" # graciously overlook any comments or blank lines + +if [ ! -z "${RECIPIENT}" ] + then + # report the outcome by e-mailing the session log + mailx -s "${ME}_${RUNDATE}" ${RECIPIENT} < ${LOGFILE} +fi + +#### END LOGIC ####