A neat script to backup Ceph volumes form one cluster to another

This script is a work in progress, fell free to reach out to me if you have any improvements.

The idea is to leverage Ceph's snap differential export feature and Copy-On-Write snapshots\layers to allow for quick and easy snaps and exports of running VM volumes from one cluster to another. The obvious disclaimer about the disk snapshotting process applies here. I.E if your OS is doing a bunch of writes and you take a snap like this then the snap might not have consistency of your on disk data so when you restore your data might be corrupt. Maybe we could get fancy and expand upon this to make an API call to Openstack or the host OS and signal the VM via the qemu-agent or something to flush it's disk buffer, but thats a little beyond the scopr of this quick and easy script to act as a DR type backup.

  1#!/bin/bash
  2##################################################################################
  3# This script is used for synchronizing the RBDs (Rados Block Devices) between a 
  4# production Ceph cluster and a backup Ceph cluster. It checks whether the images exist in 
  5# the remote pool and creates them if not. It then checks for the last synced ID and 
  6# performs an initial or incremental sync. It also deletes old snapshots from both the 
  7# source and destination clusters.
  8##################################################################################
  9echo "Starting"
 10mirrorPool() { 
 11    #list all images in the pool
 12    IMAGES=`$SOURCERBDCMD ls $SOURCEPOOL` 
 13
 14    for LOCAL_IMAGE in $IMAGES; do 
 15        #check whether remote host/pool has image 
 16        if [[ -z $($DESTRBDCMD ls $DESTPOOL | grep $LOCAL_IMAGE) ]];then
 17            echo "info: image does not exist in remote pool. creating new image"
 18    	    #todo: check succesful creation
 19    	    echo $DESTRBDCMD create $DESTPOOL/$LOCAL_IMAGE -s 1
 20    	    $DESTRBDCMD create $DESTPOOL/$LOCAL_IMAGE -s 1
 21        fi
 22
 23        # Retreive last synced id
 24        expr=" $SNAPNAME\([[:digit:]]\+\)"
 25        echo '$SOURCERBDCMD snap ls $SOURCEPOOL/$LOCAL_IMAGE | grep "$expr" | sed  "s/.*$expr.*/\1/g" | sort -n > /tmp/rbd-sync-snaplistlocal'
 26        $SOURCERBDCMD snap ls $SOURCEPOOL/$LOCAL_IMAGE | grep "$expr" | sed  "s/.*$expr.*/\1/g" | sort -n > /tmp/rbd-sync-snaplistlocal
 27
 28        if $DESTRBDCMD info $DESTPOOL/$LOCAL_IMAGE >/dev/null 2>&1; then
 29                echo "Image in destination pool exists $DESTPOOL/$LOCAL_IMAGE"
 30                echo "Listing snaps on destination image"
 31                echo $DESTRBDCMD snap ls $DESTPOOL/$LOCAL_IMAGE | grep "$expr" | sed "s/.*$expr.*/\1/g" | sort -n > /tmp/rbd-sync-snaplistremote
 32                $DESTRBDCMD snap ls $DESTPOOL/$LOCAL_IMAGE | grep "$expr" | sed "s/.*$expr.*/\1/g" | sort -n > /tmp/rbd-sync-snaplistremote
 33        else
 34                echo "Image does not exist in destination"
 35                echo "" > /tmp/rbd-sync-snaplistremote
 36        fi
 37        syncid=$(comm -12 /tmp/rbd-sync-snaplistlocal /tmp/rbd-sync-snaplistremote | tail -n1)
 38        lastid=$(cat /tmp/rbd-sync-snaplistlocal /tmp/rbd-sync-snaplistremote | sort -n | tail -n1)
 39        nextid=$(($lastid + 1))
 40
 41
 42
 43        # Initial sync
 44        if [ "$syncid" = "" ]; then
 45                echo "Initial sync with id $nextid"
 46                echo $SOURCERBDCMD snap create $SOURCEPOOL/$LOCAL_IMAGE@$SNAPNAME$nextid
 47                $SOURCERBDCMD snap create $SOURCEPOOL/$LOCAL_IMAGE@$SNAPNAME$nextid
 48
 49                echo '$SOURCERBDCMD export-diff $SOURCEPOOL/$LOCAL_IMAGE@$SNAPNAME$nextid - | $DESTRBDCMD import-diff - $DESTPOOL/$LOCAL_IMAGE'
 50                $SOURCERBDCMD export-diff $SOURCEPOOL/$LOCAL_IMAGE@$SNAPNAME$nextid - | $DESTRBDCMD import-diff - $DESTPOOL/$LOCAL_IMAGE
 51
 52
 53        # Incremental sync
 54        else
 55                echo "Found synced id : $syncid"
 56                echo $SOURCERBDCMD snap create $SOURCEPOOL/$LOCAL_IMAGE@$SNAPNAME$nextid
 57                $SOURCERBDCMD snap create $SOURCEPOOL/$LOCAL_IMAGE@$SNAPNAME$nextid
 58
 59                echo "Sync $syncid -> $nextid"
 60
 61                $SOURCERBDCMD export-diff --from-snap $SNAPNAME$syncid $SOURCEPOOL/$LOCAL_IMAGE@$SNAPNAME$nextid - \
 62                |tee >($DESTRBDCMD import-diff - $DESTPOOL/$LOCAL_IMAGE)
 63                
 64                
 65        echo "Checking for old snaps to delete on source"
 66        deleteOldSnapsOnSource
 67        echo "Checking for old snaps to delete on destination"
 68        deleteOldSnapsOnDest
 69        fi
 70
 71    done
 72}
 73
 74#Delete snapshots from source cluster
 75deleteOldSnapsOnSource() {
 76    #get all the snapshots of the image
 77    while IFS= read -r line; do
 78        # echo $line
 79        timestamp=$(echo "$line" | jq -r '.timestamp')
 80        a=$(date +%s)
 81        b=$(date -d "$timestamp" +%s)
 82        date_diff=$(((a-b)/86400))
 83
 84        if [ "$date_diff" -gt "$THRESHOLD_DAYS_SOURCE" ]; then
 85            echo "Alert: Item $(echo "$line" | jq -r '.name') with ID $(echo "$line" | jq -r '.id') is older than $THRESHOLD_DAYS_SOURCE days($date_diff days)!"
 86            echo $SOURCERBDCMD snap rm $SOURCEPOOL/$LOCAL_IMAGE@$(echo "$line" | jq -r '.name')
 87            $SOURCERBDCMD snap rm $SOURCEPOOL/$LOCAL_IMAGE@$(echo "$line" | jq -r '.name')
 88        else
 89            echo "Item $(echo "$line" | jq -r '.name') with ID $(echo "$line" | jq -r '.id') is only $date_diff days"
 90        fi
 91    done < <($SOURCERBDCMD snap ls $SOURCEPOOL/$LOCAL_IMAGE --format json |  jq -c '.[]  | select(.name|startswith("rbd-sync"))')
 92}
 93
 94
 95
 96#Delete snapshots from destination cluster
 97deleteOldSnapsOnDest() {
 98    #get all the snapshots of the image
 99    while IFS= read -r line; do
100        # echo $line
101        timestamp=$(echo "$line" | jq -r '.timestamp')
102        a=$(date +%s)
103        b=$(date -d "$timestamp" +%s)
104        date_diff=$(((a-b)/86400))
105
106        if [ "$date_diff" -gt "$THRESHOLD_DAYS_DEST" ]; then
107            echo "Alert: Item $(echo "$line" | jq -r '.name') with ID $(echo "$line" | jq -r '.id') is older than $THRESHOLD_DAYS_DEST days($date_diff days)!"
108            echo $DESTRBDCMD snap rm $DESTPOOL/$LOCAL_IMAGE@$(echo "$line" | jq -r '.name')
109            $DESTRBDCMD snap rm $DESTPOOL/$LOCAL_IMAGE@$(echo "$line" | jq -r '.name')
110        else
111            echo "Item $(echo "$line" | jq -r '.name') with ID $(echo "$line" | jq -r '.id') is only $date_diff days"
112        fi
113    done < <($DESTRBDCMD snap ls $DESTPOOL/$LOCAL_IMAGE --format json |  jq -c '.[]  | select(.name|startswith("rbd-sync"))')
114}
115
116THRESHOLD_DAYS_DEST=5
117THRESHOLD_DAYS_SOURCE=2
118
119SOURCECLUSTER="ceph"
120DESTCLUSTER="my-backup"
121SOURCERBDCMD="rbd --cluster $SOURCECLUSTER"
122DESTRBDCMD="rbd --cluster $DESTCLUSTER"
123SNAPNAME='rbd-sync-'
124
125SOURCEPOOL="test"
126DESTPOOL="backup"
127
128exec > >(tee ceph_sync.log) 2>&1
129date
130
131
132mirrorPool
133
134# if [[ -z $(grep error /var/tmp/ceph_sync.log) ]]; then
135#     cat /var/tmp/ceph_sync.log|mailx -s "Ceph sync- Successfull" XX@YY.com.au
136# else
137#     cat /var/tmp/ceph_sync.log|mailx -s "Ceph sync- FAILED" XX@YY.com.au
138# fi
139date