#!/usr/bin/env bash
#/ Usage: ghe-backup [-v]
#/ Take snapshots of all GitHub Enterprise data, including Git repository data,
#/ the MySQL database, instance settings, GitHub Pages data, etc.
#/
#/ With -v, enable verbose output and show more information about what's being
#/ transferred.
set -e

# Bring in the backup configuration
. $( dirname "${BASH_SOURCE[0]}" )/../share/github-backup-utils/ghe-backup-config

# Used to record failed backup steps
failures=

# Create the timestamped snapshot directory where files for this run will live,
# change into it, and mark the snapshot as incomplete by touching the
# 'incomplete' file. If the backup succeeds, this file will be removed
# signifying that the snapshot is complete.
mkdir -p "$GHE_SNAPSHOT_DIR"
cd "$GHE_SNAPSHOT_DIR"
touch "incomplete"

# This is toggled true once we've successfully re-enabled maintenance mode
# on the remote appliance. This is used to avoid trying to re-enable in the exit
# trap again on successful backup runs.
GHE_MAINTENANCE_MODE_ENABLED=false

# To prevent multiple backup runs happening at the same time, we create a
# in-progress file with the timestamp and pid of the backup process,
# giving us a form of locking.
#
# Set up a trap to remove the in-progress file if we exit for any reason but
# verify that we are the same process before doing so.
#
# The cleanup trap also handles disabling maintenance mode on the appliance if
# it was automatically enabled.
cleanup () {
  if [ -f ../in-progress ]; then
    progress=$(cat ../in-progress)
    snapshot=$(echo "$progress" | cut -d ' ' -f 1)
    pid=$(echo "$progress" | cut -d ' ' -f 2)
    if [ "$snapshot" = "$GHE_SNAPSHOT_TIMESTAMP" -a "$$" = $pid ]; then
      unlink ../in-progress
    fi
  fi

  if $GHE_MAINTENANCE_MODE_ENABLED; then
    ghe-maintenance-mode-disable "$GHE_HOSTNAME"
  fi
}

# Setup exit traps
trap 'cleanup' EXIT
trap 'exit $?' INT # ^C always terminate

if [ -h ../in-progress ]; then
  echo "Error: detected a backup already in progress from a previous version of ghe-backup." 1>&2
  echo "If there is no backup in progress anymore, please remove" 1>&2
  echo "the $GHE_DATA_DIR/in-progress symlink." 1>&2
  exit 1
fi

if [ -f ../in-progress ]; then
  progress=$(cat ../in-progress)
  snapshot=$(echo "$progress" | cut -d ' ' -f 1)
  pid=$(echo "$progress" | cut -d ' ' -f 2)
  if ! ps -p $pid -o command= | grep ghe-backup; then
    # We can safely remove in-progress, ghe-prune-snapshots
    # will clean up the failed backup.
    unlink ../in-progress
  else
    echo "Error: backup process $pid of $GHE_HOSTNAME already in progress in snapshot $snapshot. Aborting." 1>&2
    exit 1
  fi
fi

echo "$GHE_SNAPSHOT_TIMESTAMP $$" > ../in-progress

echo "Starting backup of $GHE_HOSTNAME in snapshot $GHE_SNAPSHOT_TIMESTAMP"

# Perform a host connection check and establish the remote appliance version.
# The version is available in the GHE_REMOTE_VERSION variable and also written
# to a version file in the snapshot directory itself.
ghe_remote_version_required
echo "$GHE_REMOTE_VERSION" > version

# Figure out if we're restoring into cluster
cluster=false
if ghe-ssh "$GHE_HOSTNAME" -- \
  "[ -f '$GHE_REMOTE_ROOT_DIR/etc/github/cluster' ]"; then
  cluster=true
fi

# Log backup start message in /var/log/syslog on remote instance
ghe_remote_logger "Starting backup from $(hostname) in snapshot $GHE_SNAPSHOT_TIMESTAMP ..."

# Determine whether to use the rsync or tarball backup strategy based on the
# remote appliance version. The tarball strategy must be used with GitHub
# Enterprise versions prior to 11.10.340 since rsync is not available.
# The tarball strategy may be forced for newer versions by setting
# GHE_BACKUP_STRATEGY=tarball in backup.config but this is not recommended.
: ${GHE_BACKUP_STRATEGY:=rsync}
if [ $GHE_VERSION_MAJOR -eq 1 -a $GHE_VERSION_PATCH -lt 340 ]; then
    GHE_BACKUP_STRATEGY="tarball"
fi

if $cluster; then
  GHE_BACKUP_STRATEGY="cluster"
fi

# Record the strategy with the snapshot so we will know how to restore.
echo "$GHE_BACKUP_STRATEGY" > strategy
export GHE_BACKUP_STRATEGY

# If we're using the tarball backup strategy, put the appliance in maintenance
# mode and wait for all writing processes to bleed out.
if [ "$GHE_BACKUP_STRATEGY" = "tarball" ]; then
    ghe-maintenance-mode-enable "$GHE_HOSTNAME"
    GHE_MAINTENANCE_MODE_ENABLED=true
fi

ghe-backup-store-version  ||
echo "Warning: storing backup-utils version remotely failed."

echo "Backing up GitHub settings ..."
ghe-backup-settings ||
failures="$failures settings"

echo "Backing up SSH authorized keys ..."
bm_start "ghe-export-authorized-keys"
ghe-ssh "$GHE_HOSTNAME" -- 'ghe-export-authorized-keys' > authorized-keys.json ||
failures="$failures authorized-keys"
bm_end "ghe-export-authorized-keys"

echo "Backing up SSH host keys ..."
bm_start "ghe-export-ssh-host-keys"
ghe-ssh "$GHE_HOSTNAME" -- 'ghe-export-ssh-host-keys' > ssh-host-keys.tar ||
failures="$failures ssh-host-keys"
bm_end "ghe-export-ssh-host-keys"

echo "Backing up MySQL database ..."
bm_start "ghe-export-mysql"
echo 'set -o pipefail; ghe-export-mysql | gzip' |
ghe-ssh "$GHE_HOSTNAME" -- /bin/bash > mysql.sql.gz ||
failures="$failures mysql"
bm_end "ghe-export-mysql"

echo "Backing up Redis database ..."
if $cluster; then
  ghe-backup-redis-cluster > redis.rdb ||
    failures="$failures redis"
else
  ghe-backup-redis > redis.rdb ||
    failures="$failures redis"
fi

if [ $GHE_VERSION_MAJOR -ge 2 ]; then
  echo "Backing up audit log ..."
  ghe-backup-es-audit-log ||
  failures="$failures audit-log"
fi

echo "Backing up hookshot logs ..."
ghe-backup-es-hookshot ||
failures="$failures hookshot"

echo "Backing up Git repositories ..."
if [ "$GHE_BACKUP_STRATEGY" = "cluster" ]; then
  if ghe-ssh "$GHE_HOSTNAME" test -f /data/github/current/bin/dgit-cluster-backup-routes ; then
    echo "* Using calculated routes method..."
    ghe-backup-repositories-cluster-ng || failures="$failure repositories"
  else
    echo "* Using legacy method. A faster backup method is available on enterprise 2.7 and up."
    ghe-backup-repositories-cluster || failures="$failures repositories"
  fi
else
  ghe-backup-repositories-${GHE_BACKUP_STRATEGY} ||
  failures="$failures repositories"
fi

echo "Backing up GitHub Pages ..."
ghe-backup-pages-${GHE_BACKUP_STRATEGY} ||
failures="$failures pages"

if [ "$GHE_VERSION_MAJOR" -ge 2 ]; then
    if $cluster; then
        echo "Backing up asset attachments ..."
        ghe-backup-alambic-cluster ||
        failures="$failures alambic_assets"

        echo "Backing up custom Git hooks ..."
        ghe-backup-git-hooks-cluster ||
        failures="$failures git-hooks"
    else
        echo "Backing up asset attachments ..."
        ghe-backup-userdata alambic_assets ||
        failures="$failures alambic_assets"

        echo "Backing up storage data ..."
        ghe-backup-userdata storage ||
        failures="$failures storage"

        echo "Backing up hook deliveries ..."
        ghe-backup-userdata hookshot ||
        failures="$failures hookshot"

        echo "Backing up custom Git hooks ..."
        ghe-backup-userdata git-hooks/environments/tarballs ||
        failures="$failures git-hooks-environments"
        ghe-backup-userdata git-hooks/repos ||
        failures="$failures git-hooks-repos"
    fi
fi

if ! $cluster; then
  echo "Backing up Elasticsearch indices ..."
  ghe-backup-es-${GHE_BACKUP_STRATEGY} ||
  failures="$failures elasticsearch"
fi

# If we're using the tarball backup strategy, bring the appliance out of
# maintenance mode now instead of waiting until after pruning stale snapshots.
if $GHE_MAINTENANCE_MODE_ENABLED; then
    ghe-maintenance-mode-disable "$GHE_HOSTNAME" ||
        echo "Warning: Disabling maintenance mode on $GHE_HOSTNAME failed."
    GHE_MAINTENANCE_MODE_ENABLED=false
fi

# git fsck repositories after the backup
if [ "$GHE_BACKUP_FSCK" = "yes" ]; then
  ghe-backup-fsck $GHE_SNAPSHOT_DIR ||
    failures="$failures fsck"
fi

# If everything was successful, mark the snapshot as complete, update the
# current symlink to point to the new snapshot and prune expired and failed
# snapshots.
if [ -z "$failures" ]; then
    rm "incomplete"

    rm -f "../current"
    ln -s "$GHE_SNAPSHOT_TIMESTAMP" "../current"

    ghe-prune-snapshots
fi

echo "Completed backup of $GHE_HOSTNAME in snapshot $GHE_SNAPSHOT_TIMESTAMP at $(date +"%H:%M:%S")"

# Exit non-zero and list the steps that failed.
if [ -z "$failures" ]; then
    ghe_remote_logger "Completed backup from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP successfully."
else
    steps="$(echo $failures | sed 's/ /, /g')"
    ghe_remote_logger "Completed backup from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP with failures: ${steps}."
    echo "Error: Snapshot incomplete. Some steps failed: ${steps}. "
    exit 1
fi

# Detect if the created backup contains any leaked ssh keys
echo "Checking for leaked ssh keys ..."
ghe-detect-leaked-ssh-keys -s "$GHE_SNAPSHOT_DIR" || true

# Make sure we exit zero after the conditional
true
