This guide provides step-by-step procedures for rolling back failed deployments, recovering from infrastructure failures, and restoring data from backups.
#!/bin/bash# Restore database from snapshot# Verify snapshot exists./restore-database.sh --snapshot latest --environment dev --verify-only# Restore from latest snapshot./restore-database.sh --snapshot latest --environment dev# Restore from specific snapshot./restore-database.sh \ --snapshot rds:govtech-dev-postgres-2026-03-03-02-00 \ --environment dev
3
Update Kubernetes Secrets
After restoration, update the database endpoint:
# Get new RDS endpointNEW_ENDPOINT=$(aws rds describe-db-instances \ --db-instance-identifier govtech-dev-postgres-restored-20260303 \ --query 'DBInstances[0].Endpoint.Address' \ --output text)echo "New endpoint: $NEW_ENDPOINT"# Update ConfigMap (if DB_HOST is stored there)kubectl edit configmap govtech-config -n govtech# Add or update: DB_HOST: "<NEW_ENDPOINT>"
4
Restart Backend Pods
# Restart backend to use new databasekubectl rollout restart deployment/backend -n govtech# Wait for rollout to completekubectl rollout status deployment/backend -n govtech# Verify connectionkubectl logs -f deployment/backend -n govtech | grep -i database
5
Verify Data Integrity
# Test API endpointscurl http://$ALB_URL/api/healthcurl http://$ALB_URL/api/workloads# Check database directly (from a pod)kubectl exec -it deployment/backend -n govtech -- \ psql -h $NEW_ENDPOINT -U govtech_admin -d govtech -c "SELECT COUNT(*) FROM workloads;"
6
Clean Up Old Instance (Optional)
# Only after verifying the restored database worksaws rds delete-db-instance \ --db-instance-identifier govtech-dev-postgres-old \ --skip-final-snapshot
# List backups in S3aws s3 ls s3://govtech-dev-app-storage-835960996869/backups/postgresql/ --recursive# Output:# backups/postgresql/govtech_20260301_0200.dump# backups/postgresql/govtech_20260302_0200.dump# backups/postgresql/govtech_20260303_0200.dump
# Get postgres pod namePOSTGRES_POD=$(kubectl get pod -l app=postgres -n govtech -o jsonpath='{.items[0].metadata.name}')# Copy backup to podkubectl cp /tmp/$BACKUP_FILE govtech/$POSTGRES_POD:/tmp/$BACKUP_FILE
# Backup current stateterraform state pull > /tmp/terraform.tfstate.backup# Compare statesdiff /tmp/terraform.tfstate.backup /tmp/terraform.tfstate.previous
4
Restore State (if needed)
# Push previous state (DANGEROUS - use with caution)terraform state push /tmp/terraform.tfstate.previous# Or restore via S3aws s3 cp /tmp/terraform.tfstate.previous \ s3://govtech-terraform-state-835960996869/dev/terraform.tfstate
# Run backupansible-playbook ansible/playbooks/backup.yml -e "environment=dev"# Test restore in dev environment./disaster-recovery/scripts/restore-database.sh \ --snapshot latest \ --environment dev# Verify data integritykubectl exec -it deployment/backend -n govtech -- npm run db:verify