RocksDB provides a powerful backup engine that allows you to create incremental backups and restore databases efficiently. This guide covers backup creation, verification, and restoration.
BackupEngine Overview
The BackupEngine enables:
Incremental Backups Only new data is backed up, saving time and space.
Consistent Snapshots Backups are consistent point-in-time snapshots.
Parallel Operations Multiple backup operations can run concurrently.
Verification Built-in tools to verify backup integrity.
Creating Backups
Opening a BackupEngine
#include "rocksdb/db.h"
#include "rocksdb/utilities/backup_engine.h"
using ROCKSDB_NAMESPACE ::BackupEngine;
using ROCKSDB_NAMESPACE ::BackupEngineOptions;
using ROCKSDB_NAMESPACE ::DB;
using ROCKSDB_NAMESPACE ::Env;
using ROCKSDB_NAMESPACE ::Status;
// Open the database
DB * db;
Options options;
options . create_if_missing = true ;
Status s = DB :: Open (options, "/tmp/rocksdb_example" , & db);
assert ( s . ok ());
// Create backup engine
BackupEngine * backup_engine;
s = BackupEngine :: Open (
Env :: Default (),
BackupEngineOptions ( "/tmp/rocksdb_example_backup" ),
& backup_engine
);
assert ( s . ok ());
Creating a New Backup
// Write some data
db -> Put ( WriteOptions (), "key1" , "value1" );
// Create backup
s = backup_engine -> CreateNewBackup (db);
assert ( s . ok ());
std ::cout << "Backup created successfully" << std ::endl;
Backups are incremental by default. Only new or modified SST files are copied, making subsequent backups much faster.
Backup Options
BackupEngineOptions
BackupEngineOptions backup_options ( "/path/to/backup" );
// Share table files between backups (default: true)
backup_options . share_table_files = true ;
// Use checksums to ensure file identity (default: true)
backup_options . share_files_with_checksum = true ;
// Synchronous writes (default: true)
backup_options . sync = true ;
// Destroy old backups on startup (default: false)
backup_options . destroy_old_data = false ;
// Number of threads for backup operations (default: 1)
backup_options . max_background_operations = 4 ;
BackupEngine * backup_engine;
BackupEngine :: Open ( Env :: Default (), backup_options, & backup_engine);
CreateBackupOptions
#include "rocksdb/utilities/backup_engine.h"
using ROCKSDB_NAMESPACE ::CreateBackupOptions;
CreateBackupOptions create_options;
// Flush before backup (default: false)
create_options . flush_before_backup = true ;
// Progress callback
create_options . progress_callback = []() {
std ::cout << "Backup progress..." << std ::endl;
};
// Create backup with options
backup_engine -> CreateNewBackup (create_options, db);
Getting Backup Info
using ROCKSDB_NAMESPACE ::BackupInfo;
std ::vector < BackupInfo > backup_info;
backup_engine -> GetBackupInfo ( & backup_info);
for ( const auto & info : backup_info) {
std ::cout << "Backup ID: " << info . backup_id << std ::endl;
std ::cout << " Timestamp: " << info . timestamp << std ::endl;
std ::cout << " Size: " << info . size << " bytes" << std ::endl;
std ::cout << " Number of files: " << info . number_files << std ::endl;
}
Verifying Backups
// Verify a specific backup
Status s = backup_engine -> VerifyBackup ( 1 ); // backup_id = 1
if ( s . ok ()) {
std ::cout << "Backup verified successfully" << std ::endl;
} else {
std ::cerr << "Backup verification failed: " << s . ToString () << std ::endl;
}
VerifyBackup() checks file existence and sizes by default. Pass true as the second argument to also verify checksums: VerifyBackup(backup_id, true)
Restoring from Backup
Basic Restore
using ROCKSDB_NAMESPACE ::BackupEngineReadOnly;
using ROCKSDB_NAMESPACE ::RestoreOptions;
// Close the database first
delete db;
db = nullptr ;
// Open backup engine in read-only mode
BackupEngineReadOnly * backup_engine_ro;
Status s = BackupEngineReadOnly :: Open (
Env :: Default (),
BackupEngineOptions ( "/tmp/rocksdb_example_backup" ),
& backup_engine_ro
);
assert ( s . ok ());
// Restore from backup
s = backup_engine_ro -> RestoreDBFromBackup (
1 , // backup_id
"/tmp/rocksdb_example" , // db_dir
"/tmp/rocksdb_example" // wal_dir
);
assert ( s . ok ());
delete backup_engine_ro;
// Reopen database
s = DB :: Open (options, "/tmp/rocksdb_example" , & db);
assert ( s . ok ());
Restore from Latest Backup
RestoreOptions restore_options;
Status s = backup_engine_ro -> RestoreDBFromLatestBackup (
restore_options,
"/tmp/rocksdb_example" ,
"/tmp/rocksdb_example"
);
assert ( s . ok ());
Restore Options
RestoreOptions restore_options;
// Keep existing log files (default: false)
restore_options . keep_log_files = false ;
backup_engine_ro -> RestoreDBFromBackup (
restore_options,
backup_id,
db_dir,
wal_dir
);
Managing Backups
Deleting Old Backups
// Keep only the latest N backups
Status s = backup_engine -> PurgeOldBackups ( 3 ); // Keep latest 3
assert ( s . ok ());
// Or delete a specific backup
s = backup_engine -> DeleteBackup ( 1 ); // Delete backup_id 1
assert ( s . ok ());
Garbage Collection
// Clean up incomplete or corrupted backups
Status s = backup_engine -> GarbageCollect ();
assert ( s . ok ());
GarbageCollect() will delete files from incomplete backup operations. Make sure no backup is in progress before calling this.
Complete Example
#include <cstdio>
#include <memory>
#include <string>
#include <vector>
#include "rocksdb/db.h"
#include "rocksdb/options.h"
#include "rocksdb/utilities/backup_engine.h"
using ROCKSDB_NAMESPACE ::BackupEngine;
using ROCKSDB_NAMESPACE ::BackupEngineOptions;
using ROCKSDB_NAMESPACE ::BackupEngineReadOnly;
using ROCKSDB_NAMESPACE ::BackupInfo;
using ROCKSDB_NAMESPACE ::DB;
using ROCKSDB_NAMESPACE ::Env;
using ROCKSDB_NAMESPACE ::Options;
using ROCKSDB_NAMESPACE ::ReadOptions;
using ROCKSDB_NAMESPACE ::Status;
using ROCKSDB_NAMESPACE ::WriteOptions;
int main () {
std ::unique_ptr < DB > db;
Options options;
options . IncreaseParallelism ();
options . OptimizeLevelStyleCompaction ();
options . create_if_missing = true ;
// Open DB
Status s = DB :: Open (options, "/tmp/rocksdb_example" , & db);
assert ( s . ok ());
// Put key-value
db -> Put ( WriteOptions (), "key1" , "value1" );
assert ( s . ok ());
// Create backup
BackupEngine * backup_engine;
s = BackupEngine :: Open ( Env :: Default (),
BackupEngineOptions ( "/tmp/rocksdb_example_backup" ),
& backup_engine);
assert ( s . ok ());
backup_engine -> CreateNewBackup ( db . get ());
assert ( s . ok ());
std ::vector < BackupInfo > backup_info;
backup_engine -> GetBackupInfo ( & backup_info);
s = backup_engine -> VerifyBackup ( 1 );
assert ( s . ok ());
// Put more data
db -> Put ( WriteOptions (), "key2" , "value2" );
assert ( s . ok ());
db . reset ();
// Restore db to backup 1
BackupEngineReadOnly * backup_engine_ro;
s = BackupEngineReadOnly :: Open (
Env :: Default (), BackupEngineOptions ( "/tmp/rocksdb_example_backup" ),
& backup_engine_ro);
assert ( s . ok ());
s = backup_engine_ro -> RestoreDBFromBackup ( 1 , "/tmp/rocksdb_example" ,
"/tmp/rocksdb_example" );
assert ( s . ok ());
// Open db again
s = DB :: Open (options, "/tmp/rocksdb_example" , & db);
assert ( s . ok ());
std ::string value;
s = db -> Get ( ReadOptions (), "key1" , & value);
assert ( ! s . IsNotFound ());
// key2 should not exist (not in backup 1)
s = db -> Get ( ReadOptions (), "key2" , & value);
assert ( s . IsNotFound ());
delete backup_engine;
delete backup_engine_ro;
db . reset ();
return 0 ;
}
Advanced Features
Rate Limiting
Limit backup/restore I/O rate to avoid impacting production:
#include "rocksdb/rate_limiter.h"
using ROCKSDB_NAMESPACE ::NewGenericRateLimiter;
BackupEngineOptions backup_options ( "/path/to/backup" );
// Limit backup to 10 MB/s
backup_options . backup_rate_limiter . reset (
NewGenericRateLimiter ( 10 << 20 ) // 10 MB/s
);
// Limit restore to 50 MB/s
backup_options . restore_rate_limiter . reset (
NewGenericRateLimiter ( 50 << 20 ) // 50 MB/s
);
BackupEngine * backup_engine;
BackupEngine :: Open ( Env :: Default (), backup_options, & backup_engine);
Custom Environment
Use a custom environment for remote backups (e.g., S3):
// Create custom Env for remote storage
Env * remote_env = CreateRemoteEnv (); // Your implementation
BackupEngineOptions backup_options ( "/remote/backup/path" );
backup_options . backup_env = remote_env;
BackupEngine * backup_engine;
BackupEngine :: Open ( Env :: Default (), backup_options, & backup_engine);
Store application metadata with backups:
using ROCKSDB_NAMESPACE ::CreateBackupOptions;
CreateBackupOptions create_options;
std ::string app_metadata = "version=1.0,timestamp=2024" ;
Status s = backup_engine -> CreateNewBackupWithMetadata (
create_options,
db,
app_metadata
);
// Later, retrieve metadata
BackupInfo info;
backup_engine -> GetBackupInfo (backup_id, & info);
std ::cout << "Metadata: " << info . app_metadata << std ::endl;
Backup Strategies
Continuous Backup
Before Major Operations
Retention Policy
Off-site Backups
Take backups periodically (e.g., hourly) to minimize data loss.
Create backups before schema changes or major data updates.
Use PurgeOldBackups() to implement retention policies (e.g., keep last 7 days).
Copy backups to remote storage for disaster recovery.
Best Practices
Test restores regularly
Periodically test backup restoration to ensure backups are valid and complete.
Verify backups
Use VerifyBackup() with checksum verification enabled for critical backups.
Monitor backup size
Track backup sizes and growth over time to plan storage capacity.
Close database before restore
Always close the database before restoring from a backup.
Use separate backup directory
Store backups in a different location from the main database.
Implement retention policies
Don’t keep backups forever. Implement a policy based on your requirements.
Monitoring and Debugging
Backup Statistics
BackupInfo info;
backup_engine -> GetBackupInfo (backup_id, & info);
std ::cout << "Backup Statistics:" << std ::endl;
std ::cout << " ID: " << info . backup_id << std ::endl;
std ::cout << " Timestamp: " << info . timestamp << std ::endl;
std ::cout << " Size: " << info . size << " bytes" << std ::endl;
std ::cout << " Files: " << info . number_files << std ::endl;
std ::cout << " Metadata: " << info . app_metadata << std ::endl;
Handling Errors
Status s = backup_engine -> CreateNewBackup (db);
if ( ! s . ok ()) {
if ( s . IsIOError ()) {
std ::cerr << "I/O error during backup" << std ::endl;
} else if ( s . IsCorruption ()) {
std ::cerr << "Database corruption detected" << std ::endl;
} else {
std ::cerr << "Backup failed: " << s . ToString () << std ::endl;
}
// Clean up incomplete backup
backup_engine -> GarbageCollect ();
}
Troubleshooting
Enable share_table_files for incremental backups
Increase max_background_operations
Use rate limiting if I/O is affecting production
Verify the backup first with VerifyBackup()
Ensure database is closed before restoring
Check disk space in destination directory
Backup directory grows too large
Use PurgeOldBackups() to remove old backups
Verify share_table_files is enabled
Run GarbageCollect() to clean up orphaned files
Next Steps
Configuration Learn about backup configuration options
Performance Tuning Optimize backup and restore performance