Skip to main content

Overview

The checksum package provides fast file integrity verification using xxHash64, a high-performance non-cryptographic hash function. It includes checksum calculation and duplicate tracking for copy operations.

Interfaces

Hasher

Defines the behavior needed for checksum calculation.
type Hasher interface {
    Calculate(path string) (uint64, error)
    CalculateReader(r io.Reader) (uint64, error)
}
Calculate
func(string) (uint64, error)
Computes checksum for a file path
CalculateReader
func(io.Reader) (uint64, error)
Computes checksum while reading from a reader

Types

XXHash64Hasher

Implementation of Hasher using xxHash64 algorithm.
type XXHash64Hasher struct{}

Tracker

Tracks checksums during a conversion run to detect duplicates.
type Tracker struct {
    mu        sync.RWMutex
    checksums map[uint64][]string
}
checksums
map[uint64][]string
Maps checksums to file paths that produced them

Functions

NewXXHash64Hasher

Creates a new xxHash64 hasher instance.
func NewXXHash64Hasher() *XXHash64Hasher
hasher
*XXHash64Hasher
A ready-to-use xxHash64 hasher

Example

import "github.com/Azilone/Camera-Workflow/internal/checksum"

hasher := checksum.NewXXHash64Hasher()
sum, err := hasher.Calculate("/path/to/file.jpg")
if err != nil {
    log.Fatalf("Checksum failed: %v", err)
}
fmt.Printf("Checksum: %s\n", checksum.FormatChecksum(sum))

NewTracker

Creates a new empty tracker for duplicate detection.
func NewTracker() *Tracker
tracker
*Tracker
A new tracker instance

Example

tracker := checksum.NewTracker()

FormatChecksum

Formats a checksum as a human-friendly hexadecimal string.
func FormatChecksum(sum uint64) string
sum
uint64
required
Checksum value to format
formatted
string
16-character hexadecimal string (e.g., “a1b2c3d4e5f67890”)

Example

sum := uint64(12345678901234567890)
formatted := checksum.FormatChecksum(sum)
// Output: "ab54a98ceb1f0ad2"

Hasher Methods

Calculate

Computes the checksum for a file at the given path.
func (h *XXHash64Hasher) Calculate(path string) (uint64, error)
path
string
required
Path to file to checksum
checksum
uint64
xxHash64 checksum value
error
error
Returns an error if file cannot be opened or read

Example

hasher := checksum.NewXXHash64Hasher()
sum, err := hasher.Calculate("/path/to/photo.jpg")
if err != nil {
    log.Fatalf("Failed to calculate checksum: %v", err)
}
fmt.Printf("File checksum: %016x\n", sum)

CalculateReader

Computes checksum while reading from an io.Reader.
func (h *XXHash64Hasher) CalculateReader(r io.Reader) (uint64, error)
r
io.Reader
required
Reader to compute checksum from
checksum
uint64
xxHash64 checksum value
error
error
Returns an error if reading fails

Buffer Size

Uses a 32KB buffer for efficient reading while keeping memory usage low.

Example

file, _ := os.Open("/path/to/file.jpg")
defer file.Close()

hasher := checksum.NewXXHash64Hasher()
sum, err := hasher.CalculateReader(file)
if err != nil {
    log.Fatalf("Failed to calculate checksum: %v", err)
}

Tracker Methods

Register

Records that a checksum has been seen for a given file path.
func (t *Tracker) Register(sum uint64, path string)
sum
uint64
required
Checksum value
path
string
required
File path that produced this checksum

Thread Safety

This method is thread-safe and can be called from multiple goroutines.

Example

tracker := checksum.NewTracker()
hasher := checksum.NewXXHash64Hasher()

sum, _ := hasher.Calculate("/path/to/file.jpg")
tracker.Register(sum, "/path/to/file.jpg")

IsDuplicate

Checks whether a checksum has been registered before.
func (t *Tracker) IsDuplicate(sum uint64) bool
sum
uint64
required
Checksum value to check
duplicate
bool
Returns true if checksum was previously registered

Example

if tracker.IsDuplicate(sum) {
    log.Printf("Duplicate file detected")
    // Skip copying this file
}

FirstPath

Returns the first file path associated with a checksum.
func (t *Tracker) FirstPath(sum uint64) (string, bool)
sum
uint64
required
Checksum value to look up
path
string
First file path registered with this checksum
found
bool
Returns true if checksum was found

Example

if path, found := tracker.FirstPath(sum); found {
    log.Printf("Duplicate of: %s", path)
}

Stats

Returns statistics about tracked checksums.
func (t *Tracker) Stats() (unique int, duplicates int)
unique
int
Number of unique checksums tracked
duplicates
int
Number of duplicate files detected

Example

unique, duplicates := tracker.Stats()
log.Printf("Found %d unique files and %d duplicates", unique, duplicates)

Usage Example: Copy with Deduplication

package main

import (
    "fmt"
    "log"
    "path/filepath"
    
    "github.com/Azilone/Camera-Workflow/internal/checksum"
)

func main() {
    hasher := checksum.NewXXHash64Hasher()
    tracker := checksum.NewTracker()
    
    files := []string{
        "/source/photo1.jpg",
        "/source/photo2.jpg",
        "/source/photo3.jpg", // duplicate of photo1
    }
    
    for _, file := range files {
        sum, err := hasher.Calculate(file)
        if err != nil {
            log.Printf("Failed to checksum %s: %v", file, err)
            continue
        }
        
        if tracker.IsDuplicate(sum) {
            original, _ := tracker.FirstPath(sum)
            fmt.Printf("Skipping duplicate: %s (same as %s)\n",
                filepath.Base(file),
                filepath.Base(original))
            continue
        }
        
        tracker.Register(sum, file)
        fmt.Printf("Processing: %s [%s]\n",
            filepath.Base(file),
            checksum.FormatChecksum(sum))
        
        // ... copy file to destination ...
    }
    
    unique, duplicates := tracker.Stats()
    fmt.Printf("\nTotal: %d unique, %d duplicates\n", unique, duplicates)
}

Performance Characteristics

xxHash64 Benefits

  • Fast: 10+ GB/s on modern CPUs
  • Low memory: 32KB buffer for streaming calculation
  • Collision-resistant: Suitable for duplicate detection
  • Non-cryptographic: Optimized for speed over security

Memory Usage

  • Hasher: Minimal overhead (stateless)
  • Tracker: O(n) where n = number of unique files
  • Calculation: 32KB buffer per operation

Thread Safety

The Tracker type is thread-safe:
  • Uses sync.RWMutex for concurrent access
  • Safe to call Register, IsDuplicate, FirstPath, and Stats from multiple goroutines

When to Use Checksums

Enable checksum verification when:
  • Using --copy-only mode (automatic)
  • Need to detect duplicate files
  • Want to verify file integrity after copy
  • Working with critical data that requires validation
  • Converter - Uses checksums for duplicate detection in copy mode
  • Config - Controls checksum verification via VerifyChecksum flag

Build docs developers (and LLMs) love