Practical Examples

Overview

This guide provides practical, production-ready examples for common image hashing scenarios.

Duplicate Detection

Basic Duplicate Detection

Detect if two images are duplicates or near-duplicates:

package main

import (
    "fmt"
    "github.com/ajdnik/imghash/v2"
)

func main() {
    // Create hasher (PDQ is recommended for duplicate detection)
    pdq, err := imghash.NewPDQ()
    if err != nil {
        panic(err)
    }

    // Hash both images
    h1, err := imghash.HashFile(pdq, "image1.jpg")
    if err != nil {
        panic(err)
    }

    h2, err := imghash.HashFile(pdq, "image2.jpg")
    if err != nil {
        panic(err)
    }

    // Compare hashes
    dist, err := pdq.Compare(h1, h2)
    if err != nil {
        panic(err)
    }

    fmt.Printf("Distance: %v\n", dist)
    
    // Determine if duplicate
    if dist < 10 {
        fmt.Println("Images are duplicates or near-duplicates")
    } else {
        fmt.Println("Images are different")
    }
}

Batch Duplicate Detection

Find all duplicates in a collection of images:

package main

import (
    "fmt"
    "path/filepath"
    "github.com/ajdnik/imghash/v2"
)

type ImageHash struct {
    Path string
    Hash imghash.Hash
}

func findDuplicates(imagePaths []string, threshold float64) map[string][]string {
    pdq, _ := imghash.NewPDQ()
    
    // Compute all hashes
    imageHashes := make([]ImageHash, 0, len(imagePaths))
    for _, path := range imagePaths {
        hash, err := imghash.HashFile(pdq, path)
        if err != nil {
            fmt.Printf("Error hashing %s: %v\n", path, err)
            continue
        }
        imageHashes = append(imageHashes, ImageHash{Path: path, Hash: hash})
    }
    
    // Find duplicate groups
    duplicates := make(map[string][]string)
    visited := make(map[int]bool)
    
    for i := 0; i < len(imageHashes); i++ {
        if visited[i] {
            continue
        }
        
        group := []string{imageHashes[i].Path}
        visited[i] = true
        
        for j := i + 1; j < len(imageHashes); j++ {
            if visited[j] {
                continue
            }
            
            dist, _ := pdq.Compare(imageHashes[i].Hash, imageHashes[j].Hash)
            if float64(dist) <= threshold {
                group = append(group, imageHashes[j].Path)
                visited[j] = true
            }
        }
        
        if len(group) > 1 {
            duplicates[imageHashes[i].Path] = group
        }
    }
    
    return duplicates
}

func main() {
    images := []string{
        "photo1.jpg",
        "photo1_edited.jpg",
        "photo2.jpg",
        "photo2_compressed.jpg",
        "unique.jpg",
    }
    
    duplicates := findDuplicates(images, 10.0)
    
    fmt.Println("Duplicate groups found:")
    for original, group := range duplicates {
        fmt.Printf("\n%s:\n", filepath.Base(original))
        for _, dup := range group[1:] {
            fmt.Printf("  - %s\n", filepath.Base(dup))
        }
    }
}

Deduplication Pipeline

Remove duplicates from a directory:

package main

import (
    "fmt"
    "os"
    "path/filepath"
    "github.com/ajdnik/imghash/v2"
)

func deduplicateDirectory(dir string, threshold float64, dryRun bool) error {
    pdq, _ := imghash.NewPDQ()
    
    // Find all image files
    var imagePaths []string
    err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
        if err != nil {
            return err
        }
        if !info.IsDir() {
            ext := filepath.Ext(path)
            if ext == ".jpg" || ext == ".jpeg" || ext == ".png" {
                imagePaths = append(imagePaths, path)
            }
        }
        return nil
    })
    if err != nil {
        return err
    }
    
    fmt.Printf("Found %d images\n", len(imagePaths))
    
    // Compute hashes
    type HashInfo struct {
        path string
        hash imghash.Hash
        size int64
    }
    
    hashes := make([]HashInfo, 0, len(imagePaths))
    for _, path := range imagePaths {
        hash, err := imghash.HashFile(pdq, path)
        if err != nil {
            fmt.Printf("Skipping %s: %v\n", path, err)
            continue
        }
        
        info, _ := os.Stat(path)
        hashes = append(hashes, HashInfo{
            path: path,
            hash: hash,
            size: info.Size(),
        })
    }
    
    // Find and remove duplicates (keep largest file)
    visited := make(map[int]bool)
    removed := 0
    
    for i := 0; i < len(hashes); i++ {
        if visited[i] {
            continue
        }
        
        visited[i] = true
        keeper := hashes[i]
        
        for j := i + 1; j < len(hashes); j++ {
            if visited[j] {
                continue
            }
            
            dist, _ := pdq.Compare(hashes[i].hash, hashes[j].hash)
            if float64(dist) <= threshold {
                visited[j] = true
                
                // Keep the larger file
                var toRemove HashInfo
                if hashes[j].size > keeper.size {
                    toRemove = keeper
                    keeper = hashes[j]
                } else {
                    toRemove = hashes[j]
                }
                
                fmt.Printf("Duplicate: %s (keeping %s)\n",
                    filepath.Base(toRemove.path),
                    filepath.Base(keeper.path))
                
                if !dryRun {
                    if err := os.Remove(toRemove.path); err != nil {
                        fmt.Printf("Error removing %s: %v\n", toRemove.path, err)
                    } else {
                        removed++
                    }
                }
            }
        }
    }
    
    if dryRun {
        fmt.Printf("\nDry run: would remove %d duplicates\n", removed)
    } else {
        fmt.Printf("\nRemoved %d duplicates\n", removed)
    }
    
    return nil
}

func main() {
    // Dry run first to preview
    deduplicateDirectory("./photos", 10.0, true)
    
    // Uncomment to actually remove duplicates
    // deduplicateDirectory("./photos", 10.0, false)
}

Image Similarity Search

Find Similar Images

Find the most similar images to a query image:

package main

import (
    "fmt"
    "sort"
    "github.com/ajdnik/imghash/v2"
)

type SearchResult struct {
    Path     string
    Distance float64
}

func searchSimilar(queryPath string, databasePaths []string, topK int) []SearchResult {
    // Use GIST for scene-level similarity
    gist, _ := imghash.NewGIST()
    
    // Hash query image
    queryHash, err := imghash.HashFile(gist, queryPath)
    if err != nil {
        panic(err)
    }
    
    // Compute distances to all database images
    results := make([]SearchResult, 0, len(databasePaths))
    for _, path := range databasePaths {
        hash, err := imghash.HashFile(gist, path)
        if err != nil {
            fmt.Printf("Skipping %s: %v\n", path, err)
            continue
        }
        
        dist, _ := gist.Compare(queryHash, hash)
        results = append(results, SearchResult{
            Path:     path,
            Distance: float64(dist),
        })
    }
    
    // Sort by distance (ascending = most similar first)
    sort.Slice(results, func(i, j int) bool {
        return results[i].Distance < results[j].Distance
    })
    
    // Return top K results
    if len(results) > topK {
        results = results[:topK]
    }
    
    return results
}

func main() {
    database := []string{
        "images/beach1.jpg",
        "images/beach2.jpg",
        "images/mountain1.jpg",
        "images/city1.jpg",
        "images/sunset.jpg",
    }
    
    results := searchSimilar("query.jpg", database, 3)
    
    fmt.Println("Top 3 similar images:")
    for i, r := range results {
        fmt.Printf("%d. %s (distance: %.4f)\n", i+1, r.Path, r.Distance)
    }
}

Reverse Image Search

Build an in-memory index for fast similarity search:

package main

import (
    "fmt"
    "github.com/ajdnik/imghash/v2"
)

type ImageIndex struct {
    hasher imghash.HasherComparer
    index  map[string]imghash.Hash
}

func NewImageIndex() *ImageIndex {
    gist, _ := imghash.NewGIST()
    return &ImageIndex{
        hasher: gist,
        index:  make(map[string]imghash.Hash),
    }
}

func (idx *ImageIndex) Add(path string) error {
    hash, err := imghash.HashFile(idx.hasher, path)
    if err != nil {
        return err
    }
    idx.index[path] = hash
    return nil
}

func (idx *ImageIndex) AddBatch(paths []string) error {
    for _, path := range paths {
        if err := idx.Add(path); err != nil {
            fmt.Printf("Warning: failed to index %s: %v\n", path, err)
        }
    }
    return nil
}

func (idx *ImageIndex) Search(queryPath string, maxResults int, maxDistance float64) []SearchResult {
    queryHash, err := imghash.HashFile(idx.hasher, queryPath)
    if err != nil {
        return nil
    }
    
    results := make([]SearchResult, 0)
    for path, hash := range idx.index {
        dist, _ := idx.hasher.Compare(queryHash, hash)
        distFloat := float64(dist)
        
        if distFloat <= maxDistance {
            results = append(results, SearchResult{
                Path:     path,
                Distance: distFloat,
            })
        }
    }
    
    sort.Slice(results, func(i, j int) bool {
        return results[i].Distance < results[j].Distance
    })
    
    if len(results) > maxResults {
        results = results[:maxResults]
    }
    
    return results
}

func main() {
    // Build index
    index := NewImageIndex()
    
    images := []string{
        "db/beach1.jpg", "db/beach2.jpg", "db/mountain1.jpg",
        "db/city1.jpg", "db/sunset1.jpg", "db/forest1.jpg",
    }
    
    fmt.Println("Indexing images...")
    index.AddBatch(images)
    fmt.Printf("Indexed %d images\n\n", len(index.index))
    
    // Search
    results := index.Search("query_beach.jpg", 5, 0.5)
    
    fmt.Println("Search results:")
    for i, r := range results {
        fmt.Printf("%d. %s (distance: %.4f)\n", i+1, r.Path, r.Distance)
    }
}

Content Moderation

Blocklist Matching

Check if an uploaded image matches known inappropriate content:

package main

import (
    "fmt"
    "github.com/ajdnik/imghash/v2"
)

type ContentModerator struct {
    hasher    imghash.HasherComparer
    blocklist map[string]imghash.Hash
    threshold float64
}

func NewContentModerator(threshold float64) *ContentModerator {
    pdq, _ := imghash.NewPDQ()
    return &ContentModerator{
        hasher:    pdq,
        blocklist: make(map[string]imghash.Hash),
        threshold: threshold,
    }
}

func (cm *ContentModerator) AddToBlocklist(id, path string) error {
    hash, err := imghash.HashFile(cm.hasher, path)
    if err != nil {
        return err
    }
    cm.blocklist[id] = hash
    return nil
}

func (cm *ContentModerator) Check(imagePath string) (bool, string, error) {
    hash, err := imghash.HashFile(cm.hasher, imagePath)
    if err != nil {
        return false, "", err
    }
    
    for id, blockHash := range cm.blocklist {
        dist, err := cm.hasher.Compare(hash, blockHash)
        if err != nil {
            continue
        }
        
        if float64(dist) <= cm.threshold {
            return true, id, nil
        }
    }
    
    return false, "", nil
}

func main() {
    moderator := NewContentModerator(10.0)
    
    // Add known bad content to blocklist
    moderator.AddToBlocklist("harmful-001", "blocklist/image1.jpg")
    moderator.AddToBlocklist("harmful-002", "blocklist/image2.jpg")
    
    // Check new upload
    blocked, matchID, err := moderator.Check("uploads/new_image.jpg")
    if err != nil {
        panic(err)
    }
    
    if blocked {
        fmt.Printf("⛔ Content blocked - matches %s\n", matchID)
    } else {
        fmt.Println("✅ Content approved")
    }
}

Working with Different Image Sources

Reading from File

package main

import (
    "fmt"
    "github.com/ajdnik/imghash/v2"
)

func main() {
    avg, err := imghash.NewAverage()
    if err != nil {
        panic(err)
    }
    
    hash, err := imghash.HashFile(avg, "image.jpg")
    if err != nil {
        panic(err)
    }
    
    fmt.Println(hash)
}

Reading from io.Reader

package main

import (
    "fmt"
    "os"
    "github.com/ajdnik/imghash/v2"
)

func main() {
    f, err := os.Open("image.jpg")
    if err != nil {
        panic(err)
    }
    defer f.Close()
    
    avg, _ := imghash.NewAverage()
    hash, err := imghash.HashReader(avg, f)
    if err != nil {
        panic(err)
    }
    
    fmt.Println(hash)
}

Hashing HTTP Response

package main

import (
    "fmt"
    "net/http"
    "github.com/ajdnik/imghash/v2"
)

func main() {
    resp, err := http.Get("https://example.com/image.jpg")
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()
    
    avg, _ := imghash.NewAverage()
    hash, err := imghash.HashReader(avg, resp.Body)
    if err != nil {
        panic(err)
    }
    
    fmt.Println(hash)
}

Hashing image.Image

package main

import (
    "fmt"
    "image"
    _ "image/jpeg"
    "os"
    "github.com/ajdnik/imghash/v2"
)

func main() {
    // Open and decode image
    f, _ := os.Open("image.jpg")
    defer f.Close()
    
    img, _, err := image.Decode(f)
    if err != nil {
        panic(err)
    }
    
    // Hash the image.Image directly
    avg, _ := imghash.NewAverage()
    hash, err := avg.Calculate(img)
    if err != nil {
        panic(err)
    }
    
    fmt.Println(hash)
}

Advanced Techniques

Multi-Algorithm Consensus

Use multiple algorithms and require agreement:

package main

import (
    "fmt"
    "github.com/ajdnik/imghash/v2"
)

func robustDuplicateCheck(img1, img2 string) bool {
    type AlgorithmCheck struct {
        name      string
        threshold float64
        match     bool
    }
    
    checks := []AlgorithmCheck{
        {name: "Average", threshold: 5.0},
        {name: "PDQ", threshold: 10.0},
        {name: "ColorMoment", threshold: 15.0},
    }
    
    // Average
    avg, _ := imghash.NewAverage()
    h1a, _ := imghash.HashFile(avg, img1)
    h2a, _ := imghash.HashFile(avg, img2)
    distAvg, _ := avg.Compare(h1a, h2a)
    checks[0].match = float64(distAvg) <= checks[0].threshold
    
    // PDQ
    pdq, _ := imghash.NewPDQ()
    h1p, _ := imghash.HashFile(pdq, img1)
    h2p, _ := imghash.HashFile(pdq, img2)
    distPDQ, _ := pdq.Compare(h1p, h2p)
    checks[1].match = float64(distPDQ) <= checks[1].threshold
    
    // ColorMoment
    cm, _ := imghash.NewColorMoment()
    h1c, _ := imghash.HashFile(cm, img1)
    h2c, _ := imghash.HashFile(cm, img2)
    distCM, _ := cm.Compare(h1c, h2c)
    checks[2].match = float64(distCM) <= checks[2].threshold
    
    // Require at least 2 out of 3 to agree
    votes := 0
    for _, check := range checks {
        fmt.Printf("%s: %v\n", check.name, check.match)
        if check.match {
            votes++
        }
    }
    
    return votes >= 2
}

func main() {
    if robustDuplicateCheck("img1.jpg", "img2.jpg") {
        fmt.Println("\n✅ Consensus: Images are duplicates")
    } else {
        fmt.Println("\n❌ Consensus: Images are different")
    }
}

Hash Persistence

Save and load hashes for faster subsequent comparisons:

package main

import (
    "encoding/json"
    "fmt"
    "os"
    "github.com/ajdnik/imghash/v2"
)

type HashDatabase struct {
    Hashes map[string][]byte `json:"hashes"`
}

func (db *HashDatabase) Save(path string) error {
    f, err := os.Create(path)
    if err != nil {
        return err
    }
    defer f.Close()
    
    return json.NewEncoder(f).Encode(db)
}

func LoadHashDatabase(path string) (*HashDatabase, error) {
    f, err := os.Open(path)
    if err != nil {
        return nil, err
    }
    defer f.Close()
    
    var db HashDatabase
    err = json.NewDecoder(f).Decode(&db)
    return &db, err
}

func main() {
    pdq, _ := imghash.NewPDQ()
    
    // Create and populate database
    db := &HashDatabase{
        Hashes: make(map[string][]byte),
    }
    
    images := []string{"img1.jpg", "img2.jpg", "img3.jpg"}
    for _, img := range images {
        hash, _ := imghash.HashFile(pdq, img)
        // Binary hashes can be cast to []byte for storage
        if binHash, ok := hash.(imghash.Binary); ok {
            db.Hashes[img] = []byte(binHash)
        }
    }
    
    // Save to disk
    db.Save("hashes.json")
    fmt.Println("Saved hash database")
    
    // Load from disk
    loaded, _ := LoadHashDatabase("hashes.json")
    fmt.Printf("Loaded %d hashes\n", len(loaded.Hashes))
    
    // Use loaded hashes
    for path, hashBytes := range loaded.Hashes {
        fmt.Printf("%s: %v\n", path, imghash.Binary(hashBytes))
    }
}

Error Handling

Comprehensive Error Handling

package main

import (
    "errors"
    "fmt"
    "os"
    "github.com/ajdnik/imghash/v2"
)

func hashImageSafely(path string) (imghash.Hash, error) {
    // Check file exists
    if _, err := os.Stat(path); err != nil {
        if os.IsNotExist(err) {
            return nil, fmt.Errorf("image file not found: %s", path)
        }
        return nil, fmt.Errorf("error accessing file: %w", err)
    }
    
    // Create hasher
    pdq, err := imghash.NewPDQ()
    if err != nil {
        return nil, fmt.Errorf("failed to create hasher: %w", err)
    }
    
    // Compute hash
    hash, err := imghash.HashFile(pdq, path)
    if err != nil {
        return nil, fmt.Errorf("failed to hash image: %w", err)
    }
    
    return hash, nil
}

func compareImagesSafely(path1, path2 string) error {
    pdq, _ := imghash.NewPDQ()
    
    h1, err := hashImageSafely(path1)
    if err != nil {
        return err
    }
    
    h2, err := hashImageSafely(path2)
    if err != nil {
        return err
    }
    
    dist, err := pdq.Compare(h1, h2)
    if err != nil {
        if errors.Is(err, imghash.ErrIncompatibleHash) {
            return fmt.Errorf("hashes are incompatible types")
        }
        return fmt.Errorf("comparison failed: %w", err)
    }
    
    fmt.Printf("Distance: %v\n", dist)
    return nil
}

func main() {
    if err := compareImagesSafely("img1.jpg", "img2.jpg"); err != nil {
        fmt.Printf("Error: %v\n", err)
        os.Exit(1)
    }
}

Get Started

Core Concepts

Binary Algorithms

Float64 Algorithms

UInt8 Algorithms

Guides

Practical Examples

Overview

Duplicate Detection

Basic Duplicate Detection

Batch Duplicate Detection

Deduplication Pipeline

Image Similarity Search

Find Similar Images

Reverse Image Search

Content Moderation

Blocklist Matching

Working with Different Image Sources

Reading from File

Reading from io.Reader

Hashing HTTP Response

Hashing image.Image

Advanced Techniques

Multi-Algorithm Consensus

Hash Persistence

Error Handling

Comprehensive Error Handling

Next Steps

API Reference

Algorithm Guide

Build docs developers (and LLMs) love

Get Started

Core Concepts

Binary Algorithms

Float64 Algorithms

UInt8 Algorithms

Guides

​Overview

​Duplicate Detection

​Basic Duplicate Detection

​Batch Duplicate Detection

​Deduplication Pipeline

​Image Similarity Search

​Find Similar Images

​Reverse Image Search

​Content Moderation

​Blocklist Matching

​Working with Different Image Sources

​Reading from File

​Reading from io.Reader

​Hashing HTTP Response

​Hashing image.Image

​Advanced Techniques

​Multi-Algorithm Consensus

​Hash Persistence

​Error Handling

​Comprehensive Error Handling

​Next Steps

API Reference

Algorithm Guide

Build docs developers (and LLMs) love

Overview

Duplicate Detection

Basic Duplicate Detection

Batch Duplicate Detection

Deduplication Pipeline

Image Similarity Search

Find Similar Images

Reverse Image Search

Content Moderation

Blocklist Matching

Working with Different Image Sources

Reading from File

Reading from io.Reader

Hashing HTTP Response

Hashing image.Image

Advanced Techniques

Multi-Algorithm Consensus

Hash Persistence

Error Handling

Comprehensive Error Handling

Next Steps