Overview
This guide provides practical, production-ready examples for common image hashing scenarios.Duplicate Detection
Basic Duplicate Detection
Detect if two images are duplicates or near-duplicates:package main
import (
"fmt"
"github.com/ajdnik/imghash/v2"
)
func main() {
// Create hasher (PDQ is recommended for duplicate detection)
pdq, err := imghash.NewPDQ()
if err != nil {
panic(err)
}
// Hash both images
h1, err := imghash.HashFile(pdq, "image1.jpg")
if err != nil {
panic(err)
}
h2, err := imghash.HashFile(pdq, "image2.jpg")
if err != nil {
panic(err)
}
// Compare hashes
dist, err := pdq.Compare(h1, h2)
if err != nil {
panic(err)
}
fmt.Printf("Distance: %v\n", dist)
// Determine if duplicate
if dist < 10 {
fmt.Println("Images are duplicates or near-duplicates")
} else {
fmt.Println("Images are different")
}
}
Batch Duplicate Detection
Find all duplicates in a collection of images:package main
import (
"fmt"
"path/filepath"
"github.com/ajdnik/imghash/v2"
)
type ImageHash struct {
Path string
Hash imghash.Hash
}
func findDuplicates(imagePaths []string, threshold float64) map[string][]string {
pdq, _ := imghash.NewPDQ()
// Compute all hashes
imageHashes := make([]ImageHash, 0, len(imagePaths))
for _, path := range imagePaths {
hash, err := imghash.HashFile(pdq, path)
if err != nil {
fmt.Printf("Error hashing %s: %v\n", path, err)
continue
}
imageHashes = append(imageHashes, ImageHash{Path: path, Hash: hash})
}
// Find duplicate groups
duplicates := make(map[string][]string)
visited := make(map[int]bool)
for i := 0; i < len(imageHashes); i++ {
if visited[i] {
continue
}
group := []string{imageHashes[i].Path}
visited[i] = true
for j := i + 1; j < len(imageHashes); j++ {
if visited[j] {
continue
}
dist, _ := pdq.Compare(imageHashes[i].Hash, imageHashes[j].Hash)
if float64(dist) <= threshold {
group = append(group, imageHashes[j].Path)
visited[j] = true
}
}
if len(group) > 1 {
duplicates[imageHashes[i].Path] = group
}
}
return duplicates
}
func main() {
images := []string{
"photo1.jpg",
"photo1_edited.jpg",
"photo2.jpg",
"photo2_compressed.jpg",
"unique.jpg",
}
duplicates := findDuplicates(images, 10.0)
fmt.Println("Duplicate groups found:")
for original, group := range duplicates {
fmt.Printf("\n%s:\n", filepath.Base(original))
for _, dup := range group[1:] {
fmt.Printf(" - %s\n", filepath.Base(dup))
}
}
}
Deduplication Pipeline
Remove duplicates from a directory:package main
import (
"fmt"
"os"
"path/filepath"
"github.com/ajdnik/imghash/v2"
)
func deduplicateDirectory(dir string, threshold float64, dryRun bool) error {
pdq, _ := imghash.NewPDQ()
// Find all image files
var imagePaths []string
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
ext := filepath.Ext(path)
if ext == ".jpg" || ext == ".jpeg" || ext == ".png" {
imagePaths = append(imagePaths, path)
}
}
return nil
})
if err != nil {
return err
}
fmt.Printf("Found %d images\n", len(imagePaths))
// Compute hashes
type HashInfo struct {
path string
hash imghash.Hash
size int64
}
hashes := make([]HashInfo, 0, len(imagePaths))
for _, path := range imagePaths {
hash, err := imghash.HashFile(pdq, path)
if err != nil {
fmt.Printf("Skipping %s: %v\n", path, err)
continue
}
info, _ := os.Stat(path)
hashes = append(hashes, HashInfo{
path: path,
hash: hash,
size: info.Size(),
})
}
// Find and remove duplicates (keep largest file)
visited := make(map[int]bool)
removed := 0
for i := 0; i < len(hashes); i++ {
if visited[i] {
continue
}
visited[i] = true
keeper := hashes[i]
for j := i + 1; j < len(hashes); j++ {
if visited[j] {
continue
}
dist, _ := pdq.Compare(hashes[i].hash, hashes[j].hash)
if float64(dist) <= threshold {
visited[j] = true
// Keep the larger file
var toRemove HashInfo
if hashes[j].size > keeper.size {
toRemove = keeper
keeper = hashes[j]
} else {
toRemove = hashes[j]
}
fmt.Printf("Duplicate: %s (keeping %s)\n",
filepath.Base(toRemove.path),
filepath.Base(keeper.path))
if !dryRun {
if err := os.Remove(toRemove.path); err != nil {
fmt.Printf("Error removing %s: %v\n", toRemove.path, err)
} else {
removed++
}
}
}
}
}
if dryRun {
fmt.Printf("\nDry run: would remove %d duplicates\n", removed)
} else {
fmt.Printf("\nRemoved %d duplicates\n", removed)
}
return nil
}
func main() {
// Dry run first to preview
deduplicateDirectory("./photos", 10.0, true)
// Uncomment to actually remove duplicates
// deduplicateDirectory("./photos", 10.0, false)
}
Image Similarity Search
Find Similar Images
Find the most similar images to a query image:package main
import (
"fmt"
"sort"
"github.com/ajdnik/imghash/v2"
)
type SearchResult struct {
Path string
Distance float64
}
func searchSimilar(queryPath string, databasePaths []string, topK int) []SearchResult {
// Use GIST for scene-level similarity
gist, _ := imghash.NewGIST()
// Hash query image
queryHash, err := imghash.HashFile(gist, queryPath)
if err != nil {
panic(err)
}
// Compute distances to all database images
results := make([]SearchResult, 0, len(databasePaths))
for _, path := range databasePaths {
hash, err := imghash.HashFile(gist, path)
if err != nil {
fmt.Printf("Skipping %s: %v\n", path, err)
continue
}
dist, _ := gist.Compare(queryHash, hash)
results = append(results, SearchResult{
Path: path,
Distance: float64(dist),
})
}
// Sort by distance (ascending = most similar first)
sort.Slice(results, func(i, j int) bool {
return results[i].Distance < results[j].Distance
})
// Return top K results
if len(results) > topK {
results = results[:topK]
}
return results
}
func main() {
database := []string{
"images/beach1.jpg",
"images/beach2.jpg",
"images/mountain1.jpg",
"images/city1.jpg",
"images/sunset.jpg",
}
results := searchSimilar("query.jpg", database, 3)
fmt.Println("Top 3 similar images:")
for i, r := range results {
fmt.Printf("%d. %s (distance: %.4f)\n", i+1, r.Path, r.Distance)
}
}
Reverse Image Search
Build an in-memory index for fast similarity search:package main
import (
"fmt"
"github.com/ajdnik/imghash/v2"
)
type ImageIndex struct {
hasher imghash.HasherComparer
index map[string]imghash.Hash
}
func NewImageIndex() *ImageIndex {
gist, _ := imghash.NewGIST()
return &ImageIndex{
hasher: gist,
index: make(map[string]imghash.Hash),
}
}
func (idx *ImageIndex) Add(path string) error {
hash, err := imghash.HashFile(idx.hasher, path)
if err != nil {
return err
}
idx.index[path] = hash
return nil
}
func (idx *ImageIndex) AddBatch(paths []string) error {
for _, path := range paths {
if err := idx.Add(path); err != nil {
fmt.Printf("Warning: failed to index %s: %v\n", path, err)
}
}
return nil
}
func (idx *ImageIndex) Search(queryPath string, maxResults int, maxDistance float64) []SearchResult {
queryHash, err := imghash.HashFile(idx.hasher, queryPath)
if err != nil {
return nil
}
results := make([]SearchResult, 0)
for path, hash := range idx.index {
dist, _ := idx.hasher.Compare(queryHash, hash)
distFloat := float64(dist)
if distFloat <= maxDistance {
results = append(results, SearchResult{
Path: path,
Distance: distFloat,
})
}
}
sort.Slice(results, func(i, j int) bool {
return results[i].Distance < results[j].Distance
})
if len(results) > maxResults {
results = results[:maxResults]
}
return results
}
func main() {
// Build index
index := NewImageIndex()
images := []string{
"db/beach1.jpg", "db/beach2.jpg", "db/mountain1.jpg",
"db/city1.jpg", "db/sunset1.jpg", "db/forest1.jpg",
}
fmt.Println("Indexing images...")
index.AddBatch(images)
fmt.Printf("Indexed %d images\n\n", len(index.index))
// Search
results := index.Search("query_beach.jpg", 5, 0.5)
fmt.Println("Search results:")
for i, r := range results {
fmt.Printf("%d. %s (distance: %.4f)\n", i+1, r.Path, r.Distance)
}
}
Content Moderation
Blocklist Matching
Check if an uploaded image matches known inappropriate content:package main
import (
"fmt"
"github.com/ajdnik/imghash/v2"
)
type ContentModerator struct {
hasher imghash.HasherComparer
blocklist map[string]imghash.Hash
threshold float64
}
func NewContentModerator(threshold float64) *ContentModerator {
pdq, _ := imghash.NewPDQ()
return &ContentModerator{
hasher: pdq,
blocklist: make(map[string]imghash.Hash),
threshold: threshold,
}
}
func (cm *ContentModerator) AddToBlocklist(id, path string) error {
hash, err := imghash.HashFile(cm.hasher, path)
if err != nil {
return err
}
cm.blocklist[id] = hash
return nil
}
func (cm *ContentModerator) Check(imagePath string) (bool, string, error) {
hash, err := imghash.HashFile(cm.hasher, imagePath)
if err != nil {
return false, "", err
}
for id, blockHash := range cm.blocklist {
dist, err := cm.hasher.Compare(hash, blockHash)
if err != nil {
continue
}
if float64(dist) <= cm.threshold {
return true, id, nil
}
}
return false, "", nil
}
func main() {
moderator := NewContentModerator(10.0)
// Add known bad content to blocklist
moderator.AddToBlocklist("harmful-001", "blocklist/image1.jpg")
moderator.AddToBlocklist("harmful-002", "blocklist/image2.jpg")
// Check new upload
blocked, matchID, err := moderator.Check("uploads/new_image.jpg")
if err != nil {
panic(err)
}
if blocked {
fmt.Printf("⛔ Content blocked - matches %s\n", matchID)
} else {
fmt.Println("✅ Content approved")
}
}
Working with Different Image Sources
Reading from File
package main
import (
"fmt"
"github.com/ajdnik/imghash/v2"
)
func main() {
avg, err := imghash.NewAverage()
if err != nil {
panic(err)
}
hash, err := imghash.HashFile(avg, "image.jpg")
if err != nil {
panic(err)
}
fmt.Println(hash)
}
Reading from io.Reader
package main
import (
"fmt"
"os"
"github.com/ajdnik/imghash/v2"
)
func main() {
f, err := os.Open("image.jpg")
if err != nil {
panic(err)
}
defer f.Close()
avg, _ := imghash.NewAverage()
hash, err := imghash.HashReader(avg, f)
if err != nil {
panic(err)
}
fmt.Println(hash)
}
Hashing HTTP Response
package main
import (
"fmt"
"net/http"
"github.com/ajdnik/imghash/v2"
)
func main() {
resp, err := http.Get("https://example.com/image.jpg")
if err != nil {
panic(err)
}
defer resp.Body.Close()
avg, _ := imghash.NewAverage()
hash, err := imghash.HashReader(avg, resp.Body)
if err != nil {
panic(err)
}
fmt.Println(hash)
}
Hashing image.Image
package main
import (
"fmt"
"image"
_ "image/jpeg"
"os"
"github.com/ajdnik/imghash/v2"
)
func main() {
// Open and decode image
f, _ := os.Open("image.jpg")
defer f.Close()
img, _, err := image.Decode(f)
if err != nil {
panic(err)
}
// Hash the image.Image directly
avg, _ := imghash.NewAverage()
hash, err := avg.Calculate(img)
if err != nil {
panic(err)
}
fmt.Println(hash)
}
Advanced Techniques
Multi-Algorithm Consensus
Use multiple algorithms and require agreement:package main
import (
"fmt"
"github.com/ajdnik/imghash/v2"
)
func robustDuplicateCheck(img1, img2 string) bool {
type AlgorithmCheck struct {
name string
threshold float64
match bool
}
checks := []AlgorithmCheck{
{name: "Average", threshold: 5.0},
{name: "PDQ", threshold: 10.0},
{name: "ColorMoment", threshold: 15.0},
}
// Average
avg, _ := imghash.NewAverage()
h1a, _ := imghash.HashFile(avg, img1)
h2a, _ := imghash.HashFile(avg, img2)
distAvg, _ := avg.Compare(h1a, h2a)
checks[0].match = float64(distAvg) <= checks[0].threshold
// PDQ
pdq, _ := imghash.NewPDQ()
h1p, _ := imghash.HashFile(pdq, img1)
h2p, _ := imghash.HashFile(pdq, img2)
distPDQ, _ := pdq.Compare(h1p, h2p)
checks[1].match = float64(distPDQ) <= checks[1].threshold
// ColorMoment
cm, _ := imghash.NewColorMoment()
h1c, _ := imghash.HashFile(cm, img1)
h2c, _ := imghash.HashFile(cm, img2)
distCM, _ := cm.Compare(h1c, h2c)
checks[2].match = float64(distCM) <= checks[2].threshold
// Require at least 2 out of 3 to agree
votes := 0
for _, check := range checks {
fmt.Printf("%s: %v\n", check.name, check.match)
if check.match {
votes++
}
}
return votes >= 2
}
func main() {
if robustDuplicateCheck("img1.jpg", "img2.jpg") {
fmt.Println("\n✅ Consensus: Images are duplicates")
} else {
fmt.Println("\n❌ Consensus: Images are different")
}
}
Hash Persistence
Save and load hashes for faster subsequent comparisons:package main
import (
"encoding/json"
"fmt"
"os"
"github.com/ajdnik/imghash/v2"
)
type HashDatabase struct {
Hashes map[string][]byte `json:"hashes"`
}
func (db *HashDatabase) Save(path string) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return json.NewEncoder(f).Encode(db)
}
func LoadHashDatabase(path string) (*HashDatabase, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
var db HashDatabase
err = json.NewDecoder(f).Decode(&db)
return &db, err
}
func main() {
pdq, _ := imghash.NewPDQ()
// Create and populate database
db := &HashDatabase{
Hashes: make(map[string][]byte),
}
images := []string{"img1.jpg", "img2.jpg", "img3.jpg"}
for _, img := range images {
hash, _ := imghash.HashFile(pdq, img)
// Binary hashes can be cast to []byte for storage
if binHash, ok := hash.(imghash.Binary); ok {
db.Hashes[img] = []byte(binHash)
}
}
// Save to disk
db.Save("hashes.json")
fmt.Println("Saved hash database")
// Load from disk
loaded, _ := LoadHashDatabase("hashes.json")
fmt.Printf("Loaded %d hashes\n", len(loaded.Hashes))
// Use loaded hashes
for path, hashBytes := range loaded.Hashes {
fmt.Printf("%s: %v\n", path, imghash.Binary(hashBytes))
}
}
Error Handling
Comprehensive Error Handling
package main
import (
"errors"
"fmt"
"os"
"github.com/ajdnik/imghash/v2"
)
func hashImageSafely(path string) (imghash.Hash, error) {
// Check file exists
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("image file not found: %s", path)
}
return nil, fmt.Errorf("error accessing file: %w", err)
}
// Create hasher
pdq, err := imghash.NewPDQ()
if err != nil {
return nil, fmt.Errorf("failed to create hasher: %w", err)
}
// Compute hash
hash, err := imghash.HashFile(pdq, path)
if err != nil {
return nil, fmt.Errorf("failed to hash image: %w", err)
}
return hash, nil
}
func compareImagesSafely(path1, path2 string) error {
pdq, _ := imghash.NewPDQ()
h1, err := hashImageSafely(path1)
if err != nil {
return err
}
h2, err := hashImageSafely(path2)
if err != nil {
return err
}
dist, err := pdq.Compare(h1, h2)
if err != nil {
if errors.Is(err, imghash.ErrIncompatibleHash) {
return fmt.Errorf("hashes are incompatible types")
}
return fmt.Errorf("comparison failed: %w", err)
}
fmt.Printf("Distance: %v\n", dist)
return nil
}
func main() {
if err := compareImagesSafely("img1.jpg", "img2.jpg"); err != nil {
fmt.Printf("Error: %v\n", err)
os.Exit(1)
}
}
Next Steps
API Reference
Explore the complete API documentation
Algorithm Guide
Learn how to choose the right algorithm