import re
from langsmith.anonymizer import create_anonymizer
# Common PII patterns
common_patterns = [
# Email addresses
{"pattern": re.compile(r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b", re.I), "replace": "[email]"},
# Phone numbers (US format)
{"pattern": re.compile(r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b"), "replace": "[phone]"},
# Social Security Numbers
{"pattern": re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "replace": "[ssn]"},
# Credit card numbers (basic)
{"pattern": re.compile(r"\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b"), "replace": "[credit-card]"},
# API keys (common formats)
{"pattern": re.compile(r"sk-[a-zA-Z0-9]{32,}"), "replace": "[api-key]"},
{"pattern": re.compile(r"[a-zA-Z0-9_-]{32,}"), "replace": "[token]"},
# IP addresses
{"pattern": re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b"), "replace": "[ip]"},
# URLs with credentials
{"pattern": re.compile(r"https?://[^:]+:[^@]+@"), "replace": "https://[credentials]@"},
]
anonymizer = create_anonymizer(common_patterns)