String Hashing

Overview

String hashing converts strings to integers for O(1) substring comparison. It’s based on treating strings as polynomials and evaluating them at a chosen base.

Static Hashing

Build: O(n)
Query: O(1)
Update: Not supported

Dynamic Hashing

Build: O(n log n)
Query: O(1)
Update: O(log n)

Hash Function

The polynomial rolling hash function:

hash(s) = (s[0] * BASE^(n-1) + s[1] * BASE^(n-2) + ... + s[n-1]) % MOD

Key Properties:

Fast computation using prefix sums
Efficient rolling hash for sliding windows
Double hashing reduces collision probability

Static Hashing

For immutable strings with O(1) substring queries.

Implementation

string_hashing_static_compress.cpp

const int MODS[] = {1001864327, 1001265673};

inline int add(int a, int b, const int &mod) {
    return (a + b >= mod) ? a + b - mod : a + b;
}
inline int sub(int a, int b, const int &mod) {
    return (a - b < 0) ? a - b + mod : a - b;
}
inline int mul(int a, int b, const int &mod) { return (1LL * a * b) % mod; }

class HashInt {
  public:
    int first;
    int second;
    HashInt(int a, int b) : first(a), second(b) {}
    HashInt(int a) : first(a), second(a) {}
    
    inline friend bool operator==(const HashInt &a, const HashInt &b) {
        return a.first == b.first && a.second == b.second;
    }
    inline friend bool operator!=(const HashInt &a, const HashInt &b) {
        return a.first != b.first && a.second != b.second;
    }
    inline friend HashInt operator+(const HashInt &a, const HashInt &b) {
        return HashInt{add(a.first, b.first, MODS[0]),
                       add(a.second, b.second, MODS[1])};
    }
    inline friend HashInt operator-(const HashInt &a, const HashInt &b) {
        return HashInt{sub(a.first, b.first, MODS[0]),
                       sub(a.second, b.second, MODS[1])};
    }
    inline friend HashInt operator*(const HashInt &a, const HashInt &b) {
        return HashInt{mul(a.first, b.first, MODS[0]),
                       mul(a.second, b.second, MODS[1])};
    }
    operator int64_t() { return (int64_t(first) << 32) | int64_t(second); }
};
using hash_int = HashInt;

const HashInt BASE(256), ZERO(0), ONE(1);

template <class T> struct RollingHashing {
    int n;
    vector<HashInt> code;
    vector<HashInt> base;

    RollingHashing(const T &s) {
        n = (int)s.size();
        base.resize(n + 1, HashInt(0));
        base[0] = ONE;
        for (int i = 1; i <= n; i++)
            base[i] = base[i - 1] * BASE;

        code.resize(n + 1, HashInt(0));
        code[0] = ZERO;
        for (int i = 1; i < (int)code.size(); ++i)
            code[i] = code[i - 1] * BASE + HashInt(s[i - 1]);
    }
    
    inline int64_t query(int left, int right) {
        assert(0 <= left && left <= right && right < n);
        return int64_t(code[right + 1] - code[left] * base[right - left + 1]);
    }
};

template <class T> using hashing = RollingHashing<T>;

Static Hashing Usage

Substring Comparison
Find Pattern
Palindrome Check

string s = "abcabcabc";
hashing<string> h(s);

// Compare substrings in O(1)
if (h.query(0, 2) == h.query(3, 5)) {
    cout << "s[0:2] == s[3:5]" << endl;  // "abc" == "abc"
}

// Check if string is periodic
int n = s.size();
for (int period = 1; period <= n / 2; period++) {
    if (n % period == 0) {
        bool is_periodic = true;
        for (int i = period; i < n; i += period) {
            if (h.query(0, period - 1) != h.query(i, i + period - 1)) {
                is_periodic = false;
                break;
            }
        }
        if (is_periodic) {
            cout << "Period: " << period << endl;
            break;
        }
    }
}

string text = "the quick brown fox jumps";
string pattern = "brown";

hashing<string> h_text(text);
hashing<string> h_pat(pattern);

auto target = h_pat.query(0, pattern.size() - 1);

vector<int> matches;
for (int i = 0; i <= text.size() - pattern.size(); i++) {
    if (h_text.query(i, i + pattern.size() - 1) == target) {
        matches.push_back(i);
    }
}

cout << "Pattern found at positions: ";
for (int pos : matches) {
    cout << pos << " ";
}

string s = "racecar";
hashing<string> h(s);

string rev(s.rbegin(), s.rend());
hashing<string> h_rev(rev);

// Check if entire string is palindrome
int n = s.size();
if (h.query(0, n - 1) == h_rev.query(0, n - 1)) {
    cout << "Is palindrome" << endl;
}

// Check if substring [l, r] is palindrome
auto is_palindrome = [&](int l, int r) {
    int len = r - l + 1;
    return h.query(l, r) == h_rev.query(n - r - 1, n - l - 1);
};

// Find all palindromic substrings
for (int i = 0; i < n; i++) {
    for (int j = i; j < n; j++) {
        if (is_palindrome(i, j)) {
            cout << "Palindrome: " << s.substr(i, j - i + 1) << endl;
        }
    }
}

Dynamic Hashing

Supports string modification with O(log n) updates using Fenwick tree.

Implementation

string_hashing_dynamic_compress.cpp

template <typename T, typename U> T fastpow(T a, U b, T mod) {
    assert(0 <= b);
    T answer = static_cast<T>(1);
    while (b > 0) {
        if (b & 1)
            answer = mul(answer, a, mod);
        a = mul(a, a, mod);
        b >>= 1;
    }
    return answer;
}

class HashInt {
  public:
    int first, second;
    HashInt(int a, int b) : first(a), second(b) {}
    HashInt(int a) : first(a), second(a) {}
    
    // ... operator overloads same as static version ...
    
    inline HashInt inverse() {
        return HashInt{fastpow(first, MODS[0] - 2, MODS[0]),
                       fastpow(second, MODS[1] - 2, MODS[1])};
    }
    operator int64_t() { return (int64_t(first) << 32) | int64_t(second); }
};

const int BASE_F = 256, BASE_S = 256;
const HashInt BASE(BASE_F, BASE_S), ZERO(0), ONE(1);
const HashInt INV_BASE = HashInt(BASE_F, BASE_S).inverse();

template <class T> struct RollingHashing {
    int n;
    string S;
    vector<HashInt> base;
    vector<HashInt> inv_base;
    vector<HashInt> tree;  // Fenwick tree

    RollingHashing(const T &s) : S(s) {
        n = (int)s.size();
        tree.resize(n + 1, HashInt(0));
        base.resize(n + 1, HashInt(0));
        inv_base.resize(n + 1, HashInt(0));

        base[0] = ONE;
        inv_base[0] = ONE;
        for (int i = 1; i <= n; i++) {
            base[i] = base[i - 1] * BASE;
            inv_base[i] = inv_base[i - 1] * INV_BASE;
        }

        for (int i = 0; i < n; ++i)
            modify(i, S[i]);
    }

  private:
    inline void modify(int index, HashInt value) {
        HashInt val = base[index] * value;
        index += 1;
        while (index <= n) {
            tree[index] = tree[index] + val;
            index += index & (-index);
        }
    }

    inline HashInt query(int index) {
        index += 1;
        HashInt ans(0);
        while (index > 0) {
            ans = ans + tree[index];
            index -= index & (-index);
        }
        return ans;
    }

  public:
    inline void set(int index, char value) {
        int delta1 = (MODS[0] + value - S[index]) % MODS[0];
        int delta2 = (MODS[1] + value - S[index]) % MODS[1];
        S[index] = value;
        modify(index, HashInt(delta1, delta2));
    }

    inline int64_t query(int left, int right) {
        assert(0 <= left && left <= right && right < n);
        return int64_t((query(right) - query(left - 1)) * inv_base[left]);
    }
};

template <class T> using hashing = RollingHashing<T>;

Dynamic Hashing Usage

String Modification
Dynamic String Matching
Text Editor Operations

string s = "hello world";
hashing<string> h(s);

// Query initial hash
auto hash1 = h.query(0, 4);  // "hello"

// Modify character
h.set(1, 'a');  // "hallo world"

// Query again
auto hash2 = h.query(0, 4);  // "hallo"

cout << (hash1 == hash2 ? "Same" : "Different") << endl;
// Output: Different

// Multiple updates
h.set(6, 'W');
h.set(7, 'O');
h.set(8, 'R');
// Now: "hallo WORld"

string text = "programming";
hashing<string> h(text);

vector<pair<int, char>> updates = {
    {0, 'P'},  // Programming
    {5, 'R'},  // PRogRamming
};

for (auto [idx, ch] : updates) {
    h.set(idx, ch);
    
    // Find all occurrences of "PR"
    string pattern = "PR";
    hashing<string> hp(pattern);
    auto target = hp.query(0, 1);
    
    for (int i = 0; i <= text.size() - 2; i++) {
        if (h.query(i, i + 1) == target) {
            cout << "Found at " << i << " after update" << endl;
        }
    }
}

// Simulate text editor with hash-based undo/redo
class TextEditor {
    string text;
    hashing<string>* h;
    stack<tuple<int, char, char>> history;  // pos, old, new
    
public:
    TextEditor(const string& s) : text(s) {
        h = new hashing<string>(text);
    }
    
    void update(int pos, char ch) {
        history.push({pos, text[pos], ch});
        text[pos] = ch;
        h->set(pos, ch);
    }
    
    void undo() {
        if (history.empty()) return;
        auto [pos, old_ch, new_ch] = history.top();
        history.pop();
        text[pos] = old_ch;
        h->set(pos, old_ch);
    }
    
    bool matches(int start, const string& pattern) {
        if (start + pattern.size() > text.size()) return false;
        hashing<string> hp(pattern);
        auto target = hp.query(0, pattern.size() - 1);
        return h->query(start, start + pattern.size() - 1) == target;
    }
};

TextEditor editor("hello world");
editor.update(0, 'H');  // "Hello world"
cout << editor.matches(0, "Hello") << endl;  // true
editor.undo();
cout << editor.matches(0, "hello") << endl;  // true

Advanced Applications

Longest Common Substring

string s1 = "abcdefgh";
string s2 = "xyzabcpqr";

hashing<string> h1(s1), h2(s2);

// Binary search on length
auto has_common = [&](int len) {
    set<int64_t> hashes;
    for (int i = 0; i <= s1.size() - len; i++) {
        hashes.insert(h1.query(i, i + len - 1));
    }
    for (int i = 0; i <= s2.size() - len; i++) {
        if (hashes.count(h2.query(i, i + len - 1))) {
            return true;
        }
    }
    return false;
};

int lo = 0, hi = min(s1.size(), s2.size());
while (lo < hi) {
    int mid = (lo + hi + 1) / 2;
    if (has_common(mid)) lo = mid;
    else hi = mid - 1;
}

cout << "Longest common substring length: " << lo << endl;

Distinct Substrings

string s = "banana";
hashing<string> h(s);

set<int64_t> distinct;
for (int len = 1; len <= s.size(); len++) {
    for (int i = 0; i <= s.size() - len; i++) {
        distinct.insert(h.query(i, i + len - 1));
    }
}

cout << "Number of distinct substrings: " << distinct.size() << endl;

Rolling Hash for Sliding Window

string text = "abcdefghijk";
int window = 3;

hashing<string> h(text);
vector<int64_t> hashes;

for (int i = 0; i <= text.size() - window; i++) {
    hashes.push_back(h.query(i, i + window - 1));
}

// Find duplicate windows
map<int64_t, vector<int>> positions;
for (int i = 0; i < hashes.size(); i++) {
    positions[hashes[i]].push_back(i);
}

for (auto [hash, pos] : positions) {
    if (pos.size() > 1) {
        cout << "Duplicate window at positions: ";
        for (int p : pos) cout << p << " ";
        cout << endl;
    }
}

Collision Handling

Reducing Collisions:

Use two large prime moduli (double hashing)
Choose base > maximum character value
Use moduli ~10^9 for 32-bit, ~10^18 for 64-bit
Verify matches with direct comparison in critical cases

// Triple hashing for extra security
const int MODS[] = {1001864327, 1001265673, 1000000007};

class TripleHash {
    int h1, h2, h3;
public:
    TripleHash(int a, int b, int c) : h1(a), h2(b), h3(c) {}
    
    bool operator==(const TripleHash& other) const {
        return h1 == other.h1 && h2 == other.h2 && h3 == other.h3;
    }
    
    // Hash for use in unordered_map
    size_t hash() const {
        return (size_t(h1) << 40) | (size_t(h2) << 20) | size_t(h3);
    }
};

Best Practices

Always use double hashing for competitive programming
Precompute powers of base for efficiency
Use 64-bit integers for hash values to avoid overflow
Test with worst-case inputs to check for collisions
Combine with binary search for optimization problems
Cache frequently used hashes in maps/sets

Practice Problems

Longest Duplicate Substring (LeetCode 1044)
Distinct Echo Substrings (LeetCode 1316)
Find K-Length Substrings With No Repeated Characters (LeetCode 1100)
Substring with Concatenation of All Words (LeetCode 30)
Palindrome Pairs (LeetCode 336)

Graph Algorithms

Data Structures

Range Query

String Algorithms

Math & Number Theory

Numeric

Search Algorithms

Techniques

Geometry

Utilities

Overview

Hash Function

Static Hashing

Implementation

Static Hashing Usage

Dynamic Hashing

Implementation

Dynamic Hashing Usage

Advanced Applications

Longest Common Substring

Distinct Substrings

Rolling Hash for Sliding Window

Collision Handling

Best Practices

Practice Problems

See Also

Build docs developers (and LLMs) love

Graph Algorithms

Data Structures

Range Query

String Algorithms

Math & Number Theory

Numeric

Search Algorithms

Techniques

Geometry

Utilities

​Overview

​Hash Function

​Static Hashing

​Implementation

​Static Hashing Usage

​Dynamic Hashing

​Implementation

​Dynamic Hashing Usage

​Advanced Applications

​Longest Common Substring

​Distinct Substrings

​Rolling Hash for Sliding Window

​Collision Handling

​Best Practices

​Practice Problems

​See Also

Build docs developers (and LLMs) love

Overview

Hash Function

Static Hashing

Implementation

Static Hashing Usage

Dynamic Hashing

Implementation

Dynamic Hashing Usage

Advanced Applications

Longest Common Substring

Distinct Substrings

Rolling Hash for Sliding Window

Collision Handling

Best Practices

Practice Problems

See Also