Skip to main content

Overview

String hashing converts strings to integers for O(1) substring comparison. It’s based on treating strings as polynomials and evaluating them at a chosen base.
Static Hashing
  • Build: O(n)
  • Query: O(1)
  • Update: Not supported
Dynamic Hashing
  • Build: O(n log n)
  • Query: O(1)
  • Update: O(log n)

Hash Function

The polynomial rolling hash function:
hash(s) = (s[0] * BASE^(n-1) + s[1] * BASE^(n-2) + ... + s[n-1]) % MOD
Key Properties:
  • Fast computation using prefix sums
  • Efficient rolling hash for sliding windows
  • Double hashing reduces collision probability

Static Hashing

For immutable strings with O(1) substring queries.

Implementation

string_hashing_static_compress.cpp
const int MODS[] = {1001864327, 1001265673};

inline int add(int a, int b, const int &mod) {
    return (a + b >= mod) ? a + b - mod : a + b;
}
inline int sub(int a, int b, const int &mod) {
    return (a - b < 0) ? a - b + mod : a - b;
}
inline int mul(int a, int b, const int &mod) { return (1LL * a * b) % mod; }

class HashInt {
  public:
    int first;
    int second;
    HashInt(int a, int b) : first(a), second(b) {}
    HashInt(int a) : first(a), second(a) {}
    
    inline friend bool operator==(const HashInt &a, const HashInt &b) {
        return a.first == b.first && a.second == b.second;
    }
    inline friend bool operator!=(const HashInt &a, const HashInt &b) {
        return a.first != b.first && a.second != b.second;
    }
    inline friend HashInt operator+(const HashInt &a, const HashInt &b) {
        return HashInt{add(a.first, b.first, MODS[0]),
                       add(a.second, b.second, MODS[1])};
    }
    inline friend HashInt operator-(const HashInt &a, const HashInt &b) {
        return HashInt{sub(a.first, b.first, MODS[0]),
                       sub(a.second, b.second, MODS[1])};
    }
    inline friend HashInt operator*(const HashInt &a, const HashInt &b) {
        return HashInt{mul(a.first, b.first, MODS[0]),
                       mul(a.second, b.second, MODS[1])};
    }
    operator int64_t() { return (int64_t(first) << 32) | int64_t(second); }
};
using hash_int = HashInt;

const HashInt BASE(256), ZERO(0), ONE(1);

template <class T> struct RollingHashing {
    int n;
    vector<HashInt> code;
    vector<HashInt> base;

    RollingHashing(const T &s) {
        n = (int)s.size();
        base.resize(n + 1, HashInt(0));
        base[0] = ONE;
        for (int i = 1; i <= n; i++)
            base[i] = base[i - 1] * BASE;

        code.resize(n + 1, HashInt(0));
        code[0] = ZERO;
        for (int i = 1; i < (int)code.size(); ++i)
            code[i] = code[i - 1] * BASE + HashInt(s[i - 1]);
    }
    
    inline int64_t query(int left, int right) {
        assert(0 <= left && left <= right && right < n);
        return int64_t(code[right + 1] - code[left] * base[right - left + 1]);
    }
};

template <class T> using hashing = RollingHashing<T>;

Static Hashing Usage

string s = "abcabcabc";
hashing<string> h(s);

// Compare substrings in O(1)
if (h.query(0, 2) == h.query(3, 5)) {
    cout << "s[0:2] == s[3:5]" << endl;  // "abc" == "abc"
}

// Check if string is periodic
int n = s.size();
for (int period = 1; period <= n / 2; period++) {
    if (n % period == 0) {
        bool is_periodic = true;
        for (int i = period; i < n; i += period) {
            if (h.query(0, period - 1) != h.query(i, i + period - 1)) {
                is_periodic = false;
                break;
            }
        }
        if (is_periodic) {
            cout << "Period: " << period << endl;
            break;
        }
    }
}

Dynamic Hashing

Supports string modification with O(log n) updates using Fenwick tree.

Implementation

string_hashing_dynamic_compress.cpp
template <typename T, typename U> T fastpow(T a, U b, T mod) {
    assert(0 <= b);
    T answer = static_cast<T>(1);
    while (b > 0) {
        if (b & 1)
            answer = mul(answer, a, mod);
        a = mul(a, a, mod);
        b >>= 1;
    }
    return answer;
}

class HashInt {
  public:
    int first, second;
    HashInt(int a, int b) : first(a), second(b) {}
    HashInt(int a) : first(a), second(a) {}
    
    // ... operator overloads same as static version ...
    
    inline HashInt inverse() {
        return HashInt{fastpow(first, MODS[0] - 2, MODS[0]),
                       fastpow(second, MODS[1] - 2, MODS[1])};
    }
    operator int64_t() { return (int64_t(first) << 32) | int64_t(second); }
};

const int BASE_F = 256, BASE_S = 256;
const HashInt BASE(BASE_F, BASE_S), ZERO(0), ONE(1);
const HashInt INV_BASE = HashInt(BASE_F, BASE_S).inverse();

template <class T> struct RollingHashing {
    int n;
    string S;
    vector<HashInt> base;
    vector<HashInt> inv_base;
    vector<HashInt> tree;  // Fenwick tree

    RollingHashing(const T &s) : S(s) {
        n = (int)s.size();
        tree.resize(n + 1, HashInt(0));
        base.resize(n + 1, HashInt(0));
        inv_base.resize(n + 1, HashInt(0));

        base[0] = ONE;
        inv_base[0] = ONE;
        for (int i = 1; i <= n; i++) {
            base[i] = base[i - 1] * BASE;
            inv_base[i] = inv_base[i - 1] * INV_BASE;
        }

        for (int i = 0; i < n; ++i)
            modify(i, S[i]);
    }

  private:
    inline void modify(int index, HashInt value) {
        HashInt val = base[index] * value;
        index += 1;
        while (index <= n) {
            tree[index] = tree[index] + val;
            index += index & (-index);
        }
    }

    inline HashInt query(int index) {
        index += 1;
        HashInt ans(0);
        while (index > 0) {
            ans = ans + tree[index];
            index -= index & (-index);
        }
        return ans;
    }

  public:
    inline void set(int index, char value) {
        int delta1 = (MODS[0] + value - S[index]) % MODS[0];
        int delta2 = (MODS[1] + value - S[index]) % MODS[1];
        S[index] = value;
        modify(index, HashInt(delta1, delta2));
    }

    inline int64_t query(int left, int right) {
        assert(0 <= left && left <= right && right < n);
        return int64_t((query(right) - query(left - 1)) * inv_base[left]);
    }
};

template <class T> using hashing = RollingHashing<T>;

Dynamic Hashing Usage

string s = "hello world";
hashing<string> h(s);

// Query initial hash
auto hash1 = h.query(0, 4);  // "hello"

// Modify character
h.set(1, 'a');  // "hallo world"

// Query again
auto hash2 = h.query(0, 4);  // "hallo"

cout << (hash1 == hash2 ? "Same" : "Different") << endl;
// Output: Different

// Multiple updates
h.set(6, 'W');
h.set(7, 'O');
h.set(8, 'R');
// Now: "hallo WORld"

Advanced Applications

Longest Common Substring

string s1 = "abcdefgh";
string s2 = "xyzabcpqr";

hashing<string> h1(s1), h2(s2);

// Binary search on length
auto has_common = [&](int len) {
    set<int64_t> hashes;
    for (int i = 0; i <= s1.size() - len; i++) {
        hashes.insert(h1.query(i, i + len - 1));
    }
    for (int i = 0; i <= s2.size() - len; i++) {
        if (hashes.count(h2.query(i, i + len - 1))) {
            return true;
        }
    }
    return false;
};

int lo = 0, hi = min(s1.size(), s2.size());
while (lo < hi) {
    int mid = (lo + hi + 1) / 2;
    if (has_common(mid)) lo = mid;
    else hi = mid - 1;
}

cout << "Longest common substring length: " << lo << endl;

Distinct Substrings

string s = "banana";
hashing<string> h(s);

set<int64_t> distinct;
for (int len = 1; len <= s.size(); len++) {
    for (int i = 0; i <= s.size() - len; i++) {
        distinct.insert(h.query(i, i + len - 1));
    }
}

cout << "Number of distinct substrings: " << distinct.size() << endl;

Rolling Hash for Sliding Window

string text = "abcdefghijk";
int window = 3;

hashing<string> h(text);
vector<int64_t> hashes;

for (int i = 0; i <= text.size() - window; i++) {
    hashes.push_back(h.query(i, i + window - 1));
}

// Find duplicate windows
map<int64_t, vector<int>> positions;
for (int i = 0; i < hashes.size(); i++) {
    positions[hashes[i]].push_back(i);
}

for (auto [hash, pos] : positions) {
    if (pos.size() > 1) {
        cout << "Duplicate window at positions: ";
        for (int p : pos) cout << p << " ";
        cout << endl;
    }
}

Collision Handling

Reducing Collisions:
  1. Use two large prime moduli (double hashing)
  2. Choose base > maximum character value
  3. Use moduli ~10^9 for 32-bit, ~10^18 for 64-bit
  4. Verify matches with direct comparison in critical cases
// Triple hashing for extra security
const int MODS[] = {1001864327, 1001265673, 1000000007};

class TripleHash {
    int h1, h2, h3;
public:
    TripleHash(int a, int b, int c) : h1(a), h2(b), h3(c) {}
    
    bool operator==(const TripleHash& other) const {
        return h1 == other.h1 && h2 == other.h2 && h3 == other.h3;
    }
    
    // Hash for use in unordered_map
    size_t hash() const {
        return (size_t(h1) << 40) | (size_t(h2) << 20) | size_t(h3);
    }
};

Best Practices

  1. Always use double hashing for competitive programming
  2. Precompute powers of base for efficiency
  3. Use 64-bit integers for hash values to avoid overflow
  4. Test with worst-case inputs to check for collisions
  5. Combine with binary search for optimization problems
  6. Cache frequently used hashes in maps/sets

Practice Problems

  1. Longest Duplicate Substring (LeetCode 1044)
  2. Distinct Echo Substrings (LeetCode 1316)
  3. Find K-Length Substrings With No Repeated Characters (LeetCode 1100)
  4. Substring with Concatenation of All Words (LeetCode 30)
  5. Palindrome Pairs (LeetCode 336)

See Also

Build docs developers (and LLMs) love