String Hashing — Polynomial Hash for O(1) Substring Comparison

Sanjeev SharmaSanjeev Sharma
2 min read

Advertisement

Polynomial Hash

h[i] = s[0]*p^(i-1) + s[1]*p^(i-2) + ... + s[i-1]*p^0

Substring s[l..r]:
hash(l,r) = (h[r+1] - h[l]*p^(r-l+1)) mod MOD

Solutions

Python

class StringHash:
    MOD = (1<<61)-1
    BASE = 131

    def __init__(self, s):
        n = len(s)
        self.h = [0]*(n+1)
        self.pw = [1]*(n+1)
        for i in range(n):
            self.h[i+1] = (self.h[i]*self.BASE + ord(s[i])) % self.MOD
            self.pw[i+1] = self.pw[i]*self.BASE % self.MOD

    def get(self, l, r):
        return (self.h[r+1] - self.h[l]*self.pw[r-l+1]) % self.MOD

JavaScript

class StringHash {
    constructor(s,BASE=131,MOD=1e9+7){
        this.MOD=MOD; this.BASE=BASE;
        const n=s.length; this.h=new Array(n+1).fill(0); this.pw=new Array(n+1).fill(1);
        for(let i=0;i<n;i++){this.h[i+1]=(this.h[i]*BASE+s.charCodeAt(i))%MOD;this.pw[i+1]=this.pw[i]*BASE%MOD;}
    }
    get(l,r){return((this.h[r+1]-this.h[l]*this.pw[r-l+1])%this.MOD+this.MOD)%this.MOD;}
}

Java

class StringHash {
    long[] h,pw; long MOD=1_000_000_007L,BASE=131;
    StringHash(String s){int n=s.length();h=new long[n+1];pw=new long[n+1];pw[0]=1;for(int i=0;i<n;i++){h[i+1]=(h[i]*BASE+s.charAt(i))%MOD;pw[i+1]=pw[i]*BASE%MOD;}}
    long get(int l,int r){return((h[r+1]-h[l]*pw[r-l+1])%MOD+MOD)%MOD;}
}

C++

#include <vector>
#include <string>
using namespace std;
struct StringHash {
    vector<long long>h,pw; long long MOD=1e9+7,BASE=131;
    StringHash(string s):h(s.size()+1,0),pw(s.size()+1,1){
        for(int i=0;i<(int)s.size();i++){h[i+1]=(h[i]*BASE+s[i])%MOD;pw[i+1]=pw[i]*BASE%MOD;}
    }
    long long get(int l,int r){return((h[r+1]-h[l]*pw[r-l+1])%MOD+MOD)%MOD;}
};

C

long long h[100001],pw[100001]; long long MOD=1000000007LL,BASE=131;
void build(char*s,int n){pw[0]=1;for(int i=0;i<n;i++){h[i+1]=(h[i]*BASE+s[i])%MOD;pw[i+1]=pw[i]*BASE%MOD;}}
long long get_hash(int l,int r){return((h[r+1]-h[l]*pw[r-l+1])%MOD+MOD)%MOD;}

Application: Longest Duplicate Substring (Binary Search + Hash)

def longestDupSubstring(s):
    n = len(s)
    h = StringHash(s)
    def has_dup(length):
        seen = set()
        for i in range(n-length+1):
            hv = h.get(i, i+length-1)
            if hv in seen: return i
            seen.add(hv)
        return -1
    lo, hi, res = 1, n-1, 0
    while lo <= hi:
        mid = (lo+hi)//2
        if has_dup(mid) >= 0: res = mid; lo = mid+1
        else: hi = mid-1
    i = has_dup(res)
    return s[i:i+res] if i >= 0 else ""

Advertisement

Sanjeev Sharma

Written by

Sanjeev Sharma

Full Stack Engineer · E-mopro