Count Distinct Substrings — Suffix Array + LCP
Advertisement
Formula
Total substrings = n*(n+1)/2
Duplicate substrings removed = sum(LCP[i])
Distinct = n*(n+1)/2 - sum(LCP)
Solutions
Python
def countDistinctSubstrings(s):
n = len(s)
sa = sorted(range(n), key=lambda i: s[i:])
rank = [0]*n
for i, suf in enumerate(sa): rank[suf] = i
lcp = [0]*n
h = 0
for i in range(n):
if rank[i] > 0:
j = sa[rank[i]-1]
while i+h < n and j+h < n and s[i+h] == s[j+h]:
h += 1
lcp[rank[i]] = h
if h: h -= 1
return n*(n+1)//2 - sum(lcp)
JavaScript
function countDistinctSubstrings(s){
const n=s.length,sa=[...Array(n).keys()].sort((a,b)=>s.slice(a)<s.slice(b)?-1:1);
const rank=new Array(n); sa.forEach((v,i)=>rank[v]=i);
const lcp=new Array(n).fill(0); let h=0;
for(let i=0;i<n;i++){if(rank[i]>0){let j=sa[rank[i]-1];while(i+h<n&&j+h<n&&s[i+h]===s[j+h])h++;lcp[rank[i]]=h;if(h)h--;}}
return n*(n+1)/2-lcp.reduce((a,b)=>a+b,0);
}
Java
import java.util.*;
public int countDistinctSubstrings(String s){
int n=s.length(); Integer[]sa=new Integer[n]; for(int i=0;i<n;i++)sa[i]=i;
Arrays.sort(sa,(a,b)->s.substring(a).compareTo(s.substring(b)));
int[]rank=new int[n]; for(int i=0;i<n;i++)rank[sa[i]]=i;
int[]lcp=new int[n]; int h=0;
for(int i=0;i<n;i++){if(rank[i]>0){int j=sa[rank[i]-1];while(i+h<n&&j+h<n&&s.charAt(i+h)==s.charAt(j+h))h++;lcp[rank[i]]=h;if(h>0)h--;}}
long total=(long)n*(n+1)/2; for(int v:lcp)total-=v; return(int)total;
}
C++
#include <string>
#include <vector>
#include <algorithm>
#include <numeric>
using namespace std;
int countDistinctSubstrings(string s){
int n=s.size(); vector<int>sa(n); iota(sa.begin(),sa.end(),0);
sort(sa.begin(),sa.end(),[&](int a,int b){return s.substr(a)<s.substr(b);});
vector<int>rank_(n),lcp(n,0); for(int i=0;i<n;i++)rank_[sa[i]]=i;
int h=0;
for(int i=0;i<n;i++){if(rank_[i]>0){int j=sa[rank_[i]-1];while(i+h<n&&j+h<n&&s[i+h]==s[j+h])h++;lcp[rank_[i]]=h;if(h)h--;}}
return(long long)n*(n+1)/2-accumulate(lcp.begin(),lcp.end(),0LL);
}
C
/* C: qsort-based suffix array, then Kasai LCP, sum and subtract */
Advertisement