Resources:
CP_Algorithms
Suffix Array
stanford.edu
CommonLounge
Youtube:
Suffix array playlist
Complexity: O(Nlog^2(n))
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<bits/stdc++.h> | |
using namespace std; | |
const int maxn = 400004; | |
void naiveApproach() // O(N^2logN) | |
{ | |
vector<pair<string, int> > v; | |
string s; //mississippi | |
//cin >> s; | |
s = "mississippi$"; | |
for(int i = 0; i < s.size(); i++){ | |
v.push_back({s.substr(i), i}); | |
} | |
sort(v.begin(), v.end()); | |
for(int i = 0; i < v.size(); i++){ | |
cout << v[i].second << " "<< v[i].first << endl; | |
} | |
return; | |
} | |
struct info | |
{ | |
int tup[2], idx; // tup[0] = prev rank, tup[1] = new rank | |
bool operator < (const info &a) const | |
{ | |
return tup[0] != a.tup[0] ? tup[0] < a.tup[0] : tup[1] < a.tup[1]; | |
} | |
} ar[maxn]; | |
int rnk[18][maxn], LCP[maxn], step; | |
string text; | |
void build_suffix_array() | |
{ | |
int n = text.size(), jump; | |
for(int i = 0; i < n; i++){ | |
rnk[0][i] = text[i]; // rank suffixes according to the 1st char | |
memset(ar[i].tup, 0, sizeof(ar[i].tup)); | |
} | |
for(step = 1, jump = 1; jump <= n; step++, jump <<= 1){ | |
for(int i = 0; i <= n; i++){ | |
ar[i].idx = i; | |
ar[i].tup[0] = rnk[step - 1][i]; // what i was in prev step | |
ar[i].tup[1] = i + jump < n ? rnk[step - 1][i + jump] : -1; | |
} | |
sort(ar, ar+n); | |
rnk[step][ar[0].idx] = 0; | |
for(int i = 1; i < n; i++) | |
rnk[step][ar[i].idx] = ar[i].tup[0] == ar[i - 1].tup[0] && ar[i].tup[1] == ar[i - 1].tup[1] ? rnk[step][ar[i - 1].idx] : i; | |
} | |
cout << "Suffix Array: \n\n"; | |
cout << n << endl; | |
for(int i = 0; i < n; i++){ | |
cout << ar[i].idx << " "<<text.substr(ar[i].idx) << endl; | |
} | |
} | |
void build_LCP_array() | |
{ | |
LCP[0] = 0; | |
int n = text.size(), i,j,id1,id2; | |
for( i = 0; i < n; i++){ | |
id1 = ar[i - 1].idx; | |
id2 = ar[i].idx; | |
LCP[i] = 0; | |
for(j = step - 1; j >= 0; j--) | |
if(rnk[j][id1] == rnk[j][id2] && rnk[j][id2]){ | |
LCP[i] += (1 << j); | |
id1 += (1 << j); | |
id2 += (1 << j); | |
} | |
cout << ar[i - 1].idx << " "<< ar[i].idx << " "<<LCP[i] << endl; | |
} | |
for(int i = 0; i < n; i++) | |
cout << i << " "<< LCP[i] << endl; | |
} | |
int main() | |
{ | |
///naiveApproach(); | |
//cin >> text; | |
text = "mississippi$"; | |
build_suffix_array(); | |
build_LCP_array(); | |
return 0; | |
} |
No comments:
Post a Comment