// find the suffix array SA of s[0..n-1] in {1..K}^n // require s[n]=s[n+1]=s[n+2]=0, n>=2 void suffixArray(int* s, int* SA, uint32_t n, uint32_t K) { uint32_t n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2; int* s12 = new int[n02 + 3]; s12[n02]= s12[n02+1]= s12[n02+2]=0; int* SA12 = new int[n02 + 3]; SA12[n02]=SA12[n02+1]=SA12[n02+2]=0; int* s0 = new int[n0]; int* SA0 = new int[n0]; // generate positions of mod 1 and mod 2 suffixes // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1 for (uint32_t i=0, j=0; i < n+(n0-n1); i++) if (i%3 != 0) s12[j++] = i; // lsb radix sort the mod 1 and mod 2 triples radixPass(s12 , SA12, s+2, n02, K); radixPass(SA12, s12 , s+1, n02, K); radixPass(s12 , SA12, s , n02, K); // find lexicographic names of triples int name = 0, c0 = -1, c1 = -1, c2 = -1; for (uint32_t i = 0; i < n02; i++) { if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) { name++; c0 = s[SA12[i]]; c1 = s[SA12[i]+1]; c2 = s[SA12[i]+2]; } if (SA12[i] % 3 == 1) { s12[SA12[i]/3] = name; } // left half else { s12[SA12[i]/3 + n0] = name; } // right half } // recurse if names are not yet unique if (name < n02) { suffixArray(s12, SA12, n02, name); // store unique names in s12 using the suffix array for (uint32_t i = 0; i < n02; i++) s12[SA12[i]] = i + 1; } else // generate the suffix array of s12 directly for (uint32_t i = 0; i < n02; i++) SA12[s12[i] - 1] = i; // stably sort the mod 0 suffixes from SA12 by their first character for (uint32_t i=0, j=0; i < n02; i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i]; radixPass(s0, SA0, s, n0, K); // merge sorted SA0 suffixes and sorted SA12 suffixes for (uint32_t p=0, t=n0-n1, k=0; k < n; k++) { #define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2) int i = GetI(); // pos of current offset 12 suffix int j = SA0[p]; // pos of current offset 0 suffix if (SA12[t] < n0 ? leq(s[i], s12[SA12[t] + n0], s[j], s12[j/3]) : leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0])) { // suffix from SA12 is smaller SA[k] = i; t++; if (t == n02) { // done --- only SA0 suffixes left for (k++; p < n0; p++, k++) SA[k] = SA0[p]; } } else { SA[k] = j; p++; if (p == n0) { // done --- only SA12 suffixes left for (k++; t < n02; t++, k++) SA[k] = GetI(); } } } delete [] s12; delete [] SA12; delete [] SA0; delete [] s0; }
void suffixArray(VI &T, VI &SA, int n, int K) { int n0 = (n + 2) / 3, n1 = (n + 1) / 3, n2 = n / 3, n02 = n0 + n2; VI R(n02+3), SA12(n02+3), R0(n0), SA0(n0); for (int i = 0, j = 0; i < n + (n0 - n1); i++) if (i % 3 != 0) R[j++] = i; radixPass(R, SA12, T.begin() + 2, n02, K); radixPass(SA12, R, T.begin() + 1, n02, K); radixPass(R, SA12, T.begin(), n02, K); int name = 0, c0 = -1, c1 = -1, c2 = -1; for (int i = 0; i < n02; i++) { if (T[SA12[i]] != c0 || T[SA12[i] + 1] != c1 || T[SA12[i] + 2] != c2) { name++; c0 = T[SA12[i]]; c1 = T[SA12[i] + 1]; c2 = T[SA12[i] + 2]; } if (SA12[i] % 3 == 1) { R[SA12[i] / 3] = name; } else { R[SA12[i] / 3 + n0] = name; } } if (name < n02) { suffixArray(R, SA12, n02, name); for (int i = 0; i < n02; i++) R[SA12[i]] = i + 1; } else for (int i = 0; i < n02; i++) SA12[R[i] - 1] = i; for (int i = 0, j = 0; i < n02; i++) if (SA12[i] < n0) R0[j++] = 3 * SA12[i]; radixPass(R0, SA0, T.begin(), n0, K); for (int p = 0, t = n0 - n1, k = 0; k < n; k++) { #define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2) int i = GetI(); // pos of current offset 12 suffix int j = SA0[p]; // pos of current offset 0 suffix if (SA12[t] < n0 ? // different compares for mod 1 and mod 2 suffixes leq(T[i], R[SA12[t] + n0], T[j], R[j / 3]) : leq(T[i], T[i + 1], R[SA12[t] - n0 + 1], T[j], T[j + 1], R[j / 3 + n0])) { // suffix from SA12 is smaller SA[k] = i; t++; if (t == n02) // done --- only SA0 suffixes left for (k++; p < n0; p++, k++) SA[k] = SA0[p]; } else { // suffix from SA0 is smaller SA[k] = j; p++; if (p == n0) // done --- only SA12 suffixes left for (k++; t < n02; t++, k++) SA[k] = GetI(); } } }
int main(int argc, char** argv) { double one_gb = 1024*1024*1024; double num_gb; int size; int* T; int* SA; FILE* f; int c, j; error_t err; if( argc == 1 ) { printf("Usage: %s <num_gb> [input_file] [input_file] ...\n", argv[0]); return -1; } sscanf(argv[1], "%lf", &num_gb); printf("Will run test with %lf GB\n", num_gb); size = num_gb * one_gb; printf("Allocating memory (size=%i)\n", size); T = malloc((size+4)*sizeof(int)); assert(T); SA = malloc(size*sizeof(int)); assert(SA); // read in some input files. j = 0; for( int i = 2; i < argc && j < size; i++ ) { printf("Reading %s\n", argv[i]); f = fopen(argv[i], "r"); assert(f); while( EOF != (c = fgetc(f)) && j < size ) { T[j++] = c; } fclose(f); } if( j < size ) { // make some random data printf("Making random data\n"); for( int i = j; i < size; i++ ) { T[i] = 1 + (rand() & 0xff); } } printf("Suffix sorting\n"); // sort it. start_clock(); err = suffixArray(T, SA, size, 256); die_if_err(err); stop_clock(); print_timings("Suffix Sort Bytes", size); return 0; }
void buildSA2(){ if (N == 1) { this->SA[0] = 0, this->RA[0] = 0; return; } VI T(N+3), SA(N+3); for(int i = 0; i < A.size(); ++i) T[i] = A[i]; suffixArray(T, SA, N, 256); for(int i = 0; i < N; ++i) RA[ SA[i] ] = i; for(int i = 0; i < N; ++i) this->SA[i] = SA[i]; }
// Example int main() { int n = 14; // Convert yabbadabbado to int array // Substitution sigma = [a -> 1, b -> 2, d -> 3, o -> 4, y -> 5] in alphabetical order // missippi sigma = 512213122134 int s[] = {5, 1, 2, 2, 1, 3, 1, 2, 2, 1, 3, 4}; // yabbadabbado int b = 5; int* SA = new int[n + 3]; s[n] = s[n + 1] = s[n + 2] = SA[n] = SA[n + 1] = SA[n + 2] = 0; suffixArray(s, SA, n, b); }
vector<size_t> const *SuffixArray<int>::genSuffixArray( vector<int> const &data, SortAlgorithm sortAlgorithm) const { if (sortAlgorithm == STLSORT) { vector<size_t> *suffixArray = new std::vector<size_t>(); // Initially fill a vector where the i-th element represents the // suffix starting at index i. This is an unsorted suffix array. for (size_t i = 0; i < data.size(); ++i) suffixArray->push_back(i); // Sort the suffix array. sort(suffixArray->begin(), suffixArray->end(), d_compareFun); return suffixArray; } // The caller does not want us to use STL sort, so use the suffix // sort algorithm by McIlroy and McIlroy. // ssort initially requires the original sequence of suffixes. QSharedPointer<vector<int> > suffixArray(new vector<int>(data)); // The hash automaton returns [0..k-1] for k different words. ssort // uses 0 as an end of sequence marker and thus expects hashcodes // [1..k]. We can simply add 1 to all hash codes. transform(suffixArray->begin(), suffixArray->end(), suffixArray->begin(), bind2nd(plus<int>(), 1)); // Add 0 to delimit the sequence. suffixArray->push_back(0); // Create the suffix array. While we'll pass the data array, ssort // will modify it to be the suffix array. errormining::util::ssort(suffixArray.data()); suffixArray->pop_back(); // ssort works on a vector of ints (amongst others because the algorithm // internally uses the sign bit), while the suffix array class uses a // vector of size_t as indexes into the data array. So, we'll convert // the vector. vector<size_t> *sizeTSuffixArray = new vector<size_t>(suffixArray->begin(), suffixArray->end()); return sizeTSuffixArray; }
int main() { int t,length,sum; while(EOF!=scanf("%d",&t)) { if(t==0) break; memset(str,0,sizeof(str)); getchar(); gets(str); len1=strlen(str); length=len1; str[len1++]=2; gets(str+len1); len2=strlen(str+len1); n=len1+len2; str[n++]=1; suffixArray(); lcp(); init(); for(i=2;i<n;i++) { stack[i-1].index=SA[i]+1; if(stack[i-1].index>=9) stack[i-1].flag=true; else stack[i-1].flag=false; } length=2*length;sum=0; for(i=1;i<length;i++) { j=i+1; while(RMQ(stack[i].index,stack[j].index)>=t) { if(stack[i].flag!=stack[j].flag) sum+=RMQ(stack[i].index,stack[j].index)-t+1; j++; } } printf("%d\n",sum); } return 0; }
int main() { #ifndef ONLINE_JUDGE freopen("input.txt", "rt", stdin); #endif scanf("%d\n%s", &N, &A); memcpy(A + N, A, N); N = 2 * N; A[N] = A[N + 1] = A[N + 2] = 0; int i; for (i = 0; i < N; i++) S[i] = A[i] - 'A'; suffixArray(S, SA, N, 26); for (i = 0; i < N; i++) printf("%d ", SA[i]); printf("\n"); LCP = lcp(SA, A, N); double answer = 0; if (LCP) { for (i = 0; i < N; i++) { printf("%d ", LCP[i]); answer += LCP[i]; } printf("\n"); answer /= N - 1; printf("%.3lf\n", answer); } return 0; }
// find the suffix array SA of T[0..n-1] in {1..K}^n // require T[n] = T[n+1] = T[n+2] = 0, n >= 2 void suffixArray(int* T, int* SA, int n, int K) { int n0 = (n + 2) / 3, n1 = (n + 1) / 3, n2 = n / 3, n02 = n0 + n2; int* R = new int[n02 + 3]; R[n02] = R[n02 + 1] = R[n02 + 2] = 0; int* SA12 = new int[n02 + 3]; SA12[n02] = SA12[n02 + 1] = SA12[n02 + 2] = 0; int* R0 = new int[n0]; int* SA0 = new int[n0]; //******* Step 0: Construct sample ******** // generate positions of mod 1 and mod 2 suffixes // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1 for (int i = 0, j = 0; i < n + (n0 - n1); i++) if (i%3 != 0) R[j++] = i; //******* Step 1: Sort sample suffixes ******** // lsb radix sort the mod 1 and mod 2 triples radixPass(R, SA12, T + 2, n02, K); radixPass(SA12, R, T + 1, n02, K); radixPass(R, SA12, T, n02, K); // find lexicographic names of triples and // write them to correct places in R int name = 0, c0 = -1, c1 = -1, c2 = -1; for (int i = 0; i < n02; i++) { if (T[SA12[i]] != c0 || T[SA12[i] + 1] != c1 || T[SA12[i] + 2] != c2) { name++; c0 = T[SA12[i]]; c1 = T[SA12[i] + 1]; c2 = T[SA12[i] + 2]; } if (SA12[i] % 3 == 1) { R[SA12[i] / 3] = name; // write to R1 } else { R[SA12[i] / 3 + n0] = name; // write to R2 } } // recurse if names are not yet unique if (name < n02) { suffixArray(R, SA12, n02, name); // store unique names in R using the suffix array for (int i = 0; i < n02; i++) R[SA12[i]] = i + 1; } else // generate the suffix array of R directly for (int i = 0; i < n02; i++) SA12[R[i] - 1] = i; //******* Step 2: Sort nonsample suffixes ******** // stably sort the mod 0 suffixes from SA12 by their first character for (int i = 0, j = 0; i < n02; i++) if (SA12[i] < n0) R0[j++] = 3 * SA12[i]; radixPass(R0, SA0, T, n0, K); //******* Step 3: Merge ******** // merge sorted SA0 suffixes and sorted SA12 suffixes for (int p = 0, t = n0 - n1, k = 0; k < n; k++) { #define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2) int i = GetI(); // pos of current offset 12 suffix int j = SA0[p]; // pos of current offset 0 suffix if (SA12[t] < n0 ? // different compares for mod 1 and mod 2 suffixes leq(T[i], R[SA12[t] + n0], T[j], R[j / 3]) : leq(T[i], T[i+1], R[SA12[t] - n0 + 1], T[j], T[j + 1], R[j / 3 + n0])) { // suffix from SA12 is smaller SA[k] = i; t++; if (t == n02) // done --- only SA0 suffixes left for (k++; p < n0; p++, k++) SA[k] = SA0[p]; } else { // suffix from SA0 is smaller SA[k] = j; p++; if (p == n0) // done --- only SA12 suffixes left for (k++; t < n02; t++, k++) SA[k] = GetI(); } } delete [] R; delete [] SA12; delete [] SA0; delete [] R0; }