int main(int argc, char** argv) { char* inp = argv[1]; int start, end, max, i; max = findLongestSubstring(inp, &start, &end); printf("%d -- Substring: ", max); for (i=start; i<=end; i++) printf("%c", inp[i]); printf("\n"); return 0; }
int test_ESA() { void load( const char* filename, int* n, int* m, int* k, int* pos, char** text, char** pattern ); printf("Testing ESA\n"); srand( time(NULL) ); int n = 1e4; int m = 200; int repeats = 1e6; // The text and pattern strings. char t[n]; char p[m]; p[m-1] = '\0'; t[n-1] = '\0'; ESA esa; _randomStrings(t,p,n-1,m-1); constructESA(t,n, &esa, NO_RMQ /*| NO_CHILD_TAB*/ | NO_INV); int LOOKUP[ ALPHABET_SIZE ]; for (int i=0; i<n; i++) { // only when the LCP is 0 do we have a new character. if (esa.LCP[i] == 0) LOOKUP[ (unsigned char)(t[esa.SA[i]]) ] = i; } for (int i=0; i<repeats; i++) { // Throw away p and set it to a substring of t. memcpy(p,t + (rand() % (n-m+1)) ,sizeof(char)*m); int l=1; int i0 = LOOKUP[ (unsigned char)p[0] ]; int i1 = LI_NEXT_CHILD(i0, (&esa)) -1; if (i1==0) i1 = n-1; findLongestSubstring(p, m, &l, i0, i1, &esa); assert(l == m); if (l != m) return 1; findLongestSubstring_simple(p, m, &l, i0, i1, &esa); // A full match should be found. assert(l == m); if (l != m) return 1; } return 0; }
void construct_pRepresentation( pTriple* P, const char* text, const char* pattern, const ESA* esa, int n, int m ) { // Look up table for each character, giving first occurence in the SA. int LOOKUP[ALPHABET_SIZE]; // Set every item to -1. for (int i=0; i<ALPHABET_SIZE; i++) LOOKUP[i] = -1; // Now, set those characters that appear to the correct value. for (int i=0; i<m; i++) { // only when the LCP is 0 do we have a new character. if (esa->LCP[i] == 0) LOOKUP[ (unsigned char)pattern[esa->SA[i]] ] = i; } // Position in the text. int t = 0; // Position in the p-Representation. int x = 0; // Go through every value in the text. while (t < n-1) { // Is the symbol in the pattern? if (LOOKUP[(unsigned char)text[t]] == -1) { P[x].j = -1; P[x].l = 1; ++t; } else { int l = 0; int i0 = LOOKUP[ (unsigned char)(text+t)[0] ]; int i1 = LI_NEXT_CHILD(i0, (esa)) -1; if (i1 == 0) i1 = esa->n-1; P[x].j = findLongestSubstring(text+t, m, &l, i0, i1, esa); P[x].l = l; t+=l; } ++x; } // Terminate the p-representation if it is shorter than the worst case. if (x<n); P[x-1].l = 0; P[x-1].j = -2; }