int lcp_two(struct suffix* first, struct suffix* second) { int i=0; printf("Checking lcp between\n"); print_suffix(first); print_suffix(second); while(first->ar[i]==second->ar[i]) { i++; } if(i>0) { if(first->ar[i-1]=='$') i--; } printf("LCP value is %d\n",i); printf("\n"); return i; }
static void print_Q (CGEN_CPU_DESC cd ATTRIBUTE_UNUSED, void * dis_info, long value ATTRIBUTE_UNUSED, unsigned int attrs ATTRIBUTE_UNUSED, bfd_vma pc ATTRIBUTE_UNUSED, int length ATTRIBUTE_UNUSED) { print_suffix (dis_info, 'q'); }
void unit_test1() { #ifdef DEBUG_TEST char test[]="mississippi"; int test_len=strlen(test); int* test_array=(int*)malloc(sizeof(int)*test_len); int i;for(i=0;i<test_len;i++)test_array[i]=test[i]; int* test_lkp=sort_012(test_array, test_len); int* test_sa =get_SA(test_lkp, test_len); int* test_bwt=get_BWT(test_array, test_sa, test_len); int* test_alpha=alphabetical_sort(test_array, test_len); printf("\nThe lookup array:\n"); for(i=0;i<test_len;i++) { printf("%d ", i); print_suffix(i); printf(" -> %d\n", test_lkp[i]); } printf("\nThe suffix array:\n"); for(i=0;i<test_len;i++) { printf("SA[%d]=\t%d ", i, test_sa[i]); print_suffix(test_sa[i]); printf("\n"); } printf("\nThe BWT'ed string:\n"); for(i=0;i<test_len;i++)printf("%c", test_bwt[i]); printf("\n"); printf("\nThe alphabetically sorted string: \n"); for(i=0;i<test_len;i++)printf("%c", test_alpha[i]); printf("\n"); struct int2 fl; int result1=findinarray_ff(test_alpha, test_len, 0, 10, 's', &fl); printf("(%d)s in sorted array from 0 to 11, first is %d last is %d\n", result1, fl.i1, fl.i2); struct intarray2 dictc; /* dict and c */ get_dicandc(test_alpha, test_len, &dictc); printf("dict size=%d\n", dictc.len1); for(i=0; i<dictc.len1; i++) printf("%c %d\n", dictc.ia1[i], dictc.ia2[i]); struct wavelettree* test_bwt_wlt = wavelettree_build(test_bwt, test_len, &dictc); printf("]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\n"); for(i=0; i<test_len; i++) printf("%c", test_bwt[i]); printf("\n"); wavelettree_print(test_bwt_wlt); wavelettree_rank(test_bwt_wlt, 3, 's'); wavelettree_rank(test_bwt_wlt, 4, 'p'); wavelettree_rank(test_bwt_wlt, test_len-1, 'i'); printf("[[[[[[[[[[[[[[[[[[[[[[[[[[[[\n"); int needle1[]={'s', 's', 'i'}; int needle1len = 3; find_num_of_occurances(test_alpha, test_bwt, test_bwt_wlt, test_len, &dictc, needle1, needle1len, &fl); printf("\n"); int needle2[]={'i'}; int needle2len = 1; find_num_of_occurances(test_alpha, test_bwt, test_bwt_wlt, test_len, &dictc, needle2, needle2len, &fl); printf("\n"); int needle3[]={'p', 'i'}; int needle3len = 2; find_num_of_occurances(test_alpha, test_bwt, test_bwt_wlt, test_len, &dictc, needle3, needle3len, &fl); printf("\n"); int needle4[]={'m', 'i', 's', 's', 'i', 's', 's', 'i', 'p', 'p', 'i'}; int needle4len = 11; find_num_of_occurances(test_alpha, test_bwt, test_bwt_wlt, test_len, &dictc, needle4, needle4len, &fl); printf("\n"); int needle5[]={'s'}; int needle5len = 1; find_num_of_occurances(test_alpha, test_bwt, test_bwt_wlt, test_len, &dictc, needle5, needle5len, &fl); free(test_array); free(test_lkp); free(test_sa); free(test_alpha); free(test_bwt); printf(">> Unit test 1 over.\n"); occurances_to_locations(test_sa, &fl, test_len); #endif }
void print_time(long long val) { print_suffix(val, time_suffix); }
void print_size(long long val) { print_suffix(val, size_suffix); }
void print_int(long long val) { print_suffix(val, int_suffix); }
int main() { struct suffix suffixes[1005]; char A[1005]; int pos[1005]; int lcp[1005]; int nw[1005]; int *group, *group_bool; int start,end,min; int ncases; int nwords,max_words; int global_max; int global_index; char c; int i=0,len=0; int j=0,k=0; int sum=0; i=0;len=0;j=0;k=0; sum=0;nwords=1; c=getchar_unlocked(); while(c!='\n') { suffixes[i].size=i+1; if(c=='$') nwords++; A[i]=c; c=getchar_unlocked(); i++; } max_words=nwords; printf("There are %d words\n",max_words); suffixes[i].size=i+1; A[i]='$'; strcpy(suffixes[i].ar,A); len=i; nwords=1; for(j=0;j<=len;j++) { printf("%c",A[j]); } printf("\n"); for(j=0;j<=len;j++) { for(k=j;k<=len;k++) { suffixes[i].ar[k-j]=A[k]; suffixes[i].nwords=max_words+1-nwords; } if(suffixes[i].ar[0]=='$') nwords++; i--; } //len+1 because len is 7 qsort(suffixes,len+1,sizeof(struct suffix),cmp); for(i=0;i<=len;i++) { pos[i]=len+1-suffixes[i].size; nw[i]=suffixes[i].nwords; } for(i=0;i<=len;i++) { print_suffix(&suffixes[i]); } // lcp_construct(A,pos,lcp,len); // for(i=max_words;i<=len;i++) { // printf("%d ",lcp[i]); // } printf("\n"); for(i=max_words;i<=len;i++) { printf("%d ",nw[i]); } printf("\n"); printf("Length is %d\n",len); //start indexing from1 to max_word group=(int *)malloc((max_words+1)*sizeof(int)); group_bool=(int *)malloc((max_words+1)*sizeof(int)); for(i=0;i<=max_words;i++) { group[i]=0; group_bool[i]=0; } start=max_words; //substring cannot be length global_max=0; global_index=0; start=max_words; end=start; while(start<=len && end<=len) { // printf("Starting search at %d\n",start); sum=0; for(i=1;i<=max_words;i++) { sum+=group_bool[i]; } // printf("Boolean values:"); // for(i=1;i<=max_words;i++) { // printf("%d ",group_bool[i]); // } // printf("\n"); while(sum!=max_words) { // printf("Checking end %d\n",end); //check if seen if(group_bool[nw[end]]==0) { group_bool[nw[end]]=1; sum++; } group[nw[end]]++; // printf("Sum: %d\n",sum); // for(i=1;i<=max_words;i++) { // printf("%d ",group_bool[i]); // } // printf("\n"); // printf("%d %d\n",start,end); // printf("Group/Boolean values ending at %d\n",end); // for(i=1;i<=max_words;i++) { // printf("%d ",group[i]); // } // printf("\n"); // for(i=1;i<=max_words;i++) { // printf("%d ",group_bool[i]); // } // printf("\n"); end++; } end--; printf("Occurence at indexes %d %d\n",start,end); min=lcp_two(&suffixes[start],&suffixes[end]); if(min>global_max) { global_max=min; global_index=start; } while(sum==max_words) { //remove first entry group[nw[start]]--; if(group[nw[start]]==0) { group_bool[nw[start]]=0; sum--; } start++; // printf("Group/boolean values beginning at %d ending at %d\n",start,end); // for(i=1;i<=max_words;i++) { // printf("%d ",group[i]); // } // printf("\n"); // for(i=1;i<=max_words;i++) { // printf("%d ",group_bool[i]); // } // printf("\n"); if(sum==max_words) { printf("Occurence at indexes %d %d\n",start,end); min=lcp_two(&suffixes[start],&suffixes[end]); if(min>global_max) { global_max=min; global_index=start; } } } end++; } // global_max=0; printf("%d %d\n",global_max,global_index); printf("The longest common substring (not unique)\n"); for(i=0;i<global_max;i++) { printf("%c",suffixes[global_index].ar[i]); } printf("\n"); return 0; }