Пример #1
0
int lcp_two(struct suffix* first, struct suffix* second) {
	int i=0;
	printf("Checking lcp between\n");
	print_suffix(first);
	print_suffix(second);
	while(first->ar[i]==second->ar[i]) {
		i++;
	}
	if(i>0) {
		if(first->ar[i-1]=='$') i--;
	}
	printf("LCP value is %d\n",i);
	printf("\n");
	return i;
}
Пример #2
0
static void
print_Q (CGEN_CPU_DESC cd ATTRIBUTE_UNUSED,
	 void * dis_info,
	 long value ATTRIBUTE_UNUSED,
	 unsigned int attrs ATTRIBUTE_UNUSED,
	 bfd_vma pc ATTRIBUTE_UNUSED,
	 int length ATTRIBUTE_UNUSED)
{
  print_suffix (dis_info, 'q');
}
Пример #3
0
void unit_test1() {
		
#ifdef DEBUG_TEST
	char test[]="mississippi";
	int test_len=strlen(test);
	int* test_array=(int*)malloc(sizeof(int)*test_len);
	int i;for(i=0;i<test_len;i++)test_array[i]=test[i];
	int* test_lkp=sort_012(test_array, test_len);
	int* test_sa =get_SA(test_lkp, test_len);
	int* test_bwt=get_BWT(test_array, test_sa, test_len);
	int* test_alpha=alphabetical_sort(test_array, test_len);
	
	printf("\nThe lookup array:\n");
	for(i=0;i<test_len;i++) {
		printf("%d ", i); print_suffix(i);
		printf(" -> %d\n", test_lkp[i]);
	}
	printf("\nThe suffix array:\n");
	for(i=0;i<test_len;i++) {
		printf("SA[%d]=\t%d   ", i, test_sa[i]);
		print_suffix(test_sa[i]);
		printf("\n");
	}
	printf("\nThe BWT'ed string:\n");
	for(i=0;i<test_len;i++)printf("%c", test_bwt[i]);
	printf("\n");
	
	printf("\nThe alphabetically sorted string: \n");
	for(i=0;i<test_len;i++)printf("%c", test_alpha[i]);
	printf("\n");
	
	struct int2 fl;
	int result1=findinarray_ff(test_alpha, test_len, 0, 10, 's', &fl);
	printf("(%d)s in sorted array from 0 to 11, first is %d last is %d\n",
		result1, fl.i1, fl.i2);
	
	struct intarray2 dictc; /* dict and c */
	get_dicandc(test_alpha, test_len, &dictc);
	printf("dict size=%d\n", dictc.len1);
	for(i=0; i<dictc.len1; i++) printf("%c %d\n", dictc.ia1[i], dictc.ia2[i]);
	
	struct wavelettree* test_bwt_wlt = wavelettree_build(test_bwt, test_len, &dictc);
	
	printf("]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\n");
	for(i=0; i<test_len; i++) printf("%c", test_bwt[i]); printf("\n");
	wavelettree_print(test_bwt_wlt);
	wavelettree_rank(test_bwt_wlt, 3, 's');
	wavelettree_rank(test_bwt_wlt, 4, 'p');
	wavelettree_rank(test_bwt_wlt, test_len-1, 'i');
	printf("[[[[[[[[[[[[[[[[[[[[[[[[[[[[\n");
	
	int needle1[]={'s', 's', 'i'};
	int needle1len = 3;
	find_num_of_occurances(test_alpha, test_bwt, test_bwt_wlt, test_len, &dictc, needle1, needle1len, &fl);
	
	printf("\n");
	int needle2[]={'i'};
	int needle2len = 1;
	find_num_of_occurances(test_alpha, test_bwt, test_bwt_wlt, test_len, &dictc, needle2, needle2len, &fl);
	
	printf("\n");
	int needle3[]={'p', 'i'};
	int needle3len = 2;
	find_num_of_occurances(test_alpha, test_bwt, test_bwt_wlt, test_len, &dictc, needle3, needle3len, &fl);
	
	printf("\n");
	int needle4[]={'m', 'i', 's', 's', 'i', 's', 's', 'i', 'p', 'p', 'i'};
	int needle4len = 11;
	find_num_of_occurances(test_alpha, test_bwt, test_bwt_wlt, test_len, &dictc, needle4, needle4len, &fl);

	printf("\n");
	int needle5[]={'s'};
	int needle5len = 1;
	find_num_of_occurances(test_alpha, test_bwt, test_bwt_wlt, test_len, &dictc, needle5, needle5len, &fl);
	
	free(test_array); free(test_lkp); free(test_sa); free(test_alpha); free(test_bwt);
	printf(">> Unit test 1 over.\n");
	
	occurances_to_locations(test_sa, &fl, test_len);
#endif
}
Пример #4
0
void print_time(long long val)
{
    print_suffix(val, time_suffix);
}
Пример #5
0
void print_size(long long val)
{
    print_suffix(val, size_suffix);
}
Пример #6
0
void print_int(long long val)
{
    print_suffix(val, int_suffix);
}
Пример #7
0
int main() {
	struct suffix suffixes[1005];
	char A[1005];
	int pos[1005];
	int lcp[1005];
	int nw[1005];
	int *group, *group_bool;
	int start,end,min;
	int ncases;
	int nwords,max_words;
	int global_max;
	int global_index;
	char c;
	int i=0,len=0;
	int j=0,k=0;
	int sum=0;
	i=0;len=0;j=0;k=0;
	sum=0;nwords=1;
	c=getchar_unlocked();
	while(c!='\n') {
		suffixes[i].size=i+1;
		if(c=='$') nwords++;
		A[i]=c;
		c=getchar_unlocked();
		i++;
	}
	max_words=nwords;
	printf("There are %d words\n",max_words);
	suffixes[i].size=i+1;
	A[i]='$';
	strcpy(suffixes[i].ar,A); 
	len=i; 
	nwords=1;
	for(j=0;j<=len;j++) {
		printf("%c",A[j]);
	}
	printf("\n");
	for(j=0;j<=len;j++) {
		for(k=j;k<=len;k++) {
			suffixes[i].ar[k-j]=A[k];
			suffixes[i].nwords=max_words+1-nwords;
		}
		if(suffixes[i].ar[0]=='$') nwords++;
		i--;
	}

	//len+1 because len is 7 
	qsort(suffixes,len+1,sizeof(struct suffix),cmp);
	for(i=0;i<=len;i++) {
		pos[i]=len+1-suffixes[i].size;
		nw[i]=suffixes[i].nwords;
	}
	for(i=0;i<=len;i++) {
		print_suffix(&suffixes[i]);
	}
//	lcp_construct(A,pos,lcp,len);
//	for(i=max_words;i<=len;i++) {
//		printf("%d ",lcp[i]);
//	}
	printf("\n");
	for(i=max_words;i<=len;i++) {
		printf("%d ",nw[i]);
	}
	printf("\n");
	printf("Length is %d\n",len);
	//start indexing from1 to max_word
	group=(int *)malloc((max_words+1)*sizeof(int));
	group_bool=(int *)malloc((max_words+1)*sizeof(int));
	for(i=0;i<=max_words;i++) {
		group[i]=0;
		group_bool[i]=0;
	}
	start=max_words;
	//substring cannot be length
	global_max=0;
	global_index=0;
	start=max_words;
	end=start;		
	while(start<=len && end<=len) {
//		printf("Starting search at %d\n",start);
		sum=0;
		for(i=1;i<=max_words;i++) {
			sum+=group_bool[i];
		}
//		printf("Boolean values:");
//		for(i=1;i<=max_words;i++) {
//			printf("%d ",group_bool[i]);
//		}
//		printf("\n");
		while(sum!=max_words) {
		//	printf("Checking end %d\n",end);
			//check if seen
			if(group_bool[nw[end]]==0) {
				group_bool[nw[end]]=1;
				sum++;
			}
			group[nw[end]]++;
			//			printf("Sum: %d\n",sum);
			//			for(i=1;i<=max_words;i++) {
			//				printf("%d ",group_bool[i]);
			//			}
			//			printf("\n");
			//			printf("%d %d\n",start,end);
//		printf("Group/Boolean values ending at %d\n",end);
//		for(i=1;i<=max_words;i++) {
//			printf("%d ",group[i]);
//		}
//		printf("\n");
//		for(i=1;i<=max_words;i++) {
//			printf("%d ",group_bool[i]);
//		}
//		printf("\n");
			end++;
		}	
		end--;
		printf("Occurence at indexes %d %d\n",start,end);
		min=lcp_two(&suffixes[start],&suffixes[end]);
		if(min>global_max) {
			global_max=min;
			global_index=start;
		}
		while(sum==max_words) {
			//remove first entry
			group[nw[start]]--;
			if(group[nw[start]]==0) {
				group_bool[nw[start]]=0;
				sum--;
			}
			start++;
//			printf("Group/boolean values beginning at %d ending at %d\n",start,end);	
//			for(i=1;i<=max_words;i++) {
//				printf("%d ",group[i]);
//			}
//			printf("\n");
//			for(i=1;i<=max_words;i++) {
//				printf("%d ",group_bool[i]);
//			}
//			printf("\n");
			if(sum==max_words) {
				printf("Occurence at indexes %d %d\n",start,end);
				min=lcp_two(&suffixes[start],&suffixes[end]);
				if(min>global_max) {
					global_max=min;
					global_index=start;
				}
			}
		}
		end++;
	}	
//	global_max=0;
	printf("%d %d\n",global_max,global_index);
	printf("The longest common substring (not unique)\n");
	for(i=0;i<global_max;i++) {
		printf("%c",suffixes[global_index].ar[i]);
	}
	printf("\n");
	return 0;
}