示例#1
0
文件: 1567.cpp 项目: scPointer/OI
int main()
{
	scanf("%d",&n);
	char c;
	while(len<n)
	{
		c=getchar();
		printf("[%c]",c);
		if(c>='a' && c<='z')
			s[len++]=c-'a'+1;
	}
	s[len++]=0;
	getSuffixArray(len);
	getH(len);
	long long ans=0;
	for(int i=1;i<n;i++)
		ans+=n-sa[i]-h[i];
	printf("%I64d",ans);
	return 0;
}
示例#2
0
int main() {
	gets(str);
	str[n++] = 0;
	getSuffixArray();
}
示例#3
0
文件: lcpTree.c 项目: 713/project
/* getLcpTreeShulens: compute intervals for each query; 
 * This is the only entry point to the functions in this file.
 */
void getLcpTreeShulens(FILE *fpout, Args *a, SequenceUnion *seqUnion, FILE *fwout) {

	Int64 *sa = NULL, *lcpTab = NULL; // **sl = NULL;
	Int64 i, j, ns;
	Int64 maxDepth;
	Int64 *leftBorders = NULL, lb;
	Int64 *strandBorders = NULL;
	Int64 *maxShulens = NULL, maxs = 0, lS0 = 0;
	Int64 *minSumWin = NULL; // minimal sum (threshold) for which winner-sequences are considered to have strong signal

	//time_t end, start, end2, end3;
	clock_t end, start, end2, end3;
	double elapsed_time1, elapsed_time2, elapsed_time3;
	queryInterval **listQueryIntervalsFwd = NULL; // lists of query intervals, there are |Q| lists
	queryInterval **listQueryIntervalsRev = NULL; // lists of query intervals, there are |Q| lists
	
	queryInterval ***fastSearch = NULL; /* matrix of pointers for fast searching */
	Int64 *lastIndex = NULL; /* array of last indices of each query - goes together with fastSearch */
	
	qNode **root = NULL; // binary tree root; initially is NULL
	qNode ***l = NULL;  // list of lists of binary tree nodes

///////////////////////////////////////////////////////////////////////////////////////
	f1 = fpout;
	headers1 = seqUnion->seqUnion->headers;
///////////////////////////////////////////////////////////////////////////////////////
  maxDepth = a->D;
	if (a->s) {
		onlyStrongSignal = 1;
	}
	// array of left borders of each sequence
	leftBorders = emalloc(sizeof(Int64) * (seqUnion->numOfSubjects + seqUnion->numOfQueries)); 
	// array of fwd strand borders of each sequence
	strandBorders = emalloc(sizeof(Int64) * (seqUnion->numOfSubjects + seqUnion->numOfQueries));
	lb = 0;
	for (i = 0; i < seqUnion->numOfSubjects + seqUnion->numOfQueries; i++) {
		leftBorders[i] = lb;
		lb = seqUnion->seqBorders[i] + 1;
		strandBorders[i] = leftBorders[i] + (seqUnion->seqBorders[i] - leftBorders[i]) / 2;
	}

	/* for each query form an array of pointers; each pointer points to the query interval 
	* whose right border is closest to the upper bound in terms of args->q, 
	* e.g. when qi.rb = 978, then an element [qi][0] points to qi, that is 978 is closest to 999=upper bound for [qi][0]
	*/
	/* fastSearch matrix, m x p, m=number of queries; p-variable for each Qi, p=|Qi|/args->q - 1, *end points to the last interval of Qi */
	//if (FASTSEARCH) {
	//	lastIndex = /*e*/malloc(sizeof(Int64) * seqUnion->numOfQueries); 
	//	fastSearch = getFastSearch(seqUnion, leftBorders, seqUnion->numOfQueries, a, lastIndex); 
	//	sizeMiniList = a->q;
	//}

	// compute suffix array
	start = clock();
  sa = getSuffixArray(seqUnion->seqUnion);
	end = clock();
	elapsed_time1 = (double)(end - start) / CLOCKS_PER_SEC;
	if (!sa) {
		eprintf("sa: out of memory!\n");
	}	
	
	// compute lcp array
	lcpTab = getLcp(seqUnion->seqUnion, sa);
	if (!lcpTab) {
		eprintf("lcp: out of memory!\n");
	}
	end2 = clock();
	elapsed_time2 = (double)(end2 - end) / CLOCKS_PER_SEC;
	// print sa, lcp 
#if DEBUG
	printSA_LCP(sa, lcpTab, seqUnion->len);
#endif

	// print run-time
	if (a->t) {
		printf( "\nSA calculation: %.2f seconds.\n", elapsed_time1);
		printf( "\nLCP calculation: %.2f seconds.\n", elapsed_time2);
	}

	/* calculate max shulens expected only by chance for each query */
	/* using both subject's and query's gc-content */
	maxShulens = emalloc(seqUnion->numOfQueries * sizeof(Int64));
	minSumWin = /*e*/malloc(seqUnion->numOfQueries * sizeof(Int64));
	lS0 = seqUnion->seqBorders[seqUnion->numOfQueries] - leftBorders[seqUnion->numOfQueries] + 1; // length of subject = S0
	for (i = 0; i < seqUnion->numOfQueries; i++) {
		//arguments: args->P, lS, gcQ, gcS for query=Qi and subject=S0
		maxShulens[i] = maxShulenNew(a->P, lS0, seqUnion->gc[i], seqUnion->gc[seqUnion->numOfQueries]);
		for (j = 1; j < seqUnion->numOfSubjects; j++) {
			maxs = maxShulenNew(a->P, seqUnion->seqBorders[j + seqUnion->numOfQueries] - leftBorders[j + seqUnion->numOfQueries] + 1
															, seqUnion->gc[i], seqUnion->gc[seqUnion->numOfQueries + j]);
			if (maxs > maxShulens[i]) {
				/* when smallest or greatest of all max shulens is used, then there is no effect; for hiv max shulen is 8 in most of combinations */
				maxShulens[i] = maxs; 
			}
		}
		
		if (a->M == 0) {
			minSumWin[i] = 0; // threshold sum for a window; below this value, the "winners" are not considered to have strong signal over a window
		}
		else {
			minSumWin[i] = maxShulens[i] * a->w; // threshold sum for a window; below this value, the "winners" are not considered to have strong signal over a window		
		}
		maxShulens[i] = (Int64)(a->m * maxShulens[i]);
	}		

	// compute lists of query intervals
	traverseLcpTree(lcpTab, sa, seqUnion->seqUnion, seqUnion->numOfSubjects, seqUnion->numOfQueries, seqUnion->seqBorders, leftBorders, strandBorders
		, &listQueryIntervalsFwd, &listQueryIntervalsRev, maxDepth, maxShulens, fastSearch, lastIndex, &root);
	
	end3 = clock();
	elapsed_time3 = (double)(end3 - end2) / CLOCKS_PER_SEC;
	
	// print run-time
	if (a->t) {
		printf( "\nLCP-tree traversal calculation: %.2f seconds.\n", elapsed_time3);
	}

	free(sa);
  free(lcpTab);
	free(maxShulens);

	// print lists of intervals for each query
	ns = seqUnion->numOfSubjects;

	if (BSEARCH) {
		for (i = 0; i < seqUnion->numOfQueries; i++) {
			correctBT(root[i], -1, strandBorders[i] - leftBorders[i]);
			//fprintf(fpout, "Query: %d %s\n", i + 1, &headers[i][1]);
			if (fpout) { // suppress printing of interval analysis on stdout as default action
				fprintf(fpout, "%s\n", seqUnion->seqUnion->headers[i]);	
				binTreeTraverse(root[i], seqUnion->seqUnion->headers, strandBorders[i] - leftBorders[i], ns, i, seqUnion->numOfQueries, fpout);
				fprintf(fpout, "\n");	
			}
		}
	}
	//else { // list search
	//	for (i = 0; i < seqUnion->numOfQueries; i++) {
	//		printListsQueries(ns, seqUnion->numOfQueries, fpout, listQueryIntervalsFwd, seqUnion->seqUnion->headers, strandBorders[i] - leftBorders[i], i);
	//	}
	//}
	
	/* windows analysis */
  //printf("Windows analysis\n");
	l = windowAnalysis(a, seqUnion, fwout, listQueryIntervalsFwd, strandBorders, leftBorders, root, BSEARCH, minSumWin, fpout);
	
	if (BSEARCH) {
		freeBTQueryIntervals(root, seqUnion->numOfQueries);
		if (l) { // windows analysis
			for (i = 0; i < seqUnion->numOfQueries; i++) {
				free(l[i]);
			}
			free(l);
		}
	}
	//else {
	//	freeListQueryIntervals(listQueryIntervalsFwd, listQueryIntervalsRev, seqUnion->numOfQueries);	
	//}

	//if (FASTSEARCH) {
	//	freeFastSearch(fastSearch, seqUnion->numOfQueries);
	//	free(lastIndex);
	//}
	free(leftBorders);
	free(strandBorders);
	free(minSumWin);
}