Пример #1
0
int main(int argc, char **argv) {
	clock_t start, finish;

	read_data *reads;

	start = clock();
	int reads_cnt = parse_reads_readsim(&reads, argv[1]);
	finish = clock(); 
	printf("Parsing reads: %.4f sec\n", (double) (finish - start) / (double) CLOCKS_PER_SEC);

	reads_sequence reads_seq;

	start = clock();
	int reads_seq_len = generate_reads_sequence(&reads_seq, reads, reads_cnt);
	finish = clock();
	printf("Generating reads sequence: %.4f sec\n", (double) (finish - start) / (double) CLOCKS_PER_SEC);

	int *SA = (int*) malloc(reads_seq_len * sizeof(int));

	start = clock();
	sa_is(SA, reads_seq.sequence, reads_seq_len, reads_cnt + 4);
	finish = clock(); 
	printf("Generating SA: %.4f sec\n", (double) (finish - start) / (double) CLOCKS_PER_SEC);

	int *LCP = (int*) malloc(reads_seq_len * sizeof(int));

	start = clock();
	lcp(LCP, SA, reads_seq.sequence, reads_seq_len);
	finish = clock();
	printf("Generating LCP: %.4f sec\n", (double) (finish - start) / (double) CLOCKS_PER_SEC);

	printf("Checking SA and LCP...\n");

	for (int i = 1; i < reads_seq_len; i++) {
		for (int j = 0; j < LCP[i]; j++) {
			if (reads_seq.sequence[SA[i-1]+j] != reads_seq.sequence[SA[i]+j]) {
				printf("LCP error at position %d.\n", i);
			}
		}

		if (SA[i-1] + LCP[i] < reads_seq_len && SA[i] + LCP[i] < reads_seq_len) {
			if (reads_seq.sequence[SA[i-1]+LCP[i]] >= reads_seq.sequence[SA[i]+LCP[i]]) {
				printf("SA error at positions %d-%d.\n", i - 1, i);
			}
		}
	}

	printf("Done!\n");

	free_read_data(&reads, reads_cnt);

	free_reads_sequence(&reads_seq);

	free(SA);

	free(LCP);

	return 0;
}
	std::vector<int> sa_is(const T &s) const {
		if(s.size() == 0){ return std::vector<int>(1); }
		const int n = s.size() + 1;
		std::vector<int> vs(n);
		for(int i = 0; i + 1 < n; ++i){ vs[i] = s[i]; }
		std::vector<int> sa(n);
		sa_is(sa.data(), vs.data(), n);
		return sa;
	}
	void sa_is(int *sa, const T *s, int n) const {
		std::vector<bool> types(n);
		types[n - 1] = true;
		for(int i = n - 2; i >= 0; --i){
			types[i] = (s[i] != s[i + 1] ? s[i] < s[i + 1] : types[i + 1]);
		}
		const int k = static_cast<int>(*std::max_element(s, s + n)) + 1;
		std::vector<int> buckets(compute_buckets<true>(s, n, k));
		for(int i = 0; i < n; ++i){ sa[i] = -1; }
		for(int i = 1; i < n; ++i){
			if(is_lms(types, i)){ sa[--buckets[s[i]]] = i; }
		}
		induce_sa_l(sa, s, n, k, types);
		induce_sa_s(sa, s, n, k, types);
		int m = 0;
		for(int i = 0; i < n; ++i){
			if(is_lms(types, sa[i])){ sa[m++] = sa[i]; }
		}
		for(int i = m; i < n; ++i){ sa[i] = -1; }
		int num_names = 0, prev = -1;
		for(int i = 0; i < m; ++i){
			int p = sa[i];
			bool diff = false;
			for(int d = 0; d < n; ++d){
				const int l = p + d, r = prev + d;
				if(prev < 0 || s[l] != s[r] || types[l] != types[r]){
					diff = true;
					break;
				}else if(d > 0 && (is_lms(types, l) || is_lms(types, r))){
					break;
				}
			}
			if(diff){
				++num_names;
				prev = p;
			}
			p = (p - (p & 1)) / 2;
			sa[m + p] = num_names - 1;
		}
		for(int i = n - 1, j = n - 1; i >= m; --i){
			if(sa[i] >= 0){ sa[j--] = sa[i]; }
		}
		int *reduced_s = sa + n - m;
		if(num_names < m){
			sa_is(sa, reduced_s, m);
		}else{
			for(int i = 0; i < m; ++i){ sa[reduced_s[i]] = i; }
		}
		buckets = compute_buckets<true>(s, n, k);
		for(int i = 1, j = 0; i < n; ++i){
			if(is_lms(types, i)){ reduced_s[j++] = i; }
		}
		for(int i = 0; i < m; ++i){ sa[i] = reduced_s[sa[i]]; }
		for(int i = m; i < n; ++i){ sa[i] = -1; }
		for(int i = m - 1; i >= 0; --i){
			const int j = sa[i];
			sa[i] = -1;
			sa[--buckets[s[j]]] = j;
		}
		induce_sa_l(sa, s, n, k, types);
		induce_sa_s(sa, s, n, k, types);
	}
	explicit SuffixArray(const T &s)
		: m_suffix_array(sa_is(s))
	{ }