int main(int argc, char **argv) { clock_t start, finish; read_data *reads; start = clock(); int reads_cnt = parse_reads_readsim(&reads, argv[1]); finish = clock(); printf("Parsing reads: %.4f sec\n", (double) (finish - start) / (double) CLOCKS_PER_SEC); reads_sequence reads_seq; start = clock(); int reads_seq_len = generate_reads_sequence(&reads_seq, reads, reads_cnt); finish = clock(); printf("Generating reads sequence: %.4f sec\n", (double) (finish - start) / (double) CLOCKS_PER_SEC); int *SA = (int*) malloc(reads_seq_len * sizeof(int)); start = clock(); sa_is(SA, reads_seq.sequence, reads_seq_len, reads_cnt + 4); finish = clock(); printf("Generating SA: %.4f sec\n", (double) (finish - start) / (double) CLOCKS_PER_SEC); int *LCP = (int*) malloc(reads_seq_len * sizeof(int)); start = clock(); lcp(LCP, SA, reads_seq.sequence, reads_seq_len); finish = clock(); printf("Generating LCP: %.4f sec\n", (double) (finish - start) / (double) CLOCKS_PER_SEC); printf("Checking SA and LCP...\n"); for (int i = 1; i < reads_seq_len; i++) { for (int j = 0; j < LCP[i]; j++) { if (reads_seq.sequence[SA[i-1]+j] != reads_seq.sequence[SA[i]+j]) { printf("LCP error at position %d.\n", i); } } if (SA[i-1] + LCP[i] < reads_seq_len && SA[i] + LCP[i] < reads_seq_len) { if (reads_seq.sequence[SA[i-1]+LCP[i]] >= reads_seq.sequence[SA[i]+LCP[i]]) { printf("SA error at positions %d-%d.\n", i - 1, i); } } } printf("Done!\n"); free_read_data(&reads, reads_cnt); free_reads_sequence(&reads_seq); free(SA); free(LCP); return 0; }
std::vector<int> sa_is(const T &s) const { if(s.size() == 0){ return std::vector<int>(1); } const int n = s.size() + 1; std::vector<int> vs(n); for(int i = 0; i + 1 < n; ++i){ vs[i] = s[i]; } std::vector<int> sa(n); sa_is(sa.data(), vs.data(), n); return sa; }
void sa_is(int *sa, const T *s, int n) const { std::vector<bool> types(n); types[n - 1] = true; for(int i = n - 2; i >= 0; --i){ types[i] = (s[i] != s[i + 1] ? s[i] < s[i + 1] : types[i + 1]); } const int k = static_cast<int>(*std::max_element(s, s + n)) + 1; std::vector<int> buckets(compute_buckets<true>(s, n, k)); for(int i = 0; i < n; ++i){ sa[i] = -1; } for(int i = 1; i < n; ++i){ if(is_lms(types, i)){ sa[--buckets[s[i]]] = i; } } induce_sa_l(sa, s, n, k, types); induce_sa_s(sa, s, n, k, types); int m = 0; for(int i = 0; i < n; ++i){ if(is_lms(types, sa[i])){ sa[m++] = sa[i]; } } for(int i = m; i < n; ++i){ sa[i] = -1; } int num_names = 0, prev = -1; for(int i = 0; i < m; ++i){ int p = sa[i]; bool diff = false; for(int d = 0; d < n; ++d){ const int l = p + d, r = prev + d; if(prev < 0 || s[l] != s[r] || types[l] != types[r]){ diff = true; break; }else if(d > 0 && (is_lms(types, l) || is_lms(types, r))){ break; } } if(diff){ ++num_names; prev = p; } p = (p - (p & 1)) / 2; sa[m + p] = num_names - 1; } for(int i = n - 1, j = n - 1; i >= m; --i){ if(sa[i] >= 0){ sa[j--] = sa[i]; } } int *reduced_s = sa + n - m; if(num_names < m){ sa_is(sa, reduced_s, m); }else{ for(int i = 0; i < m; ++i){ sa[reduced_s[i]] = i; } } buckets = compute_buckets<true>(s, n, k); for(int i = 1, j = 0; i < n; ++i){ if(is_lms(types, i)){ reduced_s[j++] = i; } } for(int i = 0; i < m; ++i){ sa[i] = reduced_s[sa[i]]; } for(int i = m; i < n; ++i){ sa[i] = -1; } for(int i = m - 1; i >= 0; --i){ const int j = sa[i]; sa[i] = -1; sa[--buckets[s[j]]] = j; } induce_sa_l(sa, s, n, k, types); induce_sa_s(sa, s, n, k, types); }
explicit SuffixArray(const T &s) : m_suffix_array(sa_is(s)) { }