int main() { scanf("%d %d %d", &n, &m, &k); for (int i = 0; i < n; i++) { scanf("%d", &A[i]); A[n+i] = A[i]; } n*= 2; A[n] = A[n+1] = A[n+2] = 0; suffix_array(A, sa, n, m-1); compute_lcp(A, sa, lcp, n); return 0; }
void process(char* string, int* result, int len) { char* bwt = (char*)malloc((len + 1) * sizeof(char)); bwt_transform(string, bwt, len); Wtree* wtree = wtree_generate(bwt, len); free(bwt); compute_lcp(wtree, result, len); free(wtree); }
int write_index ( char * filename ) { // Output files. char * sarfile = malloc(strlen(filename)+5); strcpy(sarfile, filename); strcpy(sarfile + strlen(filename), ".sar"); char * occfile = malloc(strlen(filename)+5); strcpy(occfile, filename); strcpy(occfile + strlen(filename), ".occ"); char * genfile = malloc(strlen(filename)+5); strcpy(genfile, filename); strcpy(genfile + strlen(filename), ".gen"); // char * lutfile = malloc(strlen(filename)+5); // strcpy(lutfile, filename); // strcpy(lutfile + strlen(filename), ".lut"); char * lcpfile = malloc(strlen(filename)+5); strcpy(lcpfile, filename); strcpy(lcpfile + strlen(filename), ".lcp"); // Open files. int fsa = open(sarfile, O_WRONLY | O_CREAT | O_TRUNC, 0644); int foc = open(occfile, O_WRONLY | O_CREAT | O_TRUNC, 0644); int fgn = open(genfile, O_WRONLY | O_CREAT | O_TRUNC, 0644); // int flt = open(lutfile, O_WRONLY | O_CREAT | O_TRUNC, 0644); int flc = open(lcpfile, O_WRONLY | O_CREAT | O_TRUNC, 0644); // Error control. if (fsa == -1) { fprintf(stderr, "error in write_index (open %s): %s.\n", sarfile, strerror(errno)); exit(EXIT_FAILURE); } if (foc == -1) { fprintf(stderr, "error in write_index (open %s): %s.\n", occfile, strerror(errno)); exit(EXIT_FAILURE); } if (fgn == -1) { fprintf(stderr, "error in write_index (open %s): %s.\n", genfile, strerror(errno)); exit(EXIT_FAILURE); } /* if (flt == -1) { fprintf(stderr, "error in write_index (open %s): %s.\n", lutfile, strerror(errno)); exit(EXIT_FAILURE); } */ if (flc == -1) { fprintf(stderr, "error in write_index (open %s): %s.\n", lcpfile, strerror(errno)); exit(EXIT_FAILURE); } free(sarfile); free(occfile); free(genfile); // free(lutfile); free(lcpfile); clock_t tstart; size_t s = 0, stot = 0, bytes; // Parse genome and reverse complement. uint64_t gsize; fprintf(stderr, "reading genome file ..."); tstart = clock(); char * genome = compact_genome(filename, &gsize); fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC); // Compute suffix array. fprintf(stderr, "computing suffix array ..."); tstart = clock(); uint64_t * sa = (uint64_t *)compute_sa(genome, gsize); fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC); // Compute OCC. uint64_t occ_size; fprintf(stderr, "computing occ table ..."); tstart = clock(); uint64_t * occ = compute_occ(genome, sa, gsize, &occ_size); fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC); // Compress suffix array. fprintf(stderr, "compressing suffix array ..."); tstart = clock(); uint64_t sa_bits = 0; while (gsize > ((uint64_t)1 << sa_bits)) sa_bits++; uint64_t sa_size = compact_array(sa, gsize, sa_bits); sa = realloc(sa, sa_size*sizeof(uint64_t)); fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC); // Compute C. fprintf(stderr, "computing C table ..."); tstart = clock(); uint64_t * c = compute_c(occ + occ_size - NUM_BASES); fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC); // Compute LUT /* fprintf(stderr, "computing lookup table ..."); tstart = clock(); uint64_t * lut = compute_lut(c, occ, LUT_KMER_SIZE); fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC); // Compress LUT. fprintf(stderr, "compressing lookup table ..."); tstart = clock(); uint64_t lut_kmers = 1 << (2*LUT_KMER_SIZE); uint64_t lut_size = compact_array(lut, lut_kmers, sa_bits); lut = realloc(lut, lut_size*sizeof(uint64_t)); fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC); // Write .LUT file fprintf(stderr, "writing lut..."); bytes = s = 0; uint64_t kmer_size = LUT_KMER_SIZE; while (s < sizeof(uint64_t)) s += write(flt, &kmer_size, sizeof(uint64_t)); bytes += s; s = 0; while (s < lut_size*sizeof(uint64_t)) s += write(flt, lut + s/sizeof(uint64_t), lut_size*sizeof(uint64_t) - s); bytes += s; stot += bytes; fprintf(stderr, " %ld bytes written.\n",bytes); free(lut); */ // Write .OCC file fprintf(stderr, "writing occ..."); // mark interval s = 0; uint64_t mark_int = OCC_MARK_INTERVAL; while (s < sizeof(uint64_t)) s += write(foc, &mark_int, sizeof(uint64_t)); stot += s; // Write C s = 0; while (s < (NUM_BASES+1)*sizeof(uint64_t)) s += write(foc, c + s/sizeof(uint64_t), (NUM_BASES+1)*sizeof(uint64_t) - s); stot += s; // Write OCC. s = 0; while (s < occ_size * sizeof(uint64_t)) s += write(foc,occ + s/sizeof(uint64_t), occ_size*sizeof(uint64_t) - s); stot += s; fprintf(stderr, " %ld bytes written.\n",stot); free(c); free(occ); // Compute LCP. fprintf(stderr, "computing LCP intervals..."); tstart = clock(); lcp_t lcp = compute_lcp(gsize, LCP_MIN_DEPTH, sa_bits, sa, genome); fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC); // Write .GEN FILE fprintf(stderr, "writing gen..."); // Write genome bases (forward and reverse strand). s = 0; while (s < gsize*sizeof(char)) s += write(fgn, genome + s/sizeof(char), gsize*sizeof(char) - s); stot += s; fprintf(stderr, " %ld bytes written.\n",s); // .SAR FILE fprintf(stderr, "writing sar..."); // Write sa word width. bytes = s = 0; while (s < sizeof(uint64_t)) s += write(fsa, &sa_bits, sizeof(uint64_t)); bytes += s; s = 0; while (s < sa_size*sizeof(uint64_t)) s += write(fsa, sa + s/sizeof(uint64_t), sa_size*sizeof(uint64_t) - s); bytes += s; stot += bytes; fprintf(stderr, " %ld bytes written.\n",bytes); // .LCP FILE fprintf(stderr, "writing lcp..."); // LCP index. bytes = s = 0; mark_int = LCP_MARK_INTERVAL; while (s < sizeof(uint64_t)) s += write(flc, &mark_int, sizeof(uint64_t)); bytes += s; s = 0; uint64_t min_depth = LCP_MIN_DEPTH; while (s < sizeof(uint64_t)) s += write(flc, &min_depth, sizeof(uint64_t)); bytes += s; // Write sample index szie. s = 0; while(s < sizeof(uint64_t)) s += write(flc, &(lcp.lcpidx_size), sizeof(uint64_t)); bytes += s; // Write sample index. s = 0; while(s < lcp.lcpidx_size*sizeof(uint64_t)) s += write(flc, lcp.idx_sample + s/sizeof(uint64_t), lcp.lcpidx_size*sizeof(uint64_t) - s); bytes += s; // Write extended index size. s = 0; while(s < sizeof(uint64_t)) s += write(flc, &(lcp.extidx_size), sizeof(uint64_t)); bytes += s; // Write extended index. s = 0; while(s < lcp.extidx_size*sizeof(uint64_t)) s += write(flc, lcp.idx_extend + s/sizeof(uint64_t), lcp.extidx_size*sizeof(uint64_t) - s); bytes += s; // Write LCP samples. s = 0; uint64_t nsamples = (lcp.lcp_sample)->pos; while(s < sizeof(uint64_t)) s+= write(flc, &nsamples, sizeof(uint64_t)); bytes += s; s = 0; while(s < nsamples*sizeof(int8_t)) s += write(flc, (lcp.lcp_sample)->val + s/sizeof(int8_t), nsamples * sizeof(int8_t) - s); bytes += s; // Write ext samples. s = 0; nsamples = (lcp.lcp_extend)->pos; while(s < sizeof(uint64_t)) s+= write(flc, &nsamples, sizeof(uint64_t)); bytes += s; s = 0; while(s < nsamples*sizeof(int64_t)) s += write(flc, (lcp.lcp_extend)->val + s/sizeof(int64_t), nsamples * sizeof(int64_t) - s); bytes += s; stot += bytes; fprintf(stderr, " %ld bytes written.\n",bytes); fprintf(stderr, "done. %ld bytes written.\n", stot); return 0; }