Ejemplo n.º 1
0
int main() {
    scanf("%d %d %d", &n, &m, &k);
    for (int i = 0; i < n; i++) { scanf("%d", &A[i]); A[n+i] = A[i]; }
    n*= 2; A[n] = A[n+1] = A[n+2] = 0;
    suffix_array(A, sa, n, m-1); compute_lcp(A, sa, lcp, n);
    return 0;
}
Ejemplo n.º 2
0
Archivo: lcp.c Proyecto: giavjeko/bio
void process(char* string, int* result, int len) {
  char* bwt = (char*)malloc((len + 1) * sizeof(char));
  bwt_transform(string, bwt, len);
  Wtree* wtree = wtree_generate(bwt, len);
  free(bwt);
  compute_lcp(wtree, result, len);
  free(wtree);
}
Ejemplo n.º 3
0
int
write_index
(
 char * filename
)
{
   // Output files.
   char * sarfile = malloc(strlen(filename)+5);
   strcpy(sarfile, filename);
   strcpy(sarfile + strlen(filename), ".sar");
   char * occfile = malloc(strlen(filename)+5);
   strcpy(occfile, filename);
   strcpy(occfile + strlen(filename), ".occ");
   char * genfile = malloc(strlen(filename)+5);
   strcpy(genfile, filename);
   strcpy(genfile + strlen(filename), ".gen");
   //   char * lutfile = malloc(strlen(filename)+5);
   //   strcpy(lutfile, filename);
   //   strcpy(lutfile + strlen(filename), ".lut");
   char * lcpfile = malloc(strlen(filename)+5);
   strcpy(lcpfile, filename);
   strcpy(lcpfile + strlen(filename), ".lcp");

   // Open files.
   int fsa = open(sarfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
   int foc = open(occfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
   int fgn = open(genfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
   //   int flt = open(lutfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
   int flc = open(lcpfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
   // Error control.
   if (fsa == -1) {
      fprintf(stderr, "error in write_index (open %s): %s.\n", sarfile, strerror(errno));
      exit(EXIT_FAILURE);
   }
   if (foc == -1) {
      fprintf(stderr, "error in write_index (open %s): %s.\n", occfile, strerror(errno));
      exit(EXIT_FAILURE);
   }
   if (fgn == -1) {
      fprintf(stderr, "error in write_index (open %s): %s.\n", genfile, strerror(errno));
      exit(EXIT_FAILURE);
   }
   /*
   if (flt == -1) {
      fprintf(stderr, "error in write_index (open %s): %s.\n", lutfile, strerror(errno));
      exit(EXIT_FAILURE);
   }
   */
   if (flc == -1) {
      fprintf(stderr, "error in write_index (open %s): %s.\n", lcpfile, strerror(errno));
      exit(EXIT_FAILURE);
   }
   
   free(sarfile);
   free(occfile);
   free(genfile);
   //   free(lutfile);
   free(lcpfile);

   clock_t tstart;
   size_t s = 0, stot = 0, bytes;
   // Parse genome and reverse complement.
   uint64_t gsize;
   fprintf(stderr, "reading      genome file  ..."); tstart = clock();
   char * genome = compact_genome(filename, &gsize);
   fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC);

   // Compute suffix array.
   fprintf(stderr, "computing    suffix array ..."); tstart = clock();
   uint64_t * sa = (uint64_t *)compute_sa(genome, gsize);
   fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC);

   // Compute OCC.
   uint64_t occ_size;
   fprintf(stderr, "computing    occ table    ..."); tstart = clock();
   uint64_t * occ = compute_occ(genome, sa, gsize, &occ_size);
   fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC);

   // Compress suffix array.
   fprintf(stderr, "compressing  suffix array ..."); tstart = clock();
   uint64_t sa_bits = 0;
   while (gsize > ((uint64_t)1 << sa_bits)) sa_bits++;
   uint64_t sa_size = compact_array(sa, gsize, sa_bits);
   sa = realloc(sa, sa_size*sizeof(uint64_t));
   fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC);

   // Compute C.
   fprintf(stderr, "computing    C table      ..."); tstart = clock();
   uint64_t * c = compute_c(occ + occ_size - NUM_BASES);
   fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC);

   // Compute LUT
   /*
   fprintf(stderr, "computing    lookup table ..."); tstart = clock();
   uint64_t * lut = compute_lut(c, occ, LUT_KMER_SIZE);
   fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC);

   // Compress LUT.
   fprintf(stderr, "compressing  lookup table ..."); tstart = clock();
   uint64_t lut_kmers = 1 << (2*LUT_KMER_SIZE);
   uint64_t lut_size = compact_array(lut, lut_kmers, sa_bits);
   lut = realloc(lut, lut_size*sizeof(uint64_t));
   fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC);

   // Write .LUT file
   fprintf(stderr, "writing lut...");
   bytes = s = 0;
   uint64_t kmer_size = LUT_KMER_SIZE;
   while (s < sizeof(uint64_t)) s += write(flt, &kmer_size, sizeof(uint64_t));
   bytes += s;
   s = 0;
   while (s < lut_size*sizeof(uint64_t)) s += write(flt, lut + s/sizeof(uint64_t), lut_size*sizeof(uint64_t) - s);
   bytes += s;
   stot += bytes;
   fprintf(stderr, " %ld bytes written.\n",bytes);
   free(lut);
   */
   // Write .OCC file
   fprintf(stderr, "writing occ...");
   // mark interval
   s = 0;
   uint64_t mark_int = OCC_MARK_INTERVAL;
   while (s < sizeof(uint64_t)) s += write(foc, &mark_int, sizeof(uint64_t));
   stot += s;
   // Write C
   s  = 0;
   while (s < (NUM_BASES+1)*sizeof(uint64_t)) s += write(foc, c + s/sizeof(uint64_t), (NUM_BASES+1)*sizeof(uint64_t) - s);
   stot += s;
   // Write OCC.
   s = 0;
   while (s < occ_size * sizeof(uint64_t)) s += write(foc,occ + s/sizeof(uint64_t), occ_size*sizeof(uint64_t) - s);
   stot += s;
   fprintf(stderr, " %ld bytes written.\n",stot);
   free(c); free(occ);
   // Compute LCP.
   fprintf(stderr, "computing    LCP intervals..."); tstart = clock();
   lcp_t lcp = compute_lcp(gsize, LCP_MIN_DEPTH, sa_bits, sa, genome);
   fprintf(stderr, " done [%.3fs]\n", (clock()-tstart)*1.0/CLOCKS_PER_SEC);
   // Write .GEN FILE
   fprintf(stderr, "writing gen...");
   // Write genome bases (forward and reverse strand).
   s = 0;
   while (s < gsize*sizeof(char)) s += write(fgn, genome + s/sizeof(char), gsize*sizeof(char) - s);
   stot += s;
   fprintf(stderr, " %ld bytes written.\n",s);
   // .SAR FILE
   fprintf(stderr, "writing sar...");
   // Write sa word width.
   bytes = s = 0;
   while (s < sizeof(uint64_t)) s += write(fsa, &sa_bits, sizeof(uint64_t));
   bytes += s;
   s = 0;
   while (s < sa_size*sizeof(uint64_t)) s += write(fsa, sa + s/sizeof(uint64_t), sa_size*sizeof(uint64_t) - s);
   bytes += s;
   stot += bytes;
   fprintf(stderr, " %ld bytes written.\n",bytes);
   // .LCP FILE
   fprintf(stderr, "writing lcp...");
   // LCP index.
   bytes = s = 0;
   mark_int = LCP_MARK_INTERVAL;
   while (s < sizeof(uint64_t)) s += write(flc, &mark_int, sizeof(uint64_t));
   bytes += s;
   s = 0;
   uint64_t min_depth = LCP_MIN_DEPTH;
   while (s < sizeof(uint64_t)) s += write(flc, &min_depth, sizeof(uint64_t));
   bytes += s;
   // Write sample index szie.
   s = 0;
   while(s < sizeof(uint64_t)) s += write(flc, &(lcp.lcpidx_size), sizeof(uint64_t));
   bytes += s;
   // Write sample index.
   s = 0;
   while(s < lcp.lcpidx_size*sizeof(uint64_t)) s += write(flc, lcp.idx_sample + s/sizeof(uint64_t), lcp.lcpidx_size*sizeof(uint64_t) - s);
   bytes += s;
   // Write extended index size.
   s = 0;
   while(s < sizeof(uint64_t)) s += write(flc, &(lcp.extidx_size), sizeof(uint64_t));
   bytes += s;
   // Write extended index.
   s = 0;
   while(s < lcp.extidx_size*sizeof(uint64_t)) s += write(flc, lcp.idx_extend + s/sizeof(uint64_t), lcp.extidx_size*sizeof(uint64_t) - s);
   bytes += s;
   // Write LCP samples.
   s = 0;
   uint64_t nsamples = (lcp.lcp_sample)->pos;
   while(s < sizeof(uint64_t)) s+= write(flc, &nsamples, sizeof(uint64_t));
   bytes += s;
   s = 0;
   while(s < nsamples*sizeof(int8_t)) s += write(flc, (lcp.lcp_sample)->val + s/sizeof(int8_t), nsamples * sizeof(int8_t) - s);
   bytes += s;
   // Write ext samples.
   s = 0;
   nsamples = (lcp.lcp_extend)->pos;
   while(s < sizeof(uint64_t)) s+= write(flc, &nsamples, sizeof(uint64_t));
   bytes += s;
   s = 0;
   while(s < nsamples*sizeof(int64_t)) s += write(flc, (lcp.lcp_extend)->val + s/sizeof(int64_t), nsamples * sizeof(int64_t) - s);
   bytes += s;
   stot += bytes;
   fprintf(stderr, " %ld bytes written.\n",bytes);

   fprintf(stderr, "done. %ld bytes written.\n", stot);

   return 0;
}