Esempio n. 1
0
void
tmap_refseq_pac2revpac(const char *fn_fasta)
{
  uint32_t i, j, c;
  tmap_refseq_t *refseq=NULL, *refseq_rev=NULL;

  tmap_progress_print("reversing the packed reference FASTA");

  refseq = tmap_refseq_read(fn_fasta, 0);

  // shallow copy
  refseq_rev = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq_rev");
  (*refseq_rev) = (*refseq);

  // update sequence
  refseq_rev->seq = NULL;
  refseq_rev->seq = tmap_calloc(tmap_refseq_seq_memory(refseq->len), sizeof(uint8_t), "refseq_rev->seq");
  for(i=0;i<refseq->len;i++) {
      c = tmap_refseq_seq_i(refseq, i);
      j = refseq->len - i - 1;
      tmap_refseq_seq_store_i(refseq_rev, j, c);
  }

  // write
  tmap_refseq_write(refseq_rev, fn_fasta, 1);

  // free
  free(refseq_rev->seq);
  free(refseq_rev);
  tmap_refseq_destroy(refseq);

  tmap_progress_print2("reversed the packed reference FASTA");
}
Esempio n. 2
0
tmap_index_t*
tmap_index_init(const char *fn_fasta, key_t shm_key, int32_t mm)
{
  tmap_index_t *index = NULL;

  index = tmap_calloc(1, sizeof(tmap_index_t), "index");

  index->shm_key = shm_key;
  index->mm      = mm;

  // get the reference information
  // primary 65380; sa_intv: 32
  // seq_len = 97004
  //n_sa = 3032, sa 67973 .. 18446744073709551615

  if (1 == index->mm) {
      tmap_progress_print("Retrieving reference data from memory map");
      index->refseq = tmap_refseq_mm_read(fn_fasta);
      index->bwt = tmap_bwt_mm_read(fn_fasta);
      index->sa = tmap_sa_mm_read(fn_fasta);
      tmap_progress_print2("Reference data retrieved from memory map");
  } else if(0 == index->shm_key) {
      tmap_progress_print("reading in reference data");
      index->refseq = tmap_refseq_read(fn_fasta);
      index->bwt = tmap_bwt_read(fn_fasta);
      index->sa = tmap_sa_read(fn_fasta);
      tmap_progress_print2("reference data read in");
  }
  else {
      tmap_progress_print("retrieving reference data from shared memory");
      index->shm = tmap_shm_init(index->shm_key, 0, 0);
      if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) {
          tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) {
          tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) {
          tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing);
      }
      tmap_progress_print2("reference data retrieved from shared memory");
  }

  if((index->refseq->len << 1) != index->bwt->seq_len) {
      tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange);
  }
  if((index->refseq->len << 1) != index->sa->seq_len) {
      tmap_error("refseq and sa lengths do not match", Exit, OutOfRange);
  }
  
  return index;
}
Esempio n. 3
0
tmap_index_t*
tmap_index_init(const char *fn_fasta, key_t shm_key)
{
  tmap_index_t *index = NULL;

  index = tmap_calloc(1, sizeof(tmap_index_t), "index");

  index->shm_key = shm_key;

  // get the reference information
  if(0 == index->shm_key) {
      tmap_progress_print("reading in reference data");
      index->refseq = tmap_refseq_read(fn_fasta);
      index->bwt = tmap_bwt_read(fn_fasta);
      index->sa = tmap_sa_read(fn_fasta);
      tmap_progress_print2("reference data read in");
  }
  else {
      tmap_progress_print("retrieving reference data from shared memory");
      index->shm = tmap_shm_init(index->shm_key, 0, 0);
      if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) {
          tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) {
          tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) {
          tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing);
      }
      tmap_progress_print2("reference data retrieved from shared memory");
  }

  if((index->refseq->len << 1) != index->bwt->seq_len) {
      tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange);
  }
  if((index->refseq->len << 1) != index->sa->seq_len) {
      tmap_error("refseq and sa lengths do not match", Exit, OutOfRange);
  }
  
  return index;
}
Esempio n. 4
0
int
tmap_refseq_pac2fasta_main(int argc, char *argv[])
{
  int c, help=0, amb=0;
  uint32_t i, j, k;
  char *fn_fasta = NULL;
  tmap_refseq_t *refseq = NULL;

  while((c = getopt(argc, argv, "avh")) >= 0) {
      switch(c) {
        case 'a': amb = 1; break;
        case 'v': tmap_progress_set_verbosity(1); break;
        case 'h': help = 1; break;
        default: return 1;
      }
  }
  if(1 != argc - optind || 1 == help) {
      tmap_file_fprintf(tmap_file_stderr, "Usage: %s %s [-avh] <in.fasta>\n", PACKAGE, argv[0]);
      return 1;
  }

  fn_fasta = argv[optind];

  // Note: 'tmap_file_stdout' should not have been previously modified
  tmap_file_stdout = tmap_file_fdopen(fileno(stdout), "wb", TMAP_FILE_NO_COMPRESSION);

  // read in the reference sequence
  refseq = tmap_refseq_read(fn_fasta, 0);

  for(i=0;i<refseq->num_annos;i++) {
      tmap_file_fprintf(tmap_file_stdout, ">%s", refseq->annos[i].name->s); // new line handled later
      for(j=k=0;j<refseq->annos[i].len;j++) {
          if(0 == (j % TMAP_REFSEQ_FASTA_LINE_LENGTH)) {
              tmap_file_fprintf(tmap_file_stdout, "\n");
          }
          if(1 == amb && 0 < refseq->annos[i].num_amb) {
              // move the next ambiguous region
              while(k < refseq->annos[i].num_amb && refseq->annos[i].amb_positions_end[k] < j+1) {
                  k++;
              }
              // check for the ambiguous region
              if(k < refseq->annos[i].num_amb
                 && 0 == tmap_interval_overlap(j+1, j+1, refseq->annos[i].amb_positions_start[k], refseq->annos[i].amb_positions_end[k])) {
                  tmap_file_fprintf(tmap_file_stdout, "%c", tmap_iupac_int_to_char[refseq->annos[i].amb_bases[k]]);
              }
              else {
                  tmap_file_fprintf(tmap_file_stdout, "%c", "ACGTN"[(int)tmap_refseq_seq_i(refseq, j + refseq->annos[i].offset)]);
              }
          }
          else {
              tmap_file_fprintf(tmap_file_stdout, "%c", "ACGTN"[(int)tmap_refseq_seq_i(refseq, j + refseq->annos[i].offset)]);
          }
      }
      tmap_file_fprintf(tmap_file_stdout, "\n");
  }

  // destroy
  tmap_refseq_destroy(refseq);

  // close the output
  tmap_file_fclose(tmap_file_stdout);

  return 0;
}