Beispiel #1
0
tmap_sa_t *
tmap_sa_read(const char *fn_fasta)
{
  char *fn_sa = NULL;
  tmap_file_t *fp_sa = NULL;
  tmap_sa_t *sa = NULL;

  fn_sa = tmap_get_file_name(fn_fasta, TMAP_SA_FILE);
  fp_sa = tmap_file_fopen(fn_sa, "rb", TMAP_SA_COMPRESSION);

  sa = tmap_calloc(1, sizeof(tmap_sa_t), "sa");

  if(1 != tmap_file_fread(&sa->primary, sizeof(tmap_bwt_int_t), 1, fp_sa)
     || 1 != tmap_file_fread(&sa->sa_intv, sizeof(tmap_bwt_int_t), 1, fp_sa)
     || 1 != tmap_file_fread(&sa->seq_len, sizeof(tmap_bwt_int_t), 1, fp_sa)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

  sa->n_sa = (sa->seq_len + sa->sa_intv) / sa->sa_intv;
  sa->sa = tmap_calloc(sa->n_sa, sizeof(tmap_bwt_int_t), "sa->sa");
  sa->sa[0] = -1;

  if(sa->n_sa-1 != tmap_file_fread(sa->sa + 1, sizeof(tmap_bwt_int_t), sa->n_sa - 1, fp_sa)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

  sa->sa_intv_log2 = tmap_log2(sa->sa_intv);

  tmap_file_fclose(fp_sa);
  free(fn_sa);

  sa->is_shm = 0;

  return sa;
}
Beispiel #2
0
inline tmap_seqs_io_t*
tmap_seqs_io_init(char **fns, int32_t fn_num, int8_t seq_type, int32_t compression, int64_t bam_start_vfo, int64_t bam_end_vfo)
{
  tmap_seqs_io_t *io= NULL;
  int32_t i;

  io = tmap_calloc(1, sizeof(tmap_seqs_io_t), "io");
  io->type = seq_type;
      
  if(1 < io->n && (TMAP_SEQ_TYPE_SAM == io->type || TMAP_SEQ_TYPE_BAM == io->type)) {
      tmap_error("Multi-SAM/BAM not supported", Exit, OutOfRange);
  }

  if(NULL == fns) { // stdin
      io->n = 1;
      io->seqios = tmap_calloc(1, sizeof(tmap_seq_io_t*), "io->seqios");
      io->seqios[0] = tmap_seq_io_init("-", seq_type, 0, compression); // NB: always reading
  }
  else { // from file(s)
      io->n = fn_num;
      io->seqios = tmap_calloc(fn_num, sizeof(tmap_seq_io_t*), "io->seqios");
      for(i=0;i<io->n;i++) {
          io->seqios[i] = tmap_seq_io_init(fns[i], seq_type, 0, compression); // NB: always reading
      }
  }

  if (io->n == 1 && io->seqios[0] && io->type == TMAP_SEQ_TYPE_BAM)
    tmap_sam_io_set_vfo(io->seqios[0]->io.samio, bam_start_vfo, bam_end_vfo);

  return io;
}
Beispiel #3
0
void
tmap_refseq_pac2revpac(const char *fn_fasta)
{
  uint32_t i, j, c;
  tmap_refseq_t *refseq=NULL, *refseq_rev=NULL;

  tmap_progress_print("reversing the packed reference FASTA");

  refseq = tmap_refseq_read(fn_fasta, 0);

  // shallow copy
  refseq_rev = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq_rev");
  (*refseq_rev) = (*refseq);

  // update sequence
  refseq_rev->seq = NULL;
  refseq_rev->seq = tmap_calloc(tmap_refseq_seq_memory(refseq->len), sizeof(uint8_t), "refseq_rev->seq");
  for(i=0;i<refseq->len;i++) {
      c = tmap_refseq_seq_i(refseq, i);
      j = refseq->len - i - 1;
      tmap_refseq_seq_store_i(refseq_rev, j, c);
  }

  // write
  tmap_refseq_write(refseq_rev, fn_fasta, 1);

  // free
  free(refseq_rev->seq);
  free(refseq_rev);
  tmap_refseq_destroy(refseq);

  tmap_progress_print2("reversed the packed reference FASTA");
}
Beispiel #4
0
tmap_seq_t *
tmap_seq_init(int8_t type)
{
  tmap_seq_t *seq = NULL;

  seq = tmap_calloc(1, sizeof(tmap_seq_t), "seq");
  seq->type = type;

  switch(seq->type) {
    case TMAP_SEQ_TYPE_FQ:
      seq->data.fq = tmap_fq_init();
      break;
    case TMAP_SEQ_TYPE_SFF:
      seq->data.sff = tmap_sff_init();
      break;
    case TMAP_SEQ_TYPE_SAM:
    case TMAP_SEQ_TYPE_BAM:
      seq->data.sam = tmap_sam_init();
      break;
    default:
      tmap_error("type is unrecognized", Exit, OutOfRange);
      break;
  }

  return seq;
}
Beispiel #5
0
tmap_seq_t *
tmap_seq_clone(tmap_seq_t *seq)
{
  tmap_seq_t *ret = NULL;

  ret = tmap_calloc(1, sizeof(tmap_seq_t), "ret");
  ret->type = seq->type;

  switch(seq->type) {
    case TMAP_SEQ_TYPE_FQ:
      ret->data.fq = tmap_fq_clone(seq->data.fq);
      break;
    case TMAP_SEQ_TYPE_SFF:
      ret->data.sff = tmap_sff_clone(seq->data.sff);
      break;
    case TMAP_SEQ_TYPE_SAM:
    case TMAP_SEQ_TYPE_BAM:
      ret->data.sam = tmap_sam_clone(seq->data.sam);
      break;
    default:
      tmap_error("type is unrecognized", Exit, OutOfRange);
      break;
  }

  return ret;
}
Beispiel #6
0
static inline tmap_sam_io_t *
tmap_sam_io_init_helper(const char *fn, int32_t is_bam)
{
  tmap_sam_io_t *samio = NULL;

  // initialize memory
  samio = tmap_calloc(1, sizeof(tmap_sam_io_t), "samio");
  if(0 == is_bam) {
      samio->fp = samopen(fn, "r", NULL);
  }
  else {
      samio->fp = samopen(fn, "rb", NULL);
  }
  if(NULL == samio->fp) {
      tmap_error(fn, Exit, OpenFileError);
  }
  samio->bam_end_vfo = 0;

  // check if there are sequences in the header
  /*
  if(samio->fp->header->n_targets == 0) {
      tmap_error("Found no @SQ lines in the SAM header", Exit, OutOfRange);
  }
  */

  return samio;
}
Beispiel #7
0
tmap_sa_t *
tmap_sa_shm_unpack(uint8_t *buf)
{
    tmap_sa_t *sa = NULL;

    if(NULL == buf) return NULL;

    sa = tmap_calloc(1, sizeof(tmap_sa_t), "sa");

    // fixed length data
    memcpy(&sa->primary, buf, sizeof(tmap_bwt_int_t));
    buf += sizeof(tmap_bwt_int_t);
    memcpy(&sa->sa_intv, buf, sizeof(tmap_bwt_int_t));
    buf += sizeof(tmap_bwt_int_t);
    memcpy(&sa->seq_len, buf, sizeof(tmap_bwt_int_t));
    buf += sizeof(tmap_bwt_int_t);
    memcpy(&sa->n_sa, buf, sizeof(tmap_bwt_int_t));
    buf += sizeof(tmap_bwt_int_t);
    // variable length data
    sa->sa = (tmap_bwt_int_t*)buf;
    buf += sa->n_sa*sizeof(tmap_bwt_int_t);

    sa->sa_intv_log2 = tmap_log2(sa->sa_intv);

    sa->is_shm = 1;
    sa->is_mm  = 0;

    return sa;
}
Beispiel #8
0
size_t
tmap_refseq_shm_read_num_bytes(const char *fn_fasta, uint32_t is_rev)
{
  size_t n = 0;
  tmap_file_t *fp_anno = NULL;
  char *fn_anno = NULL;
  tmap_refseq_t *refseq = NULL;

  // allocate some memory 
  refseq = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq");
  refseq->is_rev = is_rev;
  refseq->is_shm = 0;

  // read the annotation file
  fn_anno = tmap_get_file_name(fn_fasta, TMAP_ANNO_FILE);
  fp_anno = tmap_file_fopen(fn_anno, "rb", TMAP_ANNO_COMPRESSION);
  tmap_refseq_read_anno(fp_anno, refseq);
  tmap_file_fclose(fp_anno);
  free(fn_anno);

  // No need to read in the pac
  refseq->seq = NULL;

  // get the number of bytes
  n = tmap_refseq_shm_num_bytes(refseq);

  // destroy
  tmap_refseq_destroy(refseq);

  return n;
}
Beispiel #9
0
tmap_refseq_t *
tmap_refseq_read(const char *fn_fasta, uint32_t is_rev)
{
  tmap_file_t *fp_pac = NULL, *fp_anno = NULL;
  char *fn_pac = NULL, *fn_anno = NULL;
  tmap_refseq_t *refseq = NULL;

  // allocate some memory 
  refseq = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq");
  refseq->is_rev = is_rev;
  refseq->is_shm = 0;

  // read annotation file
  fn_anno = tmap_get_file_name(fn_fasta, TMAP_ANNO_FILE);
  fp_anno = tmap_file_fopen(fn_anno, "rb", TMAP_ANNO_COMPRESSION);
  tmap_refseq_read_anno(fp_anno, refseq); 
  tmap_file_fclose(fp_anno);
  free(fn_anno);

  // read the sequence
  fn_pac = tmap_get_file_name(fn_fasta, (0 == is_rev) ? TMAP_PAC_FILE : TMAP_REV_PAC_FILE);
  fp_pac = tmap_file_fopen(fn_pac, "rb", (0 == is_rev) ? TMAP_PAC_COMPRESSION : TMAP_REV_PAC_COMPRESSION);
  refseq->seq = tmap_malloc(sizeof(uint8_t)*tmap_refseq_seq_memory(refseq->len), "refseq->seq"); // allocate
  if(tmap_refseq_seq_memory(refseq->len) 
     != tmap_file_fread(refseq->seq, sizeof(uint8_t), tmap_refseq_seq_memory(refseq->len), fp_pac)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  tmap_file_fclose(fp_pac);
  free(fn_pac);


  return refseq;
}
Beispiel #10
0
tmap_sff_read_header_t *
tmap_sff_read_header_read(tmap_file_t *fp, int32_t early_eof_ok)
{
  tmap_sff_read_header_t *rh = NULL;
  uint32_t n = 0;

  rh = tmap_calloc(1, sizeof(tmap_sff_read_header_t), "rh");

  if(1 != tmap_file_fread(&rh->rheader_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->name_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->n_bases, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_qual_left, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_qual_right, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_adapter_left, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_adapter_right, sizeof(uint16_t), 1, fp)) {
      if(0 == early_eof_ok) {
          tmap_error("tmap_file_fread", Exit, ReadFileError);
      }
      else {
          free(rh);
          return NULL;
      }
  }
  n += sizeof(uint32_t) + 6*sizeof(uint16_t);

  // convert values from big-endian
  rh->rheader_length = ntohs(rh->rheader_length);
  rh->name_length = ntohs(rh->name_length);
  rh->n_bases = ntohl(rh->n_bases);
  rh->clip_qual_left = ntohs(rh->clip_qual_left);
  rh->clip_qual_right = ntohs(rh->clip_qual_right);
  rh->clip_adapter_left = ntohs(rh->clip_adapter_left);
  rh->clip_adapter_right = ntohs(rh->clip_adapter_right);

  rh->name = tmap_string_init(rh->name_length+1);

  if(rh->name_length != tmap_file_fread(rh->name->s, sizeof(char), rh->name_length, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += sizeof(char)*rh->name_length;

  // set read name length and null-terminator
  rh->name->l = rh->name_length;
  rh->name->s[rh->name->l]='\0';

  n += tmap_sff_read_padding(fp, n);

#ifdef TMAP_SFF_DEBUG
  tmap_sff_read_header_print(stderr, rh);
#endif

  if(rh->rheader_length != n) {
      tmap_error("SFF read header length did not match", Exit, ReadFileError);
  }

  return rh;
}
Beispiel #11
0
tmap_map1_aux_stack_t *
tmap_map1_aux_stack_init()
{
  tmap_map1_aux_stack_t *stack = NULL;
  stack = tmap_calloc(1, sizeof(tmap_map1_aux_stack_t), "stack");

  tmap_map1_aux_stack_init_helper(stack);

  return stack;
}
Beispiel #12
0
static inline void
tmap_refseq_read_anno(tmap_file_t *fp, tmap_refseq_t *refseq)
{
  uint32_t i;
  // read annotation file
  tmap_refseq_read_header(fp, refseq); // read the header
  refseq->annos = tmap_calloc(refseq->num_annos, sizeof(tmap_anno_t), "refseq->annos"); // allocate memory
  for(i=0;i<refseq->num_annos;i++) { // read the annotations
      tmap_refseq_read_annos(fp, &refseq->annos[i]);
  }
}
Beispiel #13
0
inline tmap_fq_t *
tmap_fq_init()
{
    tmap_fq_t *s = tmap_calloc(1, sizeof(tmap_fq_t), "s");
    s->name = tmap_string_init(0);
    s->comment = tmap_string_init(0);
    s->seq = tmap_string_init(0);
    s->qual= tmap_string_init(0);
    s->is_int = 0;

    return s;
}
Beispiel #14
0
tmap_sff_t *
tmap_sff_init()
{
  tmap_sff_t *sff = NULL;

  sff = tmap_calloc(1, sizeof(tmap_sff_t), "sff");
  sff->gheader = NULL;
  sff->rheader = NULL;
  sff->read = NULL;

  return sff;
}
Beispiel #15
0
static void
tmap_map1_aux_stack_init_helper(tmap_map1_aux_stack_t *stack)
{
  int32_t i;

  // small memory pool
  stack->entry_pool_length = TMAP_MAP1_AUX_STACK_INIT_SIZE; 
  stack->entry_pool = tmap_calloc(stack->entry_pool_length, sizeof(tmap_map1_aux_stack_entry_t*), "stack->entry_pool");
  for(i=0;i<stack->entry_pool_length;i++) {
      stack->entry_pool[i] = tmap_calloc(1, sizeof(tmap_map1_aux_stack_entry_t), "stack->entry_pool[i]");
  }

  // nullify bins
  stack->n_bins = 0;
  stack->bins = NULL;

  // be paranoid
  stack->entry_pool_i = 0;
  stack->best_score = 0;   
  stack->n_entries = 0;
}
Beispiel #16
0
inline tmap_sam_t*
tmap_sam_clone(tmap_sam_t *sam)
{
  tmap_sam_t *ret = tmap_calloc(1, sizeof(tmap_sam_t), "ret");

  ret->name = tmap_string_clone(sam->name);
  ret->seq = tmap_string_clone(sam->seq);
  ret->qual = tmap_string_clone(sam->qual);
  ret->is_int = sam->is_int;

  return ret;
}
Beispiel #17
0
tmap_seqs_t *
tmap_seqs_init(int8_t type)
{
  tmap_seqs_t *seqs = NULL;

  seqs = tmap_calloc(1, sizeof(tmap_seqs_t), "seqs");
  seqs->type = type;
  seqs->seqs = NULL;
  seqs->n = seqs->m = 0;

  return seqs;
}
Beispiel #18
0
void
tmap_sa_bwt2sa(const char *fn_fasta, uint32_t intv)
{
    int64_t isa, s; // S(isa) = sa
    uint64_t i;
    tmap_bwt_t *bwt = NULL;
    tmap_sa_t *sa = NULL;

    tmap_progress_print("constructing the SA from the BWT string");

    bwt = tmap_bwt_read(fn_fasta);

    sa = tmap_calloc(1, sizeof(tmap_sa_t), "sa");

    sa->primary = bwt->primary;
    sa->sa_intv = intv;
    sa->seq_len = bwt->seq_len;
    sa->n_sa = (bwt->seq_len + intv) / intv;

    // calculate SA value
    sa->sa = tmap_calloc(sa->n_sa, sizeof(tmap_bwt_int_t), "sa->sa");
    isa = 0;
    s = bwt->seq_len;
    for(i = 0; i < bwt->seq_len; ++i) {
        if(isa % intv == 0) sa->sa[isa/intv] = s;
        --s;
        isa = tmap_bwt_invPsi(bwt, isa);
    }
    if(isa % intv == 0) sa->sa[isa/intv] = s;
    sa->sa[0] = (tmap_bwt_int_t)-1; // before this line, bwt->sa[0] = bwt->seq_len

    tmap_sa_write(fn_fasta, sa);

    tmap_bwt_destroy(bwt);
    tmap_sa_destroy(sa);
    sa=NULL;
    bwt=NULL;

    tmap_progress_print2("constructed the SA from the BWT string");
}
Beispiel #19
0
tmap_index_t*
tmap_index_init(const char *fn_fasta, key_t shm_key, int32_t mm)
{
  tmap_index_t *index = NULL;

  index = tmap_calloc(1, sizeof(tmap_index_t), "index");

  index->shm_key = shm_key;
  index->mm      = mm;

  // get the reference information
  // primary 65380; sa_intv: 32
  // seq_len = 97004
  //n_sa = 3032, sa 67973 .. 18446744073709551615

  if (1 == index->mm) {
      tmap_progress_print("Retrieving reference data from memory map");
      index->refseq = tmap_refseq_mm_read(fn_fasta);
      index->bwt = tmap_bwt_mm_read(fn_fasta);
      index->sa = tmap_sa_mm_read(fn_fasta);
      tmap_progress_print2("Reference data retrieved from memory map");
  } else if(0 == index->shm_key) {
      tmap_progress_print("reading in reference data");
      index->refseq = tmap_refseq_read(fn_fasta);
      index->bwt = tmap_bwt_read(fn_fasta);
      index->sa = tmap_sa_read(fn_fasta);
      tmap_progress_print2("reference data read in");
  }
  else {
      tmap_progress_print("retrieving reference data from shared memory");
      index->shm = tmap_shm_init(index->shm_key, 0, 0);
      if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) {
          tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) {
          tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) {
          tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing);
      }
      tmap_progress_print2("reference data retrieved from shared memory");
  }

  if((index->refseq->len << 1) != index->bwt->seq_len) {
      tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange);
  }
  if((index->refseq->len << 1) != index->sa->seq_len) {
      tmap_error("refseq and sa lengths do not match", Exit, OutOfRange);
  }
  
  return index;
}
Beispiel #20
0
inline tmap_sam_io_t *
tmap_sam_io_init2(const char *fn, const char *mode,
                  bam_header_t *header)
{
  tmap_sam_io_t *io = NULL;

  io = tmap_calloc(1, sizeof(tmap_sam_io_t), "io");

  // Open the file for writing
  io->fp = samopen(fn, mode, header);

  return io;
}
Beispiel #21
0
inline tmap_fq_t*
tmap_fq_clone(tmap_fq_t *fq)
{
    tmap_fq_t *ret = tmap_calloc(1, sizeof(tmap_fq_t), "ret");

    ret->name = tmap_string_clone(fq->name);
    ret->comment = tmap_string_clone(fq->comment);
    ret->seq = tmap_string_clone(fq->seq);
    ret->qual = tmap_string_clone(fq->qual);
    ret->is_int = fq->is_int;

    return ret;
}
Beispiel #22
0
inline tmap_sff_io_t *
tmap_sff_io_init(tmap_file_t *fp)
{
    tmap_sff_io_t *sffio = NULL;

    sffio = tmap_calloc(1, sizeof(tmap_sff_io_t), "sffio");

    sffio->fp = fp;
    sffio->gheader = tmap_sff_header_read(sffio->fp);
    sffio->n_read = 0;

    return sffio;
}
Beispiel #23
0
int
tmap_refseq_refinfo_main(int argc, char *argv[])
{
  int c, help=0;
  tmap_refseq_t *refseq = NULL;
  tmap_file_t *fp_anno = NULL;
  char *fn_anno = NULL;
  char *fn_fasta = NULL;

  while((c = getopt(argc, argv, "vh")) >= 0) {
      switch(c) {
        case 'v': tmap_progress_set_verbosity(1); break;
        case 'h': help = 1; break;
        default: return 1;
      }
  }
  if(1 != argc - optind || 1 == help) {
      tmap_file_fprintf(tmap_file_stderr, "Usage: %s %s [-vh] <in.fasta>\n", PACKAGE, argv[0]);
      return 1;
  }
  fn_fasta = argv[optind];

  // Note: 'tmap_file_stdout' should not have been previously modified
  tmap_file_stdout = tmap_file_fdopen(fileno(stdout), "wb", TMAP_FILE_NO_COMPRESSION);

  // allocate some memory 
  refseq = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq");
  refseq->is_rev = 0;
  refseq->is_shm = 0;

  // read the annotation file
  fn_anno = tmap_get_file_name(fn_fasta, TMAP_ANNO_FILE);
  fp_anno = tmap_file_fopen(fn_anno, "rb", TMAP_ANNO_COMPRESSION);
  tmap_refseq_read_anno(fp_anno, refseq);
  tmap_file_fclose(fp_anno);
  free(fn_anno);

  // no need to read in the pac
  refseq->seq = NULL;

  // print the header
  tmap_refseq_print_header(tmap_file_stdout, refseq);

  // destroy
  tmap_refseq_destroy(refseq);

  // close the output
  tmap_file_fclose(tmap_file_stdout);

  return 0;
}
Beispiel #24
0
inline tmap_sff_io_t *
tmap_sff_io_init2(tmap_file_t *fp, int32_t early_eof_ok)
{
    tmap_sff_io_t *sffio = NULL;

    sffio = tmap_calloc(1, sizeof(tmap_sff_io_t), "sffio");

    sffio->fp = fp;
    sffio->gheader = tmap_sff_header_read(sffio->fp);
    sffio->n_read = 0;
    sffio->early_eof_ok = early_eof_ok;

    return sffio;
}
Beispiel #25
0
int32_t
tmap_map1_thread_init(void **data, 
                      tmap_map_opt_t *opt)
{
  tmap_map1_thread_data_t *d = NULL;
  d = tmap_calloc(1, sizeof(tmap_map1_thread_data_t), "d");

  d->width = NULL;
  d->width_length = 0;
  d->stack = NULL;

  d->seed_width = tmap_calloc(1+opt->seed_length, sizeof(tmap_bwt_match_width_t), "seed_width");

  d->stack = tmap_map1_aux_stack_init();

  // remember to round up
  d->max_mm = (opt->max_mm < 0) ? (int)(0.99 + opt->max_mm_frac * opt->seed2_length) : opt->max_mm; 
  d->max_gapo = (opt->max_gapo < 0) ? (int)(0.99 + opt->max_gapo_frac * opt->seed2_length) : opt->max_gapo; 
  d->max_gape = (opt->max_gape < 0) ? (int)(0.99 + opt->max_gape_frac * opt->seed2_length) : opt->max_gape; 

  (*data) = (void*)d;

  return 0;
}
Beispiel #26
0
tmap_sff_read_t *
tmap_sff_read_read(tmap_file_t *fp, tmap_sff_header_t *gh, tmap_sff_read_header_t *rh)
{
  tmap_sff_read_t *r = NULL;
  uint32_t i, n = 0;

  r = tmap_calloc(1, sizeof(tmap_sff_read_t), "r");

  r->flowgram = tmap_malloc(sizeof(uint16_t)*gh->flow_length, "r->flowgram");
  r->flow_index = tmap_malloc(sizeof(uint8_t)*rh->n_bases, "r->flow_index");

  r->bases = tmap_string_init(rh->n_bases+1);
  r->quality = tmap_string_init(rh->n_bases+1);

  if(gh->flow_length != tmap_file_fread(r->flowgram, sizeof(uint16_t), gh->flow_length, fp)
     || rh->n_bases != tmap_file_fread(r->flow_index, sizeof(uint8_t), rh->n_bases, fp)
     || rh->n_bases != tmap_file_fread(r->bases->s, sizeof(char), rh->n_bases, fp)
     || rh->n_bases != tmap_file_fread(r->quality->s, sizeof(char), rh->n_bases, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += sizeof(uint16_t)*gh->flow_length + 3*sizeof(uint8_t)*rh->n_bases;

  // set length and null-terminators
  r->bases->l = rh->n_bases;
  r->quality->l = rh->n_bases;
  r->bases->s[r->bases->l]='\0';
  r->quality->s[r->quality->l]='\0';

  // convert qualities from int to char
  for(i=0;i<r->quality->l;i++) {
      r->quality->s[i] = QUAL2CHAR(r->quality->s[i]);
  }

  // convert flowgram to host order
  for(i=0;i<gh->flow_length;i++) {
      r->flowgram[i] = ntohs(r->flowgram[i]);
  }

  n += tmap_sff_read_padding(fp, n);

#ifdef TMAP_SFF_DEBUG
  tmap_sff_read_print(stderr, r, gh, rh);
#endif

  return r;
}
Beispiel #27
0
tmap_seqs_t *
tmap_seqs_clone(tmap_seqs_t *seqs)
{
  tmap_seqs_t *ret = NULL;
  int32_t i;

  ret = tmap_calloc(1, sizeof(tmap_seqs_t), "ret");
  ret->type = seqs->type;
  ret->n = seqs->n;
  ret->m = seqs->n; // do not expand memory

  if(0 < seqs->n) {
      ret->seqs = tmap_malloc(seqs->n * sizeof(tmap_seq_t*), "ret->seqs");
      for(i=0;i<ret->n;i++) {
          ret->seqs[i] = tmap_seq_clone(seqs->seqs[i]);
      }
  }

  return ret;
}
Beispiel #28
0
static tmap_sff_read_header_t *
tmap_sff_read_header_clone(tmap_sff_read_header_t *rh)
{
  tmap_sff_read_header_t *ret = NULL;

  ret = tmap_calloc(1, sizeof(tmap_sff_read_header_t), "rh");

  ret->rheader_length = rh->rheader_length; 
  ret->name_length = rh->name_length; 
  ret->n_bases = rh->n_bases; 
  ret->clip_qual_left = rh->clip_qual_left; 
  ret->clip_qual_right = rh->clip_qual_right;
  ret->clip_adapter_left = rh->clip_adapter_left; 
  ret->clip_adapter_right = rh->clip_adapter_right;
  ret->clip_left = rh->clip_left;
  ret->clip_right = rh->clip_right;
  ret->name = tmap_string_clone(rh->name);

  return ret;
}
Beispiel #29
0
tmap_index_t*
tmap_index_init(const char *fn_fasta, key_t shm_key)
{
  tmap_index_t *index = NULL;

  index = tmap_calloc(1, sizeof(tmap_index_t), "index");

  index->shm_key = shm_key;

  // get the reference information
  if(0 == index->shm_key) {
      tmap_progress_print("reading in reference data");
      index->refseq = tmap_refseq_read(fn_fasta);
      index->bwt = tmap_bwt_read(fn_fasta);
      index->sa = tmap_sa_read(fn_fasta);
      tmap_progress_print2("reference data read in");
  }
  else {
      tmap_progress_print("retrieving reference data from shared memory");
      index->shm = tmap_shm_init(index->shm_key, 0, 0);
      if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) {
          tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) {
          tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) {
          tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing);
      }
      tmap_progress_print2("reference data retrieved from shared memory");
  }

  if((index->refseq->len << 1) != index->bwt->seq_len) {
      tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange);
  }
  if((index->refseq->len << 1) != index->sa->seq_len) {
      tmap_error("refseq and sa lengths do not match", Exit, OutOfRange);
  }
  
  return index;
}
Beispiel #30
0
static tmap_sff_read_t *
tmap_sff_read_clone(tmap_sff_read_t *r, tmap_sff_header_t *gh, tmap_sff_read_header_t *rh)
{
  tmap_sff_read_t *ret = NULL;
  uint32_t i;

  ret = tmap_calloc(1, sizeof(tmap_sff_read_t), "r");

  ret->flowgram = tmap_malloc(sizeof(uint16_t)*gh->flow_length, "ret->flowgram");
  for(i=0;i<gh->flow_length;i++) {
      ret->flowgram[i] = r->flowgram[i];
  }

  ret->flow_index = tmap_malloc(sizeof(uint8_t)*rh->n_bases, "ret->flow_index");
  for(i=0;i<rh->n_bases;i++) {
      ret->flow_index[i] = r->flow_index[i];
  }

  ret->bases = tmap_string_clone(r->bases);
  ret->quality = tmap_string_clone(r->quality);

  return ret;
}