Ejemplo n.º 1
0
static void
tmap_sam_io_update_string(tmap_string_t **dst, char *src, int32_t len)
{
  if(NULL == (*dst)) (*dst) = tmap_string_init(len+1);
  else if((*dst)->m < len + 1) {
      tmap_string_destroy((*dst));
      (*dst) = tmap_string_init(len+1);
  }
  if(NULL != src) memcpy((*dst)->s, src, len);
  (*dst)->l = len;
}
Ejemplo n.º 2
0
inline tmap_fq_t *
tmap_fq_init()
{
    tmap_fq_t *s = tmap_calloc(1, sizeof(tmap_fq_t), "s");
    s->name = tmap_string_init(0);
    s->comment = tmap_string_init(0);
    s->seq = tmap_string_init(0);
    s->qual= tmap_string_init(0);
    s->is_int = 0;

    return s;
}
Ejemplo n.º 3
0
static inline void
tmap_refseq_read_annos(tmap_file_t *fp, tmap_anno_t *anno) 
{
  uint32_t len = 0; // includes the null-terminator
  
  if(1 != tmap_file_fread(&len, sizeof(uint32_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

  anno->name = tmap_string_init(len);

  if(len != tmap_file_fread(anno->name->s, sizeof(char), len, fp)
     || 1 != tmap_file_fread(&anno->len, sizeof(uint64_t), 1, fp)
     || 1 != tmap_file_fread(&anno->offset, sizeof(uint64_t), 1, fp)
     || 1 != tmap_file_fread(&anno->num_amb, sizeof(uint32_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  if(0 < anno->num_amb) {
      anno->amb_positions_start = tmap_malloc(sizeof(uint32_t) * anno->num_amb, "anno->amb_positions_start");
      anno->amb_positions_end = tmap_malloc(sizeof(uint32_t) * anno->num_amb, "anno->amb_positions_end");
      anno->amb_bases = tmap_malloc(sizeof(uint8_t) * anno->num_amb, "anno->amb_bases");
      if(anno->num_amb != tmap_file_fread(anno->amb_positions_start, sizeof(uint32_t), anno->num_amb, fp)
         || anno->num_amb != tmap_file_fread(anno->amb_positions_end, sizeof(uint32_t), anno->num_amb, fp)
         || anno->num_amb != tmap_file_fread(anno->amb_bases, sizeof(uint8_t), anno->num_amb, fp)) {
          tmap_error(NULL, Exit, WriteFileError);
      }
  }
  else {
      anno->amb_positions_start = NULL;
      anno->amb_positions_end = NULL;
      anno->amb_bases = NULL;
  }
  // set name length
  anno->name->l = len-1;
}
Ejemplo n.º 4
0
static inline void 
tmap_refseq_read_header(tmap_file_t *fp, tmap_refseq_t *refseq)
{
  size_t package_version_l;
  if(1 != tmap_file_fread(&refseq->version_id, sizeof(uint64_t), 1, fp) 
     || 1 != tmap_file_fread(&package_version_l, sizeof(size_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  if(refseq->version_id != TMAP_VERSION_ID) {
      tmap_error("version id did not match", Exit, ReadFileError);
  }

  refseq->package_version = tmap_string_init(package_version_l+1); // add one for the null terminator
  refseq->package_version->l = package_version_l;
  if(refseq->package_version->l+1 != tmap_file_fread(refseq->package_version->s, sizeof(char), refseq->package_version->l+1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  if(0 == tmap_refseq_supported(refseq)) {
      fprintf(stderr, "reference version: %s\n", refseq->package_version->s);
      fprintf(stderr, "package version: %s\n", PACKAGE_VERSION);
      tmap_error("the reference index is not supported", Exit, ReadFileError);
  }
     
  if(1 != tmap_file_fread(&refseq->num_annos, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&refseq->len, sizeof(uint64_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

}
Ejemplo n.º 5
0
tmap_sff_read_t *
tmap_sff_read_read(tmap_file_t *fp, tmap_sff_header_t *gh, tmap_sff_read_header_t *rh)
{
  tmap_sff_read_t *r = NULL;
  uint32_t i, n = 0;

  r = tmap_calloc(1, sizeof(tmap_sff_read_t), "r");

  r->flowgram = tmap_malloc(sizeof(uint16_t)*gh->flow_length, "r->flowgram");
  r->flow_index = tmap_malloc(sizeof(uint8_t)*rh->n_bases, "r->flow_index");

  r->bases = tmap_string_init(rh->n_bases+1);
  r->quality = tmap_string_init(rh->n_bases+1);

  if(gh->flow_length != tmap_file_fread(r->flowgram, sizeof(uint16_t), gh->flow_length, fp)
     || rh->n_bases != tmap_file_fread(r->flow_index, sizeof(uint8_t), rh->n_bases, fp)
     || rh->n_bases != tmap_file_fread(r->bases->s, sizeof(char), rh->n_bases, fp)
     || rh->n_bases != tmap_file_fread(r->quality->s, sizeof(char), rh->n_bases, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += sizeof(uint16_t)*gh->flow_length + 3*sizeof(uint8_t)*rh->n_bases;

  // set length and null-terminators
  r->bases->l = rh->n_bases;
  r->quality->l = rh->n_bases;
  r->bases->s[r->bases->l]='\0';
  r->quality->s[r->quality->l]='\0';

  // convert qualities from int to char
  for(i=0;i<r->quality->l;i++) {
      r->quality->s[i] = QUAL2CHAR(r->quality->s[i]);
  }

  // convert flowgram to host order
  for(i=0;i<gh->flow_length;i++) {
      r->flowgram[i] = ntohs(r->flowgram[i]);
  }

  n += tmap_sff_read_padding(fp, n);

#ifdef TMAP_SFF_DEBUG
  tmap_sff_read_print(stderr, r, gh, rh);
#endif

  return r;
}
Ejemplo n.º 6
0
tmap_sff_read_header_t *
tmap_sff_read_header_read(tmap_file_t *fp, int32_t early_eof_ok)
{
  tmap_sff_read_header_t *rh = NULL;
  uint32_t n = 0;

  rh = tmap_calloc(1, sizeof(tmap_sff_read_header_t), "rh");

  if(1 != tmap_file_fread(&rh->rheader_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->name_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->n_bases, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_qual_left, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_qual_right, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_adapter_left, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_adapter_right, sizeof(uint16_t), 1, fp)) {
      if(0 == early_eof_ok) {
          tmap_error("tmap_file_fread", Exit, ReadFileError);
      }
      else {
          free(rh);
          return NULL;
      }
  }
  n += sizeof(uint32_t) + 6*sizeof(uint16_t);

  // convert values from big-endian
  rh->rheader_length = ntohs(rh->rheader_length);
  rh->name_length = ntohs(rh->name_length);
  rh->n_bases = ntohl(rh->n_bases);
  rh->clip_qual_left = ntohs(rh->clip_qual_left);
  rh->clip_qual_right = ntohs(rh->clip_qual_right);
  rh->clip_adapter_left = ntohs(rh->clip_adapter_left);
  rh->clip_adapter_right = ntohs(rh->clip_adapter_right);

  rh->name = tmap_string_init(rh->name_length+1);

  if(rh->name_length != tmap_file_fread(rh->name->s, sizeof(char), rh->name_length, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += sizeof(char)*rh->name_length;

  // set read name length and null-terminator
  rh->name->l = rh->name_length;
  rh->name->s[rh->name->l]='\0';

  n += tmap_sff_read_padding(fp, n);

#ifdef TMAP_SFF_DEBUG
  tmap_sff_read_header_print(stderr, rh);
#endif

  if(rh->rheader_length != n) {
      tmap_error("SFF read header length did not match", Exit, ReadFileError);
  }

  return rh;
}
Ejemplo n.º 7
0
tmap_sff_header_t *
tmap_sff_header_read(tmap_file_t *fp)
{
  tmap_sff_header_t *h = NULL;
  uint32_t n = 0;

  h = tmap_calloc(1, sizeof(tmap_sff_header_t), "h");

  if(1 != tmap_file_fread(&h->magic, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&h->version, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&h->index_offset, sizeof(uint64_t), 1, fp)
     || 1 != tmap_file_fread(&h->index_length, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&h->n_reads, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&h->gheader_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&h->key_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&h->flow_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&h->flowgram_format, sizeof(uint8_t), 1, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += 4*sizeof(uint32_t) + sizeof(uint64_t) + 3*sizeof(uint16_t) + sizeof(uint8_t);

  // convert values from big-endian
  h->magic = ntohl(h->magic);
  h->version = ntohl(h->version);
  h->index_offset = ntohll(h->index_offset);
  h->index_length = ntohl(h->index_length);
  h->n_reads = ntohl(h->n_reads);
  h->gheader_length = ntohs(h->gheader_length);
  h->key_length = ntohs(h->key_length);
  h->flow_length = ntohs(h->flow_length);

  if(TMAP_SFF_MAGIC != h->magic) {
      tmap_error("SFF magic number did not match", Exit, ReadFileError);
  }
  if(h->version != TMAP_SFF_VERSION) {
      tmap_error("SFF version number did not match", Exit, ReadFileError);
  }

  h->flow = tmap_string_init(h->flow_length+1);
  h->key = tmap_string_init(h->key_length+1);

  if(h->flow_length != tmap_file_fread(h->flow->s, sizeof(char), h->flow_length, fp)
     || h->key_length != tmap_file_fread(h->key->s, sizeof(char), h->key_length, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += sizeof(char)*(h->flow_length + h->key_length);

  // set the length and null-terminator
  h->flow->l = h->flow_length;
  h->key->l = h->key_length;
  h->flow->s[h->flow->l]='\0';
  h->key->s[h->key->l]='\0';

  n += tmap_sff_read_padding(fp, n);

#ifdef TMAP_SFF_DEBUG
  tmap_sff_header_print(stderr, h);
#endif

  if(h->gheader_length != n) {
      tmap_error("SFF global header length did not match", Exit, ReadFileError);
  }

  return h;
}
Ejemplo n.º 8
0
tmap_refseq_t *
tmap_refseq_shm_unpack(uint8_t *buf)
{
  int32_t i;
  tmap_refseq_t *refseq = NULL;
  
  if(NULL == buf) return NULL;

  refseq = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq");

  // fixed length data
  memcpy(&refseq->version_id, buf, sizeof(uint64_t)) ; buf += sizeof(uint64_t);
  if(refseq->version_id != TMAP_VERSION_ID) {
      tmap_error("version id did not match", Exit, ReadFileError);
  }
      
  refseq->package_version = tmap_string_init(0);
  memcpy(&refseq->package_version->l, buf, sizeof(size_t)); buf += sizeof(size_t);
  memcpy(&refseq->num_annos, buf, sizeof(uint32_t)) ; buf += sizeof(uint32_t);
  memcpy(&refseq->len, buf, sizeof(uint64_t)) ; buf += sizeof(uint64_t);
  memcpy(&refseq->is_rev, buf, sizeof(uint32_t)) ; buf += sizeof(uint32_t);

  // variable length data
  refseq->package_version->s = (char*)buf;
  refseq->package_version->m = refseq->package_version->l+1;
  buf += sizeof(char)*(refseq->package_version->l+1); 
  if(0 == tmap_refseq_supported(refseq)) {
      tmap_error("the reference index is not supported", Exit, ReadFileError);
  }
  refseq->seq = (uint8_t*)buf;
  buf += tmap_refseq_seq_memory(refseq->len)*sizeof(uint8_t);
  refseq->annos = tmap_calloc(refseq->num_annos, sizeof(tmap_anno_t), "refseq->annos");
  for(i=0;i<refseq->num_annos;i++) {
      // fixed length data
      memcpy(&refseq->annos[i].len, buf, sizeof(uint64_t)); buf += sizeof(uint64_t); 
      memcpy(&refseq->annos[i].offset, buf, sizeof(uint64_t)); buf += sizeof(uint64_t); 
      refseq->annos[i].name = tmap_string_init(0);
      memcpy(&refseq->annos[i].name->l, buf, sizeof(size_t)); buf += sizeof(size_t);
      refseq->annos[i].name->m = refseq->annos[i].name->l+1;
      memcpy(&refseq->annos[i].num_amb, buf, sizeof(uint32_t)); buf += sizeof(uint32_t);
      // variable length data
      refseq->annos[i].name->s = (char*)buf;
      buf += sizeof(char)*refseq->annos[i].name->l+1;
      if(0 < refseq->annos[i].num_amb) {
          refseq->annos[i].amb_positions_start = (uint32_t*)buf;
          buf += sizeof(uint32_t)*refseq->annos[i].num_amb;
          refseq->annos[i].amb_positions_end = (uint32_t*)buf;
          buf += sizeof(uint32_t)*refseq->annos[i].num_amb;
          refseq->annos[i].amb_bases = (uint8_t*)buf;
          buf += sizeof(uint8_t)*refseq->annos[i].num_amb;
      }
      else {
          refseq->annos[i].amb_positions_start = NULL;
          refseq->annos[i].amb_positions_end = NULL;
          refseq->annos[i].amb_bases = NULL;
      }
  }

  refseq->is_shm = 1;

  return refseq;
}
Ejemplo n.º 9
0
static inline tmap_string_t *
tmap_sam_md(tmap_refseq_t *refseq, char *read_bases, // read bases are characters
            uint32_t seqid, uint32_t pos, // seqid and pos are 0-based
            uint32_t *cigar, int32_t n_cigar, int32_t *nm, char *read_bases_eq)
{
  int32_t i, j;
  uint32_t ref_i, read_i, ref_start, ref_end;
  int32_t l = 0; // the length of the last md op
  uint8_t read_base, ref_base;
  tmap_string_t *md=NULL;
  uint8_t *target = NULL;;

  md = tmap_string_init(32);
  (*nm) = 0;

  ref_start = ref_end = pos + 1; // make one-based
  for(i=0;i<n_cigar;i++) { // go through each cigar operator
      int32_t op_len;
      op_len = cigar[i] >> 4;
      switch(cigar[i]&0xf) {
        case BAM_CMATCH:
        case BAM_CDEL:
        case BAM_CREF_SKIP:
          ref_end += op_len; break;
        default:
          break;
      }
  }
  ref_end--;
      
  target = tmap_refseq_subseq2(refseq, seqid+1, ref_start, ref_end, NULL, 0, NULL);
  if(NULL == target) {
      tmap_bug();
  }

  if(0 == n_cigar) {
      tmap_bug();
  }

  read_i = ref_i = 0;
  for(i=0;i<n_cigar;i++) { // go through each cigar operator
      int32_t op_len, op;

      op_len = cigar[i] >> 4;
      op = cigar[i] & 0xf;

      if(BAM_CMATCH == op) {
          for(j=0;j<op_len;j++) {
              if(refseq->len <= refseq->annos[seqid].offset + pos + ref_i) break; // out of boundary

              read_base = tmap_nt_char_to_int[(int)read_bases[read_i]]; 
              ref_base = target[ref_i];

              if(read_base == ref_base) { // a match
                  if(NULL != read_bases_eq) read_bases_eq[read_i] = '=';
                  l++;
              }
              else {
                  if(NULL != read_bases_eq) read_bases_eq[read_i] = read_bases[read_i];
                  tmap_string_lsprintf(md, md->l, "%d%c", l, tmap_iupac_int_to_char[ref_base]);
                  l = 0;
                  (*nm)++;
              }
              read_i++;
              ref_i++; 
          }
          if(j < op_len) break;
      }
      else if(BAM_CINS == op) {
          if(NULL != read_bases_eq) {
              for(j=0;j<op_len;j++) {
                  read_bases_eq[read_i+j] = read_bases[read_i+j];
              }
          }
          read_i += op_len;
          (*nm) += op_len;
      }
      else if(BAM_CDEL == op) {
          tmap_string_lsprintf(md, md->l, "%d^", l);
          for(j=0;j<op_len;j++) {
              if(refseq->len <= refseq->annos[seqid].offset + pos + ref_i) break; // out of boundary
              ref_base = target[ref_i];
              tmap_string_lsprintf(md, md->l, "%c", tmap_iupac_int_to_char[ref_base]);
              ref_i++;
          }
          if(j < op_len) break;
          (*nm) += op_len;
          l=0;
      }
      else if(BAM_CREF_SKIP == op) {
          ref_i += op_len;
      }
      else if(BAM_CSOFT_CLIP == op) {
          if(NULL != read_bases_eq) {
              for(j=0;j<op_len;j++) {
                  read_bases_eq[read_i+j] = read_bases[read_i+j];
              }
          }
          read_i += op_len;
      }
      else if(BAM_CHARD_CLIP == op) {
          // ignore
      }
      else if(BAM_CPAD == op) {
          // ignore
      }
      else {
          tmap_error("could not understand the cigar operator", Exit, OutOfRange);
      }
  }
  tmap_string_lsprintf(md, md->l, "%d", l);
  if(NULL != read_bases_eq) read_bases_eq[read_i] = '\0';

  free(target);

  return md;
}