Beispiel #1
0
tmap_sa_t *
tmap_sa_read(const char *fn_fasta)
{
  char *fn_sa = NULL;
  tmap_file_t *fp_sa = NULL;
  tmap_sa_t *sa = NULL;

  fn_sa = tmap_get_file_name(fn_fasta, TMAP_SA_FILE);
  fp_sa = tmap_file_fopen(fn_sa, "rb", TMAP_SA_COMPRESSION);

  sa = tmap_calloc(1, sizeof(tmap_sa_t), "sa");

  if(1 != tmap_file_fread(&sa->primary, sizeof(tmap_bwt_int_t), 1, fp_sa)
     || 1 != tmap_file_fread(&sa->sa_intv, sizeof(tmap_bwt_int_t), 1, fp_sa)
     || 1 != tmap_file_fread(&sa->seq_len, sizeof(tmap_bwt_int_t), 1, fp_sa)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

  sa->n_sa = (sa->seq_len + sa->sa_intv) / sa->sa_intv;
  sa->sa = tmap_calloc(sa->n_sa, sizeof(tmap_bwt_int_t), "sa->sa");
  sa->sa[0] = -1;

  if(sa->n_sa-1 != tmap_file_fread(sa->sa + 1, sizeof(tmap_bwt_int_t), sa->n_sa - 1, fp_sa)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

  sa->sa_intv_log2 = tmap_log2(sa->sa_intv);

  tmap_file_fclose(fp_sa);
  free(fn_sa);

  sa->is_shm = 0;

  return sa;
}
Beispiel #2
0
static inline void 
tmap_refseq_read_header(tmap_file_t *fp, tmap_refseq_t *refseq)
{
  size_t package_version_l;
  if(1 != tmap_file_fread(&refseq->version_id, sizeof(uint64_t), 1, fp) 
     || 1 != tmap_file_fread(&package_version_l, sizeof(size_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  if(refseq->version_id != TMAP_VERSION_ID) {
      tmap_error("version id did not match", Exit, ReadFileError);
  }

  refseq->package_version = tmap_string_init(package_version_l+1); // add one for the null terminator
  refseq->package_version->l = package_version_l;
  if(refseq->package_version->l+1 != tmap_file_fread(refseq->package_version->s, sizeof(char), refseq->package_version->l+1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  if(0 == tmap_refseq_supported(refseq)) {
      fprintf(stderr, "reference version: %s\n", refseq->package_version->s);
      fprintf(stderr, "package version: %s\n", PACKAGE_VERSION);
      tmap_error("the reference index is not supported", Exit, ReadFileError);
  }
     
  if(1 != tmap_file_fread(&refseq->num_annos, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&refseq->len, sizeof(uint64_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

}
Beispiel #3
0
static inline void
tmap_refseq_read_annos(tmap_file_t *fp, tmap_anno_t *anno) 
{
  uint32_t len = 0; // includes the null-terminator
  
  if(1 != tmap_file_fread(&len, sizeof(uint32_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

  anno->name = tmap_string_init(len);

  if(len != tmap_file_fread(anno->name->s, sizeof(char), len, fp)
     || 1 != tmap_file_fread(&anno->len, sizeof(uint64_t), 1, fp)
     || 1 != tmap_file_fread(&anno->offset, sizeof(uint64_t), 1, fp)
     || 1 != tmap_file_fread(&anno->num_amb, sizeof(uint32_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  if(0 < anno->num_amb) {
      anno->amb_positions_start = tmap_malloc(sizeof(uint32_t) * anno->num_amb, "anno->amb_positions_start");
      anno->amb_positions_end = tmap_malloc(sizeof(uint32_t) * anno->num_amb, "anno->amb_positions_end");
      anno->amb_bases = tmap_malloc(sizeof(uint8_t) * anno->num_amb, "anno->amb_bases");
      if(anno->num_amb != tmap_file_fread(anno->amb_positions_start, sizeof(uint32_t), anno->num_amb, fp)
         || anno->num_amb != tmap_file_fread(anno->amb_positions_end, sizeof(uint32_t), anno->num_amb, fp)
         || anno->num_amb != tmap_file_fread(anno->amb_bases, sizeof(uint8_t), anno->num_amb, fp)) {
          tmap_error(NULL, Exit, WriteFileError);
      }
  }
  else {
      anno->amb_positions_start = NULL;
      anno->amb_positions_end = NULL;
      anno->amb_bases = NULL;
  }
  // set name length
  anno->name->l = len-1;
}
Beispiel #4
0
tmap_refseq_t *
tmap_refseq_read(const char *fn_fasta, uint32_t is_rev)
{
  tmap_file_t *fp_pac = NULL, *fp_anno = NULL;
  char *fn_pac = NULL, *fn_anno = NULL;
  tmap_refseq_t *refseq = NULL;

  // allocate some memory 
  refseq = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq");
  refseq->is_rev = is_rev;
  refseq->is_shm = 0;

  // read annotation file
  fn_anno = tmap_get_file_name(fn_fasta, TMAP_ANNO_FILE);
  fp_anno = tmap_file_fopen(fn_anno, "rb", TMAP_ANNO_COMPRESSION);
  tmap_refseq_read_anno(fp_anno, refseq); 
  tmap_file_fclose(fp_anno);
  free(fn_anno);

  // read the sequence
  fn_pac = tmap_get_file_name(fn_fasta, (0 == is_rev) ? TMAP_PAC_FILE : TMAP_REV_PAC_FILE);
  fp_pac = tmap_file_fopen(fn_pac, "rb", (0 == is_rev) ? TMAP_PAC_COMPRESSION : TMAP_REV_PAC_COMPRESSION);
  refseq->seq = tmap_malloc(sizeof(uint8_t)*tmap_refseq_seq_memory(refseq->len), "refseq->seq"); // allocate
  if(tmap_refseq_seq_memory(refseq->len) 
     != tmap_file_fread(refseq->seq, sizeof(uint8_t), tmap_refseq_seq_memory(refseq->len), fp_pac)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  tmap_file_fclose(fp_pac);
  free(fn_pac);


  return refseq;
}
Beispiel #5
0
tmap_sff_read_header_t *
tmap_sff_read_header_read(tmap_file_t *fp, int32_t early_eof_ok)
{
  tmap_sff_read_header_t *rh = NULL;
  uint32_t n = 0;

  rh = tmap_calloc(1, sizeof(tmap_sff_read_header_t), "rh");

  if(1 != tmap_file_fread(&rh->rheader_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->name_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->n_bases, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_qual_left, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_qual_right, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_adapter_left, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_adapter_right, sizeof(uint16_t), 1, fp)) {
      if(0 == early_eof_ok) {
          tmap_error("tmap_file_fread", Exit, ReadFileError);
      }
      else {
          free(rh);
          return NULL;
      }
  }
  n += sizeof(uint32_t) + 6*sizeof(uint16_t);

  // convert values from big-endian
  rh->rheader_length = ntohs(rh->rheader_length);
  rh->name_length = ntohs(rh->name_length);
  rh->n_bases = ntohl(rh->n_bases);
  rh->clip_qual_left = ntohs(rh->clip_qual_left);
  rh->clip_qual_right = ntohs(rh->clip_qual_right);
  rh->clip_adapter_left = ntohs(rh->clip_adapter_left);
  rh->clip_adapter_right = ntohs(rh->clip_adapter_right);

  rh->name = tmap_string_init(rh->name_length+1);

  if(rh->name_length != tmap_file_fread(rh->name->s, sizeof(char), rh->name_length, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += sizeof(char)*rh->name_length;

  // set read name length and null-terminator
  rh->name->l = rh->name_length;
  rh->name->s[rh->name->l]='\0';

  n += tmap_sff_read_padding(fp, n);

#ifdef TMAP_SFF_DEBUG
  tmap_sff_read_header_print(stderr, rh);
#endif

  if(rh->rheader_length != n) {
      tmap_error("SFF read header length did not match", Exit, ReadFileError);
  }

  return rh;
}
Beispiel #6
0
tmap_sff_read_t *
tmap_sff_read_read(tmap_file_t *fp, tmap_sff_header_t *gh, tmap_sff_read_header_t *rh)
{
  tmap_sff_read_t *r = NULL;
  uint32_t i, n = 0;

  r = tmap_calloc(1, sizeof(tmap_sff_read_t), "r");

  r->flowgram = tmap_malloc(sizeof(uint16_t)*gh->flow_length, "r->flowgram");
  r->flow_index = tmap_malloc(sizeof(uint8_t)*rh->n_bases, "r->flow_index");

  r->bases = tmap_string_init(rh->n_bases+1);
  r->quality = tmap_string_init(rh->n_bases+1);

  if(gh->flow_length != tmap_file_fread(r->flowgram, sizeof(uint16_t), gh->flow_length, fp)
     || rh->n_bases != tmap_file_fread(r->flow_index, sizeof(uint8_t), rh->n_bases, fp)
     || rh->n_bases != tmap_file_fread(r->bases->s, sizeof(char), rh->n_bases, fp)
     || rh->n_bases != tmap_file_fread(r->quality->s, sizeof(char), rh->n_bases, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += sizeof(uint16_t)*gh->flow_length + 3*sizeof(uint8_t)*rh->n_bases;

  // set length and null-terminators
  r->bases->l = rh->n_bases;
  r->quality->l = rh->n_bases;
  r->bases->s[r->bases->l]='\0';
  r->quality->s[r->quality->l]='\0';

  // convert qualities from int to char
  for(i=0;i<r->quality->l;i++) {
      r->quality->s[i] = QUAL2CHAR(r->quality->s[i]);
  }

  // convert flowgram to host order
  for(i=0;i<gh->flow_length;i++) {
      r->flowgram[i] = ntohs(r->flowgram[i]);
  }

  n += tmap_sff_read_padding(fp, n);

#ifdef TMAP_SFF_DEBUG
  tmap_sff_read_print(stderr, r, gh, rh);
#endif

  return r;
}
Beispiel #7
0
static inline uint32_t
tmap_sff_read_padding(tmap_file_t *fp, uint32_t n)
{
  char padding[8]="\0";
  n = (n & 7); // (n % 8)
  if(0 != n) {
      n = 8 - n; // number of bytes of padding
      if(n != tmap_file_fread(padding, sizeof(char), n, fp)) {
          tmap_error("tmap_file_fread", Exit, ReadFileError);
      }
  }
  return n;
}
Beispiel #8
0
size_t
tmap_sa_shm_read_num_bytes(const char *fn_fasta)
{
    size_t n = 0;
    char *fn_sa = NULL;
    tmap_file_t *fp_sa = NULL;
    tmap_sa_t *sa = NULL;

    fn_sa = tmap_get_file_name(fn_fasta, TMAP_SA_FILE);
    fp_sa = tmap_file_fopen(fn_sa, "rb", TMAP_SA_COMPRESSION);

    sa = tmap_calloc(1, sizeof(tmap_sa_t), "sa");

    if(1 != tmap_file_fread(&sa->primary, sizeof(tmap_bwt_int_t), 1, fp_sa)
            || 1 != tmap_file_fread(&sa->sa_intv, sizeof(tmap_bwt_int_t), 1, fp_sa)
            || 1 != tmap_file_fread(&sa->seq_len, sizeof(tmap_bwt_int_t), 1, fp_sa)) {
        tmap_error(NULL, Exit, ReadFileError);
    }

    sa->n_sa = (sa->seq_len + sa->sa_intv) / sa->sa_intv;

    // No need to read in sa->sa
    sa->sa = NULL;

    tmap_file_fclose(fp_sa);
    free(fn_sa);

    sa->is_shm = 0;
    sa->is_mm  = 0;

    // get the number of bytes
    n = tmap_sa_shm_num_bytes(sa);

    tmap_sa_destroy(sa);

    return n;
}
Beispiel #9
0
tmap_sff_header_t *
tmap_sff_header_read(tmap_file_t *fp)
{
  tmap_sff_header_t *h = NULL;
  uint32_t n = 0;

  h = tmap_calloc(1, sizeof(tmap_sff_header_t), "h");

  if(1 != tmap_file_fread(&h->magic, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&h->version, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&h->index_offset, sizeof(uint64_t), 1, fp)
     || 1 != tmap_file_fread(&h->index_length, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&h->n_reads, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&h->gheader_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&h->key_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&h->flow_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&h->flowgram_format, sizeof(uint8_t), 1, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += 4*sizeof(uint32_t) + sizeof(uint64_t) + 3*sizeof(uint16_t) + sizeof(uint8_t);

  // convert values from big-endian
  h->magic = ntohl(h->magic);
  h->version = ntohl(h->version);
  h->index_offset = ntohll(h->index_offset);
  h->index_length = ntohl(h->index_length);
  h->n_reads = ntohl(h->n_reads);
  h->gheader_length = ntohs(h->gheader_length);
  h->key_length = ntohs(h->key_length);
  h->flow_length = ntohs(h->flow_length);

  if(TMAP_SFF_MAGIC != h->magic) {
      tmap_error("SFF magic number did not match", Exit, ReadFileError);
  }
  if(h->version != TMAP_SFF_VERSION) {
      tmap_error("SFF version number did not match", Exit, ReadFileError);
  }

  h->flow = tmap_string_init(h->flow_length+1);
  h->key = tmap_string_init(h->key_length+1);

  if(h->flow_length != tmap_file_fread(h->flow->s, sizeof(char), h->flow_length, fp)
     || h->key_length != tmap_file_fread(h->key->s, sizeof(char), h->key_length, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += sizeof(char)*(h->flow_length + h->key_length);

  // set the length and null-terminator
  h->flow->l = h->flow_length;
  h->key->l = h->key_length;
  h->flow->s[h->flow->l]='\0';
  h->key->s[h->key->l]='\0';

  n += tmap_sff_read_padding(fp, n);

#ifdef TMAP_SFF_DEBUG
  tmap_sff_header_print(stderr, h);
#endif

  if(h->gheader_length != n) {
      tmap_error("SFF global header length did not match", Exit, ReadFileError);
  }

  return h;
}