Пример #1
0
void 
tmap_refseq_write(tmap_refseq_t *refseq, const char *fn_fasta, uint32_t is_rev)
{
  tmap_file_t *fp_pac = NULL, *fp_anno = NULL;
  char *fn_pac = NULL, *fn_anno = NULL;
  uint8_t x = 0;

  // write annotation file
  if(0 == is_rev) {
      fn_anno = tmap_get_file_name(fn_fasta, TMAP_ANNO_FILE);
      fp_anno = tmap_file_fopen(fn_anno, "wb", TMAP_ANNO_COMPRESSION);
      tmap_refseq_write_anno(fp_anno, refseq); 
      tmap_file_fclose(fp_anno);
      free(fn_anno);
  }

  // write the sequence
  fn_pac = tmap_get_file_name(fn_fasta, (0 == is_rev) ? TMAP_PAC_FILE : TMAP_REV_PAC_FILE);
  fp_pac = tmap_file_fopen(fn_pac, "wb", (0 == is_rev) ? TMAP_PAC_COMPRESSION : TMAP_REV_PAC_COMPRESSION);
  if(tmap_refseq_seq_memory(refseq->len) != tmap_file_fwrite(refseq->seq, sizeof(uint8_t), tmap_refseq_seq_memory(refseq->len), fp_pac)) {
      tmap_error(NULL, Exit, WriteFileError);
  }
  if(refseq->len % 4 == 0) { // add an extra byte if we completely filled all bits
      if(1 != tmap_file_fwrite(&x, sizeof(uint8_t), 1, fp_pac)) {
          tmap_error(fn_pac, Exit, WriteFileError);
      }
  }
  // store number of unused bits at the last byte
  x = refseq->len % 4;
  if(1 != tmap_file_fwrite(&x, sizeof(uint8_t), 1, fp_pac)) {
      tmap_error(fn_pac, Exit, WriteFileError);
  }
  tmap_file_fclose(fp_pac);
  free(fn_pac);
}
Пример #2
0
static inline void
tmap_refseq_read_annos(tmap_file_t *fp, tmap_anno_t *anno) 
{
  uint32_t len = 0; // includes the null-terminator
  
  if(1 != tmap_file_fread(&len, sizeof(uint32_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

  anno->name = tmap_string_init(len);

  if(len != tmap_file_fread(anno->name->s, sizeof(char), len, fp)
     || 1 != tmap_file_fread(&anno->len, sizeof(uint64_t), 1, fp)
     || 1 != tmap_file_fread(&anno->offset, sizeof(uint64_t), 1, fp)
     || 1 != tmap_file_fread(&anno->num_amb, sizeof(uint32_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  if(0 < anno->num_amb) {
      anno->amb_positions_start = tmap_malloc(sizeof(uint32_t) * anno->num_amb, "anno->amb_positions_start");
      anno->amb_positions_end = tmap_malloc(sizeof(uint32_t) * anno->num_amb, "anno->amb_positions_end");
      anno->amb_bases = tmap_malloc(sizeof(uint8_t) * anno->num_amb, "anno->amb_bases");
      if(anno->num_amb != tmap_file_fread(anno->amb_positions_start, sizeof(uint32_t), anno->num_amb, fp)
         || anno->num_amb != tmap_file_fread(anno->amb_positions_end, sizeof(uint32_t), anno->num_amb, fp)
         || anno->num_amb != tmap_file_fread(anno->amb_bases, sizeof(uint8_t), anno->num_amb, fp)) {
          tmap_error(NULL, Exit, WriteFileError);
      }
  }
  else {
      anno->amb_positions_start = NULL;
      anno->amb_positions_end = NULL;
      anno->amb_bases = NULL;
  }
  // set name length
  anno->name->l = len-1;
}
Пример #3
0
tmap_sa_t *
tmap_sa_read(const char *fn_fasta)
{
  char *fn_sa = NULL;
  tmap_file_t *fp_sa = NULL;
  tmap_sa_t *sa = NULL;

  fn_sa = tmap_get_file_name(fn_fasta, TMAP_SA_FILE);
  fp_sa = tmap_file_fopen(fn_sa, "rb", TMAP_SA_COMPRESSION);

  sa = tmap_calloc(1, sizeof(tmap_sa_t), "sa");

  if(1 != tmap_file_fread(&sa->primary, sizeof(tmap_bwt_int_t), 1, fp_sa)
     || 1 != tmap_file_fread(&sa->sa_intv, sizeof(tmap_bwt_int_t), 1, fp_sa)
     || 1 != tmap_file_fread(&sa->seq_len, sizeof(tmap_bwt_int_t), 1, fp_sa)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

  sa->n_sa = (sa->seq_len + sa->sa_intv) / sa->sa_intv;
  sa->sa = tmap_calloc(sa->n_sa, sizeof(tmap_bwt_int_t), "sa->sa");
  sa->sa[0] = -1;

  if(sa->n_sa-1 != tmap_file_fread(sa->sa + 1, sizeof(tmap_bwt_int_t), sa->n_sa - 1, fp_sa)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

  sa->sa_intv_log2 = tmap_log2(sa->sa_intv);

  tmap_file_fclose(fp_sa);
  free(fn_sa);

  sa->is_shm = 0;

  return sa;
}
Пример #4
0
static void
tmap_seqs_io_init2_fs_and_add(tmap_seqs_io_t *io_in,
                              sam_header_t *header,
                              sam_header_record_t *record)
{
  char tag[2];
  // add @RG.KS and @RG.FO
  if(io_in->type == TMAP_SEQ_TYPE_SFF) {
      sam_header_records_t *records = sam_header_get_records(header, record->tag); // get the header line
      if(io_in->n <= records->n) tmap_error("Too many read groups specified", Exit, OutOfRange);
      // @RG.KS
      tag[0]='K';tag[1]='S';
      if(0 == sam_header_record_add(record, tag, tmap_sff_io_get_rg_ks(io_in->seqios[records->n]->io.sffio))) {
          tmap_error("Could not add the KS tag; most likely it is already present", Exit, OutOfRange);
      }
      // @RG.FO
      tag[0]='F';tag[1]='O';
      if(0 == sam_header_record_add(record, tag, tmap_sff_io_get_rg_fo(io_in->seqios[records->n]->io.sffio))) {
          tmap_error("Could not add the FO tag; most likely it is already present", Exit, OutOfRange);
      }
  }
  // check for the @RG.ID and @RG.SM tags
  if(NULL == sam_header_record_get(record, "ID")) tmap_bug(); // should not happen
  if(NULL == sam_header_record_get(record, "SM")) {
      if(0 == sam_header_record_add(record, "SM", "NOSM")) tmap_bug(); // dummy SM, for Picard validation
  }
  if(NULL == sam_header_record_get(record, "PG")) {
      if(0 == sam_header_record_add(record, "PG", PACKAGE_NAME)) tmap_bug(); // dummy PG
  }
  // add the read group
  if(0 == sam_header_add_record(header, record)) tmap_bug(); 
}
Пример #5
0
static inline int32_t
tmap_refseq_supported(tmap_refseq_t *refseq)
{
  int32_t i, j;
  char *refseq_v = refseq->package_version->s;
  char *tmap_v = PACKAGE_VERSION;

  // sanity check on version names
  for(i=j=0;i<strlen(refseq_v);i++) {
      if('.' == refseq_v[i]) j++;
  }
  if(2 != j) {
      tmap_error("did not find three version numbers", Exit, OutOfRange);
  }
  for(i=j=0;i<strlen(tmap_v);i++) {
      if('.' == tmap_v[i]) j++;
  }
  if(2 != j) {
      tmap_error("did not find three version numbers", Exit, OutOfRange);
  }
  
  // get the format ids
  if(0 == strcmp(tmap_refseq_get_version_format(refseq_v), tmap_refseq_get_version_format(tmap_v))) {
      return 1;
  }
  return 0;
}
Пример #6
0
static inline void 
tmap_refseq_read_header(tmap_file_t *fp, tmap_refseq_t *refseq)
{
  size_t package_version_l;
  if(1 != tmap_file_fread(&refseq->version_id, sizeof(uint64_t), 1, fp) 
     || 1 != tmap_file_fread(&package_version_l, sizeof(size_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  if(refseq->version_id != TMAP_VERSION_ID) {
      tmap_error("version id did not match", Exit, ReadFileError);
  }

  refseq->package_version = tmap_string_init(package_version_l+1); // add one for the null terminator
  refseq->package_version->l = package_version_l;
  if(refseq->package_version->l+1 != tmap_file_fread(refseq->package_version->s, sizeof(char), refseq->package_version->l+1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  if(0 == tmap_refseq_supported(refseq)) {
      fprintf(stderr, "reference version: %s\n", refseq->package_version->s);
      fprintf(stderr, "package version: %s\n", PACKAGE_VERSION);
      tmap_error("the reference index is not supported", Exit, ReadFileError);
  }
     
  if(1 != tmap_file_fread(&refseq->num_annos, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&refseq->len, sizeof(uint64_t), 1, fp)) {
      tmap_error(NULL, Exit, ReadFileError);
  }

}
Пример #7
0
tmap_sff_read_header_t *
tmap_sff_read_header_read(tmap_file_t *fp, int32_t early_eof_ok)
{
  tmap_sff_read_header_t *rh = NULL;
  uint32_t n = 0;

  rh = tmap_calloc(1, sizeof(tmap_sff_read_header_t), "rh");

  if(1 != tmap_file_fread(&rh->rheader_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->name_length, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->n_bases, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_qual_left, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_qual_right, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_adapter_left, sizeof(uint16_t), 1, fp)
     || 1 != tmap_file_fread(&rh->clip_adapter_right, sizeof(uint16_t), 1, fp)) {
      if(0 == early_eof_ok) {
          tmap_error("tmap_file_fread", Exit, ReadFileError);
      }
      else {
          free(rh);
          return NULL;
      }
  }
  n += sizeof(uint32_t) + 6*sizeof(uint16_t);

  // convert values from big-endian
  rh->rheader_length = ntohs(rh->rheader_length);
  rh->name_length = ntohs(rh->name_length);
  rh->n_bases = ntohl(rh->n_bases);
  rh->clip_qual_left = ntohs(rh->clip_qual_left);
  rh->clip_qual_right = ntohs(rh->clip_qual_right);
  rh->clip_adapter_left = ntohs(rh->clip_adapter_left);
  rh->clip_adapter_right = ntohs(rh->clip_adapter_right);

  rh->name = tmap_string_init(rh->name_length+1);

  if(rh->name_length != tmap_file_fread(rh->name->s, sizeof(char), rh->name_length, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += sizeof(char)*rh->name_length;

  // set read name length and null-terminator
  rh->name->l = rh->name_length;
  rh->name->s[rh->name->l]='\0';

  n += tmap_sff_read_padding(fp, n);

#ifdef TMAP_SFF_DEBUG
  tmap_sff_read_header_print(stderr, rh);
#endif

  if(rh->rheader_length != n) {
      tmap_error("SFF read header length did not match", Exit, ReadFileError);
  }

  return rh;
}
Пример #8
0
tmap_index_t*
tmap_index_init(const char *fn_fasta, key_t shm_key, int32_t mm)
{
  tmap_index_t *index = NULL;

  index = tmap_calloc(1, sizeof(tmap_index_t), "index");

  index->shm_key = shm_key;
  index->mm      = mm;

  // get the reference information
  // primary 65380; sa_intv: 32
  // seq_len = 97004
  //n_sa = 3032, sa 67973 .. 18446744073709551615

  if (1 == index->mm) {
      tmap_progress_print("Retrieving reference data from memory map");
      index->refseq = tmap_refseq_mm_read(fn_fasta);
      index->bwt = tmap_bwt_mm_read(fn_fasta);
      index->sa = tmap_sa_mm_read(fn_fasta);
      tmap_progress_print2("Reference data retrieved from memory map");
  } else if(0 == index->shm_key) {
      tmap_progress_print("reading in reference data");
      index->refseq = tmap_refseq_read(fn_fasta);
      index->bwt = tmap_bwt_read(fn_fasta);
      index->sa = tmap_sa_read(fn_fasta);
      tmap_progress_print2("reference data read in");
  }
  else {
      tmap_progress_print("retrieving reference data from shared memory");
      index->shm = tmap_shm_init(index->shm_key, 0, 0);
      if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) {
          tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) {
          tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) {
          tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing);
      }
      tmap_progress_print2("reference data retrieved from shared memory");
  }

  if((index->refseq->len << 1) != index->bwt->seq_len) {
      tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange);
  }
  if((index->refseq->len << 1) != index->sa->seq_len) {
      tmap_error("refseq and sa lengths do not match", Exit, OutOfRange);
  }
  
  return index;
}
Пример #9
0
tmap_seq_t *
tmap_seq_clone(tmap_seq_t *seq)
{
  tmap_seq_t *ret = NULL;

  ret = tmap_calloc(1, sizeof(tmap_seq_t), "ret");
  ret->type = seq->type;

  switch(seq->type) {
    case TMAP_SEQ_TYPE_FQ:
      ret->data.fq = tmap_fq_clone(seq->data.fq);
      break;
    case TMAP_SEQ_TYPE_SFF:
      ret->data.sff = tmap_sff_clone(seq->data.sff);
      break;
    case TMAP_SEQ_TYPE_SAM:
    case TMAP_SEQ_TYPE_BAM:
      ret->data.sam = tmap_sam_clone(seq->data.sam);
      break;
    default:
      tmap_error("type is unrecognized", Exit, OutOfRange);
      break;
  }

  return ret;
}
Пример #10
0
static inline tmap_sam_io_t *
tmap_sam_io_init_helper(const char *fn, int32_t is_bam)
{
  tmap_sam_io_t *samio = NULL;

  // initialize memory
  samio = tmap_calloc(1, sizeof(tmap_sam_io_t), "samio");
  if(0 == is_bam) {
      samio->fp = samopen(fn, "r", NULL);
  }
  else {
      samio->fp = samopen(fn, "rb", NULL);
  }
  if(NULL == samio->fp) {
      tmap_error(fn, Exit, OpenFileError);
  }
  samio->bam_end_vfo = 0;

  // check if there are sequences in the header
  /*
  if(samio->fp->header->n_targets == 0) {
      tmap_error("Found no @SQ lines in the SAM header", Exit, OutOfRange);
  }
  */

  return samio;
}
Пример #11
0
static int32_t
tmap_shmget(key_t key, size_t size, int32_t shmflg, int32_t create)
{
  int32_t shmid, i;

  if(0 == create) {
      // try a number of times before failing
      for(i=0,shmid=-1;shmid<0 && i<TMAP_SHMGET_RETRIES-1;i++) {
          if(0 <= (shmid = shmget(key, size, shmflg))) {
              return shmid;
          }
          tmap_progress_print("could not get shared memory, %d more %s", 
                              TMAP_SHMGET_RETRIES-i-1,
                              (1 != TMAP_SHMGET_RETRIES-i-1) ? "retries" : "retry");
          tmap_progress_print("retrying in %d seconds", TMAP_SHMGET_SLEEP);
          // sleep and retry
          sleep(TMAP_SHMGET_SLEEP);
      }
  }
  if((shmid = shmget(key, size, shmflg)) < 0) {
      tmap_error(NULL, Exit, SharedMemoryGet);
  }

  return shmid;
}
Пример #12
0
inline tmap_seqs_io_t*
tmap_seqs_io_init(char **fns, int32_t fn_num, int8_t seq_type, int32_t compression, int64_t bam_start_vfo, int64_t bam_end_vfo)
{
  tmap_seqs_io_t *io= NULL;
  int32_t i;

  io = tmap_calloc(1, sizeof(tmap_seqs_io_t), "io");
  io->type = seq_type;
      
  if(1 < io->n && (TMAP_SEQ_TYPE_SAM == io->type || TMAP_SEQ_TYPE_BAM == io->type)) {
      tmap_error("Multi-SAM/BAM not supported", Exit, OutOfRange);
  }

  if(NULL == fns) { // stdin
      io->n = 1;
      io->seqios = tmap_calloc(1, sizeof(tmap_seq_io_t*), "io->seqios");
      io->seqios[0] = tmap_seq_io_init("-", seq_type, 0, compression); // NB: always reading
  }
  else { // from file(s)
      io->n = fn_num;
      io->seqios = tmap_calloc(fn_num, sizeof(tmap_seq_io_t*), "io->seqios");
      for(i=0;i<io->n;i++) {
          io->seqios[i] = tmap_seq_io_init(fns[i], seq_type, 0, compression); // NB: always reading
      }
  }

  if (io->n == 1 && io->seqios[0] && io->type == TMAP_SEQ_TYPE_BAM)
    tmap_sam_io_set_vfo(io->seqios[0]->io.samio, bam_start_vfo, bam_end_vfo);

  return io;
}
Пример #13
0
// zero-based
static inline int32_t
tmap_refseq_get_seqid1(const tmap_refseq_t *refseq, uint32_t pacpos)
{
  int32_t left, right, mid;

  if(refseq->len < pacpos) {
      tmap_error("Coordinate was larger than the reference", Exit, OutOfRange);
  }

  left = 0; mid = 0; right = refseq->num_annos;
  while (left < right) {
      mid = (left + right) >> 1;
      if(refseq->annos[mid].offset < pacpos) {
          if(mid == refseq->num_annos - 1) break;
          if(pacpos <= refseq->annos[mid+1].offset) break;
          left = mid + 1;
      } else right = mid;
  }

  if(refseq->num_annos < mid) {
      return refseq->num_annos;
  }

  return mid;
}
Пример #14
0
tmap_refseq_t *
tmap_refseq_read(const char *fn_fasta, uint32_t is_rev)
{
  tmap_file_t *fp_pac = NULL, *fp_anno = NULL;
  char *fn_pac = NULL, *fn_anno = NULL;
  tmap_refseq_t *refseq = NULL;

  // allocate some memory 
  refseq = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq");
  refseq->is_rev = is_rev;
  refseq->is_shm = 0;

  // read annotation file
  fn_anno = tmap_get_file_name(fn_fasta, TMAP_ANNO_FILE);
  fp_anno = tmap_file_fopen(fn_anno, "rb", TMAP_ANNO_COMPRESSION);
  tmap_refseq_read_anno(fp_anno, refseq); 
  tmap_file_fclose(fp_anno);
  free(fn_anno);

  // read the sequence
  fn_pac = tmap_get_file_name(fn_fasta, (0 == is_rev) ? TMAP_PAC_FILE : TMAP_REV_PAC_FILE);
  fp_pac = tmap_file_fopen(fn_pac, "rb", (0 == is_rev) ? TMAP_PAC_COMPRESSION : TMAP_REV_PAC_COMPRESSION);
  refseq->seq = tmap_malloc(sizeof(uint8_t)*tmap_refseq_seq_memory(refseq->len), "refseq->seq"); // allocate
  if(tmap_refseq_seq_memory(refseq->len) 
     != tmap_file_fread(refseq->seq, sizeof(uint8_t), tmap_refseq_seq_memory(refseq->len), fp_pac)) {
      tmap_error(NULL, Exit, ReadFileError);
  }
  tmap_file_fclose(fp_pac);
  free(fn_pac);


  return refseq;
}
Пример #15
0
tmap_seq_t *
tmap_seq_init(int8_t type)
{
  tmap_seq_t *seq = NULL;

  seq = tmap_calloc(1, sizeof(tmap_seq_t), "seq");
  seq->type = type;

  switch(seq->type) {
    case TMAP_SEQ_TYPE_FQ:
      seq->data.fq = tmap_fq_init();
      break;
    case TMAP_SEQ_TYPE_SFF:
      seq->data.sff = tmap_sff_init();
      break;
    case TMAP_SEQ_TYPE_SAM:
    case TMAP_SEQ_TYPE_BAM:
      seq->data.sam = tmap_sam_init();
      break;
    default:
      tmap_error("type is unrecognized", Exit, OutOfRange);
      break;
  }

  return seq;
}
Пример #16
0
static void 
tmap_index_core(tmap_index_opt_t *opt)
{
  uint64_t ref_len = 0;

  // pack the reference sequence
  ref_len = tmap_refseq_fasta2pac(opt->fn_fasta, TMAP_FILE_NO_COMPRESSION, 0);
      
  if(TMAP_INDEX_TOO_BIG_GENOME <= ref_len) { // too big (2^32 - 1)!
      tmap_error("Reference sequence too large", Exit, OutOfRange);
  }

  // check returned genome size
  if(opt->is_large < 0) {
      if(TMAP_INDEX_LARGE_GENOME <= ref_len) { 
          opt->is_large = 1;
          tmap_progress_print("defaulting to \"bwtsw\" BWT construction algorithm");
      }
      else {
          opt->is_large = 0;
          tmap_progress_print("defaulting to \"is\" BWT construction algorithm");
      }
  }

  // create the bwt 
  tmap_bwt_pac2bwt(opt->fn_fasta, opt->is_large, opt->occ_interval, opt->hash_width, opt->check_hash);

  // create the suffix array
  tmap_sa_bwt2sa(opt->fn_fasta, opt->sa_interval);

  // pack the reference sequence
  ref_len = tmap_refseq_fasta2pac(opt->fn_fasta, TMAP_FILE_NO_COMPRESSION, 1);
}
Пример #17
0
static int32_t
tmap_shmdt(const void *shmaddr)
{
  if(shmdt(shmaddr) < 0) {
      tmap_error(NULL, Exit, SharedMemoryDetach);
  }
  return 0;
}
Пример #18
0
static int32_t
tmap_shmctl(int32_t shmid, int32_t cmd, struct shmid_ds *buf)
{
  if(shmctl(shmid, cmd, buf) < 0) {
      tmap_error(NULL, Exit, SharedMemoryControl);
  }

  return 0;
}
Пример #19
0
static inline void 
tmap_refseq_write_header(tmap_file_t *fp, tmap_refseq_t *refseq)
{
  if(1 != tmap_file_fwrite(&refseq->version_id, sizeof(uint64_t), 1, fp) 
     || 1 != tmap_file_fwrite(&refseq->package_version->l, sizeof(size_t), 1, fp)
     || refseq->package_version->l+1 != tmap_file_fwrite(refseq->package_version->s, sizeof(char), refseq->package_version->l+1, fp)
     || 1 != tmap_file_fwrite(&refseq->num_annos, sizeof(uint32_t), 1, fp)
     || 1 != tmap_file_fwrite(&refseq->len, sizeof(uint64_t), 1, fp)) {
      tmap_error(NULL, Exit, WriteFileError);
  }
}
Пример #20
0
static inline void
tmap_refseq_write_annos(tmap_file_t *fp, tmap_anno_t *anno) 
{
  uint32_t len = anno->name->l+1; // include null terminator

  if(1 != tmap_file_fwrite(&len, sizeof(uint32_t), 1, fp) 
     || len != tmap_file_fwrite(anno->name->s, sizeof(char), len, fp)
     || 1 != tmap_file_fwrite(&anno->len, sizeof(uint64_t), 1, fp)
     || 1 != tmap_file_fwrite(&anno->offset, sizeof(uint64_t), 1, fp)
     || 1 != tmap_file_fwrite(&anno->num_amb, sizeof(uint32_t), 1, fp)) {
      tmap_error(NULL, Exit, WriteFileError);
  }
  if(0 < anno->num_amb) {
      if(anno->num_amb != tmap_file_fwrite(anno->amb_positions_start, sizeof(uint32_t), anno->num_amb, fp)
         || anno->num_amb != tmap_file_fwrite(anno->amb_positions_end, sizeof(uint32_t), anno->num_amb, fp)
         || anno->num_amb != tmap_file_fwrite(anno->amb_bases, sizeof(uint8_t), anno->num_amb, fp)) {
          tmap_error(NULL, Exit, WriteFileError);
      }
  }
}
Пример #21
0
static void *
tmap_shmat(int32_t shmid, const void *shmaddr, int32_t shmflg)
{
  void *shm = NULL;

  if((shm = shmat(shmid, shmaddr, shmflg)) == (char*)-1) {
      tmap_error(NULL, Exit, SharedMemoryAttach);
  }

  return shm;
}
Пример #22
0
void
tmap_error_cmd_check_int(int32_t val, int32_t lower, int32_t upper, char *option)
{
  if(val < lower || upper < val) {
      char str[1024] = "\0";
      strcpy(str, "option ");
      strcat(str, option);
      strcat(str, " out of range");
      tmap_error(str, Exit, CommandLineArgument);
  }
} 
Пример #23
0
tmap_index_t*
tmap_index_init(const char *fn_fasta, key_t shm_key)
{
  tmap_index_t *index = NULL;

  index = tmap_calloc(1, sizeof(tmap_index_t), "index");

  index->shm_key = shm_key;

  // get the reference information
  if(0 == index->shm_key) {
      tmap_progress_print("reading in reference data");
      index->refseq = tmap_refseq_read(fn_fasta);
      index->bwt = tmap_bwt_read(fn_fasta);
      index->sa = tmap_sa_read(fn_fasta);
      tmap_progress_print2("reference data read in");
  }
  else {
      tmap_progress_print("retrieving reference data from shared memory");
      index->shm = tmap_shm_init(index->shm_key, 0, 0);
      if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) {
          tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) {
          tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) {
          tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing);
      }
      tmap_progress_print2("reference data retrieved from shared memory");
  }

  if((index->refseq->len << 1) != index->bwt->seq_len) {
      tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange);
  }
  if((index->refseq->len << 1) != index->sa->seq_len) {
      tmap_error("refseq and sa lengths do not match", Exit, OutOfRange);
  }
  
  return index;
}
Пример #24
0
static inline uint32_t
tmap_sff_read_padding(tmap_file_t *fp, uint32_t n)
{
  char padding[8]="\0";
  n = (n & 7); // (n % 8)
  if(0 != n) {
      n = 8 - n; // number of bytes of padding
      if(n != tmap_file_fread(padding, sizeof(char), n, fp)) {
          tmap_error("tmap_file_fread", Exit, ReadFileError);
      }
  }
  return n;
}
Пример #25
0
static inline void
tmap_sam_print_rg(tmap_file_t *fp, tmap_seq_t *seq)
{
  // RG 
  if(1 == tmap_sam_rg_id_use) {
      tmap_file_fprintf(fp, "\tRG:Z:%s", tmap_sam_rg_id);
  }
  else if(0 == tmap_sam_rg_id_use) {
      char *id = tmap_seq_get_rg_id(seq);
      if(NULL == id) {
          tmap_error("Missing Record RG.ID in the input file", Exit, OutOfRange);
      }
      tmap_file_fprintf(fp, "\tRG:Z:%s", id);
  }
}
Пример #26
0
tmap_sff_read_t *
tmap_sff_read_read(tmap_file_t *fp, tmap_sff_header_t *gh, tmap_sff_read_header_t *rh)
{
  tmap_sff_read_t *r = NULL;
  uint32_t i, n = 0;

  r = tmap_calloc(1, sizeof(tmap_sff_read_t), "r");

  r->flowgram = tmap_malloc(sizeof(uint16_t)*gh->flow_length, "r->flowgram");
  r->flow_index = tmap_malloc(sizeof(uint8_t)*rh->n_bases, "r->flow_index");

  r->bases = tmap_string_init(rh->n_bases+1);
  r->quality = tmap_string_init(rh->n_bases+1);

  if(gh->flow_length != tmap_file_fread(r->flowgram, sizeof(uint16_t), gh->flow_length, fp)
     || rh->n_bases != tmap_file_fread(r->flow_index, sizeof(uint8_t), rh->n_bases, fp)
     || rh->n_bases != tmap_file_fread(r->bases->s, sizeof(char), rh->n_bases, fp)
     || rh->n_bases != tmap_file_fread(r->quality->s, sizeof(char), rh->n_bases, fp)) {
      tmap_error("tmap_file_fread", Exit, ReadFileError);
  }
  n += sizeof(uint16_t)*gh->flow_length + 3*sizeof(uint8_t)*rh->n_bases;

  // set length and null-terminators
  r->bases->l = rh->n_bases;
  r->quality->l = rh->n_bases;
  r->bases->s[r->bases->l]='\0';
  r->quality->s[r->quality->l]='\0';

  // convert qualities from int to char
  for(i=0;i<r->quality->l;i++) {
      r->quality->s[i] = QUAL2CHAR(r->quality->s[i]);
  }

  // convert flowgram to host order
  for(i=0;i<gh->flow_length;i++) {
      r->flowgram[i] = ntohs(r->flowgram[i]);
  }

  n += tmap_sff_read_padding(fp, n);

#ifdef TMAP_SFF_DEBUG
  tmap_sff_read_print(stderr, r, gh, rh);
#endif

  return r;
}
Пример #27
0
void
tmap_seq_reverse(tmap_seq_t *seq)
{
  switch(seq->type) {
    case TMAP_SEQ_TYPE_FQ:
      tmap_fq_reverse(seq->data.fq);
      break;
    case TMAP_SEQ_TYPE_SFF:
      tmap_sff_reverse(seq->data.sff);
      break;
    case TMAP_SEQ_TYPE_SAM:
    case TMAP_SEQ_TYPE_BAM:
      tmap_sam_reverse(seq->data.sam);
      break;
    default:
      tmap_error("type is unrecognized", Exit, OutOfRange);
      break;
  }
}
Пример #28
0
// NB: includes key bases if present
static int32_t
tmap_seq_get_flowgram(tmap_seq_t *seq, uint16_t **flowgram)
{
  switch(seq->type) {
    case TMAP_SEQ_TYPE_FQ:
      break;
    case TMAP_SEQ_TYPE_SFF:
      return tmap_sff_get_flowgram(seq->data.sff, flowgram);
      break;
    case TMAP_SEQ_TYPE_SAM:
    case TMAP_SEQ_TYPE_BAM:
      return tmap_sam_get_flowgram(seq->data.sam, flowgram);
      break;
    default:
      tmap_error("type is unrecognized", Exit, OutOfRange);
      break;
  }
  return -1;
}
Пример #29
0
void
tmap_sa_write(const char *fn_fasta, tmap_sa_t *sa)
{
    char *fn_sa = NULL;
    tmap_file_t *fp_sa = NULL;

    fn_sa = tmap_get_file_name(fn_fasta, TMAP_SA_FILE);
    fp_sa = tmap_file_fopen(fn_sa, "wb", TMAP_SA_COMPRESSION);

    if(1 != tmap_file_fwrite(&sa->primary, sizeof(tmap_bwt_int_t), 1, fp_sa)
            || 1 != tmap_file_fwrite(&sa->sa_intv, sizeof(tmap_bwt_int_t), 1, fp_sa)
            || 1 != tmap_file_fwrite(&sa->seq_len, sizeof(tmap_bwt_int_t), 1, fp_sa)
            || sa->n_sa-1 != tmap_file_fwrite(sa->sa+1, sizeof(tmap_bwt_int_t), sa->n_sa-1, fp_sa)) {
        tmap_error(NULL, Exit, WriteFileError);
    }

    tmap_file_fclose(fp_sa);
    free(fn_sa);
}
Пример #30
0
inline int
tmap_seqs_io_read(tmap_seqs_io_t *io, tmap_seqs_t *seqs, sam_header_t *header)
{
  int32_t i;

  /*
   * Case 1 - SAM/BAM
   *    - NB: there must only be one input file
   *    - Read a record, if paired, then read the next
   * Case 2 - SFF/FQ
   *    - NB: there can be zero or more input files
   *    - Read one from each file, store in one record
   */

  if(io->type != seqs->type) {
      tmap_error("type mismatch", Exit, OutOfRange);
  }

  // reset seqs
  seqs->n = 0;
  if(TMAP_SEQ_TYPE_SAM == io->type || TMAP_SEQ_TYPE_BAM == io->type) {
      // NB: to supported paired reads, we check the paired flag
      for(i=0;i<2;i++) {
          tmap_seq_t *seq = tmap_seqs_get(seqs, i);
          if(tmap_seq_io_read(io->seqios[0], seq) < 0) return EOF; // TODO: better error checking
          tmap_seqs_add(seqs, seq); 
          tmap_seq_update(seq, i, header);
          // break if not paired
          if(0 == (seq->data.sam->b->core.flag & BAM_FPAIRED)) break;
      }
  }
  else {
      // read in one per file
      for(i=0;i<io->n;i++) {
          tmap_seq_t *seq = tmap_seqs_get(seqs, i);
          if(tmap_seq_io_read(io->seqios[i], seq) < 0) return EOF; // TODO: better error checking
          tmap_seqs_add(seqs, seq); 
          tmap_seq_update(seq, i, header);
      }
  }

  return 0;
}