Ejemplo n.º 1
0
tmap_index_t*
tmap_index_init(const char *fn_fasta, key_t shm_key, int32_t mm)
{
  tmap_index_t *index = NULL;

  index = tmap_calloc(1, sizeof(tmap_index_t), "index");

  index->shm_key = shm_key;
  index->mm      = mm;

  // get the reference information
  // primary 65380; sa_intv: 32
  // seq_len = 97004
  //n_sa = 3032, sa 67973 .. 18446744073709551615

  if (1 == index->mm) {
      tmap_progress_print("Retrieving reference data from memory map");
      index->refseq = tmap_refseq_mm_read(fn_fasta);
      index->bwt = tmap_bwt_mm_read(fn_fasta);
      index->sa = tmap_sa_mm_read(fn_fasta);
      tmap_progress_print2("Reference data retrieved from memory map");
  } else if(0 == index->shm_key) {
      tmap_progress_print("reading in reference data");
      index->refseq = tmap_refseq_read(fn_fasta);
      index->bwt = tmap_bwt_read(fn_fasta);
      index->sa = tmap_sa_read(fn_fasta);
      tmap_progress_print2("reference data read in");
  }
  else {
      tmap_progress_print("retrieving reference data from shared memory");
      index->shm = tmap_shm_init(index->shm_key, 0, 0);
      if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) {
          tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) {
          tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) {
          tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing);
      }
      tmap_progress_print2("reference data retrieved from shared memory");
  }

  if((index->refseq->len << 1) != index->bwt->seq_len) {
      tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange);
  }
  if((index->refseq->len << 1) != index->sa->seq_len) {
      tmap_error("refseq and sa lengths do not match", Exit, OutOfRange);
  }
  
  return index;
}
Ejemplo n.º 2
0
int 
tmap_map_vsw_main(int argc, char *argv[])
{
  tmap_map_driver_t *driver = NULL;

  // init
  driver = tmap_map_driver_init(TMAP_MAP_ALGO_MAPVSW, tmap_map_vsw_mapq);
  driver->opt->algo_stage = 1;

  // get options
  if(1 != tmap_map_opt_parse(argc, argv, driver->opt) // options parsed successfully
     || argc != optind  // all options should be used
     || 1 == argc) { // some options should be specified
      return tmap_map_opt_usage(driver->opt);
  }
  else { 
      // check command line arguments
      tmap_map_opt_check(driver->opt);
  }

  // run map_vsw
  tmap_map_vsw_core(driver);

  // destroy 
  tmap_map_driver_destroy(driver);

  tmap_progress_print2("terminating successfully");

  return 0;
}
Ejemplo n.º 3
0
void
tmap_refseq_pac2revpac(const char *fn_fasta)
{
  uint32_t i, j, c;
  tmap_refseq_t *refseq=NULL, *refseq_rev=NULL;

  tmap_progress_print("reversing the packed reference FASTA");

  refseq = tmap_refseq_read(fn_fasta, 0);

  // shallow copy
  refseq_rev = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq_rev");
  (*refseq_rev) = (*refseq);

  // update sequence
  refseq_rev->seq = NULL;
  refseq_rev->seq = tmap_calloc(tmap_refseq_seq_memory(refseq->len), sizeof(uint8_t), "refseq_rev->seq");
  for(i=0;i<refseq->len;i++) {
      c = tmap_refseq_seq_i(refseq, i);
      j = refseq->len - i - 1;
      tmap_refseq_seq_store_i(refseq_rev, j, c);
  }

  // write
  tmap_refseq_write(refseq_rev, fn_fasta, 1);

  // free
  free(refseq_rev->seq);
  free(refseq_rev);
  tmap_refseq_destroy(refseq);

  tmap_progress_print2("reversed the packed reference FASTA");
}
Ejemplo n.º 4
0
int
tmap_vswbm_main(int argc, char *argv[])
{

  int32_t seq_len = 150;
  int32_t tlen = 256; 
  int32_t n_iter = 1000;
  int32_t n_sub_iter = 1;
  int32_t vsw_type = 0;
  int c;

  while((c = getopt(argc, argv, "q:t:n:N:H:h")) >= 0) {
      switch(c) {
        case 'q':
          seq_len = atoi(optarg); break;
        case 't':
          tlen = atoi(optarg); break;
        case 'n':
          n_iter = atoi(optarg); break;
        case 'N':
          n_sub_iter = atoi(optarg); break;
        case 'H':
          vsw_type = atoi(optarg); break;
        case 'h':
        default:
          return usage(seq_len, tlen, n_iter, n_sub_iter, vsw_type);
      }
  }
  if(argc != optind || seq_len > tlen) {
      return usage(seq_len, tlen, n_iter, n_sub_iter, vsw_type);
  }

  tmap_progress_set_verbosity(1);
  tmap_progress_print2("starting benchmark");

  tmap_vsw_bm_core(seq_len, tlen, n_iter, n_sub_iter, vsw_type);
  
  tmap_progress_print2("ending benchmark");

  return 0;
}
Ejemplo n.º 5
0
tmap_index_t*
tmap_index_init(const char *fn_fasta, key_t shm_key)
{
  tmap_index_t *index = NULL;

  index = tmap_calloc(1, sizeof(tmap_index_t), "index");

  index->shm_key = shm_key;

  // get the reference information
  if(0 == index->shm_key) {
      tmap_progress_print("reading in reference data");
      index->refseq = tmap_refseq_read(fn_fasta);
      index->bwt = tmap_bwt_read(fn_fasta);
      index->sa = tmap_sa_read(fn_fasta);
      tmap_progress_print2("reference data read in");
  }
  else {
      tmap_progress_print("retrieving reference data from shared memory");
      index->shm = tmap_shm_init(index->shm_key, 0, 0);
      if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) {
          tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) {
          tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing);
      }
      if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) {
          tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing);
      }
      tmap_progress_print2("reference data retrieved from shared memory");
  }

  if((index->refseq->len << 1) != index->bwt->seq_len) {
      tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange);
  }
  if((index->refseq->len << 1) != index->sa->seq_len) {
      tmap_error("refseq and sa lengths do not match", Exit, OutOfRange);
  }
  
  return index;
}
Ejemplo n.º 6
0
void
tmap_sa_bwt2sa(const char *fn_fasta, uint32_t intv)
{
    int64_t isa, s; // S(isa) = sa
    uint64_t i;
    tmap_bwt_t *bwt = NULL;
    tmap_sa_t *sa = NULL;

    tmap_progress_print("constructing the SA from the BWT string");

    bwt = tmap_bwt_read(fn_fasta);

    sa = tmap_calloc(1, sizeof(tmap_sa_t), "sa");

    sa->primary = bwt->primary;
    sa->sa_intv = intv;
    sa->seq_len = bwt->seq_len;
    sa->n_sa = (bwt->seq_len + intv) / intv;

    // calculate SA value
    sa->sa = tmap_calloc(sa->n_sa, sizeof(tmap_bwt_int_t), "sa->sa");
    isa = 0;
    s = bwt->seq_len;
    for(i = 0; i < bwt->seq_len; ++i) {
        if(isa % intv == 0) sa->sa[isa/intv] = s;
        --s;
        isa = tmap_bwt_invPsi(bwt, isa);
    }
    if(isa % intv == 0) sa->sa[isa/intv] = s;
    sa->sa[0] = (tmap_bwt_int_t)-1; // before this line, bwt->sa[0] = bwt->seq_len

    tmap_sa_write(fn_fasta, sa);

    tmap_bwt_destroy(bwt);
    tmap_sa_destroy(sa);
    sa=NULL;
    bwt=NULL;

    tmap_progress_print2("constructed the SA from the BWT string");
}
Ejemplo n.º 7
0
int 
tmap_index(int argc, char *argv[])
{
  int c;
  tmap_index_opt_t opt;

  opt.fn_fasta = NULL;
  opt.occ_interval = TMAP_BWT_OCC_INTERVAL; 
  opt.hash_width = INT32_MAX;
  opt.sa_interval = TMAP_SA_INTERVAL; 
  opt.is_large = -1;
  opt.check_hash = 1;
      
  if(2 == argc && 0 == strcmp("--version", argv[1])) {
      tmap_file_stdout = tmap_file_fdopen(fileno(stdout), "wb", TMAP_FILE_NO_COMPRESSION);
      tmap_file_fprintf(tmap_file_stdout, "%s\n", tmap_refseq_get_version_format(PACKAGE_VERSION));
      tmap_file_fclose(tmap_file_stdout);
      return 0;
  }

  while((c = getopt(argc, argv, "f:o:i:w:a:hvH")) >= 0) {
      switch(c) {
        case 'f':
          opt.fn_fasta = tmap_strdup(optarg); break;
        case 'o':
          opt.occ_interval = atoi(optarg); break;
        case 'i':
          opt.sa_interval = atoi(optarg); break;
        case 'w':
          opt.hash_width = atoi(optarg); break;
        case 'a':
          if(0 == strcmp("is", optarg)) opt.is_large = 0;
          else if(0 == strcmp("bwtsw", optarg)) opt.is_large = 1;
          else tmap_error("Option -a value not correct", Exit, CommandLineArgument); 
          break; 
        case 'v':
          tmap_progress_set_verbosity(1); break;
        case 'h':
          return usage(&opt);
        case 'H':
          opt.check_hash = 0; break;
        default:
          return usage(&opt);
      }
  }

  if(argc != optind || 1 == argc) {
      return usage(&opt);
  }
  if(NULL == opt.fn_fasta) {
      tmap_error("required option -f", Exit, CommandLineArgument);
  }
  if(opt.occ_interval < TMAP_BWT_OCC_MOD || 0 != (opt.occ_interval % 2) || 0 != (opt.occ_interval % TMAP_BWT_OCC_MOD)) {
      tmap_error("option -o out of range", Exit, CommandLineArgument);
  }
  if(opt.hash_width < 0) {
      tmap_error("option -w out of range", Exit, CommandLineArgument);
  }
  if(opt.sa_interval <= 0 || (1 < opt.sa_interval && 0 != (opt.sa_interval % 2))) {
      tmap_error("option -i out of range", Exit, CommandLineArgument);
  }

  tmap_index_core(&opt);

  free(opt.fn_fasta);
  
  tmap_progress_print2("terminating successfully");

  return 0;
}
Ejemplo n.º 8
0
void 
tmap_bwt_compare_core2(tmap_bwt_t *bwt[2], int32_t length, int32_t print_msg, int32_t warn)
{
  uint8_t *seqs[2] = {NULL,NULL};
  char *str = NULL;
  int32_t i, asymmetric, k, m;
  uint64_t hash_j;
  int64_t sum, j;
  tmap_bwt_match_occ_t sa[2];
  tmap_bwt_int_t n[2][2];

  for(i=1;i<=length;i++) {
      seqs[0] = tmap_calloc(i, sizeof(uint8_t), "seqs[0]");
      seqs[1] = tmap_calloc(i, sizeof(uint8_t), "seqs[1]");
      str = tmap_calloc(i+1, sizeof(char), "str");
      for(j=0;j<i;j++) {
          seqs[1][j] = 3;
      }

      asymmetric = 0;
      j = 0;
      hash_j = sum = 0;
      while(1) {
          if(i == j) {
              for(k=0;k<i;k++) {
                  seqs[1][k] = 3 - seqs[0][i-k-1];
              }
              for(k=0;k<2;k++) {
                  for(m=0;m<2;m++) {
                      n[m][k] = tmap_bwt_match_exact_reverse(bwt[m], i, seqs[k], &sa[m]);
                  }
                  if(n[0][k] != n[1][k] || sa[0].k != sa[1].k || sa[0].l != sa[1].l || sa[0].hi != sa[1].hi || sa[0].offset != sa[1].offset) {
                      tmap_progress_print2("BWTs did not match");
                      tmap_progress_print2("n=[%llu,%llu]", n[0][k], n[1][k]);
                      tmap_progress_print2("k=[%llu,%llu]", sa[0].k, sa[1].k);
                      tmap_progress_print2("l=[%llu,%llu]", sa[0].l, sa[1].l);
                      tmap_progress_print2("hi=[%llu,%llu]", sa[0].hi, sa[1].hi);
                      tmap_progress_print2("offset=[%llu,%llu]", sa[0].offset, sa[1].offset);
                      tmap_bug();
                  }
              }
              for(k=0;k<2;k++) {
                  // use m == 0 && k = 0
                  if(0 == k) {
                      if(0 < n[0][k] && TMAP_BWT_INT_MAX != sa[0].k && sa[0].k <= sa[0].l) {
                          sum += n[0][k];
                      }
                  }
              }
              if(0 == asymmetric && n[0][0] != n[0][1]) {
                  asymmetric = 1;
                  //fprintf(stderr, "n[0][0]=%u n[0][1]=%u\n", n[0][0], n[0][1]);
                  tmap_error("Asymmetry found", Warn, OutOfRange);
              }

              j--;
              while(0 <= j && 3 == seqs[0][j]) {
                  seqs[0][j] = 0;
                  hash_j >>= 2;
                  j--;
              }
              if(j < 0) break;
              seqs[0][j]++;
              hash_j++;
              j++;
          }
          else {
              hash_j <<= 2;
              j++;
          }
      }

      free(seqs[0]);
      free(seqs[1]);
      free(str);

      j = (sum == (bwt[0]->seq_len - i + 1)) ? 0 : 1; // j==1 on fail
      if(1 == print_msg) {
          if(0 == j) tmap_progress_print2("%d-mer validation passed", i);
          else tmap_progress_print2("%d-mer validation failed: observed (%llu) != expected (%llu)\n", i, sum, bwt[0]->seq_len - i + 1);
      }
      if(0 == warn && 1 == j) {
          tmap_error("inconsistency found in the BWT", Exit, OutOfRange);
      }
  }
Ejemplo n.º 9
0
void 
tmap_bwt_check_core2(tmap_bwt_t *bwt, int32_t length, int32_t print_msg, int32_t print_sa, int32_t warn)
{
  uint8_t *seqs[2] = {NULL,NULL};
  char *str = NULL;
  int32_t i, asymmetric, k, l;
  uint64_t hash_j;
  int64_t sum, j;
  tmap_bwt_match_occ_t sa;
  tmap_bwt_int_t n[2];

  for(i=1;i<=length;i++) {
      seqs[0] = tmap_calloc(i, sizeof(uint8_t), "seqs[0]");
      seqs[1] = tmap_calloc(i, sizeof(uint8_t), "seqs[1]");
      str = tmap_calloc(i+1, sizeof(char), "str");
      for(j=0;j<i;j++) {
          seqs[1][j] = 3;
      }

      asymmetric = 0;
      j = 0;
      hash_j = sum = 0;
      while(1) {
          if(i == j) {
              for(k=0;k<i;k++) {
                  seqs[1][k] = 3 - seqs[0][i-k-1];
              }
              for(k=0;k<2;k++) {
                  //n[k] = tmap_bwt_match_exact(bwt, i, seqs[k], &sa);
                  n[k] = tmap_bwt_match_exact_reverse(bwt, i, seqs[k], &sa);
                  if(0 == k) {
                      if(0 < n[k] && TMAP_BWT_INT_MAX != sa.k && sa.k <= sa.l) {
                          sum += n[k];
                      }
                      if(1 == print_msg && 1 == print_sa) {
                          for(l=0;l<i;l++) {
                              str[l] = "ACGTN"[seqs[k][l]];
                          }
                          if(0 < n[k] && TMAP_BWT_INT_MAX != sa.k && sa.k <= sa.l) {
                              tmap_progress_print2("%s\t%llu\t%llu\t%llu", str, sa.k, sa.l, n[k]);
#ifdef TMAP_BWT_CHECK_DEBUG 
                              while(sa.k <= sa.l) {
                                  uint32_t seqid, pos;
                                  uint8_t strand;
                                  tmap_bwt_int_t pacpos = tmap_sa_pac_pos(tmap_bwt_index->sa, bwt, sa.k);
                                  if(0 < tmap_refseq_pac2real(tmap_bwt_index->refseq, pacpos, 1, &seqid, &pos, &strand)) {
                                      tmap_progress_print2("%s\t%llu\t%llu\t%llu\t%c%u:%u", str, sa.k, sa.l, n[k],
                                                           "+-"[strand], seqid, pos);
                                  }
                                  else {
                                      tmap_progress_print2("%s\t%llu\t%llu\t%llu\t%c%u:%u", str, sa.k, sa.l, n[k],
                                                           '?', 0, 0);
                                  }
                                  sa.k++;
                              }
#endif
                          }
                          else {
                              tmap_progress_print2("%s\tNA\tNA\tNA", str);
                          }
                      }
                  }
              }
              if(0 == asymmetric && n[0] != n[1]) {
                  asymmetric = 1;
                  //fprintf(stderr, "n[0]=%u n[1]=%u\n", n[0], n[1]);
                  tmap_error("Asymmetry found", Warn, OutOfRange);
              }

              j--;
              while(0 <= j && 3 == seqs[0][j]) {
                  seqs[0][j] = 0;
                  hash_j >>= 2;
                  j--;
              }
              if(j < 0) break;
              seqs[0][j]++;
              hash_j++;
              j++;
          }
          else {
              hash_j <<= 2;
              j++;
          }
      }

      free(seqs[0]);
      free(seqs[1]);
      free(str);

      j = (sum == (bwt->seq_len - i + 1)) ? 0 : 1; // j==1 on fail
      if(1 == print_msg) {
          if(0 == j) tmap_progress_print2("%d-mer validation passed", i);
          else tmap_progress_print2("%d-mer validation failed: observed (%llu) != expected (%llu)\n", i, sum, bwt->seq_len - i + 1);
      }
      if(0 == warn && 1 == j) {
          tmap_error("inconsistency found in the BWT", Exit, OutOfRange);
      }
  }
Ejemplo n.º 10
0
uint64_t
tmap_refseq_fasta2pac(const char *fn_fasta, int32_t compression)
{
  tmap_file_t *fp_pac = NULL, *fp_anno = NULL;
  tmap_seq_io_t *seqio = NULL;
  tmap_seq_t *seq = NULL;
  tmap_refseq_t *refseq = NULL;
  char *fn_pac = NULL, *fn_anno = NULL;
  uint8_t buffer[TMAP_REFSEQ_BUFFER_SIZE];
  int32_t i, j, l, buffer_length;
  uint32_t num_IUPAC_found= 0, amb_bases_mem = 0;
  uint8_t x = 0;
  uint64_t ref_len;

  tmap_progress_print("packing the reference FASTA");

  refseq = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq");

  refseq->version_id = TMAP_VERSION_ID; 
  refseq->package_version = tmap_string_clone2(PACKAGE_VERSION);
  refseq->seq = buffer; // IMPORTANT: must nullify later
  refseq->annos = NULL;
  refseq->num_annos = 0;
  refseq->len = 0;
  refseq->is_rev = 0;
  refseq->is_shm = 0;
  memset(buffer, 0, TMAP_REFSEQ_BUFFER_SIZE);
  buffer_length = 0;

  // input files
  seqio = tmap_seq_io_init(fn_fasta, TMAP_SEQ_TYPE_FQ, 0, compression);
  seq = tmap_seq_init(TMAP_SEQ_TYPE_FQ);

  // output files
  fn_pac = tmap_get_file_name(fn_fasta, TMAP_PAC_FILE);
  fp_pac = tmap_file_fopen(fn_pac, "wb", TMAP_PAC_COMPRESSION);

  // read in sequences
  while(0 <= (l = tmap_seq_io_read(seqio, seq))) {
      tmap_anno_t *anno = NULL;
      tmap_progress_print2("packing contig [%s:1-%d]", seq->data.fq->name->s, l);

      refseq->num_annos++;
      refseq->annos = tmap_realloc(refseq->annos, sizeof(tmap_anno_t)*refseq->num_annos, "refseq->annos");
      anno = &refseq->annos[refseq->num_annos-1];
      
      anno->name = tmap_string_clone(seq->data.fq->name); 
      anno->len = l;
      anno->offset = (1 == refseq->num_annos) ? 0 : refseq->annos[refseq->num_annos-2].offset + refseq->annos[refseq->num_annos-2].len;
      anno->amb_positions_start = NULL;
      anno->amb_positions_end = NULL;
      anno->amb_bases = NULL;
      anno->num_amb = 0;
      amb_bases_mem = 0;

      // fill the buffer
      for(i=0;i<l;i++) {
          uint8_t c = tmap_nt_char_to_int[(int)seq->data.fq->seq->s[i]];
          // handle IUPAC codes 
          if(4 <= c) {
              int32_t k;
              // warn users about IUPAC codes
              if(0 == num_IUPAC_found) { 
                  tmap_error("IUPAC codes were found and will be converted to non-matching DNA bases", Warn, OutOfRange);
                  for(j=4;j<15;j++) {
                      c = tmap_iupac_char_to_bit_string[(int)tmap_iupac_int_to_char[j]];
                      // get the lexicographically smallest base not compatible with this code
                      for(k=0;k<4;k++) {
                          if(!(c & (0x1 << k))) {
                              break;
                          }
                      } 
                      tmap_progress_print2("IUPAC code %c will be converted to %c", tmap_iupac_int_to_char[j], "ACGTN"[k & 3]);
                  }
              }
              num_IUPAC_found++;
              
              // change it to a mismatched base than the IUPAC code
              c = tmap_iupac_char_to_bit_string[(int)seq->data.fq->seq->s[i]];

              // store IUPAC bases
              if(amb_bases_mem <= anno->num_amb) { // allocate more memory if necessary
                  amb_bases_mem = anno->num_amb + 1;
                  tmap_roundup32(amb_bases_mem);
                  anno->amb_positions_start = tmap_realloc(anno->amb_positions_start, sizeof(uint32_t) * amb_bases_mem, "anno->amb_positions_start");
                  anno->amb_positions_end = tmap_realloc(anno->amb_positions_end, sizeof(uint32_t) * amb_bases_mem, "anno->amb_positions_end");
                  anno->amb_bases = tmap_realloc(anno->amb_bases, sizeof(uint8_t) * amb_bases_mem, "anno->amb_bases");
              }
              // encode stretches of the same base
              if(0 < anno->num_amb
                 && anno->amb_positions_end[anno->num_amb-1] == i
                 && anno->amb_bases[anno->num_amb-1] == tmap_iupac_char_to_int[(int)seq->data.fq->seq->s[i]]) {
                 anno->amb_positions_end[anno->num_amb-1]++; // expand the range 
              }
              else {
                  // new ambiguous base and range
                  anno->num_amb++;
                  anno->amb_positions_start[anno->num_amb-1] = i+1; // one-based
                  anno->amb_positions_end[anno->num_amb-1] = i+1; // one-based
                  anno->amb_bases[anno->num_amb-1] = tmap_iupac_char_to_int[(int)seq->data.fq->seq->s[i]];
              }
              
              // get the lexicographically smallest base not compatible with
              // this code
              for(j=0;j<4;j++) {
                  if(!(c & (0x1 << j))) {
                      break;
                  }
              } 
              c = j & 3; // Note: Ns will go to As
          }
          if(3 < c) {
              tmap_error("bug encountered", Exit, OutOfRange);
          }
          if(buffer_length == (TMAP_REFSEQ_BUFFER_SIZE << 2)) { // 2-bit
              if(tmap_refseq_seq_memory(buffer_length) != tmap_file_fwrite(buffer, sizeof(uint8_t), tmap_refseq_seq_memory(buffer_length), fp_pac)) {
                  tmap_error(fn_pac, Exit, WriteFileError);
              }
              memset(buffer, 0, TMAP_REFSEQ_BUFFER_SIZE);
              buffer_length = 0;
          }
          tmap_refseq_seq_store_i(refseq, buffer_length, c);
          buffer_length++;
      }
      refseq->len += l;
      // re-size the amibiguous bases
      if(anno->num_amb < amb_bases_mem) {
          amb_bases_mem = anno->num_amb;
          anno->amb_positions_start = tmap_realloc(anno->amb_positions_start, sizeof(uint32_t) * amb_bases_mem, "anno->amb_positions_start");
          anno->amb_positions_end = tmap_realloc(anno->amb_positions_end, sizeof(uint32_t) * amb_bases_mem, "anno->amb_positions_end");
          anno->amb_bases = tmap_realloc(anno->amb_bases, sizeof(uint8_t) * amb_bases_mem, "anno->amb_bases");
      }
  }
  // write out the buffer
  if(tmap_refseq_seq_memory(buffer_length) != tmap_file_fwrite(buffer, sizeof(uint8_t), tmap_refseq_seq_memory(buffer_length), fp_pac)) {
      tmap_error(fn_pac, Exit, WriteFileError);
  }
  if(refseq->len % 4 == 0) { // add an extra byte if we completely filled all bits
      if(1 != tmap_file_fwrite(&x, sizeof(uint8_t), 1, fp_pac)) {
          tmap_error(fn_pac, Exit, WriteFileError);
      }
  }
  // store number of unused bits at the last byte
  x = refseq->len % 4;
  if(1 != tmap_file_fwrite(&x, sizeof(uint8_t), 1, fp_pac)) {
      tmap_error(fn_pac, Exit, WriteFileError);
  }
  refseq->seq = NULL; // IMPORTANT: nullify this
  ref_len = refseq->len; // save for return
      
  tmap_progress_print2("total genome length [%u]", refseq->len);
  if(0 < num_IUPAC_found) {
      if(1 == num_IUPAC_found) {
          tmap_progress_print("%u IUPAC base was found and converted to a DNA base", num_IUPAC_found);
      }
      else {
          tmap_progress_print("%u IUPAC bases were found and converted to DNA bases", num_IUPAC_found);
      }
  }

  // write annotation file
  fn_anno = tmap_get_file_name(fn_fasta, TMAP_ANNO_FILE);
  fp_anno = tmap_file_fopen(fn_anno, "wb", TMAP_ANNO_COMPRESSION);
  tmap_refseq_write_anno(fp_anno, refseq); 

  // close files
  tmap_file_fclose(fp_pac);
  tmap_file_fclose(fp_anno);

  // check sequence name uniqueness
  for(i=0;i<refseq->num_annos;i++) {
      for(j=i+1;j<refseq->num_annos;j++) {
          if(0 == strcmp(refseq->annos[i].name->s, refseq->annos[j].name->s)) {
              tmap_file_fprintf(tmap_file_stderr, "Contigs have the same name: #%d [%s] and #%d [%s]\n",
                                i+1, refseq->annos[i].name->s, 
                                j+1, refseq->annos[j].name->s); 
              tmap_error("Contig names must be unique", Exit, OutOfRange);
          }
      }
  }

  tmap_refseq_destroy(refseq); 
  tmap_seq_io_destroy(seqio);
  tmap_seq_destroy(seq);
  free(fn_pac);
  free(fn_anno);

  tmap_progress_print2("packed the reference FASTA");

  tmap_refseq_pac2revpac(fn_fasta);

  return ref_len;
}