Пример #1
0
// reverse and reverse compliment
tmap_map_sams_t*
tmap_map_vsw_thread_map_core(void **data, tmap_seq_t *seqs[2], int32_t seq_len,
                             tmap_index_t *index, tmap_map_opt_t *opt)
{
  int32_t i;
  tmap_map_sams_t *sams = NULL;

  if((0 < opt->min_seq_len && seq_len < opt->min_seq_len)
     || (0 < opt->max_seq_len && opt->max_seq_len < seq_len)) {
      // go to the next loop
      return tmap_map_sams_init(NULL);
  }

  sams = tmap_map_sams_init(NULL);
  tmap_map_sams_realloc(sams, index->refseq->num_annos<<1); // one for each contig and strand

  for(i=0;i<index->refseq->num_annos<<1;i++) {
      tmap_map_sam_t *s;

      // save
      s = &sams->sams[i];

      // save the hit
      s->algo_id = TMAP_MAP_ALGO_MAPVSW;
      s->algo_stage = opt->algo_stage;
      s->strand = i & 1;
      s->seqid = i >> 1;
      s->pos = 0;
      s->target_len = index->refseq->annos[s->seqid].len;
      s->score_subo = INT32_MIN;
      
      // mapvswaux data
      tmap_map_sam_malloc_aux(s, TMAP_MAP_ALGO_MAPVSW);
  }

  return sams;
}
Пример #2
0
static tmap_map_sams_t *
tmap_map2_aux_store_hits(tmap_refseq_t *refseq, tmap_map_opt_t *opt, 
                         tmap_map2_aln_t *aln, int32_t seq_len)
{
  int32_t i, j;
  tmap_map_sams_t *sams = NULL;

  if(NULL == aln) return NULL;

  sams = tmap_map_sams_init(NULL);
  tmap_map_sams_realloc(sams, aln->n);

  for(i=j=0;i<aln->n;i++) {
      tmap_map2_hit_t *p = aln->hits + i;
      uint32_t seqid = 0, pos = 0;
      uint8_t strand;
      int32_t beg;
      tmap_map_sam_t *sam = &sams->sams[j];
      tmap_map_sam_init(sam);

      //strand = (p->flag & 0x10) ? 1 : 0;

      // skip over duplicate hits, or sub-optimal hits to the same location
      if(0 < i) {
          tmap_map2_hit_t *q = aln->hits + i - 1;
          if(q->flag == p->flag && q->k == p->k && q->G <= p->G && q->tlen <= p->tlen) continue;
      }
      /*
      if(i < aln->n) {
          tmap_map2_hit_t *q = aln->hits + i + 1;
          if(p->flag == q->flag && p->k == q->k && p->G <= q->G && p->tlen <= q->tlen) continue;
      }
      */

      // adjust for contig boundaries
      if(tmap_refseq_pac2real(refseq, p->k, p->tlen, &seqid, &pos, &strand) <= 0) {
          if(1 == strand) { // reverse
              if(tmap_refseq_pac2real(refseq, p->k + p->tlen - 1, 1, &seqid, &pos, &strand) <= 0) {
                  continue;
              }
              else {
                  // move to the contig and position
                  p->k = refseq->annos[seqid].offset+1;
                  p->tlen = (p->tlen < refseq->annos[seqid].len) ? p->tlen : refseq->annos[seqid].len;
              }
          }
          else {
              if(tmap_refseq_pac2real(refseq, p->k, 1, &seqid, &pos, &strand) <= 0) {
                  continue;
              }
              else {
                  // move to the contig and position
                  p->k = refseq->annos[seqid].offset+1;
                  p->tlen = (p->tlen < refseq->annos[seqid].len) ? p->tlen : refseq->annos[seqid].len;
              }
          }
      }

      // adjust based on where the hit was in the read
      beg = (1 == strand) ? (seq_len - p->end) : p->beg;
      pos = (pos <= beg) ? 1 : (pos - beg); // adjust pos

      if((p->flag & 0x1)) {
          p->G2 = p->G; // Note: the flag indicates a repetitive match, so we need to update the sub-optimal score
      }

      sam->strand = strand;
      sam->seqid = seqid;
      sam->pos = pos-1; // make it zero-based
      sam->algo_id = TMAP_MAP_ALGO_MAP2;
      sam->algo_stage = opt->algo_stage;
      sam->score = p->G;
      sam->score_subo = p->G2;
      sam->target_len = (seq_len < p->tlen) ? p->tlen : seq_len;

      // auxiliary data
      tmap_map_sam_malloc_aux(sam);
      sam->aux.map2_aux->XE = p->n_seeds;
      sam->aux.map2_aux->XF = p->flag >> 16;
      if(p->l) {
          sam->aux.map2_aux->XI = p->l - p->k + 1;
      }
      else {
          sam->aux.map2_aux->XI = 0;
      }
      sam->aux.map2_aux->flag = (p->flag & 0x1) ? 1 : 0;
      j++;
  }
  if(j != aln->n) {
      tmap_map_sams_realloc(sams, j);
  }

  return sams;
}
Пример #3
0
tmap_map_sams_t *
tmap_map1_aux_core(tmap_seq_t *seq, tmap_index_t *index,
                   tmap_bwt_match_hash_t *hash,
                   tmap_bwt_match_width_t *width, tmap_bwt_match_width_t *seed_width, tmap_map_opt_t *opt,
                   tmap_map1_aux_stack_t *stack, int32_t seed2_len)
{
  int32_t max_mm = opt->max_mm, max_gapo = opt->max_gapo, max_gape = opt->max_gape, seed_max_diff = opt->seed_max_diff;
  int32_t best_score, next_best_score;
  int32_t best_cnt = 0;
  int32_t i, j, num_n = 0;
  int32_t max_edit_score;
  tmap_bwt_match_occ_t match_sa_start;
  tmap_string_t *bases=NULL;
  tmap_map_sams_t *sams = NULL;
  int32_t max_diff, best_diff;
  tmap_bwt_int_t k, l;
  tmap_refseq_t *refseq = index->refseq;
  tmap_bwt_t *bwt = index->bwt;
  tmap_sa_t *sa = index->sa;
  tmap_map1_aux_occ_t *occs = NULL;


  max_edit_score = opt->pen_mm;
  //if(max_edit_score < opt->pen_gapo + opt->pen_gape) max_edit_score = opt->pen_gapo + opt->pen_gape;
  //if(max_edit_score < opt->pen_gape) max_edit_score = opt->pen_gape;

  bases = tmap_seq_get_bases(seq);
  /*
  fputc('\n', stderr);
  for(i=0;i<bases->l;i++) {
      fputc("ACGTN"[(int)bases->s[i]], stderr);
  }
  fputc('\n', stderr);
  */
  
  // the maximum # of differences
  if(bases->l <= TMAP_MAP_OPT_MAX_DIFF_READ_LENGTH) {
      best_diff = max_diff = opt->max_diff_table[bases->l];
  }
  else {
      best_diff = max_diff = opt->max_diff_table[TMAP_MAP_OPT_MAX_DIFF_READ_LENGTH];
  }
  
  // bound differenes by the maximum # of differences
  if(max_diff < max_mm) max_mm = max_diff;
  if(max_diff < max_gapo) max_gapo = max_diff;
  //if(max_diff < max_gape) max_gape = max_diff;
  
  best_score = next_best_score = aln_score(max_mm+1, max_gapo+1, max_gape+1, opt);

  // check whether there are too many N
  for(j=bases->l-seed2_len,num_n=0;j<bases->l;j++) {
      if(3 < bases->s[j]) {
          num_n++;
      }
  }
  if(max_mm < num_n || max_diff < num_n) {
      return tmap_map_sams_init(NULL);
  }

  // initialize
  sams = tmap_map_sams_init(NULL);
  occs = NULL;

  match_sa_start.offset = 0;
  match_sa_start.hi = 0;
  match_sa_start.k = 0;
  match_sa_start.l = bwt->seq_len;

  stack = tmap_map1_aux_stack_reset(stack, max_mm, max_gapo, max_gape, opt); // reset stack
  tmap_map1_aux_stack_push(stack, bases->l, &match_sa_start, 0, 0, 0, STATE_M, 0, NULL, opt);

  while(0 < tmap_map1_aux_stack_size(stack) && tmap_map1_aux_stack_size(stack) < opt->max_entries) {
      tmap_map1_aux_stack_entry_t *e = NULL;
      int32_t len=-1; 
      int32_t n_seed_mm=0, offset, width_cur_i;
      const uint8_t *str=NULL;
      int32_t sam_found, m;
      tmap_bwt_match_width_t *width_cur = NULL;
      const tmap_bwt_match_width_t *seed_width_cur = NULL;
      tmap_bwt_match_occ_t match_sa_cur, match_sa_next[4];
      
      // get the best entry
      e = tmap_map1_aux_stack_pop(stack); 

      // bound with best score
      if(best_score + max_edit_score < e->score) {
          break; // no need to continue
      }

      // some more information
      match_sa_cur = e->match_sa; 

      // check if we have too many edits
      m = max_diff - (e->n_mm + e->n_gapo + e->n_gape);
      if(m < 0) {
          continue; // too many edits
      }

      // get the rest of the information
      offset = e->offset; // zero-based
      str = (uint8_t*)bases->s;
      len = bases->l;
      width_cur = width;
      width_cur_i = seed2_len - (len - offset);

      if(NULL != seed_width) {
          seed_width_cur = seed_width;
          n_seed_mm = seed_max_diff - (e->n_mm + e->n_gapo + e->n_gape); // consider only mismatches in the seed
      }
      else {
          seed_width_cur = NULL;
      }
      if(0 < width_cur_i && m < width_cur[width_cur_i-1].bid) { // too many edits
          continue;
      }

      // check whether a sam is found
      sam_found = 0;
      if(len - seed2_len == offset) {
          sam_found = 1;
      }
      else if(max_mm == e->n_mm // no mismatches from any state
              && ((e->state == STATE_M && max_gapo == e->n_gapo) // in STATE_M but no more gap opens
                  || (e->state != STATE_M && max_gape == e->n_gape))) { // in STATE_I/STATE_D but no more extensions
          if(0 < tmap_bwt_match_hash_exact_alt_reverse(bwt, offset, str, &match_sa_cur, hash)) { // the alignment must match exactly to sam
              sam_found = 2;
          }
          else {
              continue; // no sam, skip
          }
      }

      if(0 < sam_found) { // alignment found
          // check for duplicates
          if(0 < sams->n) {
              for(i=0;i<sams->n;i++) {
                  // check contained
                  if(match_sa_cur.k <= occs[i].k
                     && occs[i].k <= match_sa_cur.l) { // MK <= SK <= ML
                      if(occs[i].l <= match_sa_cur.l) { // MK <= SK <= SL <= ML
                          // Want (SK - MK) + (ML - SL)
                          k = occs[i].k - match_sa_cur.k; // (SK - MK)
                          k += match_sa_cur.l - occs[i].l; // (ML - SL)
                          occs[i].l = match_sa_cur.l; // Make SL = ML
                      }
                      else { // MK <= SK <= ML <= SL
                          k = occs[i].k - match_sa_cur.k; // (SK - MK)
                      }
                      occs[i].k = match_sa_cur.k; // Make SK = MK
                      break;
                  }
                  else if(match_sa_cur.k <= occs[i].l
                          && occs[i].l <= match_sa_cur.l) { // MK <= SL <= ML
                      if(match_sa_cur.k <= occs[i].k) { // MK <= SK <= SL <= ML
                          // Want (SK - MK) + (ML - SL)
                          k = occs[i].k - match_sa_cur.k; // (SK - MK)
                          k += match_sa_cur.l - occs[i].l; // (ML - SL)
                          occs[i].k = match_sa_cur.k; // Make SK = MK
                      }
                      else { // SK <= MK <= SL <= ML
                          k = match_sa_cur.l - occs[i].l; // (ML - SL)
                      }
                      occs[i].l = match_sa_cur.l; // Make SL = ML
                      break;
                  }
              }
              if(i < sams->n) {
                  // shadow
                  if(0 < k) {
                      //tmap_map1_aux_stack_shadow(k, bwt->seq_len, e->last_diff_offset, width_cur);
                      width_cur_i = seed2_len - (len - e->last_diff_offset);
                      tmap_map1_aux_stack_shadow(k, seed2_len, width_cur_i, width_cur);
                  }
                  sam_found = 0;
                  continue;
              }
          }

          int32_t score = aln_score(e->n_mm, e->n_gapo, e->n_gape, opt);
          int32_t do_add = 1;
          if(sams->n == 0) {
              best_score = score;
              best_cnt = 0;
              best_diff = e->n_mm + e->n_gapo + e->n_gape;
          }
          if(score == best_score) {
              best_cnt += match_sa_cur.l - match_sa_cur.k + 1;
          }
          else {
              if(best_diff + 1 <= max_diff) {
                  max_diff = best_diff + 1;
              }
              if(score < next_best_score) {
                  next_best_score = score;
              }
              else if(next_best_score < score) {
                  // no need to examine further
                  break;
              }
          }
          if(do_add) { // append
              uint32_t op, op_len, cigar_i;
              tmap_map_sam_t *sam = NULL;
              tmap_map1_aux_stack_entry_t *cur = NULL;
  
              tmap_map_sams_realloc(sams, sams->n+1);
              occs = tmap_realloc(occs, sizeof(tmap_map1_aux_occ_t) * sams->n, "occs");

              sam = &sams->sams[sams->n-1];

              sam->algo_id = TMAP_MAP_ALGO_MAP1;
              sam->algo_stage = 0;
              sam->score = e->score;

              // aux data
              tmap_map_sam_malloc_aux(sam);
              k = occs[sams->n-1].k = match_sa_cur.k;
              l = occs[sams->n-1].l= match_sa_cur.l;
              sam->aux.map1_aux->n_mm = e->n_mm;
              sam->aux.map1_aux->n_gapo = e->n_gapo;
              sam->aux.map1_aux->n_gape = e->n_gape;

              // aux data: reference length
              cur = e;
              i = e->i;
              sam->aux.map1_aux->aln_ref = 0;
              cigar_i = 0;
              if(2 == sam_found) { // we used 'tmap_bwt_match_exact_alt_reverse' 
                  op = STATE_M;
                  op_len = offset;
              }
              else {
                  op = -1;
                  op_len = 0;
              }
              while(0 <= i) {
                  cur = stack->entry_pool[i];
                  if(len == cur->offset) break;
                  if(op != cur->state) {
                      if(STATE_M == op || STATE_D == op) {
                          sam->aux.map1_aux->aln_ref += op_len;
                      }
                      op = cur->state;
                      op_len = 1;
                  }
                  else {
                      op_len++;
                  }
                  //fprintf(stderr, "cur->state=%c op_len=%d cur->prev_i=%d k=%u l=%u\n", "MIDS"[cur->state], op_len, cur->prev_i, cur->match_sa.k, cur->match_sa.l);
                  i = cur->prev_i;
              }
              if(STATE_M == op || STATE_D == op) {
                  sam->aux.map1_aux->aln_ref += op_len;
              }

              /*
              fprintf(stderr, "shadow 2 k=%u l=%u len=%d offset=%d last_diff_offset=%d\n",
                      k, l, len, offset, e->last_diff_offset);
              fprintf(stderr, "e->n_mm=%d e->n_gapo=%d e->n_gape=%d\n",
                      e->n_mm, e->n_gapo, e->n_gape);
              */
              //tmap_map1_aux_stack_shadow(l - k + 1, bwt->seq_len, e->last_diff_offset, width_cur);
              width_cur_i = seed2_len - (len - e->last_diff_offset);
              tmap_map1_aux_stack_shadow(l - k + 1, seed2_len, width_cur_i, width_cur);
              if(opt->max_best_cals < best_cnt) {
                  // ignore if too many "best" have been found
                  occs[sams->n-1].l -= (best_cnt - opt->max_best_cals); // only save the maximum
                  break;
              }
          }
      }
      else {
          int32_t allow_diff = 1, allow_mm = (e->n_mm < max_mm) ? 1 : 0;

          // decrement the offset
          offset--;

          // use a bound for mismatches
          if(0 < offset) {
              int32_t seed_width_cur_i = offset - (len - opt->seed_length);
              width_cur_i = seed2_len - (len - offset);
              if(0 < width_cur_i) {
                  if(m-1 < width_cur[width_cur_i-1].bid) { 
                      allow_diff = 0;
                  }
                  else if(width_cur[width_cur_i-1].bid == m-1
                          && width_cur[width_cur_i].bid == m-1
                          && width_cur[width_cur_i-1].w == width_cur[width_cur_i].w) {
                      allow_mm = 0;
                  }
              }
              if(0 < seed_width_cur_i) {
                  if(NULL != seed_width_cur && 0 < seed_width_cur_i) {
                      if(n_seed_mm-1 < seed_width_cur[seed_width_cur_i-1].bid) {
                          allow_diff = 0;
                      }
                      else if(seed_width_cur[seed_width_cur_i-1].bid == n_seed_mm-1
                              && seed_width_cur[seed_width_cur_i].bid == n_seed_mm-1
                              && seed_width_cur[seed_width_cur_i-1].w == seed_width_cur[seed_width_cur_i].w) {
                          allow_mm = 0;
                      }
                  }
              }
          }

          // retrieve the next SA interval
          tmap_bwt_match_hash_2occ4(bwt, &e->match_sa, match_sa_next, hash); 

          // insertions/deletions
          if(allow_diff 
             && opt->indel_ends_bound + e->n_gapo + e->n_gape <= offset
             && opt->indel_ends_bound + e->n_gapo + e->n_gape <= len - offset) { // check to add gaps
              if(STATE_M == e->state) { // gap open
                  if(e->n_gapo < max_gapo) { // gap open is allowed
                      // insertion
                      tmap_map1_aux_stack_push(stack, offset, &match_sa_cur, e->n_mm, e->n_gapo + 1, e->n_gape, STATE_I, 1, e, opt);

                      // deletion
                      for(j = 0; j != 4; ++j) {
                          if(match_sa_next[j].k <= match_sa_next[j].l) {
                              //   remember that a gap deletion does not consume a
                              //   read base, so use 'offset+1'
                              tmap_map1_aux_stack_push(stack, offset+1, &match_sa_next[j], e->n_mm, e->n_gapo + 1, e->n_gape, STATE_D, 1, e, opt);
                          }
                      }
                  }
              }
              else if(STATE_I == e->state) { // extension of an insertion
                  if(e->n_gape < max_gape) { // gap extension is allowed
                      tmap_map1_aux_stack_push(stack, offset, &match_sa_cur, e->n_mm, e->n_gapo, e->n_gape + 1, STATE_I, 1, e, opt);
                  }
              }
              else if(STATE_D == e->state) { // extension of a deletion
                  if(e->n_gape < max_gape) {
                      if(e->n_gape + e->n_gapo < max_diff || e->match_sa.l - e->match_sa.k + 1 < opt->max_cals_del) { // gap extension is allowed
                          for(j = 0; j != 4; ++j) {
                              if(match_sa_next[j].k <= match_sa_next[j].l) {
                                  //   remember that a gap deletion does not consume a
                                  //   read base, so use 'offset+1'
                                  tmap_map1_aux_stack_push(stack, offset+1, &match_sa_next[j], e->n_mm, e->n_gapo, e->n_gape + 1, STATE_D, 1, e, opt);
                              }
                          }
                      }
                  }
              }
          }

          // mismatches
          if(1 == allow_mm && 1 == allow_diff) { // mismatches allowed
              for(j=0;j<4;j++) {
                  int32_t c = (str[offset] + j) & 3;
                  int32_t is_mm = (0 < j || 3 < str[offset]);
                  if(match_sa_next[c].k <= match_sa_next[c].l) {
                      tmap_map1_aux_stack_push(stack, offset, &match_sa_next[c], e->n_mm + is_mm, e->n_gapo, e->n_gape, STATE_M, is_mm, e, opt);
                  }
              }
          } 
          else if(str[offset] < 4) { // try exact match only
              int32_t c = str[offset] & 3;
              if(match_sa_next[c].k <= match_sa_next[c].l) {
                  tmap_map1_aux_stack_push(stack, offset, &match_sa_next[c], e->n_mm, e->n_gapo, e->n_gape, STATE_M, 0, e, opt);
              }
          }
      }
  }

  return tmap_map1_sam_to_real(sams, occs, bases, seed2_len, refseq, bwt, sa, hash, opt);
}
Пример #4
0
static tmap_map_sams_t *
tmap_map1_sam_to_real(tmap_map_sams_t *sams, tmap_map1_aux_occ_t *occs, tmap_string_t *bases, int32_t seed2_len,
                       tmap_refseq_t *refseq, tmap_bwt_t *bwt, tmap_sa_t *sa, tmap_bwt_match_hash_t *hash, tmap_map_opt_t *opt) 
{
  tmap_map_sams_t *sams_tmp = NULL;
  tmap_map_sam_t *sam_cur = NULL;
  uint32_t i, j, aln_ref;
  tmap_bwt_int_t k, n, num_all_sa;

  // max # of entries
  for(i=n=0;i<sams->n;i++) {
      n += occs[i].l - occs[i].k + 1;
  }
  num_all_sa = n;

  // bound the # of returned hits
  if(opt->max_best_cals < n) {
      n = opt->max_best_cals;
  }

  // alloc
  sams_tmp = tmap_map_sams_init(sams);
  tmap_map_sams_realloc(sams_tmp, n);
            
  // copy over
  for(i=j=0;i<sams->n && j<n;i++) {
      tmap_map_sam_t *sam;

      sam = &sams->sams[i];

      // go through SA interval
      for(k=occs[i].k;k<=occs[i].l;k++) {
          uint32_t pos = 0, seqid = 0;
          tmap_bwt_int_t pacpos = 0;
          uint8_t strand;

          sam_cur = &sams_tmp->sams[j];
          tmap_map_sam_init(sam_cur);

          strand = sams->sams[i].strand;
          aln_ref = sam->aux.map1_aux->aln_ref;
              
          pacpos = bwt->seq_len - tmap_sa_pac_pos_hash(sa, bwt, k, hash);
          pacpos = (pacpos < aln_ref) ? 0 : (pacpos - aln_ref);
          pacpos++; // make one-based
          
          // NB: addressing the symptom, not the problem
          // This happens when we are at the end of the reference on the reverse
          // strand
          if(pacpos <= refseq->len && refseq->len < pacpos + aln_ref - 1) {
              aln_ref = refseq->len - pacpos + 1;
          }
          else if(refseq->len * 2 < pacpos + aln_ref - 1) {
              aln_ref = (refseq->len * 2) - pacpos + 1;
          }
          
          // save the hit
          if(0 < tmap_refseq_pac2real(refseq, pacpos, aln_ref, &seqid, &pos, &strand)) {
              // copy over previous parameters
              sam_cur->algo_id = TMAP_MAP_ALGO_MAP1;
              sam_cur->algo_stage = opt->algo_stage;
              sam_cur->strand = strand;
              sam_cur->seqid = seqid;
              sam_cur->pos = pos-1; // adjust to zero-based
              sam_cur->target_len = aln_ref;
              sam_cur->score_subo = INT32_MIN;
              if(0 < opt->seed2_length && seed2_len < bases->l) { // adjust if we used a secondary seed
                  // adjust both the target length and position
                  if(0 == strand) { // forward
                      sam_cur->target_len += (bases->l - seed2_len);
                      // NB: sam_cur->pos is zero based
                      if(refseq->annos[sam_cur->seqid].len < sam_cur->pos + sam_cur->target_len) {
                          sam_cur->target_len = refseq->annos[sam_cur->seqid].len - sam_cur->pos;
                      }
                  }
                  else { // reverse
                      if(sam_cur->pos < (bases->l - seed2_len)) { // before the start of the chromosome
                          sam_cur->target_len += sam_cur->pos;
                          sam_cur->pos = 0;
                      }
                      else { // move to the end of the read
                          sam_cur->target_len += (bases->l - seed2_len);
                          sam_cur->pos -= (bases->l - seed2_len);
                      }
                  }
              }
              else if(sam_cur->target_len < bases->l) { // do not adjust, we used the full read
                  sam_cur->target_len = bases->l;
              }

              // aux
              tmap_map_sam_malloc_aux(sam_cur);
              sam_cur->aux.map1_aux->n_mm = sam->aux.map1_aux->n_mm;
              sam_cur->aux.map1_aux->n_gapo = sam->aux.map1_aux->n_gapo;
              sam_cur->aux.map1_aux->n_gape = sam->aux.map1_aux->n_gape;
              sam_cur->aux.map1_aux->aln_ref = 0;
              sam_cur->aux.map1_aux->num_all_sa = num_all_sa;
              j++;

              // only save the top n hits
              if(n <= j) {  // equivalent to opt->max_bset_cals <= j
                  break;
              }
          }
      }
  }
  
  // destroy
  tmap_map_sams_destroy(sams);

  // realloc
  tmap_map_sams_realloc(sams_tmp, j);

  //NB: do not use occs later
  free(occs);
  occs = NULL;
  
  return sams_tmp;
}