Example #1
0
 /* Function:  FM_backtrackSeed()
  *
  * Synopsis:  Find position(s) in the FM index for a diagonal that meets score threshold
  *
  * Details:   Follows the BWT/FM-index until finding an entry of the implicit
  *            suffix array that is found in the sampled SA.

  *
  * Args:      fmf             - FM index for finding matches to the input sequence
  *            fm_cfg          - FM-index meta data
  *            i               - Single position in the BWT
  *
  * Returns:   <eslOK> on success.
  */
static uint32_t
FM_backtrackSeed(const FM_DATA *fmf, const FM_CFG *fm_cfg, int i) {
  int j = i;
  int len = 0;
  int c;

  while ( j != fmf->term_loc && (j % fm_cfg->meta->freq_SA)) { //go until we hit a position in the full SA that was sampled during FM index construction
    c = fm_getChar( fm_cfg->meta->alph_type, j, fmf->BWT);
    j = fm_getOccCount (fmf, fm_cfg, j-1, c);
    j += abs(fmf->C[c]);
    len++;
  }


  return len + (j==fmf->term_loc ? 0 : fmf->SA[ j / fm_cfg->meta->freq_SA ]) ; // len is how many backward steps we had to take to find a sampled SA position

}
/* Function:  getFMHits()
 * Synopsis:  For a given interval, identify the position in original text for each element
 *            of interval
 * Purpose:   Implement Algorithm 3.7 (p17) of Firth paper (A Comparison of BWT Approaches
 *            to String Pattern Matching). Most of the meat is in the method of counting
 *            characters - bwt_getOccCount, which depends on compilation choices.
 */
int
getFMHits( FM_DATA *fm, FM_CFG *cfg, FM_INTERVAL *interval, int block_id, int hit_offset, int hit_length, FM_HIT *hits_ptr, int fm_direction) {

  int i, j, len = 0;
  int dist_from_end;

  for (i = interval->lower;  i<= interval->upper; i++) {
    j = i;
    len = 0;

    while ( j != fm->term_loc && (j % cfg->meta->freq_SA)) { //go until we hit a position in the full SA that was sampled during FM index construction
      uint8_t c = fm_getChar( cfg->meta->alph_type, j, fm->BWT);
      j = fm_getOccCount (fm, cfg, j-1, c);
      j += abs(fm->C[c]);
      len++;
    }


    hits_ptr[hit_offset + i - interval->lower].block     = block_id;
    hits_ptr[hit_offset + i - interval->lower].direction = fm_direction;
    hits_ptr[hit_offset + i - interval->lower].length    = hit_length;

    dist_from_end = 1 + len + (j==fm->term_loc ? 0 : fm->SA[ j / cfg->meta->freq_SA ]) ; // len is how many backward steps we had to take to find a sampled SA position

    if (fm_direction == fm_forward)
      dist_from_end += hit_length;
    else
      dist_from_end += 1;

    //the SA is on the reversed string.  What would be the position in the unreversed string?
    hits_ptr[hit_offset + i - interval->lower].start = fm->N - dist_from_end;

    //printf ("SA: %d\n", hits_ptr[hit_offset + i - interval->lower].start);

  }

  return eslOK;

}