Ejemplo n.º 1
0
void Matcher::handle_token(Token& token)
{
    if (LOG_WOULD_LOG(debug)) {
        char utf8token[1024];
        Fast_UnicodeUtil::utf8ncopy(utf8token, token.token, 1024,
                                    (token.token != NULL ? token.curlen : 0));
        LOG(debug, "handle_token(%s)", utf8token);
    }

    unsigned options = 0;
    if (_mo->Match(_match_iter, token, options)) {
        // Found a match. Record it with original pos and length
        add_occurrence(token.bytepos, token.wordpos, token.bytelen);
    }
    // Keep track of end of the text
    _endpos = token.bytepos + token.bytelen;
}
Ejemplo n.º 2
0
/***********************************************************************
 * Record one motif occurrence.
 ***********************************************************************/
static void record_occurrence(
  char*     sequence_id,
  char*     motif_id,
  double    p_threshold,
  double    motif_p,
  char*     prev_motif,
  int*      prev_position,
  int       motif_position,
  MATRIX_T* transp_freq,
  MATRIX_T* spacer_ave,
  ORDER_T*  new_order,
  int       num_motifs,
  MOTIF_T*  motifs
) {

  int prev_motif_location;
  int motif_location;

  /* Always include transitions to the end state. */
  if (strcmp(motif_id, END_TRANSITION) == 0) {
  }

  /* Check to see if this motif was not given. */
  else if (!have_motif(motif_id, num_motifs, motifs)) {

    if (verbosity > NORMAL_VERBOSE) {
      fprintf(stderr, "Skipping motif %s in sequence %s.\n", motif_id,
        sequence_id);
    }
    return;
  }

  /* Check to see if the threshold was exceeded. */
  else if ((p_threshold > 0.0) && (motif_p > p_threshold)) {

    if (verbosity > NORMAL_VERBOSE) {
      fprintf(stderr, "Skipping motif %s in sequence %s (%g > %g).\n",
        motif_id, sequence_id, motif_p, p_threshold);
    }
    return;
  }

  if (verbosity > NORMAL_VERBOSE) {
    if (strcmp(motif_id, END_TRANSITION) == 0) {
      fprintf(stderr, "Adding transition to end in sequence %s.\n",
        sequence_id);
    } else {
      fprintf(stderr, "Adding motif %s in sequence %s.\n", motif_id,
        sequence_id);
    }
  }

  // If we're at the end, store in the last column of the matrix.
  if (strcmp(motif_id, END_TRANSITION) == 0) {
    motif_location = num_motifs + 1;
  } else {
    // It's somewhat tricky to figure out where this motif goes in the matrix.
    motif_location = find_matrix_location(motifs, motif_id, num_motifs);
  }
  prev_motif_location = find_matrix_location(motifs, prev_motif, num_motifs);

  // Increment the transition count and spacer length matrices.
  incr_matrix_cell(prev_motif_location, motif_location, 1, transp_freq);
  incr_matrix_cell(prev_motif_location, motif_location, 
       motif_position - *prev_position, spacer_ave);

  // Add the occurrence to the order list.
  add_occurrence(motif_id, motif_position - *prev_position, new_order);

  strcpy(prev_motif, motif_id);
}