void Matcher::handle_token(Token& token) { if (LOG_WOULD_LOG(debug)) { char utf8token[1024]; Fast_UnicodeUtil::utf8ncopy(utf8token, token.token, 1024, (token.token != NULL ? token.curlen : 0)); LOG(debug, "handle_token(%s)", utf8token); } unsigned options = 0; if (_mo->Match(_match_iter, token, options)) { // Found a match. Record it with original pos and length add_occurrence(token.bytepos, token.wordpos, token.bytelen); } // Keep track of end of the text _endpos = token.bytepos + token.bytelen; }
/*********************************************************************** * Record one motif occurrence. ***********************************************************************/ static void record_occurrence( char* sequence_id, char* motif_id, double p_threshold, double motif_p, char* prev_motif, int* prev_position, int motif_position, MATRIX_T* transp_freq, MATRIX_T* spacer_ave, ORDER_T* new_order, int num_motifs, MOTIF_T* motifs ) { int prev_motif_location; int motif_location; /* Always include transitions to the end state. */ if (strcmp(motif_id, END_TRANSITION) == 0) { } /* Check to see if this motif was not given. */ else if (!have_motif(motif_id, num_motifs, motifs)) { if (verbosity > NORMAL_VERBOSE) { fprintf(stderr, "Skipping motif %s in sequence %s.\n", motif_id, sequence_id); } return; } /* Check to see if the threshold was exceeded. */ else if ((p_threshold > 0.0) && (motif_p > p_threshold)) { if (verbosity > NORMAL_VERBOSE) { fprintf(stderr, "Skipping motif %s in sequence %s (%g > %g).\n", motif_id, sequence_id, motif_p, p_threshold); } return; } if (verbosity > NORMAL_VERBOSE) { if (strcmp(motif_id, END_TRANSITION) == 0) { fprintf(stderr, "Adding transition to end in sequence %s.\n", sequence_id); } else { fprintf(stderr, "Adding motif %s in sequence %s.\n", motif_id, sequence_id); } } // If we're at the end, store in the last column of the matrix. if (strcmp(motif_id, END_TRANSITION) == 0) { motif_location = num_motifs + 1; } else { // It's somewhat tricky to figure out where this motif goes in the matrix. motif_location = find_matrix_location(motifs, motif_id, num_motifs); } prev_motif_location = find_matrix_location(motifs, prev_motif, num_motifs); // Increment the transition count and spacer length matrices. incr_matrix_cell(prev_motif_location, motif_location, 1, transp_freq); incr_matrix_cell(prev_motif_location, motif_location, motif_position - *prev_position, spacer_ave); // Add the occurrence to the order list. add_occurrence(motif_id, motif_position - *prev_position, new_order); strcpy(prev_motif, motif_id); }