// main search loop int MainCaitra::prefix_matching_search( float max_time, float threshold ) { double start_time = get_wall_time(); // intialize search - initial back transition (state in prefix matching search) BackTransition initialBack( 0.0, 0, 0, -1, 0, NULL); // ... associated with initial hypothesis states[0].back.push_back( initialBack ); // start search with maximum error 0, then increase maximum error one by one int errorAllowed = 0; while( errorAllowed <= prefix.size() * error_unit ){ // printf("error level %d\n",errorAllowed); // process decoder search graph, it is ordered, so we can just sequentially loop through states int valid_states = 0; int back_count = 0; int transition_count = 0; int match_count = 0; for( int state = 0; state < states.size(); state++ ) { // ignore state if it is too bad if (threshold > 0 && states[state].best_score < states[0].best_score+threshold) { continue; } valid_states++; // abort search if maximum time exceeded if (state % 100 == 0 && max_time > 0 && (get_wall_time()-start_time) > max_time) { return -1; } // if it has back transitions, it is reachable, so we have to process each for ( backIter back = states[state].back.begin(); back != states[state].back.end(); back++ ) { // only need to process back transitions with current error level // the ones with lower error have been processed in previous iteration /*************************/ if (back->error == errorAllowed) { back_count++; // loop through transitions out of this state for ( transIter transition = states[state].transitions.begin(); transition != states[state].transitions.end(); transition++ ) { if (threshold > 0 && states[transition->to_state].best_score < states[0].best_score+threshold) { continue; } transition_count++; // try to match this transition's phrase // starting at end word prefix position of previous back transition vector< Match > matches = string_edit_distance( back->prefix_matched, transition->output ); // process all matches for ( matchIter match = matches.begin(); match != matches.end(); match++ ) { match_count++; // check if match leads to valid new back transition process_match( state, *back, *match, *transition ); } } } } } TRACE_ERR("explored " << valid_states << " valid states, " << back_count << " backs, " << transition_count << " transitions, " << match_count << " matches at error level " << errorAllowed << endl); // found a completion -> we are done if (best[errorAllowed].from_state != -1) { cerr << "search took " << (get_wall_time()-start_time) << " seconds.\n"; return errorAllowed; } errorAllowed++; } // cout << discover_Error << endl; return errorAllowed; }
void main() { char *seq1="CGCAGGGAUACCCGCG", *seq2="GCGCCCAUAGGGACGC", *struct1,* struct2,* xstruc; float e1, e2, tree_dist, string_dist, profile_dist, kT; Tree *T1, *T2; swString *S1, *S2; float *pf1, *pf2; FLT_OR_DBL *bppm; /* fold at 30C instead of the default 37C */ temperature = 30.; /* must be set *before* initializing */ /* allocate memory for structure and fold */ struct1 = (char* ) space(sizeof(char)*(strlen(seq1)+1)); e1 = fold(seq1, struct1); struct2 = (char* ) space(sizeof(char)*(strlen(seq2)+1)); e2 = fold(seq2, struct2); free_arrays(); /* free arrays used in fold() */ /* produce tree and string representations for comparison */ xstruc = expand_Full(struct1); T1 = make_tree(xstruc); S1 = Make_swString(xstruc); free(xstruc); xstruc = expand_Full(struct2); T2 = make_tree(xstruc); S2 = Make_swString(xstruc); free(xstruc); /* calculate tree edit distance and aligned structures with gaps */ edit_backtrack = 1; tree_dist = tree_edit_distance(T1, T2); free_tree(T1); free_tree(T2); unexpand_aligned_F(aligned_line); printf("%s\n%s %3.2f\n", aligned_line[0], aligned_line[1], tree_dist); /* same thing using string edit (alignment) distance */ string_dist = string_edit_distance(S1, S2); free(S1); free(S2); printf("%s mfe=%5.2f\n%s mfe=%5.2f dist=%3.2f\n", aligned_line[0], e1, aligned_line[1], e2, string_dist); /* for longer sequences one should also set a scaling factor for partition function folding, e.g: */ kT = (temperature+273.15)*1.98717/1000.; /* kT in kcal/mol */ pf_scale = exp(-e1/kT/strlen(seq1)); /* calculate partition function and base pair probabilities */ e1 = pf_fold(seq1, struct1); /* get the base pair probability matrix for the previous run of pf_fold() */ bppm = export_bppm(); pf1 = Make_bp_profile_bppm(bppm, strlen(seq1)); e2 = pf_fold(seq2, struct2); /* get the base pair probability matrix for the previous run of pf_fold() */ bppm = export_bppm(); pf2 = Make_bp_profile_bppm(bppm, strlen(seq2)); free_pf_arrays(); /* free space allocated for pf_fold() */ profile_dist = profile_edit_distance(pf1, pf2); printf("%s free energy=%5.2f\n%s free energy=%5.2f dist=%3.2f\n", aligned_line[0], e1, aligned_line[1], e2, profile_dist); free_profile(pf1); free_profile(pf2); }