Example #1
0
// main search loop
int MainCaitra::prefix_matching_search( float max_time, float threshold ) {
    double start_time = get_wall_time();

    // intialize search - initial back transition (state in prefix matching search)
    BackTransition initialBack( 0.0, 0, 0, -1, 0, NULL);
    // ... associated with initial hypothesis
    states[0].back.push_back( initialBack );
    // start search with maximum error 0, then increase maximum error one by one
    int errorAllowed = 0;

    while( errorAllowed <= prefix.size() * error_unit ){
        // printf("error level %d\n",errorAllowed);
        // process decoder search graph, it is ordered, so we can just sequentially loop through states
        int valid_states = 0;
        int back_count = 0;
        int transition_count = 0;
        int match_count = 0;
        for( int state = 0; state < states.size(); state++ ) {

            // ignore state if it is too bad
            if (threshold > 0 && states[state].best_score < states[0].best_score+threshold) {
                continue;
            }
            valid_states++;
            // abort search if maximum time exceeded
            if (state % 100 == 0 && max_time > 0 && (get_wall_time()-start_time) > max_time) {
                return -1;
            }

            // if it has back transitions, it is reachable, so we have to process each
            for ( backIter back = states[state].back.begin(); back != states[state].back.end(); back++ ) {
                // only need to process back transitions with current error level
                // the ones with lower error have been processed in previous iteration

                /*************************/
                if (back->error == errorAllowed) {
                    back_count++;
                    // loop through transitions out of this state
                    for ( transIter transition = states[state].transitions.begin(); transition != states[state].transitions.end(); transition++ ) {

                        if (threshold > 0 && states[transition->to_state].best_score < states[0].best_score+threshold) {
                            continue;
                        }
                        transition_count++;

                        // try to match this transition's phrase
                        // starting at end word prefix position of previous back transition
                        vector< Match > matches = string_edit_distance( back->prefix_matched, transition->output );
                        // process all matches
                        for ( matchIter match = matches.begin(); match != matches.end(); match++ ) {
                            match_count++;
                            // check if match leads to valid new back transition
                            process_match( state, *back, *match, *transition );

                        }
                    }
                }
            }
        }
        TRACE_ERR("explored " << valid_states << " valid states, " << back_count << " backs, " <<  transition_count << " transitions, " << match_count << " matches at error level " << errorAllowed << endl);
          // found a completion -> we are done
        if (best[errorAllowed].from_state != -1) {
            cerr << "search took " << (get_wall_time()-start_time) << " seconds.\n";
            return errorAllowed;
        }
        errorAllowed++;
    }

    // cout << discover_Error << endl;
    return errorAllowed;
}
Example #2
0
void main()
{
   char *seq1="CGCAGGGAUACCCGCG", *seq2="GCGCCCAUAGGGACGC",
        *struct1,* struct2,* xstruc;
   float e1, e2, tree_dist, string_dist, profile_dist, kT;
   Tree *T1, *T2;
   swString *S1, *S2;
   float *pf1, *pf2;
   FLT_OR_DBL *bppm;
   /* fold at 30C instead of the default 37C */
   temperature = 30.;      /* must be set *before* initializing  */

   /* allocate memory for structure and fold */
   struct1 = (char* ) space(sizeof(char)*(strlen(seq1)+1));
   e1 =  fold(seq1, struct1);

   struct2 = (char* ) space(sizeof(char)*(strlen(seq2)+1));
   e2 =  fold(seq2, struct2);

   free_arrays();     /* free arrays used in fold() */

   /* produce tree and string representations for comparison */
   xstruc = expand_Full(struct1);
   T1 = make_tree(xstruc);
   S1 = Make_swString(xstruc);
   free(xstruc);

   xstruc = expand_Full(struct2);
   T2 = make_tree(xstruc);
   S2 = Make_swString(xstruc);
   free(xstruc);

   /* calculate tree edit distance and aligned structures with gaps */
   edit_backtrack = 1;
   tree_dist = tree_edit_distance(T1, T2);
   free_tree(T1); free_tree(T2);
   unexpand_aligned_F(aligned_line);
   printf("%s\n%s  %3.2f\n", aligned_line[0], aligned_line[1], tree_dist);

   /* same thing using string edit (alignment) distance */
   string_dist = string_edit_distance(S1, S2);
   free(S1); free(S2);
   printf("%s  mfe=%5.2f\n%s  mfe=%5.2f  dist=%3.2f\n",
          aligned_line[0], e1, aligned_line[1], e2, string_dist);

   /* for longer sequences one should also set a scaling factor for
      partition function folding, e.g: */
   kT = (temperature+273.15)*1.98717/1000.;  /* kT in kcal/mol */
   pf_scale = exp(-e1/kT/strlen(seq1));

   /* calculate partition function and base pair probabilities */
   e1 = pf_fold(seq1, struct1);
   /* get the base pair probability matrix for the previous run of pf_fold() */
   bppm = export_bppm();
   pf1 = Make_bp_profile_bppm(bppm, strlen(seq1));

   e2 = pf_fold(seq2, struct2);
   /* get the base pair probability matrix for the previous run of pf_fold() */
   bppm = export_bppm();
   pf2 = Make_bp_profile_bppm(bppm, strlen(seq2));

   free_pf_arrays();  /* free space allocated for pf_fold() */

   profile_dist = profile_edit_distance(pf1, pf2);
   printf("%s  free energy=%5.2f\n%s  free energy=%5.2f  dist=%3.2f\n",
          aligned_line[0], e1, aligned_line[1], e2, profile_dist);

   free_profile(pf1); free_profile(pf2);
}