deriv_note_ptr parse(string const& str) { boost::smatch m; span_t span; int synid; string tok; score_t total, inside; if (regex_match(str,m,synre)) { total = score_t(boost::lexical_cast<float>(m.str(1)),as_neglog10()); inside = score_t(boost::lexical_cast<float>(m.str(2)),as_neglog10()); span = span_t(boost::lexical_cast<int>(m.str(3)),boost::lexical_cast<int>(m.str(4))); synid = boost::lexical_cast<int>(m.str(5)); return deriv_note_ptr(new deriv_note(span,synid,total,inside)); } else if (regex_match(str,m,virtre)) { total = score_t(boost::lexical_cast<float>(m.str(1)),as_neglog10()); inside = score_t(boost::lexical_cast<float>(m.str(2)),as_neglog10()); span = span_t(boost::lexical_cast<int>(m.str(3)),boost::lexical_cast<int>(m.str(4))); tok = m.str(5); return deriv_note_ptr(new deriv_note(span,tok,total,inside)); } else { throw runtime_error("in parse_error module: could not parse "+str); } }
virtual void finalize() { indexed_token_factory &tf = base_t::gram.dict(); filt->finalize(); while (not filt->empty()) { typename base_t::edge_equiv_type eq = filt->top(); insert(kept_edges, eq.representative(), true); edge_q.push_back(eq); filt->pop(); } test_func.reset(all_edges.begin(), all_edges.end()); edge_iterator itr = all_edges.begin(); edge_iterator end = all_edges.end(); for (; itr != all_edges.end(); ++itr) { edge_iterator pos = kept_edges.find(*itr); bool should_keep = test_func(*itr); bool did_keep = (pos != kept_edges.end()); if (did_keep) { assert(max(pos->score(),itr->score())/min(pos->score(),itr->score()) < score_t(1.0 + 1e-5)); } if (should_keep != did_keep) { std::cout << "ERROR: filtering mistake: edge: " << print(*itr,tf) << "expected to be kept? " << (should_keep?"true":"false") << std::endl; } } }
score_t align( std::string &a, std::string &b, scoring_matrix &m ) { std::vector<score_t> s( a.size()); std::vector<score_t> si( a.size()); std::fill( s.begin(), s.end(), 0 ); std::fill( si.begin(), si.end(), 0 ); const score_t SMALL = -32000; score_t max = SMALL; for( int ib = 0; ib < b.size(); ib++ ) { char bc = b[ib]; score_t last_sl = SMALL; score_t last_sc = 0; score_t last_sdiag = 0; for( int ia = 0; ia < a.size(); ia++ ) { char ac = a[ia]; score_t match = m.get_score( ac, bc ); score_t sm = last_sdiag + match; const score_t GAP_EXT = -1; const score_t GAP_OPEN = -2; score_t sl = last_sl + GAP_EXT; sl = std::max( sl, score_t(last_sc + GAP_OPEN) ); last_sl = sl; last_sdiag = s[ia]; score_t su = si[ia] + GAP_EXT; su = std::max( su, score_t(last_sdiag + GAP_OPEN) ); si[ia] = su; score_t sc = std::max( sm , std::max( std::max( sl, su ), score_t(0) ) ); s[ia] = sc; last_sc = sc; max = std::max( sc, max ); } } return max; }