viterbi_trellis crf::viterbi_scorer::viterbi(const sequence& seq) { // we only need the scores for the states as the transition scores, set // up during construction, will never change between sequences scorer_.state_scores(*model_, seq); viterbi_trellis table{seq.size(), model_->num_labels()}; // initialize first column of trellis. We use the original state() and // trans() matrices because we are working in the log domain. for (label_id lbl{0}; lbl < model_->num_labels(); ++lbl) table.probability(0, lbl, scorer_.state(0, lbl)); // compute remaining columns of trellis using recursive formulation for (uint64_t t = 1; t < seq.size(); ++t) { for (label_id lbl{0}; lbl < model_->num_labels(); ++lbl) { double max_score = std::numeric_limits<double>::lowest(); for (label_id in{0}; in < model_->num_labels(); ++in) { auto score = table.probability(t - 1, in) + scorer_.trans(in, lbl); if (score > max_score) { max_score = score; table.previous_tag(t, lbl, in); } } table.probability(t, lbl, max_score + scorer_.state(t, lbl)); } } return table; }
// is end of a chunk (IOB1)? int evaluation::is_end_of_chunk_iob1(int human_model, int i, sequence & seq, string b_tag, string i_tag) { if (human_model == 1) { if (seq[i].label == atoi(b_tag.c_str())) { if (i >= seq.size() - 1) { return 1; } else { if (seq[i + 1].label != atoi(i_tag.c_str())) { return 1; } else { return 0; } } } else if (seq[i].label == atoi(i_tag.c_str())) { if (i >= seq.size() - 1) { return 1; } else { if (seq[i + 1].label != atoi(i_tag.c_str())) { return 1; } else { return 0; } } } else { return 0; } } else if (human_model == 2) { if (seq[i].model_label == atoi(b_tag.c_str())) { if (i >= seq.size() - 1) { return 1; } else { if (seq[i + 1].model_label != atoi(i_tag.c_str())) { return 1; } else { return 0; } } } else if (seq[i].model_label == atoi(i_tag.c_str())) { if (i >= seq.size() - 1) { return 1; } else { if (seq[i + 1].model_label != atoi(i_tag.c_str())) { return 1; } else { return 0; } } } else { return 0; } } else { return 0; } }
// is end of a chunk (IOB1)? int is_end_of_chunk_iob1(int human_model, int i, sequence & seq, string b_tag, string i_tag) { if (human_model == 1) { if (seq[i][seq[i].size() - 2] == b_tag) { if (i >= seq.size() - 1) { return 1; } else { if (seq[i + 1][seq[i + 1].size() - 2] != i_tag) { return 1; } else { return 0; } } } else if (seq[i][seq[i].size() - 2] == i_tag) { if (i >= seq.size() - 1) { return 1; } else { if (seq[i + 1][seq[i + 1].size() - 2] != i_tag) { return 1; } else { return 0; } } } else { return 0; } } else if (human_model == 2) { if (seq[i][seq[i].size() - 1] == b_tag) { if (i >= seq.size() - 1) { return 1; } else { if (seq[i + 1][seq[i + 1].size() - 1] != i_tag) { return 1; } else { return 0; } } } else if (seq[i][seq[i].size() - 1] == i_tag) { if (i >= seq.size() - 1) { return 1; } else { if (seq[i + 1][seq[i + 1].size() - 1] != i_tag) { return 1; } else { return 0; } } } else { return 0; } } else { return 0; } }
sequence<R> operator*( const sequence<T1> &X, const sequence<T2> &Y ) { if( X.size()==0 || Y.size()==0 ) return sequence<R>(); vec<R> vec = conv( X.buffer(), Y.buffer() ); int t1 = X.t1() + Y.t1(); return sequence<R>( vec, t1 ); }
void generate_vietoris_sequence(sequence& a) { if (a.empty()) return; a[0] = rdm(0, MAX_VAL); if (a.size() < 2) return; a[1] = rdm(0, a[0]); for (int k = 2; k < a.size(); k++) { a[k] = rdm(0, (1.0 * k - 1) / k) * a[k - 1]; } }
void generate_vietoris_sequence(sequence& a) { if (a.empty()) return; a[0] = 1; if (a.size() < 2) return; a[1] = .5; for (int k = 2; k < a.size(); k++) { a[k] = a[k - 1] * (k - 1) / k; } }
/* * Helper for element and docuemnt constructors to insert sequence * of atomic values. Returns true if node was actually inserted. * In this case left pointer is changed to the last inserted indirection. * In any case at_vals sequence is cleared. */ static inline bool process_atomic_values(xptr& left, const xptr& parent, sequence& at_vals) { if (at_vals.size() > 0) { executor_globals::tmp_op_str_buf.clear(); tuple_cell tcc; sequence::iterator it = at_vals.begin(); do { tcc = tuple_cell::make_sure_light_atomic((*it).cells[0]); tcc = cast(tcc, xs_string); executor_globals::tmp_op_str_buf.append(tcc); it++; } while (it != at_vals.end()); at_vals.clear(); if(executor_globals::tmp_op_str_buf.get_size() > 0) { insert_text(indirectionDereferenceCP(left), XNULL, indirectionDereferenceCP(parent), text_source_strbuf(&(executor_globals::tmp_op_str_buf))); left = get_last_mo_inderection(); return true; } } return false; }
void crf::tagger::tag(sequence& seq) { auto trellis = scorer_.viterbi(seq); auto lbls = util::range(label_id{0}, label_id(static_cast<uint32_t>(num_labels_ - 1))); auto last_lbl = functional::argmax( lbls.begin(), lbls.end(), [&](label_id lbl) { return trellis.probability(seq.size() - 1, lbl); }); seq[seq.size() - 1].label(*last_lbl); for (uint64_t t = seq.size() - 1; t > 0; t--) seq[t - 1].label(trellis.previous_tag(t, seq[t].label())); }
orthonomial(const sequence &a, const sequence &b, const sequence &c) : n(c.size() - 1), a(a), b(b), c(c) { /* Ze względów bezpieczeństwa. */ this->a.push_back(0); this->b.push_back(0); this->c.push_back(0); this->c.push_back(0); }
// is matching chunk (IOE2)? int evaluation::is_matching_chunk_ioe2(int i, sequence & seq, string i_tag, string e_tag) { if (!is_start_of_chunk_ioe2(1, i, seq, i_tag, e_tag) || !is_start_of_chunk_ioe2(2, i, seq, i_tag, e_tag)) { return 0; } int len = seq.size(); int j = i, k = i; while (j < len) { if (is_end_of_chunk_ioe2(1, j, seq, i_tag, e_tag)) { break; } else { j++; } } while (k < len) { if (is_end_of_chunk_ioe2(2, k, seq, i_tag, e_tag)) { break; } else { k++; } } return (j == k); }
void perceptron::tag(sequence& seq) const { for (uint64_t t = 0; t < seq.size(); ++t) { analyzer_.analyze(seq, t); seq[t].label(model_.best_class(seq[t].features())); seq[t].tag(analyzer_.tag(seq[t].label())); } }
RObject AORB::object_key_to_object(sequence<octet>& object_key) { RString str = new String((const char*)object_key.data(), object_key.size(), NormalSST | CCAscii); ObjectKey objkey(str); if (objkey.isLocal() == true) return objkey.getLocalObject(); // return Skelleton here return Nil; }
complex inner_prod(const sequence<T> &X, const sequence<T> &Y ) { // If any vector is empty if( X.size() == 0 || Y.size() == 0 ) return 0; // Overlapping interval int ta = max(X.t1(),Y.t1()); int tb = min(X.t2(),Y.t2()); // If they do not overlap if( ta > tb ) return 0; // They do overlap complex r = 0; for( int t = ta; t <= tb; t++ ) r += inner_prod( X(t), Y(t) ); return r; }
// compares the given two instances of ss double cm_assembly_ssq3::compare(ss const &__first, ss const &__second) const { sequence<cchb_dssp> const seq1(__first.get_sequence()), seq2(__second.get_sequence()); if(seq1.size() != seq2.size()) { throw math::compare_error(get_identifier() + ": Sequence length differ, sequence1.length=" + std::to_string(seq1.size()) + ", sequence2.length=" + std::to_string(seq2.size())); } // if size_t c_correct(0), h_correct(0), e_correct(0); // initialize for(size_t pos(0); pos < seq1.size(); ++pos) { // works for both sequences b/c of same length char const sequence1_ss(seq1[pos].get_identifier_char()); char const sequence2_ss(seq2[pos].get_identifier_char()); if(sequence1_ss == sequence2_ss) { // actually it's only important that they are the same, not which one if(sequence1_ss == 'C') { ++c_correct; } // if else if(sequence1_ss == 'H') { ++h_correct; } // else if else if(sequence1_ss == 'E') { ++e_correct; } // else if } // if } // for DEBUG << get_identifier() << ": c_correct=" << c_correct << " h_correct=" << h_correct << " e_correct=" << e_correct << " seq_len=" << seq1.size(); return ((double)c_correct + h_correct + e_correct) / seq1.size(); } // compare()
// counting matching chunks (IOE2) int count_matching_chunks_ioe2(sequence & seq, string i_tag, string e_tag) { int count = 0; for (int i = 0; i < seq.size(); i++) { if (is_start_of_chunk_ioe2(1, i, seq, i_tag, e_tag)) { if (is_matching_chunk_ioe2(i, seq, i_tag, e_tag)) { count++; } } } return count; }
sequence<decltype(T()*S())> element_prod( const sequence<T>& X, const sequence<S>& Y ) { typedef decltype(T()*S()) R; // If any vector is empty if( X.size() == 0 || Y.size() == 0 ) return sequence<R>(); // Overlapping interval int ta = max(X.t1(),Y.t1()); int tb = min(X.t2(),Y.t2()); // If they do not overlap if( ta > tb ) return sequence<R>(); // They do overlap vec<R> v = element_prod( X.buffer()( range( ta-X.t1(), tb-X.t1()+1 ) ), Y.buffer()( range( ta-Y.t1(), tb-Y.t1()+1 ) ) ); return sequence<R>( v, ta ); }
// counting number of chunks (IOE2) int count_chunks_ioe2(int human_model, sequence & seq, string i_tag, string e_tag) { int count = 0; for (int i = 0; i < seq.size(); i++) { if (human_model == 1 && is_start_of_chunk_ioe2(1, i, seq, i_tag, e_tag)) { count++; } if (human_model == 2 && is_start_of_chunk_ioe2(2, i, seq, i_tag, e_tag)) { count++; } } return count; }
void convert_IOB2_IOB1(int is_cap, int is_last, sequence & seq) { map<int, int> fixedlabels; map<int, int>::iterator obsrit; char begin = is_cap ? 'B' : 'b'; char inside = is_cap ? 'I' : 'i'; string INSIDE = is_cap ? "I" : "i"; string str1, str2, istr, newlabel; int i, len = seq.size(); for (i = 1; i < len; i++) { int col1 = is_last ? seq[i - 1].size() - 1 : seq[i - 1].size() - 2; int col2 = is_last ? seq[i].size() - 1 : seq[i].size() - 2; str1 = seq[i - 1][col1]; str2 = seq[i][col2]; if (str2[0] != begin) { continue; } istr = INSIDE; istr += strtail(str2); if (str1 == str2 || str1 == istr) { fixedlabels.insert(pair<int, int>(i, i)); } } for (i = 0; i < len; i++) { obsrit = fixedlabels.find(i); if (obsrit != fixedlabels.end()) { continue; } int col = is_last ? seq[i].size() - 1 : seq[i].size() - 2; if (seq[i][col][0] == begin) { // B- or b- => I- or i- newlabel = INSIDE; newlabel += strtail(seq[i][col]); seq[i][col] = newlabel; } } }
void convert_IOE1_IOE2(int is_cap, int is_last, sequence & seq) { vector<int> endlabels; char end = is_cap ? 'E' : 'e'; string END = is_cap ? "E" : "e"; char inside = is_cap ? 'I' : 'i'; string str1, str2, estr, newlabel; int i, len = seq.size(); for (i = 0; i < len; i++) { int col1 = is_last ? seq[i].size() - 1 : seq[i].size() - 2; str1 = seq[i][col1]; if (str1[0] != inside) { continue; } if (i == len - 1) { endlabels.push_back(i); } else { int col2 = is_last ? seq[i + 1].size() - 1 : seq[i + 1].size() - 2; str2 = seq[i + 1][col2]; estr = END; estr += strtail(str1); if (str2 != str1 && str2 != estr) { endlabels.push_back(i); } } } for (i = 0; i < endlabels.size(); i++) { int col = is_last ? seq[endlabels[i]].size() - 1 : seq[endlabels[i]].size() - 2; newlabel = END; newlabel += strtail(seq[endlabels[i]][col]); seq[endlabels[i]][col] = newlabel; } }
// ************************************************************************** // bool test_basic(const sequence& test, size_t s, bool has_cursor) // Postcondition: A return value of true indicates: // a. test.size() is s, and // b. test.is_item() is has_cursor. // Otherwise the return value is false. // In either case, a description of the test result is printed to cout. // ************************************************************************** bool test_basic(const sequence& test, size_t s, bool has_cursor) { bool answer; cout << "Testing that size() returns " << s << " ... "; cout.flush( ); answer = (test.size( ) == s); cout << (answer ? "Passed." : "Failed.") << endl; if (answer) { cout << "Testing that is_item() returns "; cout << (has_cursor ? "true" : "false") << " ... "; cout.flush( ); answer = (test.is_item( ) == has_cursor); cout << (answer ? "Passed." : "Failed.") << endl; } return answer; }
void convert_IOB1_IOB2(int is_cap, int is_last, sequence & seq) { vector<int> firstlabels; char begin = is_cap ? 'B' : 'b'; string BEGIN = is_cap ? "B" : "b"; char inside = is_cap ? 'I' : 'i'; string str1, str2, bstr, newlabel; int i, len = seq.size(); for (i = 0; i < len; i++) { int col2 = is_last ? seq[i].size() - 1 : seq[i].size() - 2; str2 = seq[i][col2]; if (str2[0] != inside) { continue; } if (i == 0) { firstlabels.push_back(i); } else { int col1 = is_last ? seq[i - 1].size() - 1 : seq[i - 1].size() - 2; str1 = seq[i - 1][col1]; bstr = BEGIN; bstr += strtail(str2); if (str2 != str1 && str1 != bstr) { firstlabels.push_back(i); } } } for (i = 0; i < firstlabels.size(); i++) { int col = is_last ? seq[firstlabels[i]].size() - 1 : seq[firstlabels[i]].size() - 2; newlabel = BEGIN; newlabel += strtail(seq[firstlabels[i]][col]); seq[firstlabels[i]][col] = newlabel; } }
void Print_Cpg(sequence& hidden_sequence, uint shift, string& chromosome_name) { uint start_cpg = 0; bool is_open_cpg = false; ull i; for (i = 0; i < hidden_sequence.size(); ++i) { if (hidden_sequence[i] < 4) { if (!is_open_cpg) { start_cpg = i + shift + 1; is_open_cpg = true; } } else { if (is_open_cpg) { uint end_cpg = i + shift + 1; if (Check_Sequence(hidden_sequence, start_cpg - shift - 1, end_cpg - shift - 1)) { cout << chromosome_name << "\t" << start_cpg << "\t" << end_cpg << endl; } is_open_cpg = false; } } } if (is_open_cpg) { uint end_cpg = i + shift + 1; if (Check_Sequence(hidden_sequence, start_cpg, end_cpg)) { cout << chromosome_name << "\t" << start_cpg << "\t" << end_cpg << endl; } } }
sequence<R> operator+( sequence<T1> X, sequence<T2> Y ) { if( X.size() == 0 ) return Y; if( Y.size() == 0 ) return X; // Intervals int t1 = min(X.t1(),Y.t1()); int ta = max(X.t1(),Y.t1()); int tb = min(X.t2(),Y.t2()); int t2 = max(X.t2(),Y.t2()); int sx = X.size(); int sy = Y.size(); // First interval vec<R> v1(0); if( t1 == X.t1() && t1 != Y.t1() ) v1 = X.buffer()( range( 0, min(ta-X.t1(),sx) ) ); else if( t1 != X.t1() && t1 == Y.t1() ) v1 = Y.buffer()( range( 0, min(ta-Y.t1(),sy) ) ); // Second interval vec<R> v2; if( ta <= tb ) v2 = X.buffer()( range( ta-X.t1(), tb-X.t1()+1 ) ) + Y.buffer()( range( ta-Y.t1(), tb-Y.t1()+1 ) ); else { int I = ta-tb-1; v2.resize(I); for( int i = 0; i < I; i++ ) v2(i) = 0*X.buffer()(0); } // Third interval vec<R> v3(0); if( t2 == X.t2() && t2 != Y.t2() ) v3 = X.buffer()( range( max(tb-X.t1()+1,0), X.size() ) ); else if( t2 != X.t2() && t2 == Y.t2() ) v3 = Y.buffer()( range( max(tb-Y.t1()+1,0), Y.size() ) ); // Sum return sequence<R>( vec<R>{v1,v2,v3}, t1 ); }
double f2(double x, sequence a) { double sum = 0; for (int i = 0; i < a.size(); i++) { sum += a[i] * cos(i * x); } }
double f1(double x, sequence a) { double sum = 0; for (int i = 1; i < a.size(); i++) { sum += a[i] * sin(i * x); } }
int main(int argc, char **argv) { using namespace std; unsigned j; /* cout << "Input number of tests (for each pattern size): " << flush; cin >> Number_Of_Tests; cout << "Input number of pattern sizes: " << flush; cin >> Number_Of_Pattern_Sizes; cout << "Input pattern sizes: " << flush; */ if (argc < 4) return 1; Number_Of_Tests = strtoul(argv[1], NULL, 10); Number_Of_Pattern_Sizes = strtoul(argv[2], NULL, 10); vector<unsigned> Pattern_Size(Number_Of_Pattern_Sizes); for (j = 0; j < Number_Of_Pattern_Sizes; ++j) Pattern_Size[j] = strtoul(argv[j + 3], NULL, 10); cout << "\nNumber of tests: " << Number_Of_Tests << endl; cout << "Pattern sizes: "; for (j = 0; j < Number_Of_Pattern_Sizes; ++j) cout << Pattern_Size[j] << " "; cout << endl; ifstream ifs(textFileName); char C; while (ifs.get(C)) S1.push_back(C); cout << S1.size() << " characters read." << endl; ifstream dictfile(wordFileName); typedef istream_iterator<string> string_input; typedef map<int, vector<sequence>, less<int> > map_type; map_type dictionary; sequence S; string S0; string_input si(dictfile); while (si != string_input()) { S0 = *si++; S.erase(S.begin(), S.end()); copy(S0.begin(), S0.end() - 1, back_inserter(S)); dictionary[S.size()].push_back(S); } for (j = 0; j < Number_Of_Pattern_Sizes; ++j) { vector<sequence>& diction = dictionary[Pattern_Size[j]]; if (diction.size() > Number_Of_Tests) { vector<sequence> temp; unsigned Skip_Amount = diction.size() / Number_Of_Tests; for (unsigned T = 0; T < Number_Of_Tests; ++T) { temp.push_back(diction[T * Skip_Amount]); } diction = temp; } Increment = (S1.size() - Pattern_Size[j]) / Number_Of_Tests; cout << "\n\n-----------------------------------------------------------\n" << "Searching for patterns of size " << Pattern_Size[j] << "..." << endl; cout << "(" << Number_Of_Tests << " patterns from the text, " << dictionary[Pattern_Size[j]].size() << " from the dictionary)" << endl; cerr << Pattern_Size[j] << " " << flush; Base_Time = 0.0; for (int k = 0; k < number_of_algorithms; ++k) { if (k != 0) cout << "Timing " << algorithm_names[k] << ":" << endl; Run(k, S1, dictionary[Pattern_Size[j]], Pattern_Size[j]); } cout << endl; } cerr << endl; }
void sequence_analyzer::analyze(sequence& sequence) { for (uint64_t t = 0; t < sequence.size(); ++t) analyze(sequence, t); }
void convert_IOE2_IOB2(int is_cap, int is_last, sequence & seq) { map<int, int> beginlabels; map<int, int>::iterator blbit; char begin = is_cap ? 'B' : 'b'; string BEGIN = is_cap ? "B" : "b"; char inside = is_cap ? 'I' : 'i'; string INSIDE = is_cap ? "I" : "i"; char end = is_cap ? 'E' : 'e'; string str1, str2, istr, str, newlabel; int i, len = seq.size(); for (i = 0; i < len; i++) { int col2 = is_last ? seq[i].size() - 1 : seq[i].size() - 2; str2 = seq[i][col2]; if (str2[0] != end && str2[0] != inside) { continue; } if (i == 0) { beginlabels.insert(pair<int, int>(i, i)); } else { int col1 = is_last ? seq[i - 1].size() - 1 : seq[i - 1].size() - 2; str1 = seq[i - 1][col1]; istr = INSIDE; istr += strtail(str2); if (str2[0] == end && str1 != istr) { beginlabels.insert(pair<int, int>(i, i)); } else if (str2[0] == inside && str1 != str2) { beginlabels.insert(pair<int, int>(i, i)); } } } for (i = 0; i < len; i++) { int col = is_last ? seq[i].size() - 1 : seq[i].size() - 2; str = seq[i][col]; if (str[0] != end && str[0] != inside) { continue; } blbit = beginlabels.find(i); if (blbit != beginlabels.end()) { newlabel = BEGIN; newlabel += strtail(str); seq[i][col] = newlabel; } else { newlabel = INSIDE; newlabel += strtail(str); seq[i][col] = newlabel; } } }
static void copy_sequence_to_list(list &dst, const sequence &src) { for(size_t i = 0; i < src.size(); ++i) { dst.append(src[i]); } }
void convert_IOB2_IOE2(int is_cap, int is_last, sequence & seq) { map<int, int> endlabels; map<int, int>::iterator elbit; char begin = is_cap ? 'B' : 'b'; char inside = is_cap ? 'I' : 'i'; string INSIDE = is_cap ? "I" : "i"; char end = is_cap ? 'E' : 'e'; string END = is_cap ? "E" : "e"; string str1, str2, istr, str, newlabel; int i, len = seq.size(); for (i = 0; i < len; i++) { int col1 = is_last ? seq[i].size() - 1 : seq[i].size() - 2; str1 = seq[i][col1]; if (str1[0] != begin && str1[0] != inside) { continue; } if (i == len - 1) { endlabels.insert(pair<int, int>(i, i)); } else { int col2 = is_last ? seq[i + 1].size() - 1 : seq[i + 1].size() - 2; str2 = seq[i + 1][col2]; istr = INSIDE; istr += strtail(str1); if (str1[0] == begin && str2 != istr) { endlabels.insert(pair<int, int>(i, i)); } else if (str1[0] == inside && str2 != str1) { endlabels.insert(pair<int, int>(i, i)); } } } for (i = 0; i < len; i++) { int col = is_last ? seq[i].size() - 1 : seq[i].size() - 2; str = seq[i][col]; if (str[0] != begin && str[0] != inside) { continue; } elbit = endlabels.find(i); if (elbit != endlabels.end()) { newlabel = END; newlabel += strtail(str); seq[i][col] = newlabel; } else { newlabel = INSIDE; newlabel += strtail(str); seq[i][col] = newlabel; } } }