Esempio n. 1
0
viterbi_trellis crf::viterbi_scorer::viterbi(const sequence& seq)
{
    // we only need the scores for the states as the transition scores, set
    // up during construction, will never change between sequences
    scorer_.state_scores(*model_, seq);

    viterbi_trellis table{seq.size(), model_->num_labels()};

    // initialize first column of trellis. We use the original state() and
    // trans() matrices because we are working in the log domain.
    for (label_id lbl{0}; lbl < model_->num_labels(); ++lbl)
        table.probability(0, lbl, scorer_.state(0, lbl));

    // compute remaining columns of trellis using recursive formulation
    for (uint64_t t = 1; t < seq.size(); ++t)
    {
        for (label_id lbl{0}; lbl < model_->num_labels(); ++lbl)
        {
            double max_score = std::numeric_limits<double>::lowest();
            for (label_id in{0}; in < model_->num_labels(); ++in)
            {
                auto score = table.probability(t - 1, in)
                             + scorer_.trans(in, lbl);

                if (score > max_score)
                {
                    max_score = score;
                    table.previous_tag(t, lbl, in);
                }
            }
            table.probability(t, lbl, max_score + scorer_.state(t, lbl));
        }
    }
    return table;
}
Esempio n. 2
0
// is end of a chunk (IOB1)?
int evaluation::is_end_of_chunk_iob1(int human_model, int i, sequence & seq, 
					string b_tag, string i_tag) {
    if (human_model == 1) {
	if (seq[i].label == atoi(b_tag.c_str())) {
	    if (i >= seq.size() - 1) {
		return 1;
	    } else {
		if (seq[i + 1].label != atoi(i_tag.c_str())) {
		    return 1;
		} else {
		    return 0;
		}
	    }
	    
	} else if (seq[i].label == atoi(i_tag.c_str())) {
	    if (i >= seq.size() - 1) {
		return 1;
	    } else {
		if (seq[i + 1].label != atoi(i_tag.c_str())) {
		    return 1;
		} else {
		    return 0;
		}
	    }
	
	} else {
	    return 0;
	}	
    
    } else if (human_model == 2) {
	if (seq[i].model_label == atoi(b_tag.c_str())) {
	    if (i >= seq.size() - 1) {
		return 1;
	    } else {
		if (seq[i + 1].model_label != atoi(i_tag.c_str())) {
		    return 1;
		} else {
		    return 0;
		}
	    }
	    
	} else if (seq[i].model_label == atoi(i_tag.c_str())) {
	    if (i >= seq.size() - 1) {
		return 1;
	    } else {
		if (seq[i + 1].model_label != atoi(i_tag.c_str())) {
		    return 1;
		} else {
		    return 0;
		}
	    }
	
	} else {
	    return 0;
	}	
    
    } else {
	return 0;
    }
}
Esempio n. 3
0
// is end of a chunk (IOB1)?
int is_end_of_chunk_iob1(int human_model, int i, sequence & seq, 
					string b_tag, string i_tag) {
    if (human_model == 1) {
	if (seq[i][seq[i].size() - 2] == b_tag) {
	    if (i >= seq.size() - 1) {
		return 1;
	    } else {
		if (seq[i + 1][seq[i + 1].size() - 2] != i_tag) {
		    return 1;
		} else {
		    return 0;
		}
	    }
	    
	} else if (seq[i][seq[i].size() - 2] == i_tag) {
	    if (i >= seq.size() - 1) {
		return 1;
	    } else {
		if (seq[i + 1][seq[i + 1].size() - 2] != i_tag) {
		    return 1;
		} else {
		    return 0;
		}
	    }
	
	} else {
	    return 0;
	}	
    
    } else if (human_model == 2) {
	if (seq[i][seq[i].size() - 1] == b_tag) {
	    if (i >= seq.size() - 1) {
		return 1;
	    } else {
		if (seq[i + 1][seq[i + 1].size() - 1] != i_tag) {
		    return 1;
		} else {
		    return 0;
		}
	    }
	    
	} else if (seq[i][seq[i].size() - 1] == i_tag) {
	    if (i >= seq.size() - 1) {
		return 1;
	    } else {
		if (seq[i + 1][seq[i + 1].size() - 1] != i_tag) {
		    return 1;
		} else {
		    return 0;
		}
	    }
	
	} else {
	    return 0;
	}	
    
    } else {
	return 0;
    }
}
Esempio n. 4
0
sequence<R> operator*( const sequence<T1> &X, const sequence<T2> &Y )
{
	if( X.size()==0 || Y.size()==0 )
		return sequence<R>();
	vec<R> vec = conv( X.buffer(), Y.buffer() );
	int t1     = X.t1() + Y.t1();
	return sequence<R>( vec, t1 );
}
Esempio n. 5
0
File: w3e4.cpp Progetto: lybeck/NuMe
void generate_vietoris_sequence(sequence& a) {
    if (a.empty())
        return;
    a[0] = rdm(0, MAX_VAL);
    if (a.size() < 2)
        return;
    a[1] = rdm(0, a[0]);
    for (int k = 2; k < a.size(); k++) {
        a[k] = rdm(0, (1.0 * k - 1) / k) * a[k - 1];
    }
}
Esempio n. 6
0
File: w3e4.cpp Progetto: lybeck/NuMe
void generate_vietoris_sequence(sequence& a) {
    if (a.empty())
        return;
    a[0] = 1;
    if (a.size() < 2)
        return;
    a[1] = .5;
    for (int k = 2; k < a.size(); k++) {
        a[k] = a[k - 1] * (k - 1) / k;
    }
}
Esempio n. 7
0
/*
 * Helper for element and docuemnt constructors to insert sequence
 * of atomic values. Returns true if node was actually inserted.
 * In this case left pointer is changed to the last inserted indirection.
 * In any case at_vals sequence is cleared.
 */
static inline bool
process_atomic_values(xptr& left, const xptr& parent, sequence& at_vals) {
    if (at_vals.size() > 0)
    {
        executor_globals::tmp_op_str_buf.clear();
        tuple_cell tcc;
        sequence::iterator it = at_vals.begin();
        do {
            tcc = tuple_cell::make_sure_light_atomic((*it).cells[0]);
            tcc = cast(tcc, xs_string);
            executor_globals::tmp_op_str_buf.append(tcc);
            it++;
        }
        while (it != at_vals.end());

        at_vals.clear();
        if(executor_globals::tmp_op_str_buf.get_size() > 0) {
            insert_text(indirectionDereferenceCP(left),
                        XNULL,
                        indirectionDereferenceCP(parent),
                        text_source_strbuf(&(executor_globals::tmp_op_str_buf)));
            left = get_last_mo_inderection();
            return true;
        }
    }
    return false;
}
Esempio n. 8
0
void crf::tagger::tag(sequence& seq)
{
    auto trellis = scorer_.viterbi(seq);

    auto lbls = util::range(label_id{0},
                            label_id(static_cast<uint32_t>(num_labels_ - 1)));
    auto last_lbl = functional::argmax(
        lbls.begin(), lbls.end(), [&](label_id lbl)
        {
            return trellis.probability(seq.size() - 1, lbl);
        });

    seq[seq.size() - 1].label(*last_lbl);
    for (uint64_t t = seq.size() - 1; t > 0; t--)
        seq[t - 1].label(trellis.previous_tag(t, seq[t].label()));
}
Esempio n. 9
0
 orthonomial(const sequence &a, const sequence &b, const sequence &c) : n(c.size() - 1), a(a), b(b), c(c) {    
   /* Ze względów bezpieczeństwa. */
   this->a.push_back(0);
   this->b.push_back(0);
   this->c.push_back(0);
   this->c.push_back(0);
 }
Esempio n. 10
0
// is matching chunk (IOE2)? 
int evaluation::is_matching_chunk_ioe2(int i, sequence & seq, string i_tag, string e_tag) {
    if (!is_start_of_chunk_ioe2(1, i, seq, i_tag, e_tag) || 
	    !is_start_of_chunk_ioe2(2, i, seq, i_tag, e_tag)) {
	return 0;
    }
    
    int len = seq.size();
    int j = i, k = i;
    while (j < len) {
	if (is_end_of_chunk_ioe2(1, j, seq, i_tag, e_tag)) {
	    break;
	} else {
	    j++;
	}
    }

    while (k < len) {
	if (is_end_of_chunk_ioe2(2, k, seq, i_tag, e_tag)) {
	    break;
	} else {
	    k++;
	}
    }

    return (j == k);
}
Esempio n. 11
0
void perceptron::tag(sequence& seq) const
{
    for (uint64_t t = 0; t < seq.size(); ++t)
    {
        analyzer_.analyze(seq, t);
        seq[t].label(model_.best_class(seq[t].features()));
        seq[t].tag(analyzer_.tag(seq[t].label()));
    }
}
Esempio n. 12
0
RObject 
AORB::object_key_to_object(sequence<octet>& object_key)
{
  RString str = new String((const char*)object_key.data(), object_key.size(), NormalSST | CCAscii);
  ObjectKey objkey(str);
  if (objkey.isLocal() == true)
    return objkey.getLocalObject();
  // return Skelleton here
  return Nil;
}
Esempio n. 13
0
complex inner_prod(const sequence<T> &X, const sequence<T> &Y )
{
	// If any vector is empty
	if( X.size() == 0 || Y.size() == 0 )
		return 0;

	// Overlapping interval
	int	ta  = max(X.t1(),Y.t1());
	int	tb  = min(X.t2(),Y.t2());

	// If they do not overlap
	if( ta > tb )
		return 0;

	// They do overlap
	complex r = 0;
	for( int t = ta; t <= tb; t++ )
		r += inner_prod( X(t), Y(t) );
	return r;
}
Esempio n. 14
0
      // compares the given two instances of ss
      double cm_assembly_ssq3::compare(ss const &__first, ss const &__second) const {
        sequence<cchb_dssp> const seq1(__first.get_sequence()), seq2(__second.get_sequence());

        if(seq1.size() != seq2.size()) {
          throw math::compare_error(get_identifier() + ": Sequence length differ, sequence1.length=" +
                                    std::to_string(seq1.size()) + ", sequence2.length=" + std::to_string(seq2.size()));
        } // if

        size_t c_correct(0), h_correct(0), e_correct(0); // initialize
        for(size_t pos(0); pos < seq1.size(); ++pos) { // works for both sequences b/c of same length
          char const sequence1_ss(seq1[pos].get_identifier_char());
          char const sequence2_ss(seq2[pos].get_identifier_char());
          if(sequence1_ss == sequence2_ss) { // actually it's only important that they are the same, not which one
            if(sequence1_ss == 'C') {
              ++c_correct;
            } // if
            else if(sequence1_ss == 'H') {
              ++h_correct;
            } // else if
            else if(sequence1_ss == 'E') {
              ++e_correct;
            } // else if
          } // if
        } // for

        DEBUG << get_identifier() << ": c_correct=" << c_correct << " h_correct=" << h_correct
              << " e_correct=" << e_correct << " seq_len=" << seq1.size();

        return ((double)c_correct + h_correct + e_correct) / seq1.size();
      } // compare()
Esempio n. 15
0
// counting matching chunks (IOE2)
int count_matching_chunks_ioe2(sequence & seq, string i_tag, string e_tag) {
    int count = 0;

    for (int i = 0; i < seq.size(); i++) {
	if (is_start_of_chunk_ioe2(1, i, seq, i_tag, e_tag)) {
	    if (is_matching_chunk_ioe2(i, seq, i_tag, e_tag)) {
		count++;
	    }
	}
    }

    return count;
}
Esempio n. 16
0
sequence<decltype(T()*S())> element_prod( const sequence<T>& X, const sequence<S>& Y )
{
	typedef decltype(T()*S()) R;

	// If any vector is empty
	if( X.size() == 0 || Y.size() == 0 )
		return sequence<R>();

	// Overlapping interval
	int	ta  = max(X.t1(),Y.t1());
	int	tb  = min(X.t2(),Y.t2());

	// If they do not overlap
	if( ta > tb )
		return sequence<R>();

	// They do overlap
	vec<R> v = element_prod(
			X.buffer()( range( ta-X.t1(), tb-X.t1()+1 ) ),
			Y.buffer()( range( ta-Y.t1(), tb-Y.t1()+1 ) ) );
	return sequence<R>( v, ta );
}
Esempio n. 17
0
// counting number of chunks (IOE2)
int count_chunks_ioe2(int human_model, sequence & seq, string i_tag, string e_tag) {
    int count = 0;

    for (int i = 0; i < seq.size(); i++) {
	if (human_model == 1 && is_start_of_chunk_ioe2(1, i, seq, i_tag, e_tag)) {
	    count++;
	}
	
	if (human_model == 2 && is_start_of_chunk_ioe2(2, i, seq, i_tag, e_tag)) {
	    count++;
	}
    }

    return count;
}
Esempio n. 18
0
void convert_IOB2_IOB1(int is_cap, int is_last, sequence & seq) {
    map<int, int> fixedlabels;
    map<int, int>::iterator obsrit;

    char begin = is_cap ? 'B' : 'b';
    char inside = is_cap ? 'I' : 'i';
    string INSIDE = is_cap ? "I" : "i";

    string str1, str2, istr, newlabel;
    int i, len = seq.size();        

    for (i = 1; i < len; i++) {
	int col1 = is_last ? seq[i - 1].size() - 1 : seq[i - 1].size() - 2;
	int col2 = is_last ? seq[i].size() - 1 : seq[i].size() - 2;
	
	str1 = seq[i - 1][col1];
	str2 = seq[i][col2];
	
	if (str2[0] != begin) {
	    continue;
	}
	
	istr = INSIDE;
	istr += strtail(str2);
	
	if (str1 == str2 || str1 == istr) {
	    fixedlabels.insert(pair<int, int>(i, i));
	}
    }
    
    for (i = 0; i < len; i++) {
	obsrit = fixedlabels.find(i);
	if (obsrit != fixedlabels.end()) {
	    continue;
	}
	
	int col = is_last ? seq[i].size() - 1 : seq[i].size() - 2;
	if (seq[i][col][0] == begin) {
	    // B- or b- => I- or i-
	    newlabel = INSIDE;
	    newlabel += strtail(seq[i][col]);
	    seq[i][col] = newlabel;
	}		
    }
}
Esempio n. 19
0
void convert_IOE1_IOE2(int is_cap, int is_last, sequence & seq) {
    vector<int> endlabels;
    
    char end = is_cap ? 'E' : 'e';
    string END = is_cap ? "E" : "e";
    char inside = is_cap ? 'I' : 'i';

    string str1, str2, estr, newlabel;
    int i, len = seq.size();        
    
    for (i = 0; i < len; i++) {
	int col1 = is_last ? seq[i].size() - 1 : seq[i].size() - 2;	
	str1 = seq[i][col1];
	
	if (str1[0] != inside) {
	    continue;
	}
	
	if (i == len - 1) {
	    endlabels.push_back(i);
	
	} else {
	    int col2 = is_last ? seq[i + 1].size() - 1 : seq[i + 1].size() - 2;
	    str2 = seq[i + 1][col2];
	
	    estr = END;
	    estr += strtail(str1);
	    
	    if (str2 != str1 && str2 != estr) {
		endlabels.push_back(i);
	    }
	}
    }

    for (i = 0; i < endlabels.size(); i++) {
	int col = is_last ? seq[endlabels[i]].size() - 1 : seq[endlabels[i]].size() - 2;
	
	newlabel = END;
	newlabel += strtail(seq[endlabels[i]][col]);
	seq[endlabels[i]][col] = newlabel;
    }
}
Esempio n. 20
0
// **************************************************************************
// bool test_basic(const sequence& test, size_t s, bool has_cursor)
//   Postcondition: A return value of true indicates:
//     a. test.size() is s, and
//     b. test.is_item() is has_cursor.
//   Otherwise the return value is false.
//   In either case, a description of the test result is printed to cout.
// **************************************************************************
bool test_basic(const sequence& test, size_t s, bool has_cursor)
{
    bool answer;

    cout << "Testing that size() returns " << s << " ... ";
    cout.flush( );
    answer = (test.size( ) == s);
    cout << (answer ? "Passed." : "Failed.") << endl;
    
    if (answer)
    {
        cout << "Testing that is_item() returns ";
        cout << (has_cursor ? "true" : "false") << " ... ";
        cout.flush( );
        answer = (test.is_item( ) == has_cursor);
        cout << (answer ? "Passed." : "Failed.") << endl;
    }

    return answer;
}
Esempio n. 21
0
void convert_IOB1_IOB2(int is_cap, int is_last, sequence & seq) {
    vector<int> firstlabels;
    char begin = is_cap ? 'B' : 'b';
    string BEGIN = is_cap ? "B" : "b";
    char inside = is_cap ? 'I' : 'i';

    string str1, str2, bstr, newlabel;
    int i, len = seq.size();        
    
    for (i = 0; i < len; i++) {
	int col2 = is_last ? seq[i].size() - 1 : seq[i].size() - 2;	
	str2 = seq[i][col2];
	
	if (str2[0] != inside) {
	    continue;
	}
	
	if (i == 0) {
	    firstlabels.push_back(i);
	
	} else {
	    int col1 = is_last ? seq[i - 1].size() - 1 : seq[i - 1].size() - 2;
	    str1 = seq[i - 1][col1];
	
	    bstr = BEGIN;
	    bstr += strtail(str2);
	    
	    if (str2 != str1 && str1 != bstr) {
		firstlabels.push_back(i);
	    }
	}
    }

    for (i = 0; i < firstlabels.size(); i++) {
	int col = is_last ? seq[firstlabels[i]].size() - 1 : seq[firstlabels[i]].size() - 2;
	
	newlabel = BEGIN;
	newlabel += strtail(seq[firstlabels[i]][col]);
	seq[firstlabels[i]][col] = newlabel;
    }
}
Esempio n. 22
0
void Print_Cpg(sequence& hidden_sequence, uint shift, string& chromosome_name)
{
    uint start_cpg = 0;
    bool is_open_cpg = false;
    ull i;
    for (i = 0; i < hidden_sequence.size(); ++i)
    {
        if (hidden_sequence[i] < 4)
        {
            if (!is_open_cpg)
            {
                start_cpg = i + shift + 1;
                is_open_cpg = true;
            }
        }
        else
        {
            if (is_open_cpg)
            {
                uint end_cpg = i + shift + 1;
                if (Check_Sequence(hidden_sequence, start_cpg - shift - 1, end_cpg - shift - 1))
                {
                    cout << chromosome_name << "\t" << start_cpg << "\t" << end_cpg << endl;
                }
                is_open_cpg = false;
            }
        }
    }

    if (is_open_cpg)
    {
        uint end_cpg = i + shift + 1;
        if (Check_Sequence(hidden_sequence, start_cpg, end_cpg))
        {
            cout << chromosome_name << "\t" << start_cpg << "\t" << end_cpg << endl;
        }
    }
}
Esempio n. 23
0
sequence<R> operator+( sequence<T1> X, sequence<T2> Y )
{
	if( X.size() == 0 )
		return Y;
	if( Y.size() == 0 )
		return X;

	// Intervals
	int	t1  = min(X.t1(),Y.t1());
	int	ta  = max(X.t1(),Y.t1());
	int	tb  = min(X.t2(),Y.t2());
	int	t2  = max(X.t2(),Y.t2());
	int sx  = X.size();
	int sy  = Y.size();

	// First interval
	vec<R> v1(0);
	if( t1 == X.t1() && t1 != Y.t1() )
		v1 = X.buffer()( range( 0, min(ta-X.t1(),sx) ) );
	else if( t1 != X.t1() && t1 == Y.t1() )
		v1 = Y.buffer()( range( 0, min(ta-Y.t1(),sy) ) );

	// Second interval
	vec<R> v2;
	if( ta <= tb )
		v2 = X.buffer()( range( ta-X.t1(), tb-X.t1()+1 ) ) + Y.buffer()( range( ta-Y.t1(), tb-Y.t1()+1 ) );
	else {
		int I = ta-tb-1;
		v2.resize(I);
		for( int i = 0; i < I; i++ )
			v2(i) = 0*X.buffer()(0);
	}

	// Third interval
	vec<R> v3(0);
	if( t2 == X.t2() && t2 != Y.t2() )
		v3 = X.buffer()( range( max(tb-X.t1()+1,0), X.size() ) );
	else if( t2 != X.t2() && t2 == Y.t2() )
		v3 = Y.buffer()( range( max(tb-Y.t1()+1,0), Y.size() ) );

	// Sum
	return sequence<R>( vec<R>{v1,v2,v3}, t1 );
}
Esempio n. 24
0
File: w3e4.cpp Progetto: lybeck/NuMe
double f2(double x, sequence a) {
    double sum = 0;
    for (int i = 0; i < a.size(); i++) {
        sum += a[i] * cos(i * x);
    }
}
Esempio n. 25
0
File: w3e4.cpp Progetto: lybeck/NuMe
double f1(double x, sequence a) {
    double sum = 0;
    for (int i = 1; i < a.size(); i++) {
        sum += a[i] * sin(i * x);
    }
}
Esempio n. 26
0
int main(int argc, char **argv)
{ 
    using namespace std;
  unsigned j;
  /*
  cout << "Input number of tests (for each pattern size): " << flush;
  cin >> Number_Of_Tests;
  cout << "Input number of pattern sizes: " << flush;
  cin >> Number_Of_Pattern_Sizes;
  cout << "Input pattern sizes: " << flush;
  */
  if (argc < 4)
      return 1;
  Number_Of_Tests = strtoul(argv[1], NULL, 10);
  Number_Of_Pattern_Sizes = strtoul(argv[2], NULL, 10);
  vector<unsigned> Pattern_Size(Number_Of_Pattern_Sizes);
  for (j = 0; j < Number_Of_Pattern_Sizes; ++j)
      Pattern_Size[j] = strtoul(argv[j + 3], NULL, 10);
  cout << "\nNumber of tests: " << Number_Of_Tests << endl;
  cout << "Pattern sizes: ";
  for (j = 0; j < Number_Of_Pattern_Sizes; ++j) 
    cout << Pattern_Size[j] << " ";
  cout << endl;
  
  ifstream ifs(textFileName);
  char C;
  while (ifs.get(C))
    S1.push_back(C);
  cout << S1.size() << " characters read." << endl;
  

  
  ifstream dictfile(wordFileName);
  typedef istream_iterator<string> string_input;
  typedef map<int, vector<sequence>, less<int> > map_type;
  map_type dictionary;
  sequence S;
  string S0;
  string_input si(dictfile);
  while (si != string_input()) {
    S0 = *si++;
    S.erase(S.begin(), S.end());
    copy(S0.begin(), S0.end() - 1, back_inserter(S));
    dictionary[S.size()].push_back(S);
  }
  

  for (j = 0; j < Number_Of_Pattern_Sizes; ++j) {
    
    vector<sequence>& diction = dictionary[Pattern_Size[j]];
    if (diction.size() > Number_Of_Tests) {
      vector<sequence> temp;
      unsigned Skip_Amount = diction.size() / Number_Of_Tests;
      for (unsigned T = 0; T < Number_Of_Tests; ++T) {
         temp.push_back(diction[T * Skip_Amount]);
      }
      diction = temp;
    }
    

    Increment = (S1.size() - Pattern_Size[j]) / Number_Of_Tests;
    
    cout << "\n\n-----------------------------------------------------------\n"
         << "Searching for patterns of size " << Pattern_Size[j] 
         << "..." << endl;
    cout << "(" << Number_Of_Tests << " patterns from the text, "
         << dictionary[Pattern_Size[j]].size() << "  from the dictionary)" << endl;
    

    cerr << Pattern_Size[j] << " " << flush;
    
    Base_Time = 0.0;
    for (int k = 0; k < number_of_algorithms; ++k) {
      if (k != 0) 
        cout << "Timing " << algorithm_names[k] << ":" << endl;
      Run(k, S1, dictionary[Pattern_Size[j]], Pattern_Size[j]);
    }
    cout << endl;
    

  }
  cerr << endl;
}
Esempio n. 27
0
void sequence_analyzer::analyze(sequence& sequence)
{
    for (uint64_t t = 0; t < sequence.size(); ++t)
        analyze(sequence, t);
}
Esempio n. 28
0
void convert_IOE2_IOB2(int is_cap, int is_last, sequence & seq) {
    map<int, int> beginlabels;
    map<int, int>::iterator blbit;
    
    char begin = is_cap ? 'B' : 'b';
    string BEGIN = is_cap ? "B" : "b";
    char inside = is_cap ? 'I' : 'i';
    string INSIDE = is_cap ? "I" : "i";
    char end = is_cap ? 'E' : 'e';
    
    string str1, str2, istr, str, newlabel;
    int i, len = seq.size();
    
    for (i = 0; i < len; i++) {
	int col2 = is_last ? seq[i].size() - 1 : seq[i].size() - 2;
	str2 = seq[i][col2];
	
	if (str2[0] != end && str2[0] != inside) {
	    continue;
	}
	
	if (i == 0) {
	    beginlabels.insert(pair<int, int>(i, i));

	} else {
	    int col1 = is_last ? seq[i - 1].size() - 1 : seq[i - 1].size() - 2;
	    str1 = seq[i - 1][col1];
	    
	    istr = INSIDE;
	    istr += strtail(str2);
	    
	    if (str2[0] == end && str1 != istr) {
		beginlabels.insert(pair<int, int>(i, i));
	    
	    } else if (str2[0] == inside && str1 != str2) {
		beginlabels.insert(pair<int, int>(i, i));	    
	    }
	}
    }
    
    for (i = 0; i < len; i++) {
	int col = is_last ? seq[i].size() - 1 : seq[i].size() - 2;
	str = seq[i][col];
	
	if (str[0] != end && str[0] != inside) {
	    continue;
	}
	
	blbit = beginlabels.find(i);
	
	if (blbit != beginlabels.end()) {
	    newlabel = BEGIN;
	    newlabel += strtail(str);
	    seq[i][col] = newlabel;
	
	} else {
	    newlabel = INSIDE;
	    newlabel += strtail(str);
	    seq[i][col] = newlabel;
	}	
    }
} 
static void copy_sequence_to_list(list &dst, const sequence &src) {
  for(size_t i = 0; i < src.size(); ++i) {
    dst.append(src[i]);
  }
}
Esempio n. 30
0
void convert_IOB2_IOE2(int is_cap, int is_last, sequence & seq) {
    map<int, int> endlabels;
    map<int, int>::iterator elbit;
    
    char begin = is_cap ? 'B' : 'b';
    char inside = is_cap ? 'I' : 'i';
    string INSIDE = is_cap ? "I" : "i";
    char end = is_cap ? 'E' : 'e';
    string END = is_cap ? "E" : "e";
    
    string str1, str2, istr, str, newlabel;
    int i, len = seq.size();
    
    for (i = 0; i < len; i++) {
	int col1 = is_last ? seq[i].size() - 1 : seq[i].size() - 2;
	str1 = seq[i][col1];
	
	if (str1[0] != begin && str1[0] != inside) {
	    continue;
	}
	
	if (i == len - 1) {
	    endlabels.insert(pair<int, int>(i, i));

	} else {
	    int col2 = is_last ? seq[i + 1].size() - 1 : seq[i + 1].size() - 2;
	    str2 = seq[i + 1][col2];
	    
	    istr = INSIDE;
	    istr += strtail(str1);
	    
	    if (str1[0] == begin && str2 != istr) {
		endlabels.insert(pair<int, int>(i, i));
	    
	    } else if (str1[0] == inside && str2 != str1) {
		endlabels.insert(pair<int, int>(i, i));	    
	    }
	}
    }
    
    for (i = 0; i < len; i++) {
	int col = is_last ? seq[i].size() - 1 : seq[i].size() - 2;
	str = seq[i][col];
	
	if (str[0] != begin && str[0] != inside) {
	    continue;
	}
	
	elbit = endlabels.find(i);
	if (elbit != endlabels.end()) {
	    newlabel = END;
	    newlabel += strtail(str);
	    seq[i][col] = newlabel;
	
	} else {
	    newlabel = INSIDE;
	    newlabel += strtail(str);
	    seq[i][col] = newlabel;
	}	
    }
}