int pad_answer( const string &mode, const V2_STR &one_sent, V2_STR &sent_feats ) { vector<string> answer_tag(1, "DUMMY"); if( mode == "tag" ) { // no answer tag for( V2_STR_citr i_row = one_sent.begin(); i_row != one_sent.end(); ++i_row ) { sent_feats.push_back( answer_tag ); } }else if( mode == "learn" ) { for( V2_STR_citr i_row = one_sent.begin(); i_row != one_sent.end(); ++i_row ) { answer_tag[ 0 ] = (*i_row)[ 0 ]; sent_feats.push_back( answer_tag ); } }else { cerr << "invalid mode option! " << endl; exit(1); } return 0; }
int Tokenizer::splitter( const string &trimmed_sent, V2_STR &data ) { size_t beg = 0, end = find_token_end( trimmed_sent, 0 ), n_tokens = 0; V1_STR one_row; one_row.push_back( "token" ); while( end != string::npos ) { string token = trimmed_sent.substr( beg, end - beg ); if( token != " " ) { // Put all tokens except a space token into the 'data' container one_row.back() = token; data.push_back( one_row ); ++n_tokens; } beg = end; end = find_token_end( trimmed_sent, beg ); } return n_tokens; }