コード例 #1
0
ファイル: nersuite.cpp プロジェクト: saetre/biocreative_data
int pad_answer( const string &mode, const V2_STR &one_sent, V2_STR &sent_feats )
{
        vector<string>                  answer_tag(1, "DUMMY");

        if( mode == "tag" ) {            // no answer tag
                for( V2_STR_citr i_row = one_sent.begin(); i_row != one_sent.end(); ++i_row ) {
                        sent_feats.push_back( answer_tag );
                }
        }else if( mode == "learn" ) {
                for( V2_STR_citr i_row = one_sent.begin(); i_row != one_sent.end(); ++i_row ) {
                        answer_tag[ 0 ] = (*i_row)[ 0 ];
                        sent_feats.push_back( answer_tag );
                }
        }else {
                cerr << "invalid mode option! " << endl;
                exit(1);
        }

        return 0;
}
コード例 #2
0
ファイル: tokenizer.cpp プロジェクト: atante/nersuite
	int Tokenizer::splitter( const string &trimmed_sent, V2_STR &data )
	{
		size_t	beg = 0, end = find_token_end( trimmed_sent, 0 ), n_tokens = 0;

		V1_STR	one_row;
		one_row.push_back( "token" );		

		while( end != string::npos ) {
			string	token = trimmed_sent.substr( beg, end - beg );

			if( token != " " ) {				// Put all tokens except a space token into the 'data' container
				one_row.back() = token;
				data.push_back( one_row );
				++n_tokens;
			}

			beg = end;
			end = find_token_end( trimmed_sent, beg );
		}

		return n_tokens;
	}