TEST(postag_interface_unittest, test_speed) { void * engine = postagger_create_postagger("./ltp_data/pos.model"); EXPECT_TRUE(NULL != engine); std::ifstream ifs("./test_data/unittest/test_data.segmented"); std::string line; std::string word; std::vector<std::string> words; std::vector<std::string> tags; int nr_tokens = 0; long start_time = clock(); while (std::getline(ifs, line, '\n')) { std::stringstream S(line); words.clear(); tags.clear(); while (S >> word) { words.push_back(word); } postagger_postag(engine, words, tags); nr_tokens += words.size(); } double throughput_per_millisecond = (nr_tokens / ((clock() -start_time) / 1000.)); std::cerr << throughput_per_millisecond << std::endl; postagger_release_postagger(engine); }
std::vector<std::string> postag(const std::vector<std::string>& words) { std::vector<std::string> ret; if (model == NULL) { std::cerr << "Postagger: Model not loaded!" << std::endl; } else { postagger_postag(model, words, ret); } return ret; }
TEST(postag_interface_unittest, test_empty_list) { void * engine = postagger_create_postagger("./ltp_data/pos.model"); EXPECT_TRUE(NULL != engine); std::vector<std::string> words; std::vector<std::string> tags; int nr_words = postagger_postag(engine, words, tags); EXPECT_EQ(0, nr_words); postagger_release_postagger(engine); }
// integrate postagger into LTP int LTP::postag(XML4NLP & xml) { if ( xml.QueryNote(NOTE_POS) ) { return 0; } // dependency int ret = wordseg(xml); if (0 != ret) { ERROR_LOG("in LTP::postag, failed to perform word segment preprocess"); return ret; } void * postagger = _resource.GetPostagger(); if (0 == postagger) { ERROR_LOG("in LTP::postag, failed to init a postagger"); return kPostagError; } int stnsNum = xml.CountSentenceInDocument(); if (0 == stnsNum) { ERROR_LOG("in LTP::postag, number of sentence equals 0"); return kEmptyStringError; } for (int i = 0; i < stnsNum; ++i) { vector<string> vecWord; vector<string> vecPOS; xml.GetWordsFromSentence(vecWord, i); if (0 == vecWord.size()) { ERROR_LOG("Input sentence is empty."); return kEmptyStringError; } if (vecWord.size() > MAX_WORDS_NUM) { ERROR_LOG("Input sentence is too long."); return kSentenceTooLongError; } if (0 == postagger_postag(postagger, vecWord, vecPOS)) { ERROR_LOG("in LTP::postag, failed to perform postag on sent. #%d", i+1); return kPostagError; } if (xml.SetPOSsToSentence(vecPOS, i) != 0) { ERROR_LOG("in LTP::postag, failed to write postag result to xml"); return kWriteXmlError; } } xml.SetNote(NOTE_POS); return 0; }
TEST(postag_interface_unittest, test_normal) { void * engine = postagger_create_postagger("./ltp_data/pos.model"); EXPECT_TRUE(NULL != engine); std::vector<std::string> words; std::vector<std::string> tags; for (int i = 0; i < kNumNormalWords; ++ i) { words.push_back(kNormalWords[i]); } int nr_words = postagger_postag(engine, words, tags); // tagged words should be greater than 4 EXPECT_GT(nr_words, 0); postagger_release_postagger(engine); }
TEST(postag_interface_unittest, test_empty_word) { void * engine = postagger_create_postagger("./ltp_data/pos.model"); EXPECT_TRUE(NULL != engine); std::vector<std::string> words; std::vector<std::string> tags; for (int i = 0; i < kNumNormalWords; ++ i) { if (i == 2) { words.push_back(""); } else { words.push_back(kNormalWords[i]); } } int nr_words = postagger_postag(engine, words, tags); EXPECT_EQ(0, nr_words); postagger_release_postagger(engine); }