TEST(postag_interface_unittest, test_speed) {
  void * engine = postagger_create_postagger("./ltp_data/pos.model");
  EXPECT_TRUE(NULL != engine);

  std::ifstream ifs("./test_data/unittest/test_data.segmented");
  std::string line;
  std::string word;
  std::vector<std::string> words;
  std::vector<std::string> tags;
  int nr_tokens = 0;

  long start_time = clock();
  while (std::getline(ifs, line, '\n')) {
    std::stringstream S(line);
    words.clear();
    tags.clear();
    while (S >> word) {
      words.push_back(word);
    }
    postagger_postag(engine, words, tags);
    nr_tokens += words.size();
  }
  double throughput_per_millisecond = (nr_tokens / ((clock() -start_time) / 1000.));
  std::cerr << throughput_per_millisecond << std::endl;
  postagger_release_postagger(engine);
}
Beispiel #2
0
 std::vector<std::string> postag(const std::vector<std::string>& words) {
   std::vector<std::string> ret;
   if (model == NULL) {
     std::cerr << "Postagger: Model not loaded!" << std::endl;
   } else {
     postagger_postag(model, words, ret);
   }
   return ret;
 }
TEST(postag_interface_unittest, test_empty_list) {
  void * engine = postagger_create_postagger("./ltp_data/pos.model");
  EXPECT_TRUE(NULL != engine);
  std::vector<std::string> words;
  std::vector<std::string> tags;
  int nr_words = postagger_postag(engine, words, tags);
  EXPECT_EQ(0, nr_words);
  postagger_release_postagger(engine);
}
Beispiel #4
0
// integrate postagger into LTP
int LTP::postag(XML4NLP & xml) {
  if ( xml.QueryNote(NOTE_POS) ) {
    return 0;
  }

  // dependency
  int ret = wordseg(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::postag, failed to perform word segment preprocess");
    return ret;
  }

  void * postagger = _resource.GetPostagger();
  if (0 == postagger) {
    ERROR_LOG("in LTP::postag, failed to init a postagger");
    return kPostagError;
  }

  int stnsNum = xml.CountSentenceInDocument();

  if (0 == stnsNum) {
    ERROR_LOG("in LTP::postag, number of sentence equals 0");
    return kEmptyStringError;
  }

  for (int i = 0; i < stnsNum; ++i) {
    vector<string> vecWord;
    vector<string> vecPOS;

    xml.GetWordsFromSentence(vecWord, i);

    if (0 == vecWord.size()) {
      ERROR_LOG("Input sentence is empty.");
      return kEmptyStringError;
    }

    if (vecWord.size() > MAX_WORDS_NUM) {
      ERROR_LOG("Input sentence is too long.");
      return kSentenceTooLongError;
    }

    if (0 == postagger_postag(postagger, vecWord, vecPOS)) {
      ERROR_LOG("in LTP::postag, failed to perform postag on sent. #%d", i+1);
      return kPostagError;
    }

    if (xml.SetPOSsToSentence(vecPOS, i) != 0) {
      ERROR_LOG("in LTP::postag, failed to write postag result to xml");
      return kWriteXmlError;
    }
  }

  xml.SetNote(NOTE_POS);

  return 0;
}
TEST(postag_interface_unittest, test_normal) {
  void * engine = postagger_create_postagger("./ltp_data/pos.model");
  EXPECT_TRUE(NULL != engine);
  std::vector<std::string> words;
  std::vector<std::string> tags;
  for (int i = 0; i < kNumNormalWords; ++ i) {
    words.push_back(kNormalWords[i]);
  }
  int nr_words = postagger_postag(engine, words, tags);
  // tagged words should be greater than 4
  EXPECT_GT(nr_words, 0);
  postagger_release_postagger(engine);
}
TEST(postag_interface_unittest, test_empty_word) {
  void * engine = postagger_create_postagger("./ltp_data/pos.model");
  EXPECT_TRUE(NULL != engine);
  std::vector<std::string> words;
  std::vector<std::string> tags;
  for (int i = 0; i < kNumNormalWords; ++ i) {
    if (i == 2) {
      words.push_back("");
    } else {
      words.push_back(kNormalWords[i]);
    }
  }
  int nr_words = postagger_postag(engine, words, tags);
  EXPECT_EQ(0, nr_words);
  postagger_release_postagger(engine);
}