Exemplo n.º 1
0
int main(int argc, char * argv[]) {
  if (argc < 2) {
    std::cerr << "cws [model path] [lexicon_file]" << std::endl;
    return 1;
  }

  void * engine = 0;
  if (argc == 2) {
    engine = segmentor_create_segmentor(argv[1]);
  } else if (argc == 3) {
    engine = segmentor_create_segmentor(argv[1], argv[2]);
  }

  if (!engine) {
    return -1;
  }
  std::vector<std::string> words;

  const char * suite[2] = {
    "What's wrong with you? 别灰心! http://t.cn/zQz0Rn",
    "台北真的是天子骄子吗?",};

  for (int i = 0; i < 2; ++ i) {
    words.clear();
    int len = segmentor_segment(engine, suite[i], words);
    for (int i = 0; i < len; ++ i) {
      std::cout << words[i];
      if (i+1 == len) std::cout <<std::endl;
      else std::cout<< "|";
    }
  }

  segmentor_release_segmentor(engine);
  return 0;
}
Exemplo n.º 2
0
 std::vector<std::string> segment(const std::string& sentence) {
   std::vector<std::string> ret;
   if (model == NULL) {
     std::cerr << "Segmentor: Model not loaded!" << std::endl;
   } else {
     segmentor_segment(model, sentence.c_str(), ret);
   }
   return ret;
 }
Exemplo n.º 3
0
Arquivo: Ltp.cpp Projeto: rudaoshi/ltp
// integrate word segmentor into LTP
int LTP::wordseg(XML4NLP & xml) {
  if (xml.QueryNote(NOTE_WORD)) {
    return 0;
  }

  //
  int ret = splitSentence_dummy(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::wordseg, failed to perform split sentence preprocess.");
    return ret;
  }

  // get the segmentor pointer
  void * segmentor = _resource.GetSegmentor();
  if (0 == segmentor) {
    ERROR_LOG("in LTP::wordseg, failed to init a segmentor");
    return kWordsegError;
  }

  int stnsNum = xml.CountSentenceInDocument();

  if (0 == stnsNum) {
    ERROR_LOG("in LTP::wordseg, number of sentence equals 0");
    return kEmptyStringError;
  }

  for (int i = 0; i < stnsNum; ++ i) {
    std::string strStn = xml.GetSentence(i);
    std::vector<std::string> vctWords;

    if (ltp::strutils::codecs::length(strStn) > MAX_SENTENCE_LEN) {
      ERROR_LOG("in LTP::wordseg, input sentence is too long");
      return kSentenceTooLongError;
    }

    if (0 == segmentor_segment(segmentor, strStn, vctWords)) {
      ERROR_LOG("in LTP::wordseg, failed to perform word segment on \"%s\"",
          strStn.c_str());
      return kWordsegError;
    }

    if (0 != xml.SetWordsToSentence(vctWords, i)) {
      ERROR_LOG("in LTP::wordseg, failed to write segment result to xml");
      return kWriteXmlError;
    }
  }

  xml.SetNote(NOTE_WORD);
  return 0;
}
Exemplo n.º 4
0
JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_segment
  (JNIEnv* env, jobject obj, jstring sent, jobject array_words) {

  jclass array_list = env->GetObjectClass(array_words);
  jmethodID list_add = env->GetMethodID(array_list, "add", "(Ljava/lang/Object;)Z");

  const char* str_sent = env->GetStringUTFChars( sent , 0);
  std::string sentence(str_sent);
  std::vector<std::string> words;

  int len = segmentor_segment(segmentor, sentence, words);

  for(int i = 0; i < len; i++) {
    jobject tmp = stringToJstring(env,words[i].c_str());
    env->CallBooleanMethod(array_words,list_add,tmp);
  }
  env->ReleaseStringUTFChars(sent, str_sent);
  return len;
}
Exemplo n.º 5
0
int main(int argc, char * argv[]) {
  if (argc < 2) {
    std::cerr << "cws [model path] [lexicon_file]" << std::endl;
    return 1;
  }

  void * engine = 0;
  if (argc == 2) {
    engine = segmentor_create_segmentor(argv[1]);
  } else if (argc == 3) {
    engine = segmentor_create_segmentor(argv[1], argv[2]);
  }

  if (!engine) {
    return -1;
  }
  std::vector<std::string> words;
  std::string sentence;

  std::cerr << "TRACE: Model is loaded" << std::endl;
  double tm = get_time();

  while (std::getline(std::cin, sentence, '\n')) {
    words.clear();
    if (sentence.size() == 0) { continue; }
    int len = segmentor_segment(engine, sentence, words);
    for (int i = 0; i < len; ++ i) {
      std::cout << words[i];
      if (i+1 == len) std::cout <<std::endl;
      else std::cout<< "\t";
    }
  }

  segmentor_release_segmentor(engine);

  tm = get_time() - tm;
  std::cerr << "TRACE: cws-tm-consume "
            << tm
            << " seconds."
            << std::endl;

  return 0;
}