int main(int argc, char * argv[]) { if (argc < 2) { std::cerr << "cws [model path] [lexicon_file]" << std::endl; return 1; } void * engine = 0; if (argc == 2) { engine = segmentor_create_segmentor(argv[1]); } else if (argc == 3) { engine = segmentor_create_segmentor(argv[1], argv[2]); } if (!engine) { return -1; } std::vector<std::string> words; const char * suite[2] = { "What's wrong with you? 别灰心! http://t.cn/zQz0Rn", "台北真的是天子骄子吗?",}; for (int i = 0; i < 2; ++ i) { words.clear(); int len = segmentor_segment(engine, suite[i], words); for (int i = 0; i < len; ++ i) { std::cout << words[i]; if (i+1 == len) std::cout <<std::endl; else std::cout<< "|"; } } segmentor_release_segmentor(engine); return 0; }
std::vector<std::string> segment(const std::string& sentence) { std::vector<std::string> ret; if (model == NULL) { std::cerr << "Segmentor: Model not loaded!" << std::endl; } else { segmentor_segment(model, sentence.c_str(), ret); } return ret; }
// integrate word segmentor into LTP int LTP::wordseg(XML4NLP & xml) { if (xml.QueryNote(NOTE_WORD)) { return 0; } // int ret = splitSentence_dummy(xml); if (0 != ret) { ERROR_LOG("in LTP::wordseg, failed to perform split sentence preprocess."); return ret; } // get the segmentor pointer void * segmentor = _resource.GetSegmentor(); if (0 == segmentor) { ERROR_LOG("in LTP::wordseg, failed to init a segmentor"); return kWordsegError; } int stnsNum = xml.CountSentenceInDocument(); if (0 == stnsNum) { ERROR_LOG("in LTP::wordseg, number of sentence equals 0"); return kEmptyStringError; } for (int i = 0; i < stnsNum; ++ i) { std::string strStn = xml.GetSentence(i); std::vector<std::string> vctWords; if (ltp::strutils::codecs::length(strStn) > MAX_SENTENCE_LEN) { ERROR_LOG("in LTP::wordseg, input sentence is too long"); return kSentenceTooLongError; } if (0 == segmentor_segment(segmentor, strStn, vctWords)) { ERROR_LOG("in LTP::wordseg, failed to perform word segment on \"%s\"", strStn.c_str()); return kWordsegError; } if (0 != xml.SetWordsToSentence(vctWords, i)) { ERROR_LOG("in LTP::wordseg, failed to write segment result to xml"); return kWriteXmlError; } } xml.SetNote(NOTE_WORD); return 0; }
JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_segment (JNIEnv* env, jobject obj, jstring sent, jobject array_words) { jclass array_list = env->GetObjectClass(array_words); jmethodID list_add = env->GetMethodID(array_list, "add", "(Ljava/lang/Object;)Z"); const char* str_sent = env->GetStringUTFChars( sent , 0); std::string sentence(str_sent); std::vector<std::string> words; int len = segmentor_segment(segmentor, sentence, words); for(int i = 0; i < len; i++) { jobject tmp = stringToJstring(env,words[i].c_str()); env->CallBooleanMethod(array_words,list_add,tmp); } env->ReleaseStringUTFChars(sent, str_sent); return len; }
int main(int argc, char * argv[]) { if (argc < 2) { std::cerr << "cws [model path] [lexicon_file]" << std::endl; return 1; } void * engine = 0; if (argc == 2) { engine = segmentor_create_segmentor(argv[1]); } else if (argc == 3) { engine = segmentor_create_segmentor(argv[1], argv[2]); } if (!engine) { return -1; } std::vector<std::string> words; std::string sentence; std::cerr << "TRACE: Model is loaded" << std::endl; double tm = get_time(); while (std::getline(std::cin, sentence, '\n')) { words.clear(); if (sentence.size() == 0) { continue; } int len = segmentor_segment(engine, sentence, words); for (int i = 0; i < len; ++ i) { std::cout << words[i]; if (i+1 == len) std::cout <<std::endl; else std::cout<< "\t"; } } segmentor_release_segmentor(engine); tm = get_time() - tm; std::cerr << "TRACE: cws-tm-consume " << tm << " seconds." << std::endl; return 0; }