// integrate word segmentor into LTP int LTP::wordseg(XML4NLP & xml) { if (xml.QueryNote(NOTE_WORD)) { return 0; } // int ret = splitSentence_dummy(xml); if (0 != ret) { ERROR_LOG("in LTP::wordseg, failed to perform split sentence preprocess."); return ret; } // get the segmentor pointer void * segmentor = _resource.GetSegmentor(); if (0 == segmentor) { ERROR_LOG("in LTP::wordseg, failed to init a segmentor"); return kWordsegError; } int stnsNum = xml.CountSentenceInDocument(); if (0 == stnsNum) { ERROR_LOG("in LTP::wordseg, number of sentence equals 0"); return kEmptyStringError; } for (int i = 0; i < stnsNum; ++ i) { std::string strStn = xml.GetSentence(i); std::vector<std::string> vctWords; if (ltp::strutils::codecs::length(strStn) > MAX_SENTENCE_LEN) { ERROR_LOG("in LTP::wordseg, input sentence is too long"); return kSentenceTooLongError; } if (0 == segmentor_segment(segmentor, strStn, vctWords)) { ERROR_LOG("in LTP::wordseg, failed to perform word segment on \"%s\"", strStn.c_str()); return kWordsegError; } if (0 != xml.SetWordsToSentence(vctWords, i)) { ERROR_LOG("in LTP::wordseg, failed to write segment result to xml"); return kWriteXmlError; } } xml.SetNote(NOTE_WORD); return 0; }