virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const { vector<Unicode> words; words.reserve(end - begin); if(!_mpSeg.cut(begin, end, words)) { Rcout<<"mpSeg cutDAG failed."<<std::endl; return false; } vector<Unicode> hmmRes; hmmRes.reserve(end - begin); Unicode piece; piece.reserve(end - begin); for (size_t i = 0, j = 0; i < words.size(); i++) { //if mp get a word, it's ok, put it into result if (1 != words[i].size() || (words[i].size() == 1 && _mpSeg.isUserDictSingleChineseWord(words[i][0]))) { res.push_back(words[i]); continue; } // if mp get a single one and it is not in userdict, collect it in sequence j = i; while (j < words.size() && 1 == words[j].size() && !_mpSeg.isUserDictSingleChineseWord(words[j][0])) { piece.push_back(words[j][0]); j++; } // cut the sequence with hmm if (!_hmmSeg.cut(piece.begin(), piece.end(), hmmRes)) { Rcout<<"_hmmSeg cut failed."<<std::endl; return false; } //put hmm result to result for (size_t k = 0; k < hmmRes.size(); k++) { res.push_back(hmmRes[k]); } //clear tmp vars piece.clear(); hmmRes.clear(); //let i jump over this piece i = j - 1; } return true; }
inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) { unicode.clear(); RuneStrArray runes; if (!DecodeRunesInString(s, len, runes)) { return false; } unicode.reserve(runes.size()); for (size_t i = 0; i < runes.size(); i++) { unicode.push_back(runes[i].rune); } return true; }
bool MixSegment::cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const { if(!_getInitFlag()) { LogError("not inited."); return false; } if(begin == end) { return false; } vector<TrieNodeInfo> infos; if(!_mpSeg.cut(begin, end, infos)) { LogError("mpSeg cutDAG failed."); return false; } Unicode unico; vector<Unicode> hmmRes; string tmp; for(uint i= 0; i < infos.size(); i++) { TransCode::encode(infos[i].word,tmp); if(1 == infos[i].word.size()) { unico.push_back(infos[i].word[0]); } else { if(!unico.empty()) { hmmRes.clear(); if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes)) { LogError("_hmmSeg cut failed."); return false; } for(uint j = 0; j < hmmRes.size(); j++) { TransCode::encode(hmmRes[j], tmp); res.push_back(tmp); } } unico.clear(); TransCode::encode(infos[i].word, tmp); res.push_back(tmp); } } if(!unico.empty()) { hmmRes.clear(); if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes)) { LogError("_hmmSeg cut failed."); return false; } for(uint j = 0; j < hmmRes.size(); j++) { TransCode::encode(hmmRes[j], tmp); res.push_back(tmp); } } return true; }