コード例 #1
0
ファイル: MixSegment.hpp プロジェクト: hoyoung2015/jiebaR
            virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
            {
                vector<Unicode> words;
                words.reserve(end - begin);
                if(!_mpSeg.cut(begin, end, words))
                {
                  Rcout<<"mpSeg cutDAG failed."<<std::endl;
                
                    return false;
                }

                vector<Unicode> hmmRes;
                hmmRes.reserve(end - begin);
                Unicode piece;
                piece.reserve(end - begin);
                for (size_t i = 0, j = 0; i < words.size(); i++)
                {
                    //if mp get a word, it's ok, put it into result
                    if (1 != words[i].size() || (words[i].size() == 1 && _mpSeg.isUserDictSingleChineseWord(words[i][0])))
                    {
                        res.push_back(words[i]);
                        continue;
                    }

                    // if mp get a single one and it is not in userdict, collect it in sequence
                    j = i;
                    while (j < words.size() && 1 == words[j].size() && !_mpSeg.isUserDictSingleChineseWord(words[j][0]))
                    {
                        piece.push_back(words[j][0]);
                        j++;
                    }

                    // cut the sequence with hmm
                    if (!_hmmSeg.cut(piece.begin(), piece.end(), hmmRes))
                    {
                      Rcout<<"_hmmSeg cut failed."<<std::endl;
            
                        return false;
                    }

                    //put hmm result to result
                    for (size_t k = 0; k < hmmRes.size(); k++)
                    {
                        res.push_back(hmmRes[k]);
                    }

                    //clear tmp vars
                    piece.clear();
                    hmmRes.clear();

                    //let i jump over this piece
                    i = j - 1;
                }
                return true;
            }
コード例 #2
0
ファイル: Unicode.hpp プロジェクト: yanyiwu/cppjieba
inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) {
  unicode.clear();
  RuneStrArray runes;
  if (!DecodeRunesInString(s, len, runes)) {
    return false;
  }
  unicode.reserve(runes.size());
  for (size_t i = 0; i < runes.size(); i++) {
    unicode.push_back(runes[i].rune);
  }
  return true;
}
コード例 #3
0
    bool MixSegment::cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
    {
        if(!_getInitFlag())
        {
            LogError("not inited.");
            return false;
        }
		if(begin == end)
		{
			return false;
		}
        vector<TrieNodeInfo> infos;
        if(!_mpSeg.cut(begin, end, infos))
        {
            LogError("mpSeg cutDAG failed.");
            return false;
        }
        Unicode unico;
        vector<Unicode> hmmRes;
        string tmp;
        for(uint i= 0; i < infos.size(); i++)
        {
            TransCode::encode(infos[i].word,tmp);
            if(1 == infos[i].word.size())
            {
                unico.push_back(infos[i].word[0]);
            }
            else
            {
                if(!unico.empty())
                {
                    hmmRes.clear();
                    if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
                    {
                        LogError("_hmmSeg cut failed.");
                        return false;
                    }
                    for(uint j = 0; j < hmmRes.size(); j++)
                    {
                        TransCode::encode(hmmRes[j], tmp);
                        res.push_back(tmp);
                    }
                }
                unico.clear();
                TransCode::encode(infos[i].word, tmp);
                res.push_back(tmp);
            }
        }
        if(!unico.empty())
        {
            hmmRes.clear();
            if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
            {
                LogError("_hmmSeg cut failed.");
                return false;
            }
            for(uint j = 0; j < hmmRes.size(); j++)
            {
                TransCode::encode(hmmRes[j], tmp);
                res.push_back(tmp);
            }
        }
        
        return true;
    }