Exemple #1
0
        bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
        {
#ifndef NO_CODING_LOG
            if (!_getInitFlag())
            {
                LogError("not inited.");
                return false;
            }
            if (begin > end)
            {
                LogError("begin > end");
                return false;
            }
#endif
            //use hmm cut first
            vector<Unicode> hmmRes;
            if (!_hmmSeg.cut(begin, end, hmmRes))
            {
                LogError("_hmmSeg cut failed.");
                return false;
            }

            vector<Unicode> fullRes;
            for (vector<Unicode>::const_iterator hmmResItr = hmmRes.begin(); hmmResItr != hmmRes.end(); hmmResItr++)
            {
                
                // if it's too long, cut with _fullSeg, put fullRes in res
                if (hmmResItr->size() > _maxWordLen)
                {
                    if (_fullSeg.cut(hmmResItr->begin(), hmmResItr->end(), fullRes))
                    {
                       for (vector<Unicode>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++)
                       {
                           res.push_back(*fullResItr);
                       }
                    }
                }
                else // just use the hmm result
                {
                    res.push_back(*hmmResItr);
                }
            }

            return true;
        }
Exemple #2
0
        bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
        {
            assert(_getInitFlag());
            if (begin >= end)
            {
                LogError("begin >= end");
                return false;
            }

            //use mix cut first
            vector<Unicode> mixRes;
            if (!_mixSeg.cut(begin, end, mixRes))
            {
                LogError("_mixSeg cut failed.");
                return false;
            }

            vector<Unicode> fullRes;
            for (vector<Unicode>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++)
            {
                
                // if it's too long, cut with _fullSeg, put fullRes in res
                if (mixResItr->size() > _maxWordLen)
                {
                    if (_fullSeg.cut(mixResItr->begin(), mixResItr->end(), fullRes))
                    {
                       for (vector<Unicode>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++)
                       {
                           res.push_back(*fullResItr);
                       }

                       //clear tmp res
                       fullRes.clear();
                    }
                }
                else // just use the mix result
                {
                    res.push_back(*mixResItr);
                }
            }

            return true;
        }