Beispiel #1
0
            bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<TrieNodeInfo>& segWordInfos)const
            {
                if(!_getInitFlag())
                {
                    LogError("not inited.");
                    return false;
                }
                SegmentContext segContext;

                //calc DAG
                if(!_calcDAG(begin, end, segContext))
                {
                    LogError("_calcDAG failed.");
                    return false;
                }

                if(!_calcDP(segContext))
                {
                    LogError("_calcDP failed.");
                    return false;
                }

                if(!_cut(segContext, segWordInfos))
                {
                    LogError("_cut failed.");
                    return false;
                }

                return true;
            }
Beispiel #2
0
 virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
 {
     if(!_getInitFlag())
     {
         LogError("not inited.");
         return false;
     }
     vector<TrieNodeInfo> segWordInfos;
     if(!cut(begin, end, segWordInfos))
     {
         return false;
     }
     string tmp;
     for(uint i = 0; i < segWordInfos.size(); i++)
     {
         if(TransCode::encode(segWordInfos[i].word, tmp))
         {
             res.push_back(tmp);
         }
         else
         {
             LogError("encode failed.");
         }
     }
     return true;
 }
Beispiel #3
0
	bool Trie::find(const Unicode& unico, vector<pair<uint, const TrieNodeInfo*> >& res)const
	{
        if(!_getInitFlag())
        {
            LogFatal("trie not initted!");
            return false;
        }
        TrieNode* p = _root;
        //for(Unicode::const_iterator it = begin; it != end; it++)
        for(uint i = 0; i < unico.size(); i++)
        {
            if(p->hmap.find(unico[i]) == p-> hmap.end())
            {
				break;
            }
			p = p->hmap[unico[i]];
			if(p->isLeaf)
			{
				uint pos = p->nodeInfoVecPos;
				if(pos < _nodeInfoVec.size())
				{
					res.push_back(make_pair(i, &_nodeInfoVec[pos]));
				}
				else
				{
					LogFatal("node's nodeInfoVecPos is out of _nodeInfoVec's range");
					return false;
				}
			}
        }
		return !res.empty();
	}
Beispiel #4
0
    bool Trie::loadDict(const char * const filePath)
    {
        if(!_getInitFlag())
        {
            LogError("not initted.");
            return false;
        }

        if(!checkFileExist(filePath))
        {
            LogError("cann't find fiel[%s].",filePath);
            return false;
        }
        bool res = false;
        res = _trieInsert(filePath);
        if(!res)
        {
            LogError("_trieInsert failed.");
            return false;
        }
        res = _countWeight();
        if(!res)
        {
            LogError("_countWeight failed.");
            return false;
        }
        return true;
    }
Beispiel #5
0
 virtual bool dispose()
 {
     if(!_getInitFlag())
     {
         return true;
     }
     _trie.dispose();
     _setInitFlag(false);
     return true;
 }
 bool MixSegment::dispose()
 {
     if(!_getInitFlag())
     {
         return true;
     }
     _mpSeg.dispose();
     _hmmSeg.dispose();
     _setInitFlag(false);
     return true;
 }
Beispiel #7
0
    bool Trie::insert(const TrieNodeInfo& nodeInfo)
    {
        if(!_getInitFlag())
        {
            LogFatal("not initted!");
            return false;
        }

        
        const Unicode& uintVec = nodeInfo.word;
        TrieNode* p = _root;
        for(uint i = 0; i < uintVec.size(); i++)
        {
            uint16_t cu = uintVec[i];
            if(NULL == p)
            {
                return false;
            }
            if(p->hmap.end() == p->hmap.find(cu))
            {
                TrieNode * next = NULL;
                try
                {
                    next = new TrieNode;
                }
                catch(const bad_alloc& e)
                {
                    return false;
                }
                p->hmap[cu] = next;
                p = next;
            }
            else
            {
                p = p->hmap[cu];
            }
        }
        if(NULL == p)
        {
            return false;
        }
        if(p->isLeaf)
        {
            LogError("this node already inserted");
            return false;
        }

        p->isLeaf = true;
        _nodeInfoVec.push_back(nodeInfo);
        p->nodeInfoVecPos = _nodeInfoVec.size() - 1;

        return true;
    }
Beispiel #8
0
    const TrieNodeInfo* Trie::findPrefix(const string& str)const
    {
        if(!_getInitFlag())
        {
            LogFatal("trie not initted!");
            return NULL;
        }
        Unicode uintVec;
        
        if(!TransCode::decode(str, uintVec))
        {
            LogError("TransCode::decode failed.");
            return NULL;
        }

        //find
        TrieNode* p = _root;
        uint pos = 0;
        uint16_t chUni = 0;
        const TrieNodeInfo * res = NULL;
        for(uint i = 0; i < uintVec.size(); i++)
        {
            chUni = uintVec[i];
            if(p->isLeaf)
            {
                pos = p->nodeInfoVecPos;
                if(pos >= _nodeInfoVec.size())
                {
                    LogFatal("node's nodeInfoVecPos is out of _nodeInfoVec's range");
                    return NULL;
                }
                res = &(_nodeInfoVec[pos]);
                
            }
            if(p->hmap.find(chUni) == p->hmap.end())
            {
                break;
            }
            else
            {
                p = p->hmap[chUni];
            }
        }
        return res;
    }
Beispiel #9
0
    bool Trie::dispose()
    {
        if(!_getInitFlag())
        {
            return false;
        }
        bool ret = _deleteNode(_root);
        if(!ret)
        {
            LogFatal("_deleteNode failed!");
            return false;
        }
        _root = NULL;
        _nodeInfoVec.clear();

        _setInitFlag(false);
        return ret;
    }
 bool MixSegment::init(const char* const mpSegDict, const char* const hmmSegDict)
 {
     if(_getInitFlag())
     {
         LogError("inited.");
         return false;
     }
     if(!_mpSeg.init(mpSegDict))
     {
         LogError("_mpSeg init");
         return false;
     }
     if(!_hmmSeg.init(hmmSegDict))
     {
         LogError("_hmmSeg init");
         return false;
     }
     return _setInitFlag(true);
 }
Beispiel #11
0
            bool cut(const string& str, vector<TrieNodeInfo>& segWordInfos)const
            {
                if(!_getInitFlag())
                {
                    LogError("not inited.");
                    return false;
                }
                if(str.empty())
                {
                    return false;
                }
                Unicode sentence;

                if(!TransCode::decode(str, sentence))
                {
                    LogError("TransCode::decode failed.");
                    return false;
                }
                return cut(sentence.begin(), sentence.end(), segWordInfos);

            }
Beispiel #12
0
 virtual bool init()
 {
     if(_getInitFlag())
     {
         LogError("already inited before now.");
         return false;
     }
     if(!_trie.init())
     {
         LogError("_trie.init failed.");
         return false;
     }
     LogInfo("_trie.loadDict(%s) start...", _dictPath.c_str());
     if(!_trie.loadDict(_dictPath.c_str()))
     {
         LogError("_trie.loadDict faield.");
         return false;
     }
     LogInfo("_trie.loadDict end.");
     return _setInitFlag(true);
 }
Beispiel #13
0
 const TrieNodeInfo* Trie::find(Unicode::const_iterator begin, Unicode::const_iterator end)const
 {
     
     if(!_getInitFlag())
     {
         LogFatal("trie not initted!");
         return NULL;
     }
     if(begin >= end)
     {
         return NULL;
     }
     TrieNode* p = _root;
     for(Unicode::const_iterator it = begin; it != end; it++)
     {
         uint16_t chUni = *it;
         if(p->hmap.find(chUni) == p-> hmap.end())
         {
             return NULL;
         }
         else
         {
             p = p->hmap[chUni];
         }
     }
     if(p->isLeaf)
     {
         uint pos = p->nodeInfoVecPos;
         if(pos < _nodeInfoVec.size())
         {
             return &(_nodeInfoVec[pos]);
         }
         else
         {
             LogFatal("node's nodeInfoVecPos is out of _nodeInfoVec's range");
             return NULL;
         }
     }
     return NULL;
 }
Beispiel #14
0
    bool Trie::init()
    {
        if(_getInitFlag())
        {
            LogError("already initted!");
            return false;
        }

        try
        {
            _root = new TrieNode;
        }
        catch(const bad_alloc& e)
        {
            return false;
        }
        if(NULL == _root)
        {
            return false;
        }
        _setInitFlag(true);
        return true;
    }
Beispiel #15
0
 operator bool() const {return _getInitFlag();};
    bool MixSegment::cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
    {
        if(!_getInitFlag())
        {
            LogError("not inited.");
            return false;
        }
		if(begin == end)
		{
			return false;
		}
        vector<TrieNodeInfo> infos;
        if(!_mpSeg.cut(begin, end, infos))
        {
            LogError("mpSeg cutDAG failed.");
            return false;
        }
        Unicode unico;
        vector<Unicode> hmmRes;
        string tmp;
        for(uint i= 0; i < infos.size(); i++)
        {
            TransCode::encode(infos[i].word,tmp);
            if(1 == infos[i].word.size())
            {
                unico.push_back(infos[i].word[0]);
            }
            else
            {
                if(!unico.empty())
                {
                    hmmRes.clear();
                    if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
                    {
                        LogError("_hmmSeg cut failed.");
                        return false;
                    }
                    for(uint j = 0; j < hmmRes.size(); j++)
                    {
                        TransCode::encode(hmmRes[j], tmp);
                        res.push_back(tmp);
                    }
                }
                unico.clear();
                TransCode::encode(infos[i].word, tmp);
                res.push_back(tmp);
            }
        }
        if(!unico.empty())
        {
            hmmRes.clear();
            if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
            {
                LogError("_hmmSeg cut failed.");
                return false;
            }
            for(uint j = 0; j < hmmRes.size(); j++)
            {
                TransCode::encode(hmmRes[j], tmp);
                res.push_back(tmp);
            }
        }
        
        return true;
    }