C++ (Cpp) CDictionary::GetHandle Exemples

Langage de programmation: C++ (Cpp)

Class/Type: CDictionary

Méthode/Fonction: GetHandle

Exemples au hotexamples.com: 2

C++ (Cpp) CDictionary::GetHandle - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de CDictionary::GetHandle extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

IsExist(5)

GetFrequency(5)

GetTranslate(3)

GetHandle(2)

Load(2)

AddWord(1)

setValueForKey(1)

SaveTranslationForNewWordOnUserDemand(1)

SaveNewWordsOnUserDemand(1)

Save(1)

ReadUnitComments(1)

PrintTranslation(1)

LoadOnlyConstants(1)

LoadDictionary(1)

IsNewWordsListEmpty(1)

IsKnowWord(1)

GetUnitTextHeader(1)

ImportFromText(1)

BeginTransaction(1)

GetString(1)

GetRGBA(1)

GetMaxMatch(1)

GetBool(1)

FindWord(1)

FillDictionary(1)

Exists(1)

EndTransaction(1)

DictionarySave(1)

ClearAll(1)

valueAsUInt32ForKey(1)

Méthodes fréquemment utilisées

IsExist (5)

GetFrequency (5)

GetTranslate (3)

GetHandle (2)

Load (2)

AddWord (1)

setValueForKey (1)

SaveTranslationForNewWordOnUserDemand (1)

SaveNewWordsOnUserDemand (1)

Save (1)

Méthodes fréquemment utilisées

ReadUnitComments (1)

PrintTranslation (1)

LoadOnlyConstants (1)

LoadDictionary (1)

IsNewWordsListEmpty (1)

IsKnowWord (1)

GetUnitTextHeader (1)

ImportFromText (1)

BeginTransaction (1)

GetString (1)

GetRGBA (1)

GetMaxMatch (1)

GetBool (1)

FindWord (1)

FillDictionary (1)

Exists (1)

EndTransaction (1)

DictionarySave (1)

ClearAll (1)

valueAsUInt32ForKey (1)

Méthodes fréquemment utilisées

GetRGBA (1)

GetMaxMatch (1)

GetBool (1)

FindWord (1)

FillDictionary (1)

Exists (1)

EndTransaction (1)

DictionarySave (1)

ClearAll (1)

valueAsUInt32ForKey (1)

Exemple #1

0

Afficher le fichier

Fichier : SegGraph.cpp Projet : yingchengsun/Research-of-Topic-Shifts-in-Comment-Based-Social-Network

bool CSegGraph::GenerateWordNet(char *sSentence,CDictionary &dictCore,bool bOriginalFreq) { //Gernerate the word net from the sLine, that's list all the possible word unsigned int i=0,j,nLen=strlen(sSentence); char sWord[WORD_MAXLENGTH]="",sTempWord[WORD_MAXLENGTH]="",sWordMatch[WORD_MAXLENGTH]; int nWordIndex=0,nHandleTemp,k,nPOS; int nMatchFreq[20],nMatchHandle[20],nTotalFreq,nMatchCount; double dValue=0; m_nAtomCount=0; m_segGraph.SetEmpty();//Set segmentation graph empty AtomSegment(sSentence); //Atomic Segmentation for(i=0;i<m_nAtomCount;i++)//Init the cost array { if(m_nAtomPOS[i]==CT_CHINESE)//The atom is a Chinese Char { if(!bOriginalFreq)//Not original frequency m_segGraph.SetElement(i,i+1,log(MAX_FREQUENCE),0);//init the link with the maximum value else m_segGraph.SetElement(i,i+1,0,0,m_sAtom[i]);//init the link with the maximum value } else//Other atom { strcpy(sWord,m_sAtom[i]);//init the word dValue=MAX_FREQUENCE; switch(m_nAtomPOS[i]) { case CT_INDEX: case CT_NUM: nPOS=-27904;//'m'*256 strcpy(sWord,"未##数"); dValue=0; break; case CT_DELIMITER: nPOS=30464;//'w'*256; break; case CT_LETTER: nPOS=-'n'*256-'x';// dValue=0; strcpy(sWord,"未##串"); break; case CT_SINGLE://12021-2129-3121 if(GetCharCount("+-1234567890",m_sAtom[i])==(int)strlen(m_sAtom[i])) { nPOS=-27904;//'m'*256 strcpy(sWord,"未##数"); } else { nPOS=-'n'*256-'x';// strcpy(sWord,"未##串"); } dValue=0; break; default: nPOS=m_nAtomPOS[i];//'?'*256; break; } if(!bOriginalFreq)//Not original frequency m_segGraph.SetElement(i,i+1,0,nPOS);//init the link with minimum else m_segGraph.SetElement(i,i+1,dValue,nPOS,sWord);//init the link with minimum } } i=0; while(i<m_nAtomCount)//All the word { strcpy(sWord,m_sAtom[i]);//Get the current atom j=i+1; if(strcmp(sWord,"月")==0&&strcmp(m_sAtom[i+1],"份")==0)//Don't split 月份 j+=1; while(j<=m_nAtomCount&&dictCore.GetMaxMatch(sWord,sWordMatch,&nHandleTemp)) {//Add a condition to control the end of string //retrieve the dictionary with the word if(strcmp(sWordMatch,sWord)==0)//find the current word { nTotalFreq=0; dictCore.GetHandle(sWord,&nMatchCount,nMatchHandle,nMatchFreq); for(k=0;k<nMatchCount;k++)//Add the frequency { nTotalFreq+=nMatchFreq[k]; } //Adding a rule to exclude some words to be formed. if(strlen(sWord)==4&&i>=1&&(IsAllNum((unsigned char *)m_sAtom[i-1])||IsAllChineseNum(m_sAtom[i-1]))&&(strncmp(sWord,"年",2)==0||strncmp(sWord,"月",2)==0)) {//1年内、1999年末 if(CC_Find("末内中底前间初",sWord+2)) break; } if(nMatchCount==1)//The possible word has only one POS, store it { if(!bOriginalFreq)//Not original frequency m_segGraph.SetElement(i,j,-log(nTotalFreq+1)+log(MAX_FREQUENCE),nMatchHandle[0]); else m_segGraph.SetElement(i,j,nTotalFreq,nMatchHandle[0],sWord); } else { if(!bOriginalFreq)//Not original frequency m_segGraph.SetElement(i,j,-log(nTotalFreq+1)+log(MAX_FREQUENCE),0); else m_segGraph.SetElement(i,j,nTotalFreq,0,sWord); } } strcat(sWord,m_sAtom[j++]); } i+=1;//Start from i++; } return true; }

Exemple #2

0

Afficher le fichier

Fichier : Span.cpp Projet : tedzhang/SearchMonkey

int CSpan::GetFrom(PWORD_RESULT pWordItems,int nIndex,CDictionary &dictCore, CDictionary &dictUnknown) { int nCount,aPOS[MAX_POS_PER_WORD],aFreq[MAX_POS_PER_WORD]; int nFreq=0,j,nRetPos=0,nWordsIndex=0; bool bSplit=false;//Need to split in Transliteration recognition int i=1; nWordsIndex=i+nIndex-1; for(;i<MAX_WORDS_PER_SENTENCE&&pWordItems[nWordsIndex].sWord[0]!=0;i++) { if(m_tagType==TT_NORMAL||!dictUnknown.IsExist(pWordItems[nWordsIndex].sWord,44)) { strcpy(m_sWords[i],pWordItems[nWordsIndex].sWord);//store current word m_nWordPosition[i+1]=m_nWordPosition[i]+strlen(m_sWords[i]); } else { if(!bSplit) { strncpy(m_sWords[i],pWordItems[nWordsIndex].sWord,2);//store current word m_sWords[i][2]=0; bSplit=true; } else { unsigned int nLen=strlen(pWordItems[nWordsIndex].sWord+2); strncpy(m_sWords[i],pWordItems[nWordsIndex].sWord+2,nLen);//store current word m_sWords[i][nLen]=0; bSplit=false; } m_nWordPosition[i+1]=m_nWordPosition[i]+strlen(m_sWords[i]); } //Record the position of current word m_nStartPos=m_nWordPosition[i+1]; //Move the Start POS to the ending if(m_tagType!=TT_NORMAL) { //Get the POSs from the unknown recognition dictionary dictUnknown.GetHandle(m_sWords[i],&nCount,aPOS,aFreq); for(j=0;j<nCount;j++) {//Get the POS set of sCurWord in the unknown dictionary m_nTags[i][j]=aPOS[j]; m_dFrequency[i][j]=-log((double)(1+aFreq[j]))+log((double)(m_context.GetFrequency(0,aPOS[j])+1)); } //Get the POS set of sCurWord in the core dictionary //We ignore the POS in the core dictionary and recognize them as other (0). //We add their frequency to get the possibility as POS 0 dictCore.GetHandle(m_sWords[i],&nCount,aPOS,aFreq); nFreq=0; for(int k=0;k<nCount;k++) { nFreq+=aFreq[k]; } if(nCount>0) { m_nTags[i][j]=0; //m_dFrequency[i][j]=(double)(1+nFreq)/(double)(m_context.GetFrequency(0,0)+1); m_dFrequency[i][j]=-log((double)(1+nFreq))+log((double)(m_context.GetFrequency(0,0)+1)); j++; } } else//For normal POS tagging { j=0; //Get the POSs from the unknown recognition dictionary if(pWordItems[nWordsIndex].nHandle>0) {//The word has is only one POS value //We have record its POS and nFrequncy in the items. m_nTags[i][j]=pWordItems[nWordsIndex].nHandle; m_dFrequency[i][j]=pWordItems[nWordsIndex].dValue-LOG_MAX_FRQUENCE+log((double)(m_context.GetFrequency(0,m_nTags[i][j])+1)); if(m_dFrequency[i][j]<0)//Not permit the value less than 0 m_dFrequency[i][j]=0; j++; } else {//The word has multiple POSs, we should retrieve the information from Core Dictionary if(pWordItems[nWordsIndex].nHandle<0) {//The word has is only one POS value //We have record its POS and nFrequncy in the items. if(pWordItems[nWordsIndex].nHandle==-'t'*256-'t')//tt { char sWordOrg[100],sPostfix[10]; double dRatio=0.6925;//The ratio which transliteration as a person name PostfixSplit(pWordItems[nWordsIndex].sWord,sWordOrg,sPostfix); if(sPostfix[0]!=0) dRatio=0.01; m_nTags[i][j]='n'*256+'r'; m_dFrequency[i][j]=-log(dRatio)+pWordItems[nWordsIndex].dValue; //m_dFrequency[i][j]=log(dRatio)+pWordItems[nWordsIndex].dValue-log(m_context.GetFrequency(0,m_nTags[i][j]))+log(MAX_FREQUENCE); //P(W|R)=P(WRT)/P(RT)=P(R)*P(W|T)/P(R|T) j++; m_nTags[i][j]='n'*256+'s'; m_dFrequency[i][j]=-log(1-dRatio)+pWordItems[nWordsIndex].dValue; //m_dFrequency[i][j]=log(1-dRatio)+pWordItems[nWordsIndex].dValue-log(m_context.GetFrequency(0,m_nTags[i][j]))+log(MAX_FREQUENCE); j++; } else//Unknown words such as Chinese person name or place name { m_nTags[i][j]=-pWordItems[nWordsIndex].nHandle; // m_dFrequency[i][j++]=(double)(1+pWordItems[nWordsIndex].nFrequency)/(double)(m_context.GetFrequency(0,aPOS[j])+1); m_dFrequency[i][j++]=pWordItems[nWordsIndex].dValue; } } dictCore.GetHandle(m_sWords[i],&nCount,aPOS,aFreq); for(;j<nCount;j++) {//Get the POS set of sCurWord in the unknown dictionary m_nTags[i][j]=aPOS[j]; m_dFrequency[i][j]=-log((double)1+aFreq[j])+log((double)m_context.GetFrequency(0,m_nTags[i][j])+1); } } } if(j==0) {//We donot know the POS, so we have to guess them according lexical knowledge GuessPOS(i,&j);//Guess the POS of current word } m_nTags[i][j]=-1;//Set the ending POS if(j==1)//No ambuguity {//No ambuguity, so we can break from the loop i++; m_sWords[i][0]=0; break; } if(!bSplit) {nWordsIndex++;} } if(pWordItems[nWordsIndex].sWord[0]==0) nRetPos=-1;//Reaching ending if(m_nTags[i-1][1]!=-1)//||m_sWords[i][0]==0 {//Set end for words like "张/华/平" if(m_tagType!=TT_NORMAL) m_nTags[i][0]=101; else m_nTags[i][0]=1; m_dFrequency[i][0]=0; m_sWords[i][0]=0;//Set virtual ending m_nTags[i++][1]=-1; } m_nCurLength=i;//The current word count if(nRetPos!=-1) return nWordsIndex+1;//Next start position return -1;//Reaching ending }