C++ (Cpp) CDictionary::GetFrequencyの例

プログラミング言語: C++ (Cpp)

クラス/型: CDictionary

メソッド/関数: GetFrequency

hotexamples.comのコード掲載数: 5

C++ (Cpp) CDictionary::GetFrequency - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC++ (Cpp)のCDictionary::GetFrequencyの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

IsExist(5)

GetFrequency(5)

GetTranslate(3)

GetHandle(2)

Load(2)

AddWord(1)

setValueForKey(1)

SaveTranslationForNewWordOnUserDemand(1)

SaveNewWordsOnUserDemand(1)

Save(1)

ReadUnitComments(1)

PrintTranslation(1)

LoadOnlyConstants(1)

LoadDictionary(1)

IsNewWordsListEmpty(1)

IsKnowWord(1)

GetUnitTextHeader(1)

ImportFromText(1)

BeginTransaction(1)

GetString(1)

GetRGBA(1)

GetMaxMatch(1)

GetBool(1)

FindWord(1)

FillDictionary(1)

Exists(1)

EndTransaction(1)

DictionarySave(1)

ClearAll(1)

valueAsUInt32ForKey(1)

コード例 #1

ファイルを表示

ファイル: Segment.cpp プロジェクト: firememory/dfwbi

//CDynamicArray &aWord: the words array
//CDynamicArray &aWordBinaryNet:the net between words
//double dSmoothingPara: the parameter of data smoothing
//CDictionary &DictBinary: the binary dictionary
//CDictionary &DictCore: the Core dictionary
bool CSegment::BiGraphGenerate(CDynamicArray &aWord, CDynamicArray &aBinaryWordNet,double dSmoothingPara,CDictionary &DictBinary,CDictionary &DictCore)
{
	PARRAY_CHAIN pTail,pCur,pNextWords;//Temp buffer
	unsigned int nWordIndex=0,nTwoWordsFreq=0,nCurWordIndex,nNextWordIndex;
	//nWordIndex: the index number of current word
	double dCurFreqency,dValue,dTemp;
	char sTwoWords[WORD_MAXLENGTH];
	m_nWordCount=aWord.GetTail(&pTail);//Get tail element and return the words count
	if(m_npWordPosMapTable)
	{//free buffer
		delete [] m_npWordPosMapTable;
		m_npWordPosMapTable=0;
	}
	if(m_nWordCount>0)//Word count is greater than 0
        {
		m_npWordPosMapTable=new int[m_nWordCount];//Record the  position of possible words
                memset(m_npWordPosMapTable,0,m_nWordCount*sizeof(int));
        }
	pCur=aWord.GetHead();
	while(pCur!=NULL)//Set the position map of words
	{
		m_npWordPosMapTable[nWordIndex++]=pCur->row*MAX_SENTENCE_LEN+pCur->col;
		pCur=pCur->next;
	}

	pCur=aWord.GetHead();
	while(pCur!=NULL)//
	{
		if(pCur->nPOS>=0)//It's not an unknown words
			dCurFreqency=pCur->value;
		else//Unknown words
			dCurFreqency=DictCore.GetFrequency(pCur->sWord,2);
		aWord.GetElement(pCur->col,-1,pCur,&pNextWords);//Get next words which begin with pCur->col
		while(pNextWords&&pNextWords->row==pCur->col)//Next words
		{	
			//Current words frequency
			strcpy(sTwoWords,pCur->sWord);
			strcat(sTwoWords,WORD_SEGMENTER);
			strcat(sTwoWords,pNextWords->sWord);
			nTwoWordsFreq=DictBinary.GetFrequency(sTwoWords,3);
			//Two linked Words frequency
			dTemp=(double)1/MAX_FREQUENCE;
			//Smoothing
			dValue=-log(dSmoothingPara*(1+dCurFreqency)/(MAX_FREQUENCE+80000)+(1-dSmoothingPara)*((1-dTemp)*nTwoWordsFreq/(1+dCurFreqency)+dTemp));
			//-log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
			if(pCur->nPOS<0)//Unknown words: P(Wi|Ci);while known words:1
			    dValue+=pCur->value;

			//Get the position index of current word in the position map table
			nCurWordIndex=BinarySearch(pCur->row*MAX_SENTENCE_LEN+pCur->col,m_npWordPosMapTable,m_nWordCount);
			nNextWordIndex=BinarySearch(pNextWords->row*MAX_SENTENCE_LEN+pNextWords->col,m_npWordPosMapTable,m_nWordCount);
			aBinaryWordNet.SetElement(nCurWordIndex,nNextWordIndex,dValue,pCur->nPOS);
			pNextWords=pNextWords->next;//Get next word
		}
		pCur=pCur->next;
	}
	return true;
}

コード例 #2

ファイルを表示

ファイル: Result.cpp プロジェクト: Dashboard-X/WebSearch3.1

bool CResult::ChineseNameSplit(char *sPersonName, char *sSurname, char *sSurname2, char *sGivenName, CDictionary &personDict)
{
	int nSurNameLen=4,nLen=strlen(sPersonName),nFreq,i=0,nCharType,nFreqGiven;
	char sTemp[3];
	if(nLen<3||nLen>8)//Not a traditional Chinese person name
		return false;
	while(i<nLen)//No Including non-CHinese char
	{
		nCharType=charType((unsigned char*)sPersonName+i);
		if(nCharType!=CT_CHINESE&&nCharType!=CT_OTHER)
			return false;
		i+=2;
	}
	sSurname2[0]=0;//init 
	strncpy(sSurname,sPersonName,nSurNameLen);	
	sSurname[nSurNameLen]=0;
	if(!personDict.IsExist(sSurname,1))
	{
		nSurNameLen=2;
		sSurname[nSurNameLen]=0;
		if(!personDict.IsExist(sSurname,1))
		{
			nSurNameLen=0;
			sSurname[nSurNameLen]=0;
		}
	}
	strcpy(sGivenName,sPersonName+nSurNameLen);
	if(nLen>6)
	{
		strncpy(sTemp,sPersonName+nSurNameLen,2);
		sTemp[2]=0;//Get the second possible surname
		if(personDict.IsExist(sTemp,1))
		{//Hongkong women's name: Surname+surname+given name
			strcpy(sSurname2,sTemp);
			strcpy(sGivenName,sPersonName+nSurNameLen+2);
		}
	}
	nFreq=personDict.GetFrequency(sSurname,1);
	strncpy(sTemp,sGivenName,2);
	sTemp[2]=0;
	nFreqGiven=personDict.GetFrequency(sTemp,2);
	if(nSurNameLen!=4&&((nSurNameLen==0&&nLen>4)||strlen(sGivenName)>4||(GetForeignCharCount(sPersonName)>=3&&nFreq<personDict.GetFrequency("张",1)/40&&nFreqGiven<personDict.GetFrequency("华",2)/20)||(nFreq<10&&GetForeignCharCount(sGivenName)==(nLen-nSurNameLen)/2)))
		return false;
	if(nLen==4&&m_uPerson.IsGivenName(sPersonName))
	{//Single Surname+given name
		return false;
	}
	return true;
}

コード例 #3

ファイルを表示

ファイル: Span.cpp プロジェクト: tedzhang/SearchMonkey

//POS tagging with Hidden Markov Model
bool CSpan::POSTagging(PWORD_RESULT pWordItems,CDictionary &dictCore,CDictionary &dictUnknown)
{
//pWordItems: Items; nItemCount: the count of items;core dictionary and unknown recognition dictionary
    int i=0,j,nStartPos;
	Reset(false);
    while(i>-1&&pWordItems[i].sWord[0]!=0)
	{
		nStartPos=i;//Start Position
		i=GetFrom(pWordItems,nStartPos,dictCore,dictUnknown);
		GetBestPOS();
		switch(m_tagType)
		{
		case TT_NORMAL://normal POS tagging
			j=1;
			while(m_nBestTag[j]!=-1&&j<m_nCurLength)
			{//Store the best POS tagging
				pWordItems[j+nStartPos-1].nHandle=m_nBestTag[j];
				//Let 。be 0
				if(pWordItems[j+nStartPos-1].dValue>0&&dictCore.IsExist(pWordItems[j+nStartPos-1].sWord,-1))//Exist and update its frequncy as a POS value
					pWordItems[j+nStartPos-1].dValue=LOG_MAX_FRQUENCE-log((double)dictCore.GetFrequency(pWordItems[j+nStartPos-1].sWord,m_nBestTag[j])+1);
				j+=1;
			}
			break;
		case TT_PERSON://Person recognition
			/*clock_t lStart,lEnd;
		    lStart=clock();
			*/
			SplitPersonPOS(dictUnknown);
			//lEnd=clock();
			//printf("SplitPersonPOS=%f\n",(double)(lEnd-lStart)*1000/CLOCKS_PER_SEC);
			//Spit Persons POS
			//lStart=clock();
			PersonRecognize(dictUnknown);
			//lEnd=clock();
			//printf("PersonRecognize=%f\n",(double)(lEnd-lStart)/CLOCKS_PER_SEC);
			//Person Recognition with the person recognition dictionary
			break;
		case TT_PLACE://Place name recognition
			PlaceRecognize(dictCore,dictUnknown);
			break;
		case TT_TRANS://Transliteration
			TransRecognize(dictCore,dictUnknown);
			break;
		default:
			break;
		}
		Reset();
	}
	return true;
}

コード例 #4

ファイルを表示

ファイル: Span.cpp プロジェクト: tedzhang/SearchMonkey

ELEMENT_TYPE  CSpan::ComputePossibility(int nStartPos,int nLength,CDictionary &dict)
{
	ELEMENT_TYPE dRetValue=0,dPOSPoss;
	//dPOSPoss: the possibility of a POS appears
	//dContextPoss: The possibility of context POS appears
	int nFreq;
	for(int i=nStartPos;i<nStartPos+nLength;i++)
	{
		nFreq=dict.GetFrequency(m_sWords[i],m_nBestTag[i]);
		//nFreq is word being the POS
		dPOSPoss=log((double)(m_context.GetFrequency(0,m_nBestTag[i])+1))-log((double)(nFreq+1));
		dRetValue+=dPOSPoss;
/*		if(i<nStartPos+nLength-1)
		{
			dContextPoss=log((double)(m_context.GetContextPossibility(0,m_nBestTag[i],m_nBestTag[i+1])+1));
			dRetValue+=dPOSPoss-dContextPoss;
		}
*/	}
	return dRetValue;
}

コード例 #5

ファイルを表示

ファイル: Span.cpp プロジェクト: tedzhang/SearchMonkey

bool CSpan::PersonRecognize(CDictionary &personDict)
{
  char sPOS[MAX_WORDS_PER_SENTENCE]="z",sPersonName[100];
                          //0     1    2    3    4   5   
  char sPatterns[][5]={ "BBCD","BBC","BBE","BBZ","BCD","BEE","BE",
						 "BG",  "BXD","BZ", "CDCD","CD","EE", 
						 "FB", "Y","XD",""};
  double dFactor[]={0.0011,0.0011,0.0011,0.0011,0.7614,0.0011,0.2055,
						 0.0160,0.0011,0.0011,0,0.0160,0.0011,
						 0.0160,0.0011,0.0011,0 };
  //About parameter:
/*
	Given Name: 486     0.0160
	Surname+postfix:484 0.0160
	m_lPerson2Num:6265   0.2055
	m_lPerson3Num: 23184 0.7614
	m_lPerson4Num:32     0.0011
  */
  //The person recognition patterns set
  //BBCD:姓+姓+名1+名2;
  //BBE: 姓+姓+单名;
  //BBZ: 姓+姓+双名成词;
  //BCD: 姓+名1+名2;
  //BE:  姓+单名;
  //BEE: 姓+单名+单名;韩磊磊
  //BG:  姓+后缀
  //BXD: 姓+姓双名首字成词+双名末字
  //BZ:  姓+双名成词;
  //B:	 姓
  //CD:  名1+名2;
  //EE:  单名+单名;
  //FB:  前缀+姓
  //XD:  姓双名首字成词+双名末字
  //Y:   姓单名成词
  int nPatternLen[]={4,3,3,3,3,3,2,2,3,2,4,2,2,2,1,2,0};

  int i;
  for(i=1;m_nBestTag[i]>-1;i++)//Convert to string from POS
	sPOS[i]=m_nBestTag[i]+'A';
  sPOS[i]=0;
  int j=1,k,nPos;//Find the proper pattern from the first POS
  int nLittleFreqCount;//Counter for the person name role with little frequecy
  bool bMatched=false;   
  while(j<i)
  {
	bMatched=false;   
	for(k=0;!bMatched&&nPatternLen[k]>0;k++)
	{
		if(strncmp(sPatterns[k],sPOS+j,nPatternLen[k])==0&&strcmp(m_sWords[j-1],"·")!=0&&strcmp(m_sWords[j+nPatternLen[k]],"·")!=0)
		{//Find the proper pattern k
			if(strcmp(sPatterns[k],"FB")==0&&(sPOS[j+2]=='E'||sPOS[j+2]=='C'||sPOS[j+2]=='G'))
			{//Rule 1 for exclusion:前缀+姓+名1(名2): 规则(前缀+姓)失效；
				continue;
			}
/*			if((strcmp(sPatterns[k],"BEE")==0||strcmp(sPatterns[k],"EE")==0)&&strcmp(m_sWords[j+nPatternLen[k]-1],m_sWords[j+nPatternLen[k]-2])!=0)
			{//Rule 2 for exclusion:姓+单名+单名:单名+单名 若EE对应的字不同，规则失效.如：韩磊磊
				continue;
			}

			if(strcmp(sPatterns[k],"B")==0&&m_nBestTag[j+1]!=12)
			{//Rule 3 for exclusion: 若姓后不是后缀，规则失效.如：江主席、刘大娘
				continue;
			}
*/			//Get the possible name
			nPos=j;//Record the person position in the tag sequence
			sPersonName[0]=0;
			nLittleFreqCount=0;//Record the number of role with little frequency
			while(nPos<j+nPatternLen[k])
			{//Get the possible person name
			 //
				if(m_nBestTag[nPos]<4&&personDict.GetFrequency(m_sWords[nPos],m_nBestTag[nPos])<LITTLE_FREQUENCY)
					nLittleFreqCount++;//The counter increase
				strcat(sPersonName,m_sWords[nPos]);
				nPos+=1;
			}
			if(IsAllForeign(sPersonName)&&personDict.GetFrequency(m_sWords[j],1)<LITTLE_FREQUENCY)
			{//Exclusion foreign name
			 //Rule 2 for exclusion:若均为外国人名用字 规则(名1+名2)失效
				j+=nPatternLen[k]-1;
				continue;
			}
			if(strcmp(sPatterns[k],"CDCD")==0)
			{//Rule for exclusion
			 //规则(名1+名2+名1+名2)本身是排除规则:女高音歌唱家迪里拜尔演唱
 			 //Rule 3 for exclusion:含外国人名用字 规则适用
			 //否则，排除规则失效:黑妞白妞姐俩拔了头筹。
				if(GetForeignCharCount(sPersonName)>0)
					j+=nPatternLen[k]-1;
				continue;
			}
			if(strcmp(sPatterns[k],"CD")==0&&IsAllForeign(sPersonName))
			{//
				j+=nPatternLen[k]-1;
				continue;
			}
			if(nLittleFreqCount==nPatternLen[k]||nLittleFreqCount==3)
			//马哈蒂尔;小扎耶德与他的中国阿姨胡彩玲受华黎明大使之邀，
			//The all roles appear with two lower frequecy,we will ignore them
				continue;
			m_nUnknownWords[m_nUnknownIndex][0]=m_nWordPosition[j];
			m_nUnknownWords[m_nUnknownIndex][1]=m_nWordPosition[j+nPatternLen[k]];
			m_dWordsPossibility[m_nUnknownIndex]=log(dFactor[k])+ComputePossibility(j,nPatternLen[k],personDict);
			//Mutiply the factor 
			m_nUnknownIndex+=1;
			j+=nPatternLen[k];
			bMatched=true;
		}
	}
    if(!bMatched)//Not matched, add j by 1
		j+=1;
  }
  return true;
}