Ejemplo n.º 1
0
void CTime::serialize(CDictionary& toDictionary) const
{
	toDictionary.setValueForKey(cMillisecondsKey, milliseconds());
	toDictionary.setValueForKey(cSecondsKey, seconds());
	toDictionary.setValueForKey(cMinutesKey, minutes());
	toDictionary.setValueForKey(cHoursKey, hours());
}
Ejemplo n.º 2
0
//对字符串用最大匹配法(正向或逆向)处理
string SegmentSentence(string s1)
{
	string s2 = ""; //用s2存放分词结果

	while (!s1.empty())
	{
		int len = (int)s1.length(); // 取输入串长度
		if (len > MaxWordLength) // 如果输入串长度大于最大词长
		{
			len = MaxWordLength; // 只在最大词长范围内进行处理
		}
		//string w = s1.substr(0, len); // (正向用)将输入串左边等于最大词长长度串取出作为候选词
		string w = s1.substr(s1.length() - len, len); //逆向用
		int n = WordDic.FindWord(w); // 在词典中查找相应的词
		while (len > 2 && n == 0) // 如果不是词
		{
			len -= 2; // 从候选词右边减掉一个汉字,将剩下的部分作为候选词
			//w = w.substr(0, len); //正向用
			w = s1.substr(s1.length() - len, len); //逆向用
			n = WordDic.FindWord(w);
		}
		//s2 += w + Separator; // (正向用)将匹配得到的词连同词界标记加到输出串末尾
		w = w + Separator; // (逆向用)
		s2 = w + s2; // (逆向用)
		//s1 = s1.substr(w.length(), s1.length()); //(正向用)从s1-w处开始
		s1 = s1.substr(0, s1.length() - len); // (逆向用)
	}
	return s2;
}
Ejemplo n.º 3
0
    /*
     * логика внутри цикла метода может показаться запутанной и тяжеловатой
     * для сопровождения. Собственно, это было сделано в пользу малого размера словаря
     * при желании его можно сделать более пригодным для сопровождения
     */
    string Translate(int num,int power)
    {
        string result;
        int temp;
        vector<int> digits;
        char buff[100];

        digits=Num2Digits(num,power);


        vector<int>::iterator it=digits.begin();
        vector<int>::iterator end=digits.end();

        int dig=digits.size()-1;
        while(it!=end)
        {
            switch(dig%3)
            {
            case 0: {
                sprintf(buff,"d%d%d", dig ,*it );
                result+=D.GetTranslate(buff) + " ";
                break;
            }
            case 1: {
                if(*it)
                {
                    if(*it==1)
                    {
                        temp=10*(*it);
                        ++it;
                        --dig;
                        temp+=(*it);
                        sprintf(buff,"%d", temp );
                        result+=D.GetTranslate(buff) + " ";
                        sprintf(buff,"d%d0", dig );
                        result+=D.GetTranslate(buff) + " ";
                    }
                    else
                    {
                        sprintf(buff,"%d", *it*10 );
                        result+=D.GetTranslate(buff) + " ";
                    }
                }
                break;
            }
            case 2: {
                if(*it)
                {
                    sprintf(buff,"%d", *it*100 );
                    result+=D.GetTranslate(buff) + " ";
                }
                break;
            }
            }
            --dig;
            ++it;
        }
        return result;
    }
Ejemplo n.º 4
0
//CDynamicArray &aWord: the words array
//CDynamicArray &aWordBinaryNet:the net between words
//double dSmoothingPara: the parameter of data smoothing
//CDictionary &DictBinary: the binary dictionary
//CDictionary &DictCore: the Core dictionary
bool CSegment::BiGraphGenerate(CDynamicArray &aWord, CDynamicArray &aBinaryWordNet,double dSmoothingPara,CDictionary &DictBinary,CDictionary &DictCore)
{
	PARRAY_CHAIN pTail,pCur,pNextWords;//Temp buffer
	unsigned int nWordIndex=0,nTwoWordsFreq=0,nCurWordIndex,nNextWordIndex;
	//nWordIndex: the index number of current word
	double dCurFreqency,dValue,dTemp;
	char sTwoWords[WORD_MAXLENGTH];
	m_nWordCount=aWord.GetTail(&pTail);//Get tail element and return the words count
	if(m_npWordPosMapTable)
	{//free buffer
		delete [] m_npWordPosMapTable;
		m_npWordPosMapTable=0;
	}
	if(m_nWordCount>0)//Word count is greater than 0
        {
		m_npWordPosMapTable=new int[m_nWordCount];//Record the  position of possible words
                memset(m_npWordPosMapTable,0,m_nWordCount*sizeof(int));
        }
	pCur=aWord.GetHead();
	while(pCur!=NULL)//Set the position map of words
	{
		m_npWordPosMapTable[nWordIndex++]=pCur->row*MAX_SENTENCE_LEN+pCur->col;
		pCur=pCur->next;
	}

	pCur=aWord.GetHead();
	while(pCur!=NULL)//
	{
		if(pCur->nPOS>=0)//It's not an unknown words
			dCurFreqency=pCur->value;
		else//Unknown words
			dCurFreqency=DictCore.GetFrequency(pCur->sWord,2);
		aWord.GetElement(pCur->col,-1,pCur,&pNextWords);//Get next words which begin with pCur->col
		while(pNextWords&&pNextWords->row==pCur->col)//Next words
		{	
			//Current words frequency
			strcpy(sTwoWords,pCur->sWord);
			strcat(sTwoWords,WORD_SEGMENTER);
			strcat(sTwoWords,pNextWords->sWord);
			nTwoWordsFreq=DictBinary.GetFrequency(sTwoWords,3);
			//Two linked Words frequency
			dTemp=(double)1/MAX_FREQUENCE;
			//Smoothing
			dValue=-log(dSmoothingPara*(1+dCurFreqency)/(MAX_FREQUENCE+80000)+(1-dSmoothingPara)*((1-dTemp)*nTwoWordsFreq/(1+dCurFreqency)+dTemp));
			//-log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
			if(pCur->nPOS<0)//Unknown words: P(Wi|Ci);while known words:1
			    dValue+=pCur->value;

			//Get the position index of current word in the position map table
			nCurWordIndex=BinarySearch(pCur->row*MAX_SENTENCE_LEN+pCur->col,m_npWordPosMapTable,m_nWordCount);
			nNextWordIndex=BinarySearch(pNextWords->row*MAX_SENTENCE_LEN+pNextWords->col,m_npWordPosMapTable,m_nWordCount);
			aBinaryWordNet.SetElement(nCurWordIndex,nNextWordIndex,dValue,pCur->nPOS);
			pNextWords=pNextWords->next;//Get next word
		}
		pCur=pCur->next;
	}
	return true;
}
Ejemplo n.º 5
0
void CTime::deserialize(const CDictionary& fromDictionary)
{
	UInt32 ms = fromDictionary.valueAsUInt32ForKey(cMillisecondsKey);
	UInt32 s = fromDictionary.valueAsUInt32ForKey(cSecondsKey);
	UInt32 m = fromDictionary.valueAsUInt32ForKey(cMinutesKey);
	UInt32 h = fromDictionary.valueAsUInt32ForKey(cHoursKey);
	mData = h * MILLISECONDS_IN_HOUR +
			m * MILLISECONDS_IN_MINUTE +
			s * MILLISECONDS_IN_SECOND +
			ms;
}
Ejemplo n.º 6
0
//POS tagging with Hidden Markov Model
bool CSpan::POSTagging(PWORD_RESULT pWordItems,CDictionary &dictCore,CDictionary &dictUnknown)
{
//pWordItems: Items; nItemCount: the count of items;core dictionary and unknown recognition dictionary
    int i=0,j,nStartPos;
	Reset(false);
    while(i>-1&&pWordItems[i].sWord[0]!=0)
	{
		nStartPos=i;//Start Position
		i=GetFrom(pWordItems,nStartPos,dictCore,dictUnknown);
		GetBestPOS();
		switch(m_tagType)
		{
		case TT_NORMAL://normal POS tagging
			j=1;
			while(m_nBestTag[j]!=-1&&j<m_nCurLength)
			{//Store the best POS tagging
				pWordItems[j+nStartPos-1].nHandle=m_nBestTag[j];
				//Let 。be 0
				if(pWordItems[j+nStartPos-1].dValue>0&&dictCore.IsExist(pWordItems[j+nStartPos-1].sWord,-1))//Exist and update its frequncy as a POS value
					pWordItems[j+nStartPos-1].dValue=LOG_MAX_FRQUENCE-log((double)dictCore.GetFrequency(pWordItems[j+nStartPos-1].sWord,m_nBestTag[j])+1);
				j+=1;
			}
			break;
		case TT_PERSON://Person recognition
			/*clock_t lStart,lEnd;
		    lStart=clock();
			*/
			SplitPersonPOS(dictUnknown);
			//lEnd=clock();
			//printf("SplitPersonPOS=%f\n",(double)(lEnd-lStart)*1000/CLOCKS_PER_SEC);
			//Spit Persons POS
			//lStart=clock();
			PersonRecognize(dictUnknown);
			//lEnd=clock();
			//printf("PersonRecognize=%f\n",(double)(lEnd-lStart)/CLOCKS_PER_SEC);
			//Person Recognition with the person recognition dictionary
			break;
		case TT_PLACE://Place name recognition
			PlaceRecognize(dictCore,dictUnknown);
			break;
		case TT_TRANS://Transliteration
			TransRecognize(dictCore,dictUnknown);
			break;
		default:
			break;
		}
		Reset();
	}
	return true;
}
Ejemplo n.º 7
0
bool CResult::ChineseNameSplit(char *sPersonName, char *sSurname, char *sSurname2, char *sGivenName, CDictionary &personDict)
{
	int nSurNameLen=4,nLen=strlen(sPersonName),nFreq,i=0,nCharType,nFreqGiven;
	char sTemp[3];
	if(nLen<3||nLen>8)//Not a traditional Chinese person name
		return false;
	while(i<nLen)//No Including non-CHinese char
	{
		nCharType=charType((unsigned char*)sPersonName+i);
		if(nCharType!=CT_CHINESE&&nCharType!=CT_OTHER)
			return false;
		i+=2;
	}
	sSurname2[0]=0;//init 
	strncpy(sSurname,sPersonName,nSurNameLen);	
	sSurname[nSurNameLen]=0;
	if(!personDict.IsExist(sSurname,1))
	{
		nSurNameLen=2;
		sSurname[nSurNameLen]=0;
		if(!personDict.IsExist(sSurname,1))
		{
			nSurNameLen=0;
			sSurname[nSurNameLen]=0;
		}
	}
	strcpy(sGivenName,sPersonName+nSurNameLen);
	if(nLen>6)
	{
		strncpy(sTemp,sPersonName+nSurNameLen,2);
		sTemp[2]=0;//Get the second possible surname
		if(personDict.IsExist(sTemp,1))
		{//Hongkong women's name: Surname+surname+given name
			strcpy(sSurname2,sTemp);
			strcpy(sGivenName,sPersonName+nSurNameLen+2);
		}
	}
	nFreq=personDict.GetFrequency(sSurname,1);
	strncpy(sTemp,sGivenName,2);
	sTemp[2]=0;
	nFreqGiven=personDict.GetFrequency(sTemp,2);
	if(nSurNameLen!=4&&((nSurNameLen==0&&nLen>4)||strlen(sGivenName)>4||(GetForeignCharCount(sPersonName)>=3&&nFreq<personDict.GetFrequency("张",1)/40&&nFreqGiven<personDict.GetFrequency("华",2)/20)||(nFreq<10&&GetForeignCharCount(sGivenName)==(nLen-nSurNameLen)/2)))
		return false;
	if(nLen==4&&m_uPerson.IsGivenName(sPersonName))
	{//Single Surname+given name
		return false;
	}
	return true;
}
Ejemplo n.º 8
0
int main(int argc, char *argv[])
{
	SetConsoleOutputCP(1251);
	SetConsoleCP(1251);
	if (argc != 1)
	{
		cout << "The command has no additional parameters" << endl;
		return 0;
	}

	string englishWord;
	CDictionary dictionary;
	CTranslator translator(dictionary);
	while (getline(cin, englishWord) && englishWord != EXIT_LINE)
	{
		if (!englishWord.empty())
		{
			ConvertToLowercase(englishWord);
			translator.DoDialogWithUser(englishWord);
		}
	}
	string answer;
	if (!dictionary.IsNewWordsListEmpty())
	{
		cout << "Do you want to save new words? YES/NO" << endl;
		while (getline(cin, answer))
		{
			if (answer == "YES")
			{
				dictionary.DictionarySave();
				break;
			}
			else if (answer == "NO")
			{
				break;
			}
			else
			{
				cout << "Try again" << endl;
			}
		}
	}
	return 0;
}
Ejemplo n.º 9
0
ELEMENT_TYPE  CSpan::ComputePossibility(int nStartPos,int nLength,CDictionary &dict)
{
	ELEMENT_TYPE dRetValue=0,dPOSPoss;
	//dPOSPoss: the possibility of a POS appears
	//dContextPoss: The possibility of context POS appears
	int nFreq;
	for(int i=nStartPos;i<nStartPos+nLength;i++)
	{
		nFreq=dict.GetFrequency(m_sWords[i],m_nBestTag[i]);
		//nFreq is word being the POS
		dPOSPoss=log((double)(m_context.GetFrequency(0,m_nBestTag[i])+1))-log((double)(nFreq+1));
		dRetValue+=dPOSPoss;
/*		if(i<nStartPos+nLength-1)
		{
			dContextPoss=log((double)(m_context.GetContextPossibility(0,m_nBestTag[i],m_nBestTag[i+1])+1));
			dRetValue+=dPOSPoss-dContextPoss;
		}
*/	}
	return dRetValue;
}
Ejemplo n.º 10
0
int main(int argc, char *argv[])
{
	SetConsoleOutputCP(1251);
	SetConsoleCP(1251);
	if (argc != 2)
	{
		cout << "Not enough parameters. The correct command line format:\ndictionary.exe <file name>" << "\n";
		return 1;
	}
	else
	{
		ifstream inputFile(argv[1]);
		string inputWord;
		CDictionary dictionary;
		if (inputFile.is_open())
		{
			dictionary.FillDictionary(inputFile);
		}
		cout << "Для перевода введите слово на английском. Для выхода введите '...'\n";
		ofstream outputFile;
		outputFile.open(argv[1], ios::app);
		getline(cin, inputWord);
		while (inputWord != "...")
		{
			if (dictionary.IsKnowWord(inputWord))
			{
				dictionary.PrintTranslation(inputWord);
			}
			else if (!inputWord.empty())
			{
				dictionary.SaveTranslationForNewWordOnUserDemand(inputWord);
			}
			getline(cin, inputWord);
		}
		dictionary.SaveNewWordsOnUserDemand(outputFile);
	}
	return 0;

}
Ejemplo n.º 11
0
int CSpan::GetFrom(PWORD_RESULT pWordItems,int nIndex,CDictionary &dictCore, CDictionary &dictUnknown)
{
	int nCount,aPOS[MAX_POS_PER_WORD],aFreq[MAX_POS_PER_WORD];
	int nFreq=0,j,nRetPos=0,nWordsIndex=0;
	bool bSplit=false;//Need to split in Transliteration recognition 
    int i=1;
	nWordsIndex=i+nIndex-1;
	for(;i<MAX_WORDS_PER_SENTENCE&&pWordItems[nWordsIndex].sWord[0]!=0;i++)
	{
		if(m_tagType==TT_NORMAL||!dictUnknown.IsExist(pWordItems[nWordsIndex].sWord,44))
        {
			strcpy(m_sWords[i],pWordItems[nWordsIndex].sWord);//store current word
   		    m_nWordPosition[i+1]=m_nWordPosition[i]+strlen(m_sWords[i]);
		}
		else
		{
			if(!bSplit)
			{
				strncpy(m_sWords[i],pWordItems[nWordsIndex].sWord,2);//store current word
				m_sWords[i][2]=0;
				bSplit=true;
			}
			else
			{
				unsigned int nLen=strlen(pWordItems[nWordsIndex].sWord+2);
				strncpy(m_sWords[i],pWordItems[nWordsIndex].sWord+2,nLen);//store current word
				m_sWords[i][nLen]=0;
				bSplit=false;
			}
   		    m_nWordPosition[i+1]=m_nWordPosition[i]+strlen(m_sWords[i]);
		}
		//Record the position of current word
		m_nStartPos=m_nWordPosition[i+1];
		//Move the Start POS to the ending
		if(m_tagType!=TT_NORMAL)
		{
			//Get the POSs from the unknown recognition dictionary
			dictUnknown.GetHandle(m_sWords[i],&nCount,aPOS,aFreq);
			for(j=0;j<nCount;j++) 
			{//Get the POS set of sCurWord in the unknown dictionary
				m_nTags[i][j]=aPOS[j];
   				m_dFrequency[i][j]=-log((double)(1+aFreq[j]))+log((double)(m_context.GetFrequency(0,aPOS[j])+1));
			}
			//Get the POS set of sCurWord in the core dictionary
			//We ignore the POS in the core dictionary and recognize them as other (0).
			//We add their frequency to get the possibility as POS 0
			dictCore.GetHandle(m_sWords[i],&nCount,aPOS,aFreq);
			nFreq=0;
			for(int k=0;k<nCount;k++) 
			{
				nFreq+=aFreq[k];
			}
			if(nCount>0)
			{
				m_nTags[i][j]=0;
				//m_dFrequency[i][j]=(double)(1+nFreq)/(double)(m_context.GetFrequency(0,0)+1);
				m_dFrequency[i][j]=-log((double)(1+nFreq))+log((double)(m_context.GetFrequency(0,0)+1));
				j++;
			}
		}
		else//For normal POS tagging
		{
			j=0;
			//Get the POSs from the unknown recognition dictionary
			if(pWordItems[nWordsIndex].nHandle>0)
			{//The word has  is only one POS value
			 //We have record its POS and nFrequncy in the items.
				m_nTags[i][j]=pWordItems[nWordsIndex].nHandle;
				m_dFrequency[i][j]=pWordItems[nWordsIndex].dValue-LOG_MAX_FRQUENCE+log((double)(m_context.GetFrequency(0,m_nTags[i][j])+1));
				if(m_dFrequency[i][j]<0)//Not permit the value less than 0
					m_dFrequency[i][j]=0;
				j++;
			}
			else
			{//The word has multiple POSs, we should retrieve the information from Core Dictionary 
				
				if(pWordItems[nWordsIndex].nHandle<0)
				{//The word has  is only one POS value
				 //We have record its POS and nFrequncy in the items.
					if(pWordItems[nWordsIndex].nHandle==-'t'*256-'t')//tt
					{
						char sWordOrg[100],sPostfix[10];
						double dRatio=0.6925;//The ratio which transliteration as a person name 
						PostfixSplit(pWordItems[nWordsIndex].sWord,sWordOrg,sPostfix);
						if(sPostfix[0]!=0)
								dRatio=0.01;
						m_nTags[i][j]='n'*256+'r';
						m_dFrequency[i][j]=-log(dRatio)+pWordItems[nWordsIndex].dValue;
						//m_dFrequency[i][j]=log(dRatio)+pWordItems[nWordsIndex].dValue-log(m_context.GetFrequency(0,m_nTags[i][j]))+log(MAX_FREQUENCE);
						//P(W|R)=P(WRT)/P(RT)=P(R)*P(W|T)/P(R|T)
						j++;
						m_nTags[i][j]='n'*256+'s';
						m_dFrequency[i][j]=-log(1-dRatio)+pWordItems[nWordsIndex].dValue;
						//m_dFrequency[i][j]=log(1-dRatio)+pWordItems[nWordsIndex].dValue-log(m_context.GetFrequency(0,m_nTags[i][j]))+log(MAX_FREQUENCE);
						j++;
					}
					else//Unknown words such as Chinese person name or place name
					{
						m_nTags[i][j]=-pWordItems[nWordsIndex].nHandle;
   					//	m_dFrequency[i][j++]=(double)(1+pWordItems[nWordsIndex].nFrequency)/(double)(m_context.GetFrequency(0,aPOS[j])+1);
						m_dFrequency[i][j++]=pWordItems[nWordsIndex].dValue;
					}
				}
				dictCore.GetHandle(m_sWords[i],&nCount,aPOS,aFreq);
				for(;j<nCount;j++) 
				{//Get the POS set of sCurWord in the unknown dictionary
					m_nTags[i][j]=aPOS[j];
   					m_dFrequency[i][j]=-log((double)1+aFreq[j])+log((double)m_context.GetFrequency(0,m_nTags[i][j])+1);
				}
			}
		}
		if(j==0)
		{//We donot know the POS, so we have to guess them according lexical knowledge
			GuessPOS(i,&j);//Guess the POS of current word
		}
		m_nTags[i][j]=-1;//Set the ending POS 
		if(j==1)//No ambuguity
		{//No ambuguity, so we can break from the loop
			i++;
			m_sWords[i][0]=0;
			break;
		}
		if(!bSplit)
		{nWordsIndex++;}
	}
	if(pWordItems[nWordsIndex].sWord[0]==0)
		nRetPos=-1;//Reaching ending

	if(m_nTags[i-1][1]!=-1)//||m_sWords[i][0]==0
	{//Set end for words like "张/华/平"
		if(m_tagType!=TT_NORMAL)
		       m_nTags[i][0]=101;
		else
		       m_nTags[i][0]=1;
		
		m_dFrequency[i][0]=0;
	    m_sWords[i][0]=0;//Set virtual ending
		m_nTags[i++][1]=-1;
	}
	m_nCurLength=i;//The current word count
	if(nRetPos!=-1)
		return nWordsIndex+1;//Next start position
	return -1;//Reaching ending
}
Ejemplo n.º 12
0
bool CSpan::PersonRecognize(CDictionary &personDict)
{
  char sPOS[MAX_WORDS_PER_SENTENCE]="z",sPersonName[100];
                          //0     1    2    3    4   5   
  char sPatterns[][5]={ "BBCD","BBC","BBE","BBZ","BCD","BEE","BE",
						 "BG",  "BXD","BZ", "CDCD","CD","EE", 
						 "FB", "Y","XD",""};
  double dFactor[]={0.0011,0.0011,0.0011,0.0011,0.7614,0.0011,0.2055,
						 0.0160,0.0011,0.0011,0,0.0160,0.0011,
						 0.0160,0.0011,0.0011,0 };
  //About parameter:
/*
	Given Name: 486     0.0160
	Surname+postfix:484 0.0160
	m_lPerson2Num:6265   0.2055
	m_lPerson3Num: 23184 0.7614
	m_lPerson4Num:32     0.0011
  */
  //The person recognition patterns set
  //BBCD:姓+姓+名1+名2;
  //BBE: 姓+姓+单名;
  //BBZ: 姓+姓+双名成词;
  //BCD: 姓+名1+名2;
  //BE:  姓+单名;
  //BEE: 姓+单名+单名;韩磊磊
  //BG:  姓+后缀
  //BXD: 姓+姓双名首字成词+双名末字
  //BZ:  姓+双名成词;
  //B:	 姓
  //CD:  名1+名2;
  //EE:  单名+单名;
  //FB:  前缀+姓
  //XD:  姓双名首字成词+双名末字
  //Y:   姓单名成词
  int nPatternLen[]={4,3,3,3,3,3,2,2,3,2,4,2,2,2,1,2,0};

  int i;
  for(i=1;m_nBestTag[i]>-1;i++)//Convert to string from POS
	sPOS[i]=m_nBestTag[i]+'A';
  sPOS[i]=0;
  int j=1,k,nPos;//Find the proper pattern from the first POS
  int nLittleFreqCount;//Counter for the person name role with little frequecy
  bool bMatched=false;   
  while(j<i)
  {
	bMatched=false;   
	for(k=0;!bMatched&&nPatternLen[k]>0;k++)
	{
		if(strncmp(sPatterns[k],sPOS+j,nPatternLen[k])==0&&strcmp(m_sWords[j-1],"·")!=0&&strcmp(m_sWords[j+nPatternLen[k]],"·")!=0)
		{//Find the proper pattern k
			if(strcmp(sPatterns[k],"FB")==0&&(sPOS[j+2]=='E'||sPOS[j+2]=='C'||sPOS[j+2]=='G'))
			{//Rule 1 for exclusion:前缀+姓+名1(名2): 规则(前缀+姓)失效;
				continue;
			}
/*			if((strcmp(sPatterns[k],"BEE")==0||strcmp(sPatterns[k],"EE")==0)&&strcmp(m_sWords[j+nPatternLen[k]-1],m_sWords[j+nPatternLen[k]-2])!=0)
			{//Rule 2 for exclusion:姓+单名+单名:单名+单名 若EE对应的字不同,规则失效.如:韩磊磊
				continue;
			}

			if(strcmp(sPatterns[k],"B")==0&&m_nBestTag[j+1]!=12)
			{//Rule 3 for exclusion: 若姓后不是后缀,规则失效.如:江主席、刘大娘
				continue;
			}
*/			//Get the possible name
			nPos=j;//Record the person position in the tag sequence
			sPersonName[0]=0;
			nLittleFreqCount=0;//Record the number of role with little frequency
			while(nPos<j+nPatternLen[k])
			{//Get the possible person name
			 //
				if(m_nBestTag[nPos]<4&&personDict.GetFrequency(m_sWords[nPos],m_nBestTag[nPos])<LITTLE_FREQUENCY)
					nLittleFreqCount++;//The counter increase
				strcat(sPersonName,m_sWords[nPos]);
				nPos+=1;
			}
			if(IsAllForeign(sPersonName)&&personDict.GetFrequency(m_sWords[j],1)<LITTLE_FREQUENCY)
			{//Exclusion foreign name
			 //Rule 2 for exclusion:若均为外国人名用字 规则(名1+名2)失效
				j+=nPatternLen[k]-1;
				continue;
			}
			if(strcmp(sPatterns[k],"CDCD")==0)
			{//Rule for exclusion
			 //规则(名1+名2+名1+名2)本身是排除规则:女高音歌唱家迪里拜尔演唱
 			 //Rule 3 for exclusion:含外国人名用字 规则适用
			 //否则,排除规则失效:黑妞白妞姐俩拔了头筹。
				if(GetForeignCharCount(sPersonName)>0)
					j+=nPatternLen[k]-1;
				continue;
			}
			if(strcmp(sPatterns[k],"CD")==0&&IsAllForeign(sPersonName))
			{//
				j+=nPatternLen[k]-1;
				continue;
			}
			if(nLittleFreqCount==nPatternLen[k]||nLittleFreqCount==3)
			//马哈蒂尔;小扎耶德与他的中国阿姨胡彩玲受华黎明大使之邀,
			//The all roles appear with two lower frequecy,we will ignore them
				continue;
			m_nUnknownWords[m_nUnknownIndex][0]=m_nWordPosition[j];
			m_nUnknownWords[m_nUnknownIndex][1]=m_nWordPosition[j+nPatternLen[k]];
			m_dWordsPossibility[m_nUnknownIndex]=log(dFactor[k])+ComputePossibility(j,nPatternLen[k],personDict);
			//Mutiply the factor 
			m_nUnknownIndex+=1;
			j+=nPatternLen[k];
			bMatched=true;
		}
	}
    if(!bMatched)//Not matched, add j by 1
		j+=1;
  }
  return true;
}
Ejemplo n.º 13
0
int main(int argc, char **argv)
{
	if (argc != 4)
		PrintUsage();

	if (		!strcmp (argv[1],  "-h")
			||  !strcmp (argv[1],  "-help")
			||  !strcmp (argv[1],  "/h")
			||  !strcmp (argv[1],  "/help")
	)
		PrintUsage();
	string Action = argv[1];
	if	(		(Action != "ToTxt")  
			&&	(Action != "FromTxt")  
		)
		PrintUsage();

	string FileName = argv[2];
	CDictionary Dict;
	if (Action == "FromTxt")
	{
		if (access (FileName.c_str(), 04) != 0)
		{
			fprintf (stderr, "Cannot read %s\n",FileName.c_str());
			return 1;

		};
		if (!Dict.LoadOnlyConstants(argv[3]))
		{
			fprintf (stderr, "Cannot load an empty dictionary from %s\n",argv[3]);
			return 1;
		};
		Dict.m_bShouldSaveComments = true;

		string Messages;
		bool bResult = Dict.ImportFromText(FileName,false, iceSkip,1, Messages);
		fprintf (stderr, "%s", Messages.c_str() );
		if (bResult)
			if (Dict.Save())
				return  0;
		return 1;
	}
	else
	{
		if (!Dict.Load(argv[3]) || !Dict.ReadUnitComments())
		{
			fprintf (stderr, "Cannot load dictionary from %s\n",argv[3]);
			return 1;
		};
		FILE * fp = fopen (FileName.c_str(),"wb");
		if (!fp)
		{
			fprintf (stderr, "Cannot write to %s\n",FileName.c_str());
			return 1;
		};

		CTempArticle A;
		A.m_pRoss = &Dict;

		for (WORD i = 0;  i < Dict.m_Units.size(); i++)
		{ 
			fprintf (fp,"============\r\n");
			fprintf (fp,"%s", Dict.GetUnitTextHeader(i).c_str());
			try 
			{
				A.ReadFromDictionary(i, false, true);
				if (!A.ArticleToText())
				{
					fprintf (fp,"Error! Cannot get the entry No %i\r\n", i);
					return 1;
				};
				fprintf (fp,"%s",A.GetArticleStr().c_str());
			}
			catch (...)
			{
				fprintf (fp,"Error! Cannot get the entry No %i\r\n", i);
				return 1;
			}
			
		};
		fclose(fp);
		return 0;


	};

	
}
Ejemplo n.º 14
0
 void InjectWord(unsigned w, unsigned val) {
     dictionary.AddWord(w, val, false);
 }
Ejemplo n.º 15
0
bool CSpan::SplitPersonPOS(CDictionary &unlistDict)
{//Split the word with POS 21 and 22
    int i=m_nCurLength-1,j;
	unsigned int nLenWord,nLenPart;
	char sFirstPart[50],sLastPart[50];
	int nFirstPOS,nLastPOS;
	for(;i>0;i--)
	{
		if(m_nBestTag[i]==21||m_nBestTag[i]==22)
		{//Find the POS which need to split
			for(j=m_nCurLength-1;j>i;j--)
			{//Move the POS and words
				strcpy(m_sWords[j+1],m_sWords[j]);
				m_nBestTag[j+1]=m_nBestTag[j];
				m_nWordPosition[j+1]=m_nWordPosition[j];
			}
			m_nCurLength+=1;//The length increment 
	        
			//Generate new segment words and POS
			if(m_nBestTag[i]==21)
			{//Combination by Previous and first component
				nLenWord=strlen(m_sWords[i]);
				if(nLenWord>4)//Get first component
				{
					strcpy(sLastPart,m_sWords[i]+nLenWord-4);
					if(!unlistDict.IsExist(sLastPart,-1))
						strcpy(sLastPart,m_sWords[i]+nLenWord-2);
				}
				else
				{
					strcpy(sLastPart,m_sWords[i]+nLenWord-2);	
				}
				nLenPart=strlen(sLastPart);
				if(nLenPart<nLenWord)
				{//Get first part
					strncpy(sFirstPart,m_sWords[i],nLenWord-nLenPart);
					sFirstPart[nLenWord-nLenPart]=0;
				}
				else
				{
					strncpy(sFirstPart,m_sWords[i],nLenWord-2);
					sFirstPart[nLenWord-2]=0;
					strncpy(sLastPart,m_sWords[i]+nLenWord-2,2);
					sLastPart[2]=0;
				}
				nFirstPOS=11;
				nLastPOS=1;
			}
			else
			{//Combination by Next word and last component
				nLenWord=strlen(m_sWords[i]);
				if(nLenWord>4)//Get last component
				{
					strncpy(sFirstPart,m_sWords[i],4);
					sFirstPart[4]=0;
					if(!unlistDict.IsExist(sFirstPart,-1))
						sFirstPart[2]=0;
				}
				else
				{
					strncpy(sFirstPart,m_sWords[i],2);	
					sFirstPart[2]=0;
				}
				nLenPart=strlen(sFirstPart);
				if(nLenPart<nLenWord)
				{//Get first part
					strncpy(sLastPart,m_sWords[i]+nLenPart,nLenWord-nLenPart);
					sLastPart[nLenWord-nLenPart]=0;
				}
				else
				{
					strncpy(sFirstPart,m_sWords[i],2);
					sFirstPart[2]=0;
					strncpy(sLastPart,m_sWords[i]+2,nLenWord-2);
					sLastPart[nLenWord-2]=0;
				}
				if(unlistDict.IsExist(sFirstPart,1)&&m_nBestTag[i-1]==5)
					//小陈说:
					nFirstPOS=1;
				else if(unlistDict.IsExist(m_sWords[i-1],1)&&!unlistDict.IsExist(m_sWords[i-2],1))
					nFirstPOS=4;
				else
					nFirstPOS=3;
				nLastPOS=12;
			}
            strcpy(m_sWords[i],sFirstPart);
			m_nBestTag[i]=nFirstPOS;
            strcpy(m_sWords[i+1],sLastPart);
			m_nBestTag[i+1]=nLastPOS;
			m_nWordPosition[i+1]=m_nWordPosition[i]+strlen(sFirstPart);
		}
	}
	return true;
}
Ejemplo n.º 16
0
	Dictionary_()
	{
		ss << "cat" << endl << "кошка" << endl << "dog" << endl << "собака";
		dictionary.LoadDictionary(ss);
	}
Ejemplo n.º 17
0
 string TranslateIn8Power(int Num)
 {
     return Translate(Num,8)+Comments.GetTranslate("Ru comment about hex system");
 }
Ejemplo n.º 18
0
 void BeginDictionaryTransaction() {
     dictionary.BeginTransaction();
 }
Ejemplo n.º 19
0
void CViewport::LoadDictionary(void)
{
	CSV::CSVDocument doc;
	CSV::CSVDocument::row_index_type row_count;

	//Parse from the document

	try
	{
		row_count = doc.load_file("captionmod/dictionary.csv");
	}
	catch(std::exception &err)
	{
		Sys_ErrorEx("%s\n%s", "LoadDictionary: ", err.what());
	}

	if(row_count < 2)
		return;

	IScheme *ischeme = scheme()->GetIScheme(GetScheme());

	if(!ischeme)
		return;

	Color defaultColor = ischeme->GetColor("BaseText", Color(255, 255, 255, 200));
	
	//Initialize the dictionary hashtable
	m_StringsHashTable.SetSize(2048);

	for (int i = 0; i < m_StringsHashTable.Count(); i++)
		m_StringsHashTable[i].next = NULL;

	EmptyDictionaryHash();

	int nRowCount = row_count;
	
	//parse the dictionary line by line...
	for (int i = 1;i < nRowCount; ++i)
	{
		CSV::CSVDocument::row_type row = doc.get_row(i);

		if(row.size() < 1)
			continue;

		const char *title = row[0].c_str();

		if(!title || !title[0])
			continue;

		CDictionary *Dict = new CDictionary;

		Dict->Load(row, defaultColor, ischeme);

		m_Dictionary.AddToTail(Dict);

		AddDictionaryHash(Dict, Dict->m_szTitle);
	}

	//Link the dictionaries

	for(int i = 0; i < m_Dictionary.Count(); ++i)
	{
		CDictionary *Dict = m_Dictionary[i];
		if(Dict->m_szNext[0])
		{
			Dict->m_pNext = FindDictionary(Dict->m_szNext);
		}
	}
}
Ejemplo n.º 20
0
	void HandleCreate(const CDictionary& dict)
	{
		super::HandleCreate(dict);

		CRGBA rgbaTint(255,255,255,255);

		dict.GetRGBA("tint", &rgbaTint);

		for ( uint32 iButtonImage = 0 ; iButtonImage < 9 ; ++iButtonImage )
		{
			m_apImages[iButtonImage]->SetShader(GetTheme()->GetButtonImage(iButtonImage));
			m_apImages[iButtonImage]->SetTint(rgbaTint);
		}

		if ( dict.Exists("font") )
		{
			m_pText->SetFont(dict.GetString("font"));
		}
		else
		{
			m_pText->SetFont(GetTheme()->GetButtonTextFont());
		}

		if ( dict.Exists("text") )
		{
			m_pText->SetText(dict.GetString("text").c_str());
		}
		
		if ( dict.Exists("text_shadow") )
		{
			bool bShadow = dict.GetBool("text_shadow");
			int nStyle = m_pText->GetStyle();
			m_pText->SetStyle(IText::Style(bShadow ? (nStyle | IText::eStyleShadowed) : (nStyle & ~IText::eStyleShadowed)));
		}

		if ( dict.Exists("text_tint") )
		{
			m_pText->SetTint(dict.GetRGBA("text_tint"));
		}

		if ( dict.Exists("onpress") )
		{
			m_sOnPress = dict.GetString("onpress");
		}

		if ( dict.Exists("onunpress") )
		{
			m_sOnUnpress = dict.GetString("onunpress");
		}
	}
Ejemplo n.º 21
0
 void BootstrapDB() {
     dictionary.ClearAll();
     markov.ClearAll();
 }
Ejemplo n.º 22
0
bool CSpan::TransRecognize(CDictionary &dictCore,CDictionary &transDict)
{
  char sPOS[MAX_WORDS_PER_SENTENCE]="Z";
  int nStart=1,nEnd=1,i=1;
  while(m_nBestTag[i]>-1)
  {
	  if(m_nBestTag[i]==1||m_nBestTag[i]==11||m_nBestTag[i]==21)//1,11,21 Trigger the recognition
	  {
		nStart=i;
		nEnd=nStart+1;
		while(m_nBestTag[nEnd]==m_nBestTag[nStart])//1,11,21
			nEnd++;
		while(m_nBestTag[nEnd]==m_nBestTag[nStart]+1)//2,12,22
			nEnd++;
		while(m_nBestTag[nEnd]==m_nBestTag[nStart]+2)//3,13,23
			nEnd++;
		while(m_nBestTag[nEnd]==30)//3,13,23
			nEnd++;
	  }
	  else if(m_nBestTag[i]==2||m_nBestTag[i]==12||m_nBestTag[i]==22)//1,11,21 Trigger the recognition
	  {
		nStart=i;
		nEnd=nStart+1;
		while(m_nBestTag[nEnd]==m_nBestTag[nStart])//2,12,22
			nEnd++;
		while(m_nBestTag[nEnd]==m_nBestTag[nStart]+1)//2,12,22
			nEnd++;
		while(m_nBestTag[nEnd]==30)//3,13,23
			nEnd++;
	  }
	  if(nEnd>nStart&&!IsAllNum((unsigned char *)m_sWords[nStart])&&(nEnd>nStart+2||(nEnd==nStart+2&&(m_nBestTag[nEnd-1]!=30||strlen(m_sWords[nStart])>2))||(nEnd==nStart+1&&strlen(m_sWords[nStart])>2&&!dictCore.IsExist(m_sWords[nStart],-1))))
	  {
			m_nUnknownWords[m_nUnknownIndex][0]=m_nWordPosition[nStart];
			m_nUnknownWords[m_nUnknownIndex][1]=m_nWordPosition[nEnd];
			m_dWordsPossibility[m_nUnknownIndex++]=ComputePossibility(nStart,nEnd-nStart+1,transDict);
			nStart=nEnd;
	  }

	  if(i<nEnd)
		  i=nEnd;
	  else
		  i=i+1;
  }
  return true;
}
Ejemplo n.º 23
0
 string TranslateIn10Power(int Num)
 {
     return Translate(Num,10)+Comments.GetTranslate("Ru comment about dec system");
 }
bool CSegGraph::GenerateWordNet(char *sSentence,CDictionary &dictCore,bool	bOriginalFreq)
{
//Gernerate the word net from the sLine, that's list all the possible word
	unsigned int i=0,j,nLen=strlen(sSentence);
	char sWord[WORD_MAXLENGTH]="",sTempWord[WORD_MAXLENGTH]="",sWordMatch[WORD_MAXLENGTH];
	int nWordIndex=0,nHandleTemp,k,nPOS;
	int nMatchFreq[20],nMatchHandle[20],nTotalFreq,nMatchCount;
	double dValue=0;
	m_nAtomCount=0;
	m_segGraph.SetEmpty();//Set segmentation graph empty

	AtomSegment(sSentence);
	//Atomic Segmentation

    for(i=0;i<m_nAtomCount;i++)//Init the cost array
    {
		if(m_nAtomPOS[i]==CT_CHINESE)//The atom is a Chinese Char
		{
			if(!bOriginalFreq)//Not original frequency
				m_segGraph.SetElement(i,i+1,log(MAX_FREQUENCE),0);//init the link with the maximum value
			else
				m_segGraph.SetElement(i,i+1,0,0,m_sAtom[i]);//init the link with the maximum value
		}
		else//Other atom
		{
			strcpy(sWord,m_sAtom[i]);//init the word 
			dValue=MAX_FREQUENCE;
			switch(m_nAtomPOS[i])
			{
			case CT_INDEX:
			case CT_NUM:
				nPOS=-27904;//'m'*256
				strcpy(sWord,"未##数");
				dValue=0;
				break;
			case CT_DELIMITER:
				nPOS=30464;//'w'*256;
				break;
			case CT_LETTER:
				nPOS=-'n'*256-'x';//
				dValue=0;
				strcpy(sWord,"未##串");
				break;
			case CT_SINGLE://12021-2129-3121
				if(GetCharCount("+-1234567890",m_sAtom[i])==(int)strlen(m_sAtom[i]))
				{
					nPOS=-27904;//'m'*256
					strcpy(sWord,"未##数");
				}
				else
				{
					nPOS=-'n'*256-'x';//
					strcpy(sWord,"未##串");
				}
				dValue=0;
				break;
			default:
				nPOS=m_nAtomPOS[i];//'?'*256;
				break;
			}
			if(!bOriginalFreq)//Not original frequency
				m_segGraph.SetElement(i,i+1,0,nPOS);//init the link with minimum
			else
				m_segGraph.SetElement(i,i+1,dValue,nPOS,sWord);//init the link with minimum
		}
    }
	i=0;
	while(i<m_nAtomCount)//All the word
	{
	  strcpy(sWord,m_sAtom[i]);//Get the current atom
	  j=i+1;
	  if(strcmp(sWord,"月")==0&&strcmp(m_sAtom[i+1],"份")==0)//Don't split 月份
		  j+=1;
	  while(j<=m_nAtomCount&&dictCore.GetMaxMatch(sWord,sWordMatch,&nHandleTemp))
	  {//Add a condition to control the end of string
	   //retrieve the dictionary with the word
          if(strcmp(sWordMatch,sWord)==0)//find the current word
		  {
			  nTotalFreq=0;
			  dictCore.GetHandle(sWord,&nMatchCount,nMatchHandle,nMatchFreq);
			  for(k=0;k<nMatchCount;k++)//Add the frequency
			  {
				 nTotalFreq+=nMatchFreq[k];
			  }
			  //Adding a rule to exclude some words to be formed.
			  if(strlen(sWord)==4&&i>=1&&(IsAllNum((unsigned char *)m_sAtom[i-1])||IsAllChineseNum(m_sAtom[i-1]))&&(strncmp(sWord,"年",2)==0||strncmp(sWord,"月",2)==0))
			  {//1年内、1999年末
			     if(CC_Find("末内中底前间初",sWord+2))
				     break;
			  }
			  if(nMatchCount==1)//The possible word has only one POS, store it
			  {
				if(!bOriginalFreq)//Not original frequency
					m_segGraph.SetElement(i,j,-log(nTotalFreq+1)+log(MAX_FREQUENCE),nMatchHandle[0]);
				else
					m_segGraph.SetElement(i,j,nTotalFreq,nMatchHandle[0],sWord);
			  }
			  else 
			  {
					if(!bOriginalFreq)//Not original frequency
						m_segGraph.SetElement(i,j,-log(nTotalFreq+1)+log(MAX_FREQUENCE),0);
					else
						m_segGraph.SetElement(i,j,nTotalFreq,0,sWord);
			  }
		  }
		  strcat(sWord,m_sAtom[j++]);
	  }
	  i+=1;//Start from i++;
	}
	return true;
}
Ejemplo n.º 25
0
 void EndDictionaryTransaction() {
     dictionary.EndTransaction();
 }