/********************************************************************* * * Func Name : GetWordType * * Description: Get the type of word * * * Parameters : sWord: the word * Returns : the type * Author : Kevin Zhang * History : * 1.create 2002-1-9 *********************************************************************/ int CDictionary::GetWordType(char *sWord) { int nType=charType((unsigned char *)sWord),nLen=strlen(sWord); if(nLen>0&&nType==CT_CHINESE&&IsAllChinese((unsigned char *)sWord)) return WT_CHINESE;//Chinese word else if(nLen>0&&nType==CT_DELIMITER) return WT_DELIMITER;//Delimiter else return WT_OTHER;//other invalid }
//Guess the POS of No. nIndex word item bool CSpan::GuessPOS(int nIndex,int *pSubIndex) { int j=0,i=nIndex,nCharType; unsigned int nLen; switch(m_tagType) { case TT_NORMAL: break; case TT_PERSON: j=0; if(CC_Find("××",m_sWords[nIndex])) { m_nTags[i][j]=6; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,6)+1); } else { m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); nLen=strlen(m_sWords[nIndex]); if(nLen>=4) { m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8); } else if(nLen==2) { m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); nCharType=charType((unsigned char *)m_sWords[nIndex]); if(nCharType==CT_OTHER||nCharType==CT_CHINESE) { m_nTags[i][j]=1; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)+1); m_nTags[i][j]=2; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)+1); m_nTags[i][j]=3; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)+1); m_nTags[i][j]=4; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,4)+1); } m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8); } } break; case TT_PLACE: j=0; m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); nLen=strlen(m_sWords[nIndex]); if(nLen>=4) { m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8); } else if(nLen==2) { m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); nCharType=charType((unsigned char *)m_sWords[nIndex]); if(nCharType==CT_OTHER||nCharType==CT_CHINESE) { m_nTags[i][j]=1; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)+1); m_nTags[i][j]=2; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)+1); m_nTags[i][j]=3; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)+1); m_nTags[i][j]=4; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,4)+1); } m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8); } break; case TT_TRANS: j=0; nLen=strlen(m_sWords[nIndex]); m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); if(!IsAllChinese((unsigned char *)m_sWords[nIndex])) { if(IsAllLetter((unsigned char *)m_sWords[nIndex])) { m_nTags[i][j]=1; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)+1); m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)+1); /* } if(IsAllNum((unsigned char *)m_sWords[nIndex])||IsAllLetter((unsigned char *)m_sWords[nIndex])) { */ m_nTags[i][j]=2; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)*2+1); m_nTags[i][j]=3; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)*2+1); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*2+1); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*2+1); } m_nTags[i][j]=41; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,41)*8); m_nTags[i][j]=42; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,42)*8); m_nTags[i][j]=43; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,43)*8); } else if(nLen>=4) { m_nTags[i][j]=41; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,41)*8); m_nTags[i][j]=42; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,42)*8); m_nTags[i][j]=43; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,43)*8); } else if(nLen==2) { nCharType=charType((unsigned char *)m_sWords[nIndex]); if(nCharType==CT_OTHER||nCharType==CT_CHINESE) { m_nTags[i][j]=1; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)*2+1); m_nTags[i][j]=2; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)*2+1); m_nTags[i][j]=3; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)*2+1); m_nTags[i][j]=30; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,30)*8+1); m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*4+1); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*4+1); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*4+1); m_nTags[i][j]=21; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,21)*2+1); m_nTags[i][j]=22; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,22)*2+1); m_nTags[i][j]=23; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,23)*2+1); } m_nTags[i][j]=41; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,41)*8); m_nTags[i][j]=42; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,42)*8); m_nTags[i][j]=43; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,43)*8); } break; default: break; } *pSubIndex=j; return true; }