/* * "int isValidVariable(char *str)" * check if string of valid length, does not start with number or symbol * does not contain symbols in the middle, special symbols have been check by now */ int isValidVariableAndNotReserved(char *str){ // base case check for null or is a reserved word or is numerical if (str == NULL || isReserverdWord(str) || stringIsNumber(str)) { return 0; } // get the string length int len = strlen(str); // is token a punctuation or symbol if (len == 1) { if (charType(str[0]) == 3){ return 0; } } // does variable start with a letter, or is of legal size if ((charType(str[0]) != 2) || len > MAX_VAR_LEN ) { printError(err31, str); //printf("Error, variable %s is invalid\n", str); exit(EXIT_FAILURE); } // at this point, isnumber, is symbol, is reserved word, is special char have all been checked return 1; }
/* * "void splitInputTokens(char cleanSrc[])" * split clean source code into tokens */ void splitInputTokens(char cleanSrc[], char *caCleanInputTokens[]){ int curCharIsSpecial = 0; int i = 0; int j = 0; char tkn[MAX_STR] = " "; while (i <= m_nCleanCount ) { // if this is a non empty character, store it into local token array if (charType(cleanSrc[i])) { // increase both token index and cleanSrc index // check if this character is a special character, if it is //mark a flag and print it, reset token in next code block curCharIsSpecial = isSpecialChar(cleanSrc[i]); tkn[j++] = cleanSrc[i++]; } // check if this is a new line or space (empty character) if ( isSpecialChar(cleanSrc[i]) ||(charType(cleanSrc[i]) == 0 ) || curCharIsSpecial ) { // if at least one chacter is in local token array, print it and reset token //printf("clean source %c\n", cleanSrc[i] ); if(j) { // allocate space for token, store token, increase token count //printf("tkn %s\n",tkn); caCleanInputTokens[m_nCleanInputTokens] = cleanInputTokenCalloc(j); strcpy(caCleanInputTokens[m_nCleanInputTokens], tkn); m_nCleanInputTokens++; } // reset local token array memset(tkn, 0, sizeof(tkn)); j = 0; // check if this is a new line or space (empty character) if (charType(cleanSrc[i]) == 0 ){ // increase cleanSrc index i++; // skip code beyond here and continue to next character continue; } } // check next character in the loop after all 3 if cases } }
// ----------------------------------------------------------------------------- // CWPRoot::Visit // ----------------------------------------------------------------------------- // void CWPRoot::VisitL( CWPParameter& aParameter ) { FLOG( _L( "[Provisioning] CWPRoot::VisitL Parameter:" ) ); if( iCharStack ) { TInt charType( iCharStack->Type() ); TInt paramID( aParameter.ID() ); // If the current characteristic is a logical proxy or access point, // add it to the list of potential targets of a link if( (charType == KWPPxLogical && paramID == EWPParameterProxyID ) || (charType == KWPNapDef && paramID == EWPParameterNapID) ) { iProviderIDs->AppendL( aParameter.Value() ); } // Handle internet-capable NAPDEF here else if( charType == KWPNapDef && paramID == EWPParameterInternet ) { User::LeaveIfError( iProviders.Append( iCharStack ) ); iProviderIDs->AppendL( KInternet ); } // If the parameter can link, append to the list of sources of links else if( paramID == EWPParameterToNapID || paramID == EWPParameterToProxy || (charType == KWPBootstrap && paramID == EWPParameterProxyID) ) { iNeededIDs->AppendL( aParameter.Value() ); User::LeaveIfError( iNeeders.Append( iCharStack ) ); } } }
// ----------------------------------------------------------------------------- // CWPRoot::Visit // ----------------------------------------------------------------------------- // void CWPRoot::VisitL( CWPCharacteristic& aCharacteristic ) { FLOG( _L( "[Provisioning] CWPRoot::VisitL Char:" ) ); TInt charType( aCharacteristic.Type() ); // Add the characteristic to the stack as current CWPCharacteristic* charStack = iCharStack; iCharStack = &aCharacteristic; // Logical proxy and access points as treated as potential targets of links if( charType == KWPPxLogical || charType == KWPNapDef ) { // First get the characteristic and then use a visitor to find out the // id aCharacteristic.AcceptL( *this ); // If no id was found, there's something wrong with the document if( iProviders.Count() == iProviderIDs->Count()-1 ) { User::LeaveIfError( iProviders.Append( &aCharacteristic ) ); } } else { // Just enter other characteristics aCharacteristic.AcceptL( *this ); } // Remove the current from stack iCharStack = charStack; }
/* * eatSpace: increment mark over all whitespace until we hit a * character which is not whitespace. This ignores line breaks. */ static char *eatSpace( i_mark *mark, bool reverse ) { char *s; s = ptrFromMark( mark ); if( s == NULL ) { return( NULL ); } while( charType( *s, true ) == BLOCK_WHITESPACE ) { if( reverse ) { s = decrementMark( mark ); } else { s = incrementMark( mark ); } if( s == NULL ) { if( EditFlags.OperatorWantsMove ) { return( NULL ); } if( reverse ) { s = prevLine( mark ); } else { s = nextLine( mark ); } if( s == NULL ) { break; } } } return( s ); } /* eatSpace */
bool ScriptTokeniser::tokeniseSolidus (char c) { switch (charType(c)) { case eNewline: case eWhitespace: case eCharQuote: case eCharSpecial: pop(); add('/'); m_emit = true; // emit single slash break; case eCharToken: pop(); add('/'); add(c); break; case eCharSolidus: pop(); push(Tokenise(&ScriptTokeniser::tokeniseComment)); break; // don't emit single slash case eCharStar: pop(); push(Tokenise(&ScriptTokeniser::tokeniseBlockComment)); break; // don't emit single slash default: break; } return true; }
bool ScriptTokeniser::tokeniseDefault (char c) { switch (charType(c)) { case eNewline: break; case eCharToken: case eCharStar: push(Tokenise(&ScriptTokeniser::tokeniseToken)); add(c); break; case eCharSpecial: push(Tokenise(&ScriptTokeniser::tokeniseSpecial)); add(c); break; case eCharQuote: push(Tokenise(&ScriptTokeniser::tokeniseQuotedToken)); break; case eCharSolidus: push(Tokenise(&ScriptTokeniser::tokeniseSolidus)); break; default: break; } return true; }
/* * "long stringIsNumber(char *str);" * check if string is numerical, it will not check if its of 5 digits * we need to know if it is a number regardless of legth * a false (false return) would interfere with other checks, i.e. variable check */ int stringIsNumber(char *str){ int i = 0; // base case check for null if (str == NULL) { return 0; } // get the string length int len = strlen(str); // if any of the string characters is not a numerical, then string is not numerical // return a defined invalid int value for (i = 0; i < len; i++) { if (charType(str[i]) != 1) { return 0; } } // if string is numerical, but has more than 5 digits, exit error if (len > 5) { printError(err25, str); //printf("Error, string: %s is numerical, but it has more than 5 digits\n", str); exit(EXIT_FAILURE); } // if the string is numerical, then return 1 //strtol(str, (char **)NULL, 10) return 1; }
/* * MarkEndOfNextWordForward - find a pointer to the end of the next * word (in the forwards direction) */ vi_rc MarkEndOfNextWordForward( i_mark *result, i_mark *curr, bool big ) { char *s; btype block_type; btype block_type2; noWrap = false; *result = *curr; if( EditFlags.IsChangeWord ) { EditFlags.IsChangeWord = false; s = ptrFromMark( result ); if( s == NULL ) { return( ERR_NO_ERR ); } block_type = charType( *s, big ); block_type2 = charType( *(s + 1), big ); if( block_type == BLOCK_ENDOFLINE || block_type != block_type2 ) { return( ERR_NO_ERR ); } } s = incrementMark( result ); if( s == NULL ) { s = nextLine( result ); if( s == NULL ) { if( EditFlags.OperatorWantsMove ) { *result = *curr; return( ERR_NO_ERR ); } return( ERR_NOT_THAT_MANY_WORDS ); } } s = eatSpace( result, false ); if( s == NULL ) { return( ERR_NOT_THAT_MANY_WORDS ); } block_type = charType( *s, big ); while( charType( *(s + 1), big ) == block_type ) { s = incrementMark( result ); if( s == NULL ) { break; } } return( ERR_NO_ERR ); } /* MarkEndOfNextWordForward */
/* * GimmeCurrentEntireWordDim - fetch forward & backward to get the entire word */ vi_rc GimmeCurrentEntireWordDim( int *sc, int *ec, bool big ) { i_mark curr, start, end; char *s; int last_col; btype block_type; vi_rc rc; noWrap = false; rc = ERR_NO_WORD_TO_FIND; curr = CurrentPos; s = ptrFromMark( &curr ); if( s == NULL ) { return( rc ); } block_type = charType( *s, big ); if( block_type == BLOCK_WORD || block_type == BLOCK_DELIM ) { end = curr; last_col = LineLength( end.line ); while( charType( *s, big ) == block_type ) { if( end.column > last_col ) { break; } s = incrementMark( &end ); if( s == NULL ) { break; } } start = curr; s = ptrFromMark( &start ); while( charType( *s, big ) == block_type ) { if( start.column < 1 ) { break; } s = decrementMark( &start ); if( s == NULL ) { break; } } *sc = start.column + 1; *ec = end.column - 1; rc = ERR_NO_ERR; } return( rc ); } /* GimmeCurrentEntireWordDim */
/********************************************************************* * * Func Name : GetWordType * * Description: Get the type of word * * * Parameters : sWord: the word * Returns : the type * Author : Kevin Zhang * History : * 1.create 2002-1-9 *********************************************************************/ int CDictionary::GetWordType(char *sWord) { int nType=charType((unsigned char *)sWord),nLen=strlen(sWord); if(nLen>0&&nType==CT_CHINESE&&IsAllChinese((unsigned char *)sWord)) return WT_CHINESE;//Chinese word else if(nLen>0&&nType==CT_DELIMITER) return WT_DELIMITER;//Delimiter else return WT_OTHER;//other invalid }
void printHashTable(struct nlist** hashtab) { struct nlist* np; int i; char chType[MAX_ID_LEN + 1]; for (i = 0; i< HASHSIZE; i++) for (np = hashtab[i]; np!= NULL; np = np->next){ strcpy(chType, charType(np->type)); printf("%s = %s, %s, %s\n", np->name, chType, np->scope, np->storage); } }
/* * MarkEndOfNextWordForward - find a pointer to the start of the next * word (in the backwards direction) */ vi_rc MarkStartOfNextWordBackward( i_mark *result, i_mark *curr, bool big ) { char *s; btype block_type; noWrap = false; *result = *curr; s = decrementMark( result ); if( s == NULL ) { s = prevLine( result ); if( s == NULL ) { return( ERR_NOT_THAT_MANY_WORDS ); } if( EditFlags.Modeless ) { incrementMark( result ); return ERR_NO_ERR; } } s = eatSpace( result, true ); if( s == NULL ) { return( ERR_NOT_THAT_MANY_WORDS ); } /* * because we are looking at the previous character in the following loop, * we have to be careful in case we are sitting at the start of a line. */ if( result->column > 1 ) { block_type = charType( *s, big ); while( charType( *(s - 1), big ) == block_type ) { s = decrementMark( result ); if( s == NULL || result->column == 1 ) { break; } } } return( ERR_NO_ERR ); } /* MarkStartOfNextWordBackward */
/* * MarkStartOfNextWordForward: given a mark curr which denotes a logical * current position, we return the location of the next logical word in * the result mark, or ERR_NOT_THAT_MANY_WORDS if there are no more logical * words in the file. Note that this will span lines. */ vi_rc MarkStartOfNextWordForward( i_mark *result, i_mark *curr, bool big ) { char *s; btype block_type; noWrap = false; *result = *curr; s = ptrFromMark( result ); while( s == NULL ) { s = nextLine( result ); if( s == NULL ) { return( ERR_NOT_THAT_MANY_WORDS ); } if( EditFlags.Modeless ) { return( ERR_NO_ERR ); } } block_type = charType( *s, big ); while( charType( *s, big ) == block_type ) { s = incrementMark( result ); if( s == NULL ) { if( EditFlags.OperatorWantsMove || EditFlags.Modeless ) { return( ERR_NO_ERR ); } s = nextLine( result ); if( s == NULL ) { return( ERR_NOT_THAT_MANY_WORDS ); } break; } } eatSpace( result, false ); return( ERR_NO_ERR ); } /* MarkStartOfNextWordForward */
bool CResult::ChineseNameSplit(char *sPersonName, char *sSurname, char *sSurname2, char *sGivenName, CDictionary &personDict) { int nSurNameLen=4,nLen=strlen(sPersonName),nFreq,i=0,nCharType,nFreqGiven; char sTemp[3]; if(nLen<3||nLen>8)//Not a traditional Chinese person name return false; while(i<nLen)//No Including non-CHinese char { nCharType=charType((unsigned char*)sPersonName+i); if(nCharType!=CT_CHINESE&&nCharType!=CT_OTHER) return false; i+=2; } sSurname2[0]=0;//init strncpy(sSurname,sPersonName,nSurNameLen); sSurname[nSurNameLen]=0; if(!personDict.IsExist(sSurname,1)) { nSurNameLen=2; sSurname[nSurNameLen]=0; if(!personDict.IsExist(sSurname,1)) { nSurNameLen=0; sSurname[nSurNameLen]=0; } } strcpy(sGivenName,sPersonName+nSurNameLen); if(nLen>6) { strncpy(sTemp,sPersonName+nSurNameLen,2); sTemp[2]=0;//Get the second possible surname if(personDict.IsExist(sTemp,1)) {//Hongkong women's name: Surname+surname+given name strcpy(sSurname2,sTemp); strcpy(sGivenName,sPersonName+nSurNameLen+2); } } nFreq=personDict.GetFrequency(sSurname,1); strncpy(sTemp,sGivenName,2); sTemp[2]=0; nFreqGiven=personDict.GetFrequency(sTemp,2); if(nSurNameLen!=4&&((nSurNameLen==0&&nLen>4)||strlen(sGivenName)>4||(GetForeignCharCount(sPersonName)>=3&&nFreq<personDict.GetFrequency("张",1)/40&&nFreqGiven<personDict.GetFrequency("华",2)/20)||(nFreq<10&&GetForeignCharCount(sGivenName)==(nLen-nSurNameLen)/2))) return false; if(nLen==4&&m_uPerson.IsGivenName(sPersonName)) {//Single Surname+given name return false; } return true; }
bool ScriptTokeniser::tokeniseToken (char c) { switch (charType(c)) { case eNewline: case eWhitespace: case eCharQuote: case eCharSpecial: pop(); m_emit = true; // emit token break; case eCharSolidus: case eCharToken: case eCharStar: add(c); break; default: break; } return true; }
bool ScriptTokeniser::tokeniseQuotedToken (char c) { switch (charType(c)) { case eNewline: break; case eWhitespace: case eCharToken: case eCharSolidus: case eCharStar: case eCharSpecial: add(c); break; case eCharQuote: pop(); push(Tokenise(&ScriptTokeniser::tokeniseEndQuote)); break; default: break; } return true; }
//Guess the POS of No. nIndex word item bool CSpan::GuessPOS(int nIndex,int *pSubIndex) { int j=0,i=nIndex,nCharType; unsigned int nLen; switch(m_tagType) { case TT_NORMAL: break; case TT_PERSON: j=0; if(CC_Find("××",m_sWords[nIndex])) { m_nTags[i][j]=6; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,6)+1); } else { m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); nLen=strlen(m_sWords[nIndex]); if(nLen>=4) { m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8); } else if(nLen==2) { m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); nCharType=charType((unsigned char *)m_sWords[nIndex]); if(nCharType==CT_OTHER||nCharType==CT_CHINESE) { m_nTags[i][j]=1; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)+1); m_nTags[i][j]=2; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)+1); m_nTags[i][j]=3; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)+1); m_nTags[i][j]=4; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,4)+1); } m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8); } } break; case TT_PLACE: j=0; m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); nLen=strlen(m_sWords[nIndex]); if(nLen>=4) { m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8); } else if(nLen==2) { m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); nCharType=charType((unsigned char *)m_sWords[nIndex]); if(nCharType==CT_OTHER||nCharType==CT_CHINESE) { m_nTags[i][j]=1; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)+1); m_nTags[i][j]=2; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)+1); m_nTags[i][j]=3; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)+1); m_nTags[i][j]=4; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,4)+1); } m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8); } break; case TT_TRANS: j=0; nLen=strlen(m_sWords[nIndex]); m_nTags[i][j]=0; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1); if(!IsAllChinese((unsigned char *)m_sWords[nIndex])) { if(IsAllLetter((unsigned char *)m_sWords[nIndex])) { m_nTags[i][j]=1; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)+1); m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)+1); /* } if(IsAllNum((unsigned char *)m_sWords[nIndex])||IsAllLetter((unsigned char *)m_sWords[nIndex])) { */ m_nTags[i][j]=2; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)*2+1); m_nTags[i][j]=3; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)*2+1); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*2+1); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*2+1); } m_nTags[i][j]=41; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,41)*8); m_nTags[i][j]=42; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,42)*8); m_nTags[i][j]=43; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,43)*8); } else if(nLen>=4) { m_nTags[i][j]=41; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,41)*8); m_nTags[i][j]=42; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,42)*8); m_nTags[i][j]=43; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,43)*8); } else if(nLen==2) { nCharType=charType((unsigned char *)m_sWords[nIndex]); if(nCharType==CT_OTHER||nCharType==CT_CHINESE) { m_nTags[i][j]=1; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)*2+1); m_nTags[i][j]=2; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)*2+1); m_nTags[i][j]=3; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)*2+1); m_nTags[i][j]=30; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,30)*8+1); m_nTags[i][j]=11; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*4+1); m_nTags[i][j]=12; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*4+1); m_nTags[i][j]=13; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*4+1); m_nTags[i][j]=21; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,21)*2+1); m_nTags[i][j]=22; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,22)*2+1); m_nTags[i][j]=23; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,23)*2+1); } m_nTags[i][j]=41; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,41)*8); m_nTags[i][j]=42; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,42)*8); m_nTags[i][j]=43; m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,43)*8); } break; default: break; } *pSubIndex=j; return true; }
bool CSegGraph::AtomSegment(char *sSentence) { unsigned int i=0,j=0,nCurType,nNextType; //i is the pointer of sentence string //j is the pointer of pAtoms char sChar[3]; sChar[2]=0;//Set the char ending m_sAtom[j][0]=0;//Set the first word as null m_nAtomLength[j]=0; if(strncmp(sSentence,SENTENCE_BEGIN,strlen(SENTENCE_BEGIN))==0) { strcpy(m_sAtom[j],SENTENCE_BEGIN);//Set the first word as sentence begining m_nAtomLength[j]=strlen(SENTENCE_BEGIN); m_nAtomPOS[j]=CT_SENTENCE_BEGIN;//init i+=m_nAtomLength[j]; j+=1; m_sAtom[j][0]=0;//Set the first word as null m_nAtomLength[j]=0; } while(i<strlen(sSentence)) { if(strncmp(sSentence+i,SENTENCE_END,strlen(SENTENCE_END))==0) { strcpy(m_sAtom[j],SENTENCE_END);//Set the first word as null m_nAtomLength[j]=strlen(SENTENCE_END); m_nAtomPOS[j]=CT_SENTENCE_END;//init i+=m_nAtomLength[j]; j+=1; m_sAtom[j][0]=0;//Set the first word as null m_nAtomLength[j]=0; continue; } sChar[0]=*(sSentence+i);//Get the char with first byte sChar[1]=0;// i+=1; if(sChar[0]<0)//Two byte char { sChar[1]=*(sSentence+i);//Get the char with second byte i+=1;//i increased by 1 } strcat(m_sAtom[j],sChar); nCurType=charType((unsigned char *)sChar); if(sChar[0]=='.'&&(charType((unsigned char *)sSentence+i)==CT_NUM||(*(sSentence+i)>='0'&&*(sSentence+i)<='9'))) nCurType=CT_NUM;//Digit after . indicate . as a point in the numeric m_nAtomPOS[j]=nCurType; //Record its property, just convience for continuous processing if(nCurType==CT_CHINESE||nCurType==CT_INDEX||nCurType==CT_DELIMITER||nCurType==CT_OTHER) {//Chinese char, index number,delimiter and other is treated as atom m_nAtomLength[j]=strlen(m_sAtom[j]);//Save its length j+=1;//Skip to next atom m_sAtom[j][0]=0;//init } else {//Number,single char, letter nNextType=255; if(i<strlen(sSentence)) nNextType=charType((unsigned char *)(sSentence+i)); if(nNextType!=nCurType||i==strlen(sSentence)) //Reaching end or next char type is different from current char { m_nAtomLength[j]=strlen(m_sAtom[j]);//Save its length j+=1; m_sAtom[j][0]=0;//init } } } m_nAtomCount=j;//The count of segmentation atoms return true; }
/********************************************************************* * * Func Name : PreProcessing * * Description: Get the type of word * * * Parameters : sWord: the word * Returns : the type * Author : Kevin Zhang * History : * 1.create 2002-1-9 *********************************************************************/ bool CDictionary::PreProcessing(char *sWord, int *nId, char *sWordRet,bool bAdd) { //Position for the delimeters int nType=charType((unsigned char *)sWord),nLen=strlen(sWord); int nEnd=nLen-1,nBegin=0; if(nLen==0) return false; while(nEnd>=0&&sWord[nEnd]==' ') nEnd-=1; while(nBegin<=nEnd&&sWord[nBegin]==' ') nBegin+=1; if(nBegin>nEnd) return false; if(nEnd!=nLen-1||nBegin!=0) { strncpy(sWord,sWord+nBegin,nEnd-nBegin+1); sWord[nEnd-nBegin+1]=0; } /* if((bAdd||strlen(sWord)>4)&&IsAllChineseNum(sWord)) { //Only convert the Chinese Num to 3755 while //Get the inner code of the first Chinese Char strcpy(sWord,"五十八"); } */ if(nType==CT_CHINESE)//&&IsAllChinese((unsigned char *)sWord) {//Chinese word *nId=CC_ID(sWord[0],sWord[1]); //Get the inner code of the first Chinese Char strcpy(sWordRet,&sWord[2]);//store the word,not store the first Chinese Char return true; } /* if(nType==CT_NUM&&IsAllNum((unsigned char *)sWord)) { *nId=3756; //Get the inner code of the first Chinese Char sWordRet[0]=0;//store the word,not store the first Chinese Char return true; } */ if(nType==CT_DELIMITER) {//Delimiter *nId=3755; //Get the inner code of the first Chinese Char strcpy(sWordRet,sWord);//store the word,not store the first Chinese Char return true; } /* if(nType==CT_LETTER&&IsAllLetter((unsigned char *)sWord)) { *nId=3757; //Get the inner code of the first Chinese Char sWordRet[0]=0;//store the word,not store the first Chinese Char return true; } if(nType==CT_SINGLE&&IsAllSingleByte((unsigned char *)sWord)) { *nId=3758; //Get the inner code of the first Chinese Char sWordRet[0]=0;//store the word,not store the first Chinese Char return true; } if(nType==CT_INDEX&&IsAllIndex((unsigned char *)sWord)) { *nId=3759; //Get the inner code of the first Chinese Char sWordRet[0]=0;//store the word,not store the first Chinese Char return true; } */ return false;//other invalid }