static uint32_t possibleEncodings(const char* s)
{
    uint32_t result = kEncodingAll;
    // if s contains a native encoding, then it was mistakenly encoded in utf8 as if it were latin-1
    // so we need to reverse the latin-1 -> utf8 conversion to get the native chars back
    uint8_t ch1, ch2;
    uint8_t* chp = (uint8_t *)s;

    while ((ch1 = *chp++)) {
        if (ch1 & 0x80) {
            ch2 = *chp++;
            ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F);
            // ch1 is now the first byte of the potential native char

            ch2 = *chp++;
            if (ch2 & 0x80)
                ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F);
            // ch2 is now the second byte of the potential native char
            int ch = (int)ch1 << 8 | (int)ch2;
            result &= findPossibleEncodings(ch);
        }
        // else ASCII character, which could be anything
    }

    return result;
}
Esempio n. 2
0
static uint32_t possibleEncodings(const char* s)
{

   ALOGI("+possibleEncodings %s \n",s);       
 
    uint32_t result = kEncodingAll;

    uint8_t ch1, ch2;
    uint8_t* chp = (uint8_t *)s;

 
#ifndef ANDROID_DEFAULT_CODE   
//added by xu.lai for enhance
	if(*s!=0xFF){
		if(ISUTF8(s))
			return 0xFFFFFFFF;
	}
	else
	   	s++;

	if(*chp==0xFF)
		chp++;
//added by xu.lai for enhance	
  uint32_t uiISO8859 = kEncodingISO8859;
	int GBK_count=0;
	int BIG5_count=0;


    while ((ch1 = *chp++)) {
		if(ch1&0x80){
			if(uiISO8859 && charMatchISO8859(ch1)){

						uiISO8859 &= kEncodingISO8859;	
					}
					else
						uiISO8859  = 0;
			ch2=*chp++;
			if(uiISO8859 && charMatchISO8859(ch2)){

						uiISO8859 &= kEncodingISO8859;	
					}
					else
						uiISO8859  = 0;
		     int ch = (int)ch1 << 8 | (int)ch2;
//			 ALOGD("ch %x \n",ch);
		     result &= findPossibleEncodings(ch);
//			 ALOGD("result %x \n",result);
			 if(charMatchestest(ch,kGBKSWRanges,ARRAY_SIZE(kGBKSWRanges)))
			 	GBK_count++;
			 if(charMatchestest(ch,kBig5SWRanges,ARRAY_SIZE(kBig5SWRanges)))
				BIG5_count++;
          }
    	}
        // else ASCII character, which could be anything

   result |= uiISO8859; //contain the iso8859 info in the result
//   ALOGD("result 0x%x,GBK_count %d,BIG5_count %d \n",result,GBK_count,BIG5_count);
/*
//BIG5 & GBK enhance
   if(GBK_count|BIG5_count){
		if(GBK_count>BIG5_count)
			result&=0xFFFB;
		else
			if(GBK_count<BIG5_count)				
				result&=0xFFFD;
   }
 */
   ALOGI("-possibleEncodings %d \n",result); 
#else
    while ((ch1 = *chp++)) {
        if (ch1 & 0x80) {
            ch2 = *chp++;
            ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F);
            // ch1 is now the first byte of the potential native char

            ch2 = *chp++;
            if (ch2 & 0x80)
                ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F);
            // ch2 is now the second byte of the potential native char
            int ch = (int)ch1 << 8 | (int)ch2;
            result &= findPossibleEncodings(ch);
        }
        // else ASCII character, which could be anything
    }

#endif

    return result;
}