static uint32_t possibleEncodings(const char* s) { uint32_t result = kEncodingAll; // if s contains a native encoding, then it was mistakenly encoded in utf8 as if it were latin-1 // so we need to reverse the latin-1 -> utf8 conversion to get the native chars back uint8_t ch1, ch2; uint8_t* chp = (uint8_t *)s; while ((ch1 = *chp++)) { if (ch1 & 0x80) { ch2 = *chp++; ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F); // ch1 is now the first byte of the potential native char ch2 = *chp++; if (ch2 & 0x80) ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F); // ch2 is now the second byte of the potential native char int ch = (int)ch1 << 8 | (int)ch2; result &= findPossibleEncodings(ch); } // else ASCII character, which could be anything } return result; }
static uint32_t possibleEncodings(const char* s) { ALOGI("+possibleEncodings %s \n",s); uint32_t result = kEncodingAll; uint8_t ch1, ch2; uint8_t* chp = (uint8_t *)s; #ifndef ANDROID_DEFAULT_CODE //added by xu.lai for enhance if(*s!=0xFF){ if(ISUTF8(s)) return 0xFFFFFFFF; } else s++; if(*chp==0xFF) chp++; //added by xu.lai for enhance uint32_t uiISO8859 = kEncodingISO8859; int GBK_count=0; int BIG5_count=0; while ((ch1 = *chp++)) { if(ch1&0x80){ if(uiISO8859 && charMatchISO8859(ch1)){ uiISO8859 &= kEncodingISO8859; } else uiISO8859 = 0; ch2=*chp++; if(uiISO8859 && charMatchISO8859(ch2)){ uiISO8859 &= kEncodingISO8859; } else uiISO8859 = 0; int ch = (int)ch1 << 8 | (int)ch2; // ALOGD("ch %x \n",ch); result &= findPossibleEncodings(ch); // ALOGD("result %x \n",result); if(charMatchestest(ch,kGBKSWRanges,ARRAY_SIZE(kGBKSWRanges))) GBK_count++; if(charMatchestest(ch,kBig5SWRanges,ARRAY_SIZE(kBig5SWRanges))) BIG5_count++; } } // else ASCII character, which could be anything result |= uiISO8859; //contain the iso8859 info in the result // ALOGD("result 0x%x,GBK_count %d,BIG5_count %d \n",result,GBK_count,BIG5_count); /* //BIG5 & GBK enhance if(GBK_count|BIG5_count){ if(GBK_count>BIG5_count) result&=0xFFFB; else if(GBK_count<BIG5_count) result&=0xFFFD; } */ ALOGI("-possibleEncodings %d \n",result); #else while ((ch1 = *chp++)) { if (ch1 & 0x80) { ch2 = *chp++; ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F); // ch1 is now the first byte of the potential native char ch2 = *chp++; if (ch2 & 0x80) ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F); // ch2 is now the second byte of the potential native char int ch = (int)ch1 << 8 | (int)ch2; result &= findPossibleEncodings(ch); } // else ASCII character, which could be anything } #endif return result; }