bool nsGB18030ToUnicode::DecodeToSurrogate(const char* aSrc, char16_t* aOut) { NS_ASSERTION(FIRST_BYTE_IS_SURROGATE(aSrc[0]), "illegal first byte"); NS_ASSERTION(LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]), "illegal second byte"); NS_ASSERTION(LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]), "illegal third byte"); NS_ASSERTION(LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]), "illegal forth byte"); if(! FIRST_BYTE_IS_SURROGATE(aSrc[0])) return false; if(! LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1])) return false; if(! LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2])) return false; if(! LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3])) return false; uint8_t a1 = (uint8_t) aSrc[0]; uint8_t a2 = (uint8_t) aSrc[1]; uint8_t a3 = (uint8_t) aSrc[2]; uint8_t a4 = (uint8_t) aSrc[3]; a1 -= (uint8_t)0x90; a2 -= (uint8_t)0x30; a3 -= (uint8_t)0x81; a4 -= (uint8_t)0x30; uint32_t idx = (((a1 * 10 + a2 ) * 126 + a3) * 10) + a4; // idx == ucs4Codepoint - 0x10000 if (idx > 0x000FFFFF) return false; *aOut++ = 0xD800 | (idx >> 10); *aOut = 0xDC00 | (0x000003FF & idx); return true; }
PRBool nsGB18030ToUnicode::DecodeToSurrogate(const char* aSrc, PRUnichar* aOut) { NS_ASSERTION(FIRST_BYTE_IS_SURROGATE(aSrc[0]), "illegal first byte"); NS_ASSERTION(LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]), "illegal second byte"); NS_ASSERTION(LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]), "illegal third byte"); NS_ASSERTION(LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]), "illegal forth byte"); if(! FIRST_BYTE_IS_SURROGATE(aSrc[0])) return PR_FALSE; if(! LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1])) return PR_FALSE; if(! LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2])) return PR_FALSE; if(! LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3])) return PR_FALSE; PRUint8 a1 = (PRUint8) aSrc[0]; PRUint8 a2 = (PRUint8) aSrc[1]; PRUint8 a3 = (PRUint8) aSrc[2]; PRUint8 a4 = (PRUint8) aSrc[3]; a1 -= (PRUint8)0x90; a2 -= (PRUint8)0x30; a3 -= (PRUint8)0x81; a4 -= (PRUint8)0x30; PRUint32 idx = (((a1 * 10 + a2 ) * 126 + a3) * 10) + a4; *aOut++ = 0xD800 | (0x000003FF & (idx >> 10)); *aOut = 0xDC00 | (0x000003FF & idx); return PR_TRUE; }
NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc, int32_t * aSrcLength, char16_t *aDest, int32_t * aDestLength) { int32_t i=0; int32_t iSrcLength = (*aSrcLength); int32_t iDestlen = 0; nsresult rv=NS_OK; *aSrcLength = 0; for (i=0;i<iSrcLength;i++) { if ( iDestlen >= (*aDestLength) ) { rv = NS_OK_UDEC_MOREOUTPUT; break; } // The valid range for the 1st byte is [0x81,0xFE] if(LEGAL_GBK_MULTIBYTE_FIRST_BYTE(*aSrc)) { if(i+1 >= iSrcLength) { rv = NS_OK_UDEC_MOREINPUT; break; } // To make sure, the second byte has to be checked as well. // In GBK, the second byte range is [0x40,0x7E] and [0x80,0XFE] if(LEGAL_GBK_2BYTE_SECOND_BYTE(aSrc[1])) { // Valid GBK code *aDest = mUtil.GBKCharToUnicode(aSrc[0], aSrc[1]); if(UCS2_NO_MAPPING == *aDest) { // We cannot map in the common mapping, let's call the // delegate 2 byte decoder to decode the gbk or gb18030 unique // 2 byte mapping if(! TryExtensionDecoder(aSrc, aDest)) { *aDest = UCS2_NO_MAPPING; } } aSrc += 2; i++; } else if (LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1])) { // from the first 2 bytes, it looks like a 4 byte GB18030 if(i+3 >= iSrcLength) // make sure we got 4 bytes { rv = NS_OK_UDEC_MOREINPUT; break; } // 4 bytes patten // [0x81-0xfe][0x30-0x39][0x81-0xfe][0x30-0x39] // preset the if (LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]) && LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3])) { if ( ! FIRST_BYTE_IS_SURROGATE(aSrc[0])) { // let's call the delegated 4 byte gb18030 converter to convert it if(! Try4BytesDecoder(aSrc, aDest)) *aDest = UCS2_NO_MAPPING; } else { // let's try supplement mapping if ( (iDestlen+1) < (*aDestLength) ) { if(DecodeToSurrogate(aSrc, aDest)) { // surrogte two char16_t iDestlen++; aDest++; } else { *aDest = UCS2_NO_MAPPING; } } else { if (*aDestLength < 2) { NS_ERROR("insufficient space in output buffer"); *aDest = UCS2_NO_MAPPING; } else { rv = NS_OK_UDEC_MOREOUTPUT; break; } } } aSrc += 4; i += 3; } else { *aDest = UCS2_NO_MAPPING; // If the third and fourth bytes are not in the legal ranges for // a four-byte sequnce, resynchronize on the second byte // (which we know is in the range of LEGAL_GBK_4BYTE_SECOND_BYTE, // 0x30-0x39) aSrc++; } } else if ((uint8_t) aSrc[0] == (uint8_t)0xA0 ) { // stand-alone (not followed by a valid second byte) 0xA0 ! // treat it as valid a la Netscape 4.x *aDest = CAST_CHAR_TO_UNICHAR(*aSrc); aSrc++; } else { // Invalid GBK code point (second byte should be 0x40 or higher) *aDest = UCS2_NO_MAPPING; aSrc++; } } else { if(IS_ASCII(*aSrc)) { // The source is an ASCII *aDest = CAST_CHAR_TO_UNICHAR(*aSrc); aSrc++; } else { if(IS_GBK_EURO(*aSrc)) { *aDest = UCS2_EURO; } else { *aDest = UCS2_NO_MAPPING; } aSrc++; } } iDestlen++; aDest++; *aSrcLength = i+1; } *aDestLength = iDestlen; return rv; }