NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert( const char * aSrc, int32_t * aSrcLen, PRUnichar * aDest, int32_t * aDestLen) { static const uint16_t fbIdx[128] = { /* 0x8X */ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, /* 0x9X */ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, /* 0xAX */ 0xFFFD, 0, 94, 94* 2, 94* 3, 94* 4, 94* 5, 94* 6, 94* 7, 94* 8 , 94* 9, 94*10, 94*11, 94*12, 94*13, 94*14, /* 0xBX */ 94*15, 94*16, 94*17, 94*18, 94*19, 94*20, 94*21, 94*22, 94*23, 94*24, 94*25, 94*26, 94*27, 94*28, 94*29, 94*30, /* 0xCX */ 94*31, 94*32, 94*33, 94*34, 94*35, 94*36, 94*37, 94*38, 94*39, 94*40, 94*41, 94*42, 94*43, 94*44, 94*45, 94*46, /* 0xDX */ 94*47, 94*48, 94*49, 94*50, 94*51, 94*52, 94*53, 94*54, 94*55, 94*56, 94*57, 94*58, 94*59, 94*60, 94*61, 94*62, /* 0xEX */ 94*63, 94*64, 94*65, 94*66, 94*67, 94*68, 94*69, 94*70, 94*71, 94*72, 94*73, 94*74, 94*75, 94*76, 94*77, 94*78, /* 0xFX */ 94*79, 94*80, 94*81, 94*82, 94*83, 94*84, 94*85, 94*86, 94*87, 94*88, 94*89, 94*90, 94*91, 94*92, 94*93, 0xFFFD, }; static const uint8_t sbIdx[256] = { /* 0x0X */ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x1X */ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x2X */ 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, 8 , 9, 10, 11, 12, 13, 14, /* 0x3X */ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, /* 0x4X */ 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, /* 0x5X */ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, /* 0x6X */ 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, /* 0x7X */ 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 0xFF, /* 0x8X */ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x9X */ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0xAX */ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0xBX */ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0xCX */ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0xDX */ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0xEX */ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0xFX */ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, }; const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen; const unsigned char* src =(unsigned char*) aSrc; PRUnichar* destEnd = aDest + *aDestLen; PRUnichar* dest = aDest; while((src < srcEnd)) { switch(mState) { case mState_ASCII: if(0x1b == *src) { mLastLegalState = mState; mState = mState_ESC; } else if(*src & 0x80) { if (mErrBehavior == kOnError_Signal) goto error3; if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (PRUnichar) *src; } break; case mState_ESC: if( '(' == *src) { mState = mState_ESC_28; } else if ('$' == *src) { mState = mState_ESC_24; } else if ('.' == *src) { // for ISO-2022-JP-2 mState = mState_ESC_2e; } else if ('N' == *src) { // for ISO-2022-JP-2 mState = mState_ESC_4e; } else { if (CHECK_OVERRUN(dest, destEnd, 2)) goto error1; *dest++ = (PRUnichar) 0x1b; if (0x80 & *src) { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { *dest++ = (PRUnichar) *src; } mState = mLastLegalState; } break; case mState_ESC_28: // ESC ( if( 'B' == *src) { mState = mState_ASCII; if (mRunLength == 0) { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = 0xFFFD; } mRunLength = 0; } else if ('J' == *src) { mState = mState_JISX0201_1976Roman; if (mRunLength == 0 && mLastLegalState != mState_ASCII) { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = 0xFFFD; } mRunLength = 0; } else if ('I' == *src) { mState = mState_JISX0201_1976Kana; mRunLength = 0; } else { if (CHECK_OVERRUN(dest, destEnd, 3)) goto error1; *dest++ = (PRUnichar) 0x1b; *dest++ = (PRUnichar) '('; if (0x80 & *src) { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { *dest++ = (PRUnichar) *src; } mState = mLastLegalState; } break; case mState_ESC_24: // ESC $ if( '@' == *src) { mState = mState_JISX0208_1978; mRunLength = 0; } else if ('A' == *src) { mState = mState_GB2312_1980; mRunLength = 0; } else if ('B' == *src) { mState = mState_JISX0208_1983; mRunLength = 0; } else if ('(' == *src) { mState = mState_ESC_24_28; } else { if (CHECK_OVERRUN(dest, destEnd, 3)) goto error1; *dest++ = (PRUnichar) 0x1b; *dest++ = (PRUnichar) '$'; if (0x80 & *src) { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { *dest++ = (PRUnichar) *src; } mState = mLastLegalState; } break; case mState_ESC_24_28: // ESC $ ( if( 'C' == *src) { mState = mState_KSC5601_1987; mRunLength = 0; } else if ('D' == *src) { mState = mState_JISX0212_1990; mRunLength = 0; } else { if (CHECK_OVERRUN(dest, destEnd, 4)) goto error1; *dest++ = (PRUnichar) 0x1b; *dest++ = (PRUnichar) '$'; *dest++ = (PRUnichar) '('; if (0x80 & *src) { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { *dest++ = (PRUnichar) *src; } mState = mLastLegalState; } break; case mState_JISX0201_1976Roman: if(0x1b == *src) { mLastLegalState = mState; mState = mState_ESC; } else if(*src & 0x80) { if (mErrBehavior == kOnError_Signal) goto error3; if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = UNICODE_REPLACEMENT_CHARACTER; ++mRunLength; } else { // XXX We need to decide how to handle \ and ~ here // we may need a if statement here for '\' and '~' // to map them to Yen and Overbar if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (PRUnichar) *src; ++mRunLength; } break; case mState_JISX0201_1976Kana: if(0x1b == *src) { mLastLegalState = mState; mState = mState_ESC; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; if((0x21 <= *src) && (*src <= 0x5F)) { *dest++ = (0xFF61-0x0021) + *src; } else { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } ++mRunLength; } break; case mState_JISX0208_1978: if(0x1b == *src) { mLastLegalState = mState; mState = mState_ESC; } else if(*src & 0x80) { mLastLegalState = mState; mState = mState_ERROR; } else { mData = JIS0208_INDEX[*src & 0x7F]; if (0xFFFD == mData) { if (mErrBehavior == kOnError_Signal) goto error3; mState = mState_ERROR; } else { mState = mState_JISX0208_1978_2ndbyte; } } break; case mState_GB2312_1980: if(0x1b == *src) { mLastLegalState = mState; mState = mState_ESC; } else if(*src & 0x80) { mLastLegalState = mState; mState = mState_ERROR; } else { mData = fbIdx[*src & 0x7F]; if (0xFFFD == mData) { if (mErrBehavior == kOnError_Signal) goto error3; mState = mState_ERROR; } else { mState = mState_GB2312_1980_2ndbyte; } } break; case mState_JISX0208_1983: if(0x1b == *src) { mLastLegalState = mState; mState = mState_ESC; } else if(*src & 0x80) { mLastLegalState = mState; mState = mState_ERROR; } else { mData = JIS0208_INDEX[*src & 0x7F]; if (0xFFFD == mData) { if (mErrBehavior == kOnError_Signal) goto error3; mState = mState_ERROR; } else { mState = mState_JISX0208_1983_2ndbyte; } } break; case mState_KSC5601_1987: if(0x1b == *src) { mLastLegalState = mState; mState = mState_ESC; } else if(*src & 0x80) { mLastLegalState = mState; mState = mState_ERROR; } else { mData = fbIdx[*src & 0x7F]; if (0xFFFD == mData) { if (mErrBehavior == kOnError_Signal) goto error3; mState = mState_ERROR; } else { mState = mState_KSC5601_1987_2ndbyte; } } break; case mState_JISX0212_1990: if(0x1b == *src) { mLastLegalState = mState; mState = mState_ESC; } else if(*src & 0x80) { mLastLegalState = mState; mState = mState_ERROR; } else { mData = JIS0212_INDEX[*src & 0x7F]; if (0xFFFD == mData) { if (mErrBehavior == kOnError_Signal) goto error3; mState = mState_ERROR; } else { mState = mState_JISX0212_1990_2ndbyte; } } break; case mState_JISX0208_1978_2ndbyte: { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; uint8_t off = sbIdx[*src]; if(0xFF == off) { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { // XXX We need to map from JIS X 0208 1983 to 1987 // in the next line before pass to *dest++ *dest++ = gJapaneseMap[mData+off]; } ++mRunLength; mState = mState_JISX0208_1978; } break; case mState_GB2312_1980_2ndbyte: { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; uint8_t off = sbIdx[*src]; if(0xFF == off) { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { if (!mGB2312Decoder) { // creating a delegate converter (GB2312) nsresult rv; nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv); if (NS_SUCCEEDED(rv)) { rv = ccm->GetUnicodeDecoderRaw("GB2312", &mGB2312Decoder); } } if (!mGB2312Decoder) {// failed creating a delegate converter goto error2; } else { unsigned char gb[2]; PRUnichar uni; int32_t gbLen = 2, uniLen = 1; // ((mData/94)+0x21) is the original 1st byte. // *src is the present 2nd byte. // Put 2 bytes (one character) to gb[] with GB2312 encoding. gb[0] = ((mData / 94) + 0x21) | 0x80; gb[1] = *src | 0x80; // Convert GB2312 to unicode. mGB2312Decoder->Convert((const char *)gb, &gbLen, &uni, &uniLen); *dest++ = uni; } } ++mRunLength; mState = mState_GB2312_1980; } break; case mState_JISX0208_1983_2ndbyte: { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; uint8_t off = sbIdx[*src]; if(0xFF == off) { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { *dest++ = gJapaneseMap[mData+off]; } ++mRunLength; mState = mState_JISX0208_1983; } break; case mState_KSC5601_1987_2ndbyte: { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; uint8_t off = sbIdx[*src]; if(0xFF == off) { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { if (!mEUCKRDecoder) { // creating a delegate converter (EUC-KR) nsresult rv; nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv); if (NS_SUCCEEDED(rv)) { rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder); } } if (!mEUCKRDecoder) {// failed creating a delegate converter goto error2; } else { unsigned char ksc[2]; PRUnichar uni; int32_t kscLen = 2, uniLen = 1; // ((mData/94)+0x21) is the original 1st byte. // *src is the present 2nd byte. // Put 2 bytes (one character) to ksc[] with EUC-KR encoding. ksc[0] = ((mData / 94) + 0x21) | 0x80; ksc[1] = *src | 0x80; // Convert EUC-KR to unicode. mEUCKRDecoder->Convert((const char *)ksc, &kscLen, &uni, &uniLen); *dest++ = uni; } } ++mRunLength; mState = mState_KSC5601_1987; } break; case mState_JISX0212_1990_2ndbyte: { uint8_t off = sbIdx[*src]; if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; if(0xFF == off) { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { *dest++ = gJapaneseMap[mData+off]; } ++mRunLength; mState = mState_JISX0212_1990; } break; case mState_ESC_2e: // ESC . // "ESC ." will designate 96 character set to G2. mState = mLastLegalState; if( 'A' == *src) { G2charset = G2_ISO88591; } else if ('F' == *src) { G2charset = G2_ISO88597; } else { if (CHECK_OVERRUN(dest, destEnd, 3)) goto error1; *dest++ = (PRUnichar) 0x1b; *dest++ = (PRUnichar) '.'; if (0x80 & *src) { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { *dest++ = (PRUnichar) *src; } } break; case mState_ESC_4e: // ESC N // "ESC N" is the SS2 sequence, that invoke a G2 designated // character set. Since SS2 is effective only for next one // character, mState should be returned to the last status. mState = mLastLegalState; if((0x20 <= *src) && (*src <= 0x7F)) { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; if (G2_ISO88591 == G2charset) { *dest++ = *src | 0x80; } else if (G2_ISO88597 == G2charset) { if (!mISO88597Decoder) { // creating a delegate converter (ISO-8859-7) nsresult rv; nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv); if (NS_SUCCEEDED(rv)) { rv = ccm->GetUnicodeDecoderRaw("ISO-8859-7", &mISO88597Decoder); } } if (!mISO88597Decoder) {// failed creating a delegate converter goto error2; } else { // Put one character with ISO-8859-7 encoding. unsigned char gr = *src | 0x80; PRUnichar uni; int32_t grLen = 1, uniLen = 1; // Convert ISO-8859-7 to unicode. mISO88597Decoder->Convert((const char *)&gr, &grLen, &uni, &uniLen); *dest++ = uni; } } else {// G2charset is G2_unknown (not designated yet) if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } ++mRunLength; } else { if (CHECK_OVERRUN(dest, destEnd, 3)) goto error1; *dest++ = (PRUnichar) 0x1b; *dest++ = (PRUnichar) 'N'; if (0x80 & *src) { if (mErrBehavior == kOnError_Signal) goto error3; *dest++ = UNICODE_REPLACEMENT_CHARACTER; } else { *dest++ = (PRUnichar) *src; } } break; case mState_ERROR: mState = mLastLegalState; if (mErrBehavior == kOnError_Signal) { mRunLength = 0; goto error3; } if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = UNICODE_REPLACEMENT_CHARACTER; ++mRunLength; break; } // switch src++; } *aDestLen = dest - aDest; return NS_OK; error1: *aDestLen = dest - aDest; *aSrcLen = src - (const unsigned char*)aSrc; return NS_OK_UDEC_MOREOUTPUT; error2: *aDestLen = dest - aDest; *aSrcLen = src - (const unsigned char*)aSrc; return NS_ERROR_UNEXPECTED; error3: *aDestLen = dest - aDest; *aSrcLen = src - (const unsigned char*)aSrc; return NS_ERROR_ILLEGAL_INPUT; }
NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, PRUnichar * aDest, int32_t * aDestLen) { const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen; const unsigned char* src =(unsigned char*) aSrc; PRUnichar* destEnd = aDest + *aDestLen; PRUnichar* dest = aDest; while((src < srcEnd)) { // if LF/CR, return to US-ASCII unconditionally. if ( *src == 0x0a || *src == 0x0d ) mState = mState_Init; switch(mState) { case mState_Init: if(0x1b == *src) { mLastLegalState = mState_ASCII; mState = mState_ESC; break; } mState = mState_ASCII; // fall through case mState_ASCII: if(0x0e == *src) { // Shift-Out mState = mState_KSX1001_1992; mRunLength = 0; } else if(*src & 0x80) { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = 0xFFFD; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (PRUnichar) *src; } break; case mState_ESC: if('$' == *src) { mState = mState_ESC_24; } else { if (CHECK_OVERRUN(dest, destEnd, 2)) goto error1; *dest++ = (PRUnichar) 0x1b; *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src; mState = mLastLegalState; } break; case mState_ESC_24: // ESC $ if(')' == *src) { mState = mState_ESC_24_29; } else { if (CHECK_OVERRUN(dest, destEnd, 3)) goto error1; *dest++ = (PRUnichar) 0x1b; *dest++ = (PRUnichar) '$'; *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src; mState = mLastLegalState; } break; case mState_ESC_24_29: // ESC $ ) mState = mLastLegalState; if('C' == *src) { mState = mState_ASCII; mRunLength = 0; } else { if (CHECK_OVERRUN(dest, destEnd, 4)) goto error1; *dest++ = (PRUnichar) 0x1b; *dest++ = (PRUnichar) '$'; *dest++ = (PRUnichar) ')'; *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src; mState = mLastLegalState; } break; case mState_KSX1001_1992: if (0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f) { mData = (uint8_t) *src; mState = mState_KSX1001_1992_2ndbyte; } else if (0x0f == *src) { // Shift-In (SI) mState = mState_ASCII; if (mRunLength == 0) { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = 0xFFFD; } mRunLength = 0; } else if ((uint8_t) *src == 0x20 || (uint8_t) *src == 0x09) { // Allow space and tab between SO and SI (i.e. in Hangul segment) if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; mState = mState_KSX1001_1992; *dest++ = (PRUnichar) *src; ++mRunLength; } else { // Everything else is invalid. if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = 0xFFFD; } break; case mState_KSX1001_1992_2ndbyte: if ( 0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f ) { if (!mEUCKRDecoder) { // creating a delegate converter (EUC-KR) nsresult rv; nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv); if (NS_SUCCEEDED(rv)) { rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder); } } if (!mEUCKRDecoder) {// failed creating a delegate converter *dest++ = 0xFFFD; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; unsigned char ksx[2]; PRUnichar uni; int32_t ksxLen = 2, uniLen = 1; // mData is the original 1st byte. // *src is the present 2nd byte. // Put 2 bytes (one character) to ksx[] with EUC-KR encoding. ksx[0] = mData | 0x80; ksx[1] = *src | 0x80; // Convert EUC-KR to unicode. mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen); *dest++ = uni; ++mRunLength; } mState = mState_KSX1001_1992; } else { // Invalid if ( 0x0f == *src ) { // Shift-In (SI) mState = mState_ASCII; } else { mState = mState_KSX1001_1992; } if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = 0xFFFD; } break; case mState_ERROR: mState = mLastLegalState; if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = 0xFFFD; break; } // switch src++; } *aDestLen = dest - aDest; return NS_OK; error1: *aDestLen = dest-aDest; *aSrcLen = src-(unsigned char*)aSrc; return NS_OK_UDEC_MOREOUTPUT; }
NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, char16_t * aDest, int32_t * aDestLen) { const unsigned char * srcEnd = (unsigned char *)aSrc + *aSrcLen; const unsigned char * src = (unsigned char *) aSrc; char16_t* destEnd = aDest + *aDestLen; char16_t* dest = aDest; nsresult rv; int32_t aLen; while ((src < srcEnd)) { switch (mState) { case eState_ASCII: if(ESC == *src) { mState = eState_ESC; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_ESC: // ESC if('$' == *src) { mState = eState_ESC_24; } else { if (CHECK_OVERRUN(dest, destEnd, 2)) goto error1; *dest++ = (char16_t) ESC; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_ESC_24: // ESC $ if(')' == *src) { mState = eState_ESC_24_29; } else if('*' == *src) { mState = eState_ESC_24_2A; } else if('+' == *src) { mState = eState_ESC_24_2B; } else { if (CHECK_OVERRUN(dest, destEnd, 3)) goto error1; *dest++ = (char16_t) ESC; *dest++ = (char16_t) '$'; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_ESC_24_29: // ESC $ ) if('A' == *src) { mState = eState_ESC_24_29_A; } else if('G' == *src) { mState = eState_ESC_24_29_G; } else { if (CHECK_OVERRUN(dest, destEnd, 4)) goto error1; *dest++ = (char16_t) ESC; *dest++ = (char16_t) '$'; *dest++ = (char16_t) ')'; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_ESC_24_29_A: // ESC $ ) A if(SO == *src) { mState = eState_GB2312_1980; mRunLength = 0; } else { if (CHECK_OVERRUN(dest, destEnd, 5)) goto error1; *dest++ = (char16_t) ESC; *dest++ = (char16_t) '$'; *dest++ = (char16_t) ')'; *dest++ = (char16_t) 'A'; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_GB2312_1980: // ESC $ ) A SO if(SI == *src) { // Shift-In (SI) mState = eState_ESC_24_29_A_SO_SI; if (mRunLength == 0) { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = 0xFFFD; } mRunLength = 0; } else if(ESC == *src) { mState = eState_ESC; } else { if(0x20 < *src && *src < 0x7f) { mData = *src; mState = eState_GB2312_1980_2ndbyte; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; } } break; case eState_GB2312_1980_2ndbyte: // ESC $ ) A SO if(0x20 < *src && *src < 0x7f) { unsigned char gb[2]; int32_t gbLen = 2; gb[0] = mData | 0x80; gb[1] = *src | 0x80; aLen = destEnd - dest; rv = GB2312_To_Unicode(gb, gbLen, dest, &aLen); ++mRunLength; if(rv == NS_OK_UDEC_MOREOUTPUT) { goto error1; } else if(NS_FAILED(rv)) { goto error2; } dest += aLen; } else { if (CHECK_OVERRUN(dest, destEnd, 2)) goto error1; *dest++ = (char16_t) mData; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; } mState = eState_GB2312_1980; break; case eState_ESC_24_29_A_SO_SI: // ESC $ ) A SO SI if(SO == *src) { mState = eState_GB2312_1980; mRunLength = 0; } else if(ESC == *src) { mState = eState_ESC; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ESC_24_29_A_SO_SI; } break; case eState_ESC_24_29_G: // ESC $ ) G if(SO == *src) { mState = eState_CNS11643_1; mRunLength = 0; } else { if (CHECK_OVERRUN(dest, destEnd, 5)) goto error1; *dest++ = (char16_t) ESC; *dest++ = (char16_t) '$'; *dest++ = (char16_t) ')'; *dest++ = (char16_t) 'G'; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_CNS11643_1: // ESC $ ) G SO if(SI == *src) { // Shift-In (SI) mState = eState_ESC_24_29_G_SO_SI; if (mRunLength == 0) { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = 0xFFFD; } mRunLength = 0; } else if(ESC == *src) { mState = eState_ESC; } else { if(0x20 < *src && *src < 0x7f) { mData = *src; mState = eState_CNS11643_1_2ndbyte; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; } } break; case eState_CNS11643_1_2ndbyte: // ESC $ ) G SO if(0x20 < *src && *src < 0x7f) { unsigned char cns[4]; int32_t cnsLen = 2; cns[0] = mData | 0x80; cns[1] = *src | 0x80; aLen = destEnd - dest; rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen); ++mRunLength; if(rv == NS_OK_UDEC_MOREOUTPUT) { goto error1; } else if(NS_FAILED(rv)) { goto error2; } dest += aLen; } else { if (CHECK_OVERRUN(dest, destEnd, 2)) goto error1; *dest++ = (char16_t) mData; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; } mState = eState_CNS11643_1; break; case eState_ESC_24_29_G_SO_SI: // ESC $ ) G SO SI if(SO == *src) { mState = eState_CNS11643_1; mRunLength = 0; } else if(ESC == *src) { mState = eState_ESC; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ESC_24_29_G_SO_SI; } break; case eState_ESC_24_2A: // ESC $ * if('H' == *src) { mState = eState_ESC_24_2A_H; } else { if (CHECK_OVERRUN(dest, destEnd, 4)) goto error1; *dest++ = (char16_t) ESC; *dest++ = (char16_t) '$'; *dest++ = (char16_t) '*'; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_ESC_24_2A_H: // ESC $ * H if(ESC == *src) { mState = eState_ESC_24_2A_H_ESC; } else { if (CHECK_OVERRUN(dest, destEnd, 5)) goto error1; *dest++ = (char16_t) ESC; *dest++ = (char16_t) '$'; *dest++ = (char16_t) '*'; *dest++ = (char16_t) 'H'; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_ESC_24_2A_H_ESC: // ESC $ * H ESC if(SS2 == *src) { mState = eState_CNS11643_2; mRunLength = 0; } else if('$' == *src) { mState = eState_ESC_24; } else { if (CHECK_OVERRUN(dest, destEnd, 6)) goto error1; *dest++ = (char16_t) ESC; *dest++ = (char16_t) '$'; *dest++ = (char16_t) '*'; *dest++ = (char16_t) 'H'; *dest++ = (char16_t) ESC; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_CNS11643_2: // ESC $ * H ESC SS2 if(SI == *src) { // Shift-In (SI) mState = eState_ESC_24_2A_H_ESC_SS2_SI; if (mRunLength == 0) { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = 0xFFFD; } mRunLength = 0; } else if(ESC == *src) { mState = eState_ESC_24_2A_H_ESC; } else { if(0x20 < *src && *src < 0x7f) { mData = *src; mState = eState_CNS11643_2_2ndbyte; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; } } break; case eState_CNS11643_2_2ndbyte: // ESC $ * H ESC SS2 if(0x20 < *src && *src < 0x7f) { unsigned char cns[4]; int32_t cnsLen = 4; cns[0] = (unsigned char) MBYTE; cns[1] = (unsigned char) (PMASK + 2); cns[2] = mData | 0x80; cns[3] = *src | 0x80; aLen = destEnd - dest; rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen); ++mRunLength; if(rv == NS_OK_UDEC_MOREOUTPUT) { goto error1; } else if(NS_FAILED(rv)) { goto error2; } dest += aLen; } else { if (CHECK_OVERRUN(dest, destEnd, 2)) goto error1; *dest++ = (char16_t) mData; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; } mState = eState_CNS11643_2; break; case eState_ESC_24_2A_H_ESC_SS2_SI: // ESC $ * H ESC SS2 SI if(ESC == *src) { mState = eState_ESC_24_2A_H_ESC_SS2_SI_ESC; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ESC_24_2A_H_ESC_SS2_SI; } break; case eState_ESC_24_2A_H_ESC_SS2_SI_ESC: // ESC $ * H ESC SS2 SI ESC if(SS2 == *src) { mState = eState_CNS11643_2; mRunLength = 0; } else if('$' == *src) { mState = eState_ESC_24; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ESC_24_2A_H_ESC_SS2_SI; } break; case eState_ESC_24_2B: // ESC $ + if('I' <= *src && *src <= 'M') { mState = eState_ESC_24_2B_I; mPlaneID = *src - 'I' + 3; } else { if (CHECK_OVERRUN(dest, destEnd, 4)) goto error1; *dest++ = (char16_t) ESC; *dest++ = (char16_t) '$'; *dest++ = (char16_t) '+'; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_ESC_24_2B_I: // ESC $ + I if(ESC == *src) { mState = eState_ESC_24_2B_I_ESC; } else { if (CHECK_OVERRUN(dest, destEnd, 5)) goto error1; *dest++ = (char16_t) ESC; *dest++ = (char16_t) '$'; *dest++ = (char16_t) '+'; *dest++ = (char16_t) 'I' + mPlaneID - 3; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_ESC_24_2B_I_ESC: // ESC $ + I ESC if(SS3 == *src) { mState = eState_CNS11643_3; mRunLength = 0; } else if('$' == *src) { mState = eState_ESC_24; } else { if (CHECK_OVERRUN(dest, destEnd, 6)) goto error1; *dest++ = (char16_t) ESC; *dest++ = (char16_t) '$'; *dest++ = (char16_t) '+'; *dest++ = (char16_t) 'I' + mPlaneID - 3; *dest++ = (char16_t) ESC; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ASCII; } break; case eState_CNS11643_3: // ESC $ + I ESC SS3 if(SI == *src) { // Shift-In (SI) mState = eState_ESC_24_2B_I_ESC_SS3_SI; if (mRunLength == 0) { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = 0xFFFD; } mRunLength = 0; } else if(ESC == *src) { mState = eState_ESC_24_2B_I_ESC; } else { if(0x20 < *src && *src < 0x7f) { mData = *src; mState = eState_CNS11643_3_2ndbyte; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; } } break; case eState_CNS11643_3_2ndbyte: // ESC $ + I ESC SS3 if(0x20 < *src && *src < 0x7f) { unsigned char cns[4]; int32_t cnsLen = 4; cns[0] = (unsigned char) MBYTE; cns[1] = (unsigned char) (PMASK + mPlaneID); cns[2] = mData | 0x80; cns[3] = *src | 0x80; aLen = destEnd - dest; rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen); ++mRunLength; if(rv == NS_OK_UDEC_MOREOUTPUT) { goto error1; } else if(NS_FAILED(rv)) { goto error2; } dest += aLen; } else { if (CHECK_OVERRUN(dest, destEnd, 2)) goto error1; *dest++ = (char16_t) mData; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; } mState = eState_CNS11643_3; break; case eState_ESC_24_2B_I_ESC_SS3_SI: // ESC $ + I ESC SS3 SI if(ESC == *src) { mState = eState_ESC_24_2B_I_ESC_SS3_SI_ESC; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ESC_24_2B_I_ESC_SS3_SI; } break; case eState_ESC_24_2B_I_ESC_SS3_SI_ESC: // ESC $ + I ESC SS3 SI ESC if(SS3 == *src) { mState = eState_CNS11643_3; mRunLength = 0; } else if('$' == *src) { mState = eState_ESC_24; } else { if (CHECK_OVERRUN(dest, destEnd, 1)) goto error1; *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; mState = eState_ESC_24_2B_I_ESC_SS3_SI; } break; case eState_ERROR: NS_NOTREACHED("unhandled case"); goto error2; } // switch src++; } *aDestLen = dest- aDest; return NS_OK; error1: *aDestLen = dest-aDest; *aSrcLen = src - (const unsigned char*)aSrc; return NS_OK_UDEC_MOREOUTPUT; error2: *aSrcLen = src - (const unsigned char*)aSrc; *aDestLen = dest-aDest; mState = eState_ASCII; return NS_ERROR_UNEXPECTED; }