コード例 #1
0
NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
   const char * aSrc, int32_t * aSrcLen,
     PRUnichar * aDest, int32_t * aDestLen)
{
   static const uint16_t fbIdx[128] =
   {
/* 0x8X */
     0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
     0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
/* 0x9X */
     0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
     0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
/* 0xAX */
     0xFFFD, 0,      94,     94* 2,  94* 3,  94* 4,  94* 5,  94* 6,  
     94* 7,  94* 8 , 94* 9,  94*10,  94*11,  94*12,  94*13,  94*14,
/* 0xBX */
     94*15,  94*16,  94*17,  94*18,  94*19,  94*20,  94*21,  94*22,
     94*23,  94*24,  94*25,  94*26,  94*27,  94*28,  94*29,  94*30,
/* 0xCX */
     94*31,  94*32,  94*33,  94*34,  94*35,  94*36,  94*37,  94*38,
     94*39,  94*40,  94*41,  94*42,  94*43,  94*44,  94*45,  94*46,
/* 0xDX */
     94*47,  94*48,  94*49,  94*50,  94*51,  94*52,  94*53,  94*54,
     94*55,  94*56,  94*57,  94*58,  94*59,  94*60,  94*61,  94*62,
/* 0xEX */
     94*63,  94*64,  94*65,  94*66,  94*67,  94*68,  94*69,  94*70,
     94*71,  94*72,  94*73,  94*74,  94*75,  94*76,  94*77,  94*78,
/* 0xFX */
     94*79,  94*80,  94*81,  94*82,  94*83,  94*84,  94*85,  94*86,
     94*87,  94*88,  94*89,  94*90,  94*91,  94*92,  94*93,  0xFFFD,
   };
   static const uint8_t sbIdx[256] =
   {
/* 0x0X */
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
/* 0x1X */
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
/* 0x2X */
     0xFF, 0,    1,    2,    3,    4,    5,    6,  
     7,    8 ,   9,    10,   11,   12,   13,   14,
/* 0x3X */
     15,   16,   17,   18,   19,   20,   21,   22, 
     23,   24,   25,   26,   27,   28,   29,   30, 
/* 0x4X */
     31,   32,   33,   34,   35,   36,   37,   38, 
     39,   40,   41,   42,   43,   44,   45,   46, 
/* 0x5X */
     47,   48,   49,   50,   51,   52,   53,   54, 
     55,   56,   57,   58,   59,   60,   61,   62, 
/* 0x6X */
     63,   64,   65,   66,   67,   68,   69,   70, 
     71,   72,   73,   74,   75,   76,   77,   78, 
/* 0x7X */
     79,   80,   81,   82,   83,   84,   85,   86, 
     87,   88,   89,   90,   91,   92,   93,   0xFF, 
/* 0x8X */
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
/* 0x9X */
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
/* 0xAX */
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
/* 0xBX */
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
/* 0xCX */
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
/* 0xDX */
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
/* 0xEX */
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
/* 0xFX */
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   };

   const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
   const unsigned char* src =(unsigned char*) aSrc;
   PRUnichar* destEnd = aDest + *aDestLen;
   PRUnichar* dest = aDest;
   while((src < srcEnd))
   {
     
       switch(mState)
       {
          case mState_ASCII:
            if(0x1b == *src)
            {
              mLastLegalState = mState;
              mState = mState_ESC;
            } else if(*src & 0x80) {
              if (mErrBehavior == kOnError_Signal)
                goto error3;
              if (CHECK_OVERRUN(dest, destEnd, 1))
                goto error1;
              *dest++ = UNICODE_REPLACEMENT_CHARACTER;
            } else {
              if (CHECK_OVERRUN(dest, destEnd, 1))
                goto error1;
              *dest++ = (PRUnichar) *src;
            }
          break;
          
          case mState_ESC:
            if( '(' == *src) {
              mState = mState_ESC_28;
            } else if ('$' == *src)  {
              mState = mState_ESC_24;
            } else if ('.' == *src)  { // for ISO-2022-JP-2
              mState = mState_ESC_2e;
            } else if ('N' == *src)  { // for ISO-2022-JP-2
              mState = mState_ESC_4e;
            } else  {
              if (CHECK_OVERRUN(dest, destEnd, 2))
                goto error1;
              *dest++ = (PRUnichar) 0x1b;
              if (0x80 & *src) {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                *dest++ = UNICODE_REPLACEMENT_CHARACTER;
              } else {
                *dest++ = (PRUnichar) *src;
              }
              mState = mLastLegalState;
            }
          break;

          case mState_ESC_28: // ESC (
            if( 'B' == *src) {
              mState = mState_ASCII;
              if (mRunLength == 0) {
                if (CHECK_OVERRUN(dest, destEnd, 1))
                  goto error1;
                *dest++ = 0xFFFD;
              }
              mRunLength = 0;
            } else if ('J' == *src)  {
              mState = mState_JISX0201_1976Roman;
              if (mRunLength == 0 && mLastLegalState != mState_ASCII) {
                if (CHECK_OVERRUN(dest, destEnd, 1))
                  goto error1;
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                *dest++ = 0xFFFD;
              }
              mRunLength = 0;
            } else if ('I' == *src)  {
              mState = mState_JISX0201_1976Kana;
              mRunLength = 0;
            } else  {
              if (CHECK_OVERRUN(dest, destEnd, 3))
                goto error1;
              *dest++ = (PRUnichar) 0x1b;
              *dest++ = (PRUnichar) '(';
              if (0x80 & *src) {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                *dest++ = UNICODE_REPLACEMENT_CHARACTER;
              } else {
                *dest++ = (PRUnichar) *src;
              }
              mState = mLastLegalState;
            }
          break;

          case mState_ESC_24: // ESC $
            if( '@' == *src) {
              mState = mState_JISX0208_1978;
              mRunLength = 0;
            } else if ('A' == *src)  {
              mState = mState_GB2312_1980;
              mRunLength = 0;
            } else if ('B' == *src)  {
              mState = mState_JISX0208_1983;
              mRunLength = 0;
            } else if ('(' == *src)  {
              mState = mState_ESC_24_28;
            } else  {
              if (CHECK_OVERRUN(dest, destEnd, 3))
                goto error1;
              *dest++ = (PRUnichar) 0x1b;
              *dest++ = (PRUnichar) '$';
              if (0x80 & *src) {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                *dest++ = UNICODE_REPLACEMENT_CHARACTER;
              } else {
                *dest++ = (PRUnichar) *src;
              }
              mState = mLastLegalState;
            }
          break;

          case mState_ESC_24_28: // ESC $ (
            if( 'C' == *src) {
              mState = mState_KSC5601_1987;
              mRunLength = 0;
            } else if ('D' == *src) {
              mState = mState_JISX0212_1990;
              mRunLength = 0;
            } else  {
              if (CHECK_OVERRUN(dest, destEnd, 4))
                goto error1;
              *dest++ = (PRUnichar) 0x1b;
              *dest++ = (PRUnichar) '$';
              *dest++ = (PRUnichar) '(';
              if (0x80 & *src) {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                *dest++ = UNICODE_REPLACEMENT_CHARACTER;
              } else {
                *dest++ = (PRUnichar) *src;
              }
              mState = mLastLegalState;
            }
          break;

          case mState_JISX0201_1976Roman:
            if(0x1b == *src) {
              mLastLegalState = mState;
              mState = mState_ESC;
            } else if(*src & 0x80) {
              if (mErrBehavior == kOnError_Signal)
                goto error3;
              if (CHECK_OVERRUN(dest, destEnd, 1))
                goto error1;
              *dest++ = UNICODE_REPLACEMENT_CHARACTER;
              ++mRunLength;
            } else {
              // XXX We need to  decide how to handle \ and ~ here
              // we may need a if statement here for '\' and '~' 
              // to map them to Yen and Overbar
              if (CHECK_OVERRUN(dest, destEnd, 1))
                goto error1;
              *dest++ = (PRUnichar) *src;
              ++mRunLength;
            }
          break;

          case mState_JISX0201_1976Kana:
            if(0x1b == *src) {
              mLastLegalState = mState;
              mState = mState_ESC;
            } else {
              if (CHECK_OVERRUN(dest, destEnd, 1))
                goto error1;
              if((0x21 <= *src) && (*src <= 0x5F)) {
                *dest++ = (0xFF61-0x0021) + *src;
              } else {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                *dest++ = UNICODE_REPLACEMENT_CHARACTER;
              }
              ++mRunLength;
            }
          break;

          case mState_JISX0208_1978:
            if(0x1b == *src) {
              mLastLegalState = mState;
              mState = mState_ESC;
            } else if(*src & 0x80) {
              mLastLegalState = mState;
              mState = mState_ERROR;
            } else {
              mData = JIS0208_INDEX[*src & 0x7F];
              if (0xFFFD == mData) {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                mState = mState_ERROR;
              } else {
                mState = mState_JISX0208_1978_2ndbyte;
              }
            }
          break;

          case mState_GB2312_1980:
            if(0x1b == *src) {
              mLastLegalState = mState;
              mState = mState_ESC;
            } else if(*src & 0x80) {
              mLastLegalState = mState;
              mState = mState_ERROR;
            } else {
              mData = fbIdx[*src & 0x7F];
              if (0xFFFD == mData) {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                mState = mState_ERROR;
              } else {
                mState = mState_GB2312_1980_2ndbyte;
              }
            }
          break;

          case mState_JISX0208_1983:
            if(0x1b == *src) {
              mLastLegalState = mState;
              mState = mState_ESC;
            } else if(*src & 0x80) {
              mLastLegalState = mState;
              mState = mState_ERROR;
            } else {
              mData = JIS0208_INDEX[*src & 0x7F];
              if (0xFFFD == mData) {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                mState = mState_ERROR;
              } else {
                mState = mState_JISX0208_1983_2ndbyte;
              }
            }
          break;

          case mState_KSC5601_1987:
            if(0x1b == *src) {
              mLastLegalState = mState;
              mState = mState_ESC;
            } else if(*src & 0x80) {
              mLastLegalState = mState;
              mState = mState_ERROR;
            } else {
              mData = fbIdx[*src & 0x7F];
              if (0xFFFD == mData) {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                mState = mState_ERROR;
              } else {
                mState = mState_KSC5601_1987_2ndbyte;
              }
            }
          break;

          case mState_JISX0212_1990:
            if(0x1b == *src) {
              mLastLegalState = mState;
              mState = mState_ESC;
            } else if(*src & 0x80) {
              mLastLegalState = mState;
              mState = mState_ERROR;
            } else {
              mData = JIS0212_INDEX[*src & 0x7F];
              if (0xFFFD == mData) {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                mState = mState_ERROR;
              } else {
                mState = mState_JISX0212_1990_2ndbyte;
              }
            }
          break;

          case mState_JISX0208_1978_2ndbyte:
          {
            if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
            uint8_t off = sbIdx[*src];
            if(0xFF == off) {
              if (mErrBehavior == kOnError_Signal)
                goto error3;
              *dest++ = UNICODE_REPLACEMENT_CHARACTER;
            } else {
               // XXX We need to map from JIS X 0208 1983 to 1987 
               // in the next line before pass to *dest++
              *dest++ = gJapaneseMap[mData+off];
            }
            ++mRunLength;
            mState = mState_JISX0208_1978;
          }
          break;

          case mState_GB2312_1980_2ndbyte:
          {
            if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
            uint8_t off = sbIdx[*src];
            if(0xFF == off) {
              if (mErrBehavior == kOnError_Signal)
                goto error3;
              *dest++ = UNICODE_REPLACEMENT_CHARACTER;
            } else {
              if (!mGB2312Decoder) {
                // creating a delegate converter (GB2312)
                nsresult rv;
                nsCOMPtr<nsICharsetConverterManager> ccm = 
                         do_GetService(kCharsetConverterManagerCID, &rv);
                if (NS_SUCCEEDED(rv)) {
                  rv = ccm->GetUnicodeDecoderRaw("GB2312", &mGB2312Decoder);
                }
              }
              if (!mGB2312Decoder) {// failed creating a delegate converter
                goto error2;
              } else {
                unsigned char gb[2];
                PRUnichar uni;
                int32_t gbLen = 2, uniLen = 1;
                // ((mData/94)+0x21) is the original 1st byte.
                // *src is the present 2nd byte.
                // Put 2 bytes (one character) to gb[] with GB2312 encoding.
                gb[0] = ((mData / 94) + 0x21) | 0x80;
                gb[1] = *src | 0x80;
                // Convert GB2312 to unicode.
                mGB2312Decoder->Convert((const char *)gb, &gbLen,
                                        &uni, &uniLen);
                *dest++ = uni;
              }
            }
            ++mRunLength;
            mState = mState_GB2312_1980;
          }
          break;

          case mState_JISX0208_1983_2ndbyte:
          {
            if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
            uint8_t off = sbIdx[*src];
            if(0xFF == off) {
              if (mErrBehavior == kOnError_Signal)
                goto error3;
              *dest++ = UNICODE_REPLACEMENT_CHARACTER;
            } else {
              *dest++ = gJapaneseMap[mData+off];
            }
            ++mRunLength;
            mState = mState_JISX0208_1983;
          }
          break;

          case mState_KSC5601_1987_2ndbyte:
          {
            if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
            uint8_t off = sbIdx[*src];
            if(0xFF == off) {
              if (mErrBehavior == kOnError_Signal)
                goto error3;
              *dest++ = UNICODE_REPLACEMENT_CHARACTER;
            } else {
              if (!mEUCKRDecoder) {
                // creating a delegate converter (EUC-KR)
                nsresult rv;
                nsCOMPtr<nsICharsetConverterManager> ccm = 
                         do_GetService(kCharsetConverterManagerCID, &rv);
                if (NS_SUCCEEDED(rv)) {
                  rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
                }
              }
              if (!mEUCKRDecoder) {// failed creating a delegate converter
                goto error2;
              } else {              
                unsigned char ksc[2];
                PRUnichar uni;
                int32_t kscLen = 2, uniLen = 1;
                // ((mData/94)+0x21) is the original 1st byte.
                // *src is the present 2nd byte.
                // Put 2 bytes (one character) to ksc[] with EUC-KR encoding.
                ksc[0] = ((mData / 94) + 0x21) | 0x80;
                ksc[1] = *src | 0x80;
                // Convert EUC-KR to unicode.
                mEUCKRDecoder->Convert((const char *)ksc, &kscLen,
                                       &uni, &uniLen);
                *dest++ = uni;
              }
            }
            ++mRunLength;
            mState = mState_KSC5601_1987;
          }
          break;

          case mState_JISX0212_1990_2ndbyte:
          {
            uint8_t off = sbIdx[*src];
            if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
            if(0xFF == off) {
              if (mErrBehavior == kOnError_Signal)
                goto error3;
              *dest++ = UNICODE_REPLACEMENT_CHARACTER;
            } else {
              *dest++ = gJapaneseMap[mData+off];
            }
            ++mRunLength;
            mState = mState_JISX0212_1990;
          }
          break;

          case mState_ESC_2e: // ESC .
            // "ESC ." will designate 96 character set to G2.
            mState = mLastLegalState;
            if( 'A' == *src) {
              G2charset = G2_ISO88591;
            } else if ('F' == *src) {
              G2charset = G2_ISO88597;
            } else  {
              if (CHECK_OVERRUN(dest, destEnd, 3))
                goto error1;
              *dest++ = (PRUnichar) 0x1b;
              *dest++ = (PRUnichar) '.';
              if (0x80 & *src) {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                *dest++ = UNICODE_REPLACEMENT_CHARACTER;
              } else {
                *dest++ = (PRUnichar) *src;
              }
            }
          break;

          case mState_ESC_4e: // ESC N
            // "ESC N" is the SS2 sequence, that invoke a G2 designated
            // character set.  Since SS2 is effective only for next one
            // character, mState should be returned to the last status.
            mState = mLastLegalState;
            if((0x20 <= *src) && (*src <= 0x7F)) {
              if (CHECK_OVERRUN(dest, destEnd, 1))
                goto error1;
              if (G2_ISO88591 == G2charset) {
                *dest++ = *src | 0x80;
              } else if (G2_ISO88597 == G2charset) {
                if (!mISO88597Decoder) {
                  // creating a delegate converter (ISO-8859-7)
                  nsresult rv;
                  nsCOMPtr<nsICharsetConverterManager> ccm = 
                           do_GetService(kCharsetConverterManagerCID, &rv);
                  if (NS_SUCCEEDED(rv)) {
                    rv = ccm->GetUnicodeDecoderRaw("ISO-8859-7", &mISO88597Decoder);
                  }
                }
                if (!mISO88597Decoder) {// failed creating a delegate converter
                  goto error2;
                } else {
                  // Put one character with ISO-8859-7 encoding.
                  unsigned char gr = *src | 0x80;
                  PRUnichar uni;
                  int32_t grLen = 1, uniLen = 1;
                  // Convert ISO-8859-7 to unicode.
                  mISO88597Decoder->Convert((const char *)&gr, &grLen,
                                            &uni, &uniLen);
                  *dest++ = uni;
                }
              } else {// G2charset is G2_unknown (not designated yet)
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                *dest++ = UNICODE_REPLACEMENT_CHARACTER;
              }
              ++mRunLength;
            } else {
              if (CHECK_OVERRUN(dest, destEnd, 3))
                goto error1;
              *dest++ = (PRUnichar) 0x1b;
              *dest++ = (PRUnichar) 'N';
              if (0x80 & *src) {
                if (mErrBehavior == kOnError_Signal)
                  goto error3;
                *dest++ = UNICODE_REPLACEMENT_CHARACTER;
              } else {
                *dest++ = (PRUnichar) *src;
              }
            }
          break;

          case mState_ERROR:
            mState = mLastLegalState;
            if (mErrBehavior == kOnError_Signal) {
              mRunLength = 0;
              goto error3;
            }
            if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
            *dest++ = UNICODE_REPLACEMENT_CHARACTER;
            ++mRunLength;
          break;

       } // switch
       src++;
   }
   *aDestLen = dest - aDest;
   return NS_OK;
error1:
   *aDestLen = dest - aDest;
   *aSrcLen = src - (const unsigned char*)aSrc;
   return NS_OK_UDEC_MOREOUTPUT;
error2:
   *aDestLen = dest - aDest;
   *aSrcLen = src - (const unsigned char*)aSrc;
   return NS_ERROR_UNEXPECTED;
error3:
   *aDestLen = dest - aDest;
   *aSrcLen = src - (const unsigned char*)aSrc;
   return NS_ERROR_ILLEGAL_INPUT;
}
コード例 #2
0
NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, PRUnichar * aDest, int32_t * aDestLen)
{
  const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
  const unsigned char* src =(unsigned char*) aSrc;
  PRUnichar* destEnd = aDest + *aDestLen;
  PRUnichar* dest = aDest;
  while((src < srcEnd))
  {
    // if LF/CR, return to US-ASCII unconditionally.
    if ( *src == 0x0a || *src == 0x0d )
      mState = mState_Init;

    switch(mState)
    {
      case mState_Init:
        if(0x1b == *src) {
          mLastLegalState = mState_ASCII;
          mState = mState_ESC;
          break;
        }
        mState = mState_ASCII;
        // fall through

      case mState_ASCII:
        if(0x0e == *src) { // Shift-Out 
          mState = mState_KSX1001_1992;
          mRunLength = 0;
        } 
        else if(*src & 0x80) {
          if (CHECK_OVERRUN(dest, destEnd, 1))
            goto error1;
          *dest++ = 0xFFFD;
        } 
        else {
          if (CHECK_OVERRUN(dest, destEnd, 1))
            goto error1;
          *dest++ = (PRUnichar) *src;
        }
        break;
          
      case mState_ESC:
        if('$' == *src) {
          mState = mState_ESC_24;
        } 
        else  {
          if (CHECK_OVERRUN(dest, destEnd, 2))
            goto error1;
          *dest++ = (PRUnichar) 0x1b;
          *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
          mState =  mLastLegalState;
        }
        break;

      case mState_ESC_24: // ESC $
        if(')' == *src) {
          mState = mState_ESC_24_29;
        } 
        else  {
          if (CHECK_OVERRUN(dest, destEnd, 3))
            goto error1;
          *dest++ = (PRUnichar) 0x1b;
          *dest++ = (PRUnichar) '$';
          *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
          mState = mLastLegalState;
        }
        break;

      case mState_ESC_24_29: // ESC $ )
        mState = mLastLegalState;
        if('C' == *src) {
          mState = mState_ASCII;
          mRunLength = 0;
        } 
        else  {
          if (CHECK_OVERRUN(dest, destEnd, 4))
            goto error1;
          *dest++ = (PRUnichar) 0x1b;
          *dest++ = (PRUnichar) '$';
          *dest++ = (PRUnichar) ')';
          *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
          mState = mLastLegalState;
        }
        break;

      case mState_KSX1001_1992:
        if (0x20 < (uint8_t) *src  && (uint8_t) *src < 0x7f) {
          mData = (uint8_t) *src;
          mState = mState_KSX1001_1992_2ndbyte;
        } 
        else if (0x0f == *src) { // Shift-In (SI)
          mState = mState_ASCII;
          if (mRunLength == 0) {
            if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
            *dest++ = 0xFFFD;
          }
          mRunLength = 0;
        } 
        else if ((uint8_t) *src == 0x20 || (uint8_t) *src == 0x09) {
          // Allow space and tab between SO and SI (i.e. in Hangul segment)
          if (CHECK_OVERRUN(dest, destEnd, 1))
            goto error1;
          mState = mState_KSX1001_1992;
          *dest++ = (PRUnichar) *src;
          ++mRunLength;
        } 
        else {         // Everything else is invalid.
          if (CHECK_OVERRUN(dest, destEnd, 1))
            goto error1;
          *dest++ = 0xFFFD;
        }
        break;

      case mState_KSX1001_1992_2ndbyte:
        if ( 0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f  ) {
          if (!mEUCKRDecoder) {
            // creating a delegate converter (EUC-KR)
            nsresult rv;
            nsCOMPtr<nsICharsetConverterManager> ccm = 
                  do_GetService(kCharsetConverterManagerCID, &rv);
            if (NS_SUCCEEDED(rv)) {
              rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
            }
          }

          if (!mEUCKRDecoder) {// failed creating a delegate converter
           *dest++ = 0xFFFD;
          } 
          else {              
            if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
            unsigned char ksx[2];
            PRUnichar uni;
            int32_t ksxLen = 2, uniLen = 1;
            // mData is the original 1st byte.
            // *src is the present 2nd byte.
            // Put 2 bytes (one character) to ksx[] with EUC-KR encoding.
            ksx[0] = mData | 0x80;
            ksx[1] = *src | 0x80;
            // Convert EUC-KR to unicode.
            mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen);
            *dest++ = uni;
            ++mRunLength;
          }
          mState = mState_KSX1001_1992;
        } 
        else {        // Invalid 
          if ( 0x0f == *src ) {   // Shift-In (SI)
            mState = mState_ASCII;
          } 
          else {
            mState = mState_KSX1001_1992;
          }
          if (CHECK_OVERRUN(dest, destEnd, 1))
            goto error1;
          *dest++ = 0xFFFD;
        }
        break;

      case mState_ERROR:
        mState = mLastLegalState;
        if (CHECK_OVERRUN(dest, destEnd, 1))
          goto error1;
        *dest++ = 0xFFFD;
        break;

    } // switch
    src++;
  }
  *aDestLen = dest - aDest;
  return NS_OK;

error1:
  *aDestLen = dest-aDest;
  *aSrcLen = src-(unsigned char*)aSrc;
  return NS_OK_UDEC_MOREOUTPUT;
}
コード例 #3
0
NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, char16_t * aDest, int32_t * aDestLen)
{
  const unsigned char * srcEnd = (unsigned char *)aSrc + *aSrcLen;
  const unsigned char * src = (unsigned char *) aSrc;
  char16_t* destEnd = aDest + *aDestLen;
  char16_t* dest = aDest;
  nsresult rv;
  int32_t aLen; 

  while ((src < srcEnd))
  {
    switch (mState)
    {
      case eState_ASCII:
        if(ESC == *src) {
           mState = eState_ESC;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC:    // ESC
        if('$' == *src) {
           mState = eState_ESC_24;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 2))
              goto error1;
           *dest++ = (char16_t) ESC;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24: // ESC $
        if(')' == *src) {
           mState = eState_ESC_24_29;
        } else if('*' == *src) {
           mState = eState_ESC_24_2A;
        } else if('+' == *src) {
           mState = eState_ESC_24_2B;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 3))
              goto error1;
           *dest++ = (char16_t) ESC;
           *dest++ = (char16_t) '$';
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24_29: // ESC $ )
        if('A' == *src) {
           mState = eState_ESC_24_29_A;
        } else if('G' == *src) {
           mState = eState_ESC_24_29_G;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 4))
              goto error1;
           *dest++ = (char16_t) ESC;
           *dest++ = (char16_t) '$';
           *dest++ = (char16_t) ')';
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24_29_A:  // ESC $ ) A
        if(SO == *src) {
           mState = eState_GB2312_1980;
           mRunLength = 0;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 5))
              goto error1;
           *dest++ = (char16_t) ESC;
           *dest++ = (char16_t) '$';
           *dest++ = (char16_t) ')';
           *dest++ = (char16_t) 'A';
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_GB2312_1980:   // ESC $ ) A SO
        if(SI == *src) { // Shift-In (SI)
           mState = eState_ESC_24_29_A_SO_SI;
           if (mRunLength == 0) {
              if (CHECK_OVERRUN(dest, destEnd, 1))
                 goto error1;
              *dest++ = 0xFFFD;
           }
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC;
        } else {
           if(0x20 < *src && *src < 0x7f) {
              mData = *src;
              mState = eState_GB2312_1980_2ndbyte;
           } else {
              if (CHECK_OVERRUN(dest, destEnd, 1))
                 goto error1;
              *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
           }
        }
        break; 

      case eState_GB2312_1980_2ndbyte:  // ESC $ ) A SO
        if(0x20 < *src && *src < 0x7f) {
           unsigned char gb[2];
           int32_t gbLen = 2;

           gb[0] = mData | 0x80;
           gb[1] = *src | 0x80;

           aLen = destEnd - dest;
           rv = GB2312_To_Unicode(gb, gbLen, dest, &aLen);
           ++mRunLength;
           if(rv == NS_OK_UDEC_MOREOUTPUT) {
              goto error1;
           } else if(NS_FAILED(rv)) {
              goto error2;
           }

           dest += aLen;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 2))
              goto error1;
           *dest++ = (char16_t) mData;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
        }
        mState = eState_GB2312_1980;
        break;

      case eState_ESC_24_29_A_SO_SI:  // ESC $ ) A SO SI
        if(SO == *src) {
           mState = eState_GB2312_1980;
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ESC_24_29_A_SO_SI;
        }
        break;

      case eState_ESC_24_29_G:   // ESC $ ) G
        if(SO == *src) {
           mState = eState_CNS11643_1;
           mRunLength = 0;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 5))
              goto error1;
           *dest++ = (char16_t) ESC;
           *dest++ = (char16_t) '$';
           *dest++ = (char16_t) ')';
           *dest++ = (char16_t) 'G';
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_CNS11643_1:   // ESC $ ) G SO
        if(SI == *src) { // Shift-In (SI)
           mState = eState_ESC_24_29_G_SO_SI;
           if (mRunLength == 0) {
              if (CHECK_OVERRUN(dest, destEnd, 1))
                 goto error1;
              *dest++ = 0xFFFD;
           }
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC;
        } else {
           if(0x20 < *src && *src < 0x7f) {
              mData = *src;
              mState = eState_CNS11643_1_2ndbyte;
           } else {
              if (CHECK_OVERRUN(dest, destEnd, 1))
                 goto error1;
              *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
           }
        }
        break;

      case eState_CNS11643_1_2ndbyte:  // ESC $ ) G SO
        if(0x20 < *src && *src < 0x7f) {
           unsigned char cns[4];
           int32_t cnsLen = 2;

           cns[0] = mData | 0x80;
           cns[1] = *src | 0x80;

           aLen = destEnd - dest;
           rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
           ++mRunLength;
           if(rv == NS_OK_UDEC_MOREOUTPUT) {
              goto error1;
           } else if(NS_FAILED(rv)) {
              goto error2;
           }

           dest += aLen;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 2))
              goto error1;
           *dest++ = (char16_t) mData;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
        }
        mState = eState_CNS11643_1;
        break;

      case eState_ESC_24_29_G_SO_SI: // ESC $ ) G SO SI
        if(SO == *src) {
           mState = eState_CNS11643_1;
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ESC_24_29_G_SO_SI;
        }
        break;

      case eState_ESC_24_2A: // ESC $ *
        if('H' == *src) {
           mState = eState_ESC_24_2A_H;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 4))
              goto error1;
           *dest++ = (char16_t) ESC;
           *dest++ = (char16_t) '$';
           *dest++ = (char16_t) '*';
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24_2A_H:  // ESC $ * H
        if(ESC == *src) {
           mState = eState_ESC_24_2A_H_ESC;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 5))
              goto error1;
           *dest++ = (char16_t) ESC;
           *dest++ = (char16_t) '$';
           *dest++ = (char16_t) '*';
           *dest++ = (char16_t) 'H';
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        } 
        break;

      case eState_ESC_24_2A_H_ESC:  // ESC $ * H ESC
        if(SS2 == *src) {
           mState = eState_CNS11643_2;
           mRunLength = 0;
        } else if('$' == *src) {
           mState = eState_ESC_24;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 6))
              goto error1;
           *dest++ = (char16_t) ESC;
           *dest++ = (char16_t) '$';
           *dest++ = (char16_t) '*';
           *dest++ = (char16_t) 'H';
           *dest++ = (char16_t) ESC;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_CNS11643_2:  // ESC $ * H ESC SS2
        if(SI == *src) { // Shift-In (SI)
           mState = eState_ESC_24_2A_H_ESC_SS2_SI;
           if (mRunLength == 0) {
              if (CHECK_OVERRUN(dest, destEnd, 1))
                 goto error1;
              *dest++ = 0xFFFD;
           }
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC_24_2A_H_ESC;
        } else {
           if(0x20 < *src && *src < 0x7f) {
              mData = *src;
              mState = eState_CNS11643_2_2ndbyte;
           } else {
              if (CHECK_OVERRUN(dest, destEnd, 1))
                 goto error1;
              *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
           }
        }
        break;

      case eState_CNS11643_2_2ndbyte:   // ESC $ * H ESC SS2
        if(0x20 < *src && *src < 0x7f) {
           unsigned char cns[4];
           int32_t cnsLen = 4;
 
           cns[0] = (unsigned char) MBYTE;
           cns[1] = (unsigned char) (PMASK + 2);
           cns[2] = mData | 0x80;
           cns[3] = *src | 0x80;
 
           aLen = destEnd - dest;
           rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
           ++mRunLength;
           if(rv == NS_OK_UDEC_MOREOUTPUT) {
              goto error1;
           } else if(NS_FAILED(rv)) {
              goto error2;
           }

           dest += aLen;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 2))
              goto error1;
           *dest++ = (char16_t) mData;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
        }
        mState = eState_CNS11643_2;
        break;

      case eState_ESC_24_2A_H_ESC_SS2_SI:  // ESC $ * H ESC SS2 SI
        if(ESC == *src) {
           mState = eState_ESC_24_2A_H_ESC_SS2_SI_ESC;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ESC_24_2A_H_ESC_SS2_SI;
        }
        break;

      case eState_ESC_24_2A_H_ESC_SS2_SI_ESC:  // ESC $ * H ESC SS2 SI ESC
        if(SS2 == *src) {
           mState = eState_CNS11643_2;
           mRunLength = 0;
        } else if('$' == *src) {
           mState = eState_ESC_24;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ESC_24_2A_H_ESC_SS2_SI;
        }
        break;

      case eState_ESC_24_2B: // ESC $ +
        if('I' <= *src && *src <= 'M') {
            mState = eState_ESC_24_2B_I;
            mPlaneID = *src - 'I' + 3;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 4))
              goto error1;
           *dest++ = (char16_t) ESC;
           *dest++ = (char16_t) '$';
           *dest++ = (char16_t) '+';
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24_2B_I:  // ESC $ + I
        if(ESC == *src) {
           mState = eState_ESC_24_2B_I_ESC;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 5))
              goto error1;
           *dest++ = (char16_t) ESC;
           *dest++ = (char16_t) '$';
           *dest++ = (char16_t) '+';
           *dest++ = (char16_t) 'I' + mPlaneID - 3;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24_2B_I_ESC:  // ESC $ + I ESC
        if(SS3 == *src) {
           mState = eState_CNS11643_3;
           mRunLength = 0;
        } else if('$' == *src) {
           mState = eState_ESC_24;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 6))
              goto error1;
           *dest++ = (char16_t) ESC;
           *dest++ = (char16_t) '$';
           *dest++ = (char16_t) '+';
           *dest++ = (char16_t) 'I' + mPlaneID - 3;
           *dest++ = (char16_t) ESC;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_CNS11643_3:   // ESC $ + I ESC SS3
        if(SI == *src) { // Shift-In (SI)
           mState = eState_ESC_24_2B_I_ESC_SS3_SI;
           if (mRunLength == 0) {
              if (CHECK_OVERRUN(dest, destEnd, 1))
                 goto error1;
              *dest++ = 0xFFFD;
           }
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC_24_2B_I_ESC;
        } else {
           if(0x20 < *src && *src < 0x7f) {
              mData = *src;
              mState = eState_CNS11643_3_2ndbyte;
           } else {
              if (CHECK_OVERRUN(dest, destEnd, 1))
                 goto error1;
              *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
           }
        }

        break;

      case eState_CNS11643_3_2ndbyte:  // ESC $ + I ESC SS3
        if(0x20 < *src && *src < 0x7f) {
           unsigned char cns[4];
           int32_t cnsLen = 4;

           cns[0] = (unsigned char) MBYTE;
           cns[1] = (unsigned char) (PMASK + mPlaneID);
           cns[2] = mData | 0x80;
           cns[3] = *src | 0x80;

           aLen = destEnd - dest;
           rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
           ++mRunLength;
           if(rv == NS_OK_UDEC_MOREOUTPUT) {
              goto error1;
           } else if(NS_FAILED(rv)) {
              goto error2;
           }

           dest += aLen;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 2))
              goto error1;
           *dest++ = (char16_t) mData;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
        }
        mState = eState_CNS11643_3;
        break;

      case eState_ESC_24_2B_I_ESC_SS3_SI:  // ESC $ + I ESC SS3 SI
        if(ESC == *src) {
           mState = eState_ESC_24_2B_I_ESC_SS3_SI_ESC;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ESC_24_2B_I_ESC_SS3_SI;
        }
        break;

      case eState_ESC_24_2B_I_ESC_SS3_SI_ESC:  // ESC $ + I ESC SS3 SI ESC
        if(SS3 == *src) {
           mState = eState_CNS11643_3;
           mRunLength = 0;
        } else if('$' == *src) {
           mState = eState_ESC_24;
        } else {
           if (CHECK_OVERRUN(dest, destEnd, 1))
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;

           mState = eState_ESC_24_2B_I_ESC_SS3_SI;
        }
        break;

      case eState_ERROR:
        NS_NOTREACHED("unhandled case");
        goto error2;

    } // switch
    src++;
  }

  *aDestLen = dest- aDest;
  return NS_OK;

error1:
  *aDestLen = dest-aDest;
  *aSrcLen = src - (const unsigned char*)aSrc;
  return NS_OK_UDEC_MOREOUTPUT;

error2:
  *aSrcLen = src - (const unsigned char*)aSrc;
  *aDestLen = dest-aDest;
  mState = eState_ASCII;
  return NS_ERROR_UNEXPECTED;
}