Esempio n. 1
0
bool nsGB18030ToUnicode::DecodeToSurrogate(const char* aSrc, char16_t* aOut)
{
  NS_ASSERTION(FIRST_BYTE_IS_SURROGATE(aSrc[0]),       "illegal first byte");
  NS_ASSERTION(LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]),   "illegal second byte");
  NS_ASSERTION(LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]),    "illegal third byte");
  NS_ASSERTION(LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]),    "illegal forth byte");
  if(! FIRST_BYTE_IS_SURROGATE(aSrc[0]))
    return false;
  if(! LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]))
    return false;
  if(! LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]))
    return false;
  if(! LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]))
    return false;

  uint8_t a1 = (uint8_t) aSrc[0];
  uint8_t a2 = (uint8_t) aSrc[1];
  uint8_t a3 = (uint8_t) aSrc[2];
  uint8_t a4 = (uint8_t) aSrc[3];
  a1 -= (uint8_t)0x90;
  a2 -= (uint8_t)0x30;
  a3 -= (uint8_t)0x81;
  a4 -= (uint8_t)0x30;
  uint32_t idx = (((a1 * 10 + a2 ) * 126 + a3) * 10) + a4;
  // idx == ucs4Codepoint - 0x10000
  if (idx > 0x000FFFFF)
    return false;

  *aOut++ = 0xD800 | (idx >> 10);
  *aOut = 0xDC00 | (0x000003FF & idx);

  return true;
}
PRBool nsGB18030ToUnicode::DecodeToSurrogate(const char* aSrc, PRUnichar* aOut)
{
  NS_ASSERTION(FIRST_BYTE_IS_SURROGATE(aSrc[0]),       "illegal first byte");
  NS_ASSERTION(LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]),   "illegal second byte");
  NS_ASSERTION(LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]),    "illegal third byte");
  NS_ASSERTION(LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]),    "illegal forth byte");
  if(! FIRST_BYTE_IS_SURROGATE(aSrc[0]))
    return PR_FALSE;
  if(! LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]))
    return PR_FALSE;
  if(! LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]))
    return PR_FALSE;
  if(! LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]))
    return PR_FALSE;

  PRUint8 a1 = (PRUint8) aSrc[0];
  PRUint8 a2 = (PRUint8) aSrc[1];
  PRUint8 a3 = (PRUint8) aSrc[2];
  PRUint8 a4 = (PRUint8) aSrc[3];
  a1 -= (PRUint8)0x90;
  a2 -= (PRUint8)0x30;
  a3 -= (PRUint8)0x81;
  a4 -= (PRUint8)0x30;
  PRUint32 idx = (((a1 * 10 + a2 ) * 126 + a3) * 10) + a4;

  *aOut++ = 0xD800 | (0x000003FF & (idx >> 10));
  *aOut = 0xDC00 | (0x000003FF & idx);

  return PR_TRUE;
}
Esempio n. 3
0
NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc,
                                            int32_t * aSrcLength,
                                            char16_t *aDest,
                                            int32_t * aDestLength)
{
  int32_t i=0;
  int32_t iSrcLength = (*aSrcLength);
  int32_t iDestlen = 0;
  nsresult rv=NS_OK;
  *aSrcLength = 0;
  
  for (i=0;i<iSrcLength;i++)
  {
    if ( iDestlen >= (*aDestLength) )
    {
      rv = NS_OK_UDEC_MOREOUTPUT;
      break;
    }
    // The valid range for the 1st byte is [0x81,0xFE] 
    if(LEGAL_GBK_MULTIBYTE_FIRST_BYTE(*aSrc))
    {
      if(i+1 >= iSrcLength) 
      {
        rv = NS_OK_UDEC_MOREINPUT;
        break;
      }
      // To make sure, the second byte has to be checked as well.
      // In GBK, the second byte range is [0x40,0x7E] and [0x80,0XFE]
      if(LEGAL_GBK_2BYTE_SECOND_BYTE(aSrc[1]))
      {
        // Valid GBK code
        *aDest = mUtil.GBKCharToUnicode(aSrc[0], aSrc[1]);
        if(UCS2_NO_MAPPING == *aDest)
        { 
          // We cannot map in the common mapping, let's call the
          // delegate 2 byte decoder to decode the gbk or gb18030 unique 
          // 2 byte mapping
          if(! TryExtensionDecoder(aSrc, aDest))
          {
            *aDest = UCS2_NO_MAPPING;
          }
        }
        aSrc += 2;
        i++;
      }
      else if (LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]))
      {
        // from the first 2 bytes, it looks like a 4 byte GB18030
        if(i+3 >= iSrcLength)  // make sure we got 4 bytes
        {
          rv = NS_OK_UDEC_MOREINPUT;
          break;
        }
        // 4 bytes patten
        // [0x81-0xfe][0x30-0x39][0x81-0xfe][0x30-0x39]
        // preset the 
 
        if (LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]) &&
            LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]))
        {
           if ( ! FIRST_BYTE_IS_SURROGATE(aSrc[0])) 
           {
             // let's call the delegated 4 byte gb18030 converter to convert it
             if(! Try4BytesDecoder(aSrc, aDest))
               *aDest = UCS2_NO_MAPPING;
           } else {
              // let's try supplement mapping
             if ( (iDestlen+1) < (*aDestLength) )
             {
               if(DecodeToSurrogate(aSrc, aDest))
               {
                 // surrogte two char16_t
                 iDestlen++;
                 aDest++;
               }  else {
                 *aDest = UCS2_NO_MAPPING;
              }
             } else {
               if (*aDestLength < 2) {
                 NS_ERROR("insufficient space in output buffer");
                 *aDest = UCS2_NO_MAPPING;
               } else {
                 rv = NS_OK_UDEC_MOREOUTPUT;
                 break;
               }
             }
           }
           aSrc += 4;
           i += 3;
        } else {
          *aDest = UCS2_NO_MAPPING; 
          // If the third and fourth bytes are not in the legal ranges for
          // a four-byte sequnce, resynchronize on the second byte
          // (which we know is in the range of LEGAL_GBK_4BYTE_SECOND_BYTE,
          //  0x30-0x39)
          aSrc++;
        }
      }
      else if ((uint8_t) aSrc[0] == (uint8_t)0xA0 )
      {
        // stand-alone (not followed by a valid second byte) 0xA0 !
        // treat it as valid a la Netscape 4.x
        *aDest = CAST_CHAR_TO_UNICHAR(*aSrc);
        aSrc++;
      } else {
        // Invalid GBK code point (second byte should be 0x40 or higher)
        *aDest = UCS2_NO_MAPPING;
        aSrc++;
      }
    } else {
      if(IS_ASCII(*aSrc))
      {
        // The source is an ASCII
        *aDest = CAST_CHAR_TO_UNICHAR(*aSrc);
        aSrc++;
      } else {
        if(IS_GBK_EURO(*aSrc)) {
          *aDest = UCS2_EURO;
        } else {
          *aDest = UCS2_NO_MAPPING;
        }
        aSrc++;
      }
    }
    iDestlen++;
    aDest++;
    *aSrcLength = i+1;
  }
  *aDestLength = iDestlen;
  return rv;
}