C++ (Cpp) LocaleCharToUCS4 Examples

Example #1

0

Show file

File: exp_attrs.cpp Project: AlexPeng19/incubator-trafodion

// Return number of bytes used by the characters in buf preceding the Nth char.
// Return an error if we encounter a character that is not valid in the cs 
// character set.
Int32 Attributes::convertCharToOffset (const char        *buf,
                                       Int32             numOfChar,
                                       Int32             maxBufLen,
                                       CharInfo::CharSet cs)
{
  if (cs == CharInfo::ISO88591 || cs == CharInfo::UCS2)
     return((numOfChar <= maxBufLen) ? numOfChar - 1 : maxBufLen);

  Int32 firstCharLenInBuf;
  UInt32 UCS4value;

  cnv_charset charset = convertCharsetEnum(cs);

  // Number of character in string functions start from 1, not 0. 1 means
  // the first character in the string. Offset start from 0. The offset of
  // the first character in a string is 0.
  Int32 count = 1;
  Int32 offset = 0;

  while(count < numOfChar && offset < maxBufLen)
  {
    firstCharLenInBuf = LocaleCharToUCS4(&buf[offset],
                                           maxBufLen - offset,
                                           &UCS4value,
                                           charset);

    if(firstCharLenInBuf < 0) return firstCharLenInBuf;

    offset += firstCharLenInBuf;
    ++count;
  }
  return offset;
}

Example #2

0

Show file

File: exp_attrs.cpp Project: AlexPeng19/incubator-trafodion

// Find the number of character at the offset in buf.
Int32 Attributes::convertOffsetToChar(const char        *buf,
                                      Int32             offset, 
                                      CharInfo::CharSet cs)
{
  if (cs == CharInfo::ISO88591 || cs == CharInfo::UCS2)
     return(offset);

  Int32 firstCharLenInBuf;
  UInt32 UCS4value;

  cnv_charset charset = convertCharsetEnum(cs);

  Int32 numberOfChar = 0;
  Int32 i = 0;

  while(i < offset)
  {
    firstCharLenInBuf = LocaleCharToUCS4(&buf[i],
                                         offset - i,
                                         &UCS4value,
                                         charset);
    if(firstCharLenInBuf < 0) return firstCharLenInBuf;

    i += firstCharLenInBuf;
    ++numberOfChar;
  }
  return numberOfChar;
}

Example #3

0

Show file

File: conversionLocale.cpp Project: AlexPeng19/incubator-trafodion

// -----------------------------------------------------------------------
// ComputeStrLenInUCS4chars:
//
// Returns the actual (i.e., UCS4) character count of the input string
// (in the specified character set) in the actual (i.e., UCS4) characters.
// Return an error code (a negative number) if encounters an error.  The
// error code values are defined in w:/common/csconvert.h.  Note that
// this function does not need to use a workspace heap.
// -----------------------------------------------------------------------
NA_EIDPROC
Int32 ComputeStrLenInUCS4chars (const char * pStr,
                                const Int32 strLenInBytes,
                                const CharInfo::CharSet cs)
{
  if (cs == CharInfo::ISO88591 || strLenInBytes == 0)
    return strLenInBytes;

  Int32 numberOfUCS4chars = 0;
  Int32 firstCharLenInBuf = 0;
  UInt32 /*ucs4_t*/ UCS4value;
  cnv_charset cnvCharSet = convertCharsetEnum(cs);
  const char *s = pStr;
  Int32 num_trailing_zeros = 0;
  Int32 len = (Int32)strLenInBytes;

  while (len > 0)
  {
    firstCharLenInBuf = LocaleCharToUCS4 (s, len, &UCS4value, cnvCharSet);

    if (firstCharLenInBuf <= 0)
      return CNV_ERR_INVALID_CHAR;

    numberOfUCS4chars++;
    if ( *s == '\0' )
       num_trailing_zeros += 1;
    else 
       num_trailing_zeros = 0;
    s += firstCharLenInBuf;
    len -= firstCharLenInBuf;
  }

  return numberOfUCS4chars - num_trailing_zeros ; //NOTE: Don't count trailing zeros !

} // ComputeStrLenInUCS4chars ()

Example #4

0

Show file

File: exp_attrs.cpp Project: AlexPeng19/incubator-trafodion

Int32 Attributes::getCharLengthInBuf
    (const char        *buf,
     const char        *endOfBuf,
     char              *charLengthInBuf,
     CharInfo::CharSet cs)
{
  Int32 numberOfCharacterInBuf;

  if (cs == CharInfo::ISO88591 || cs == CharInfo::UCS2)
  {
    numberOfCharacterInBuf = endOfBuf - buf;
    if(charLengthInBuf != NULL)
    {
      for(Int32 i = 0; i < numberOfCharacterInBuf; i ++)
        charLengthInBuf[i] = 1;
    }
    return numberOfCharacterInBuf;
  }

  Int32 firstCharLenInBuf;
  UInt32 UCS4value;
  cnv_charset charset = convertCharsetEnum(cs);

  // For SJIS, it is impossible to get the length of the last character
  // from right. Scan the string from the beginning and save the vales to
  // an array.
  // For example: SJIS string (x'5182828251') and (x'51828251'), the last
  // character in the first string is 2-byte, double-byte "2". The last
  // character in the second string is 1 byte, single-byte "Q".

  size_t len = endOfBuf - buf;
  numberOfCharacterInBuf = 0;

  while(len > 0)
  {
    firstCharLenInBuf = LocaleCharToUCS4 (buf, len, &UCS4value, charset);

    if (firstCharLenInBuf <= 0)
      return CNV_ERR_INVALID_CHAR;
    else
    {
      if(charLengthInBuf != NULL)
      {
        charLengthInBuf[numberOfCharacterInBuf] = (char)firstCharLenInBuf;
      }

      numberOfCharacterInBuf++;
      buf += firstCharLenInBuf;
      len -= firstCharLenInBuf;
    }
  }
  return numberOfCharacterInBuf;
}

Example #5

0

Show file

File: exp_attrs.cpp Project: AlexPeng19/incubator-trafodion

// Return number of bytes of the first character in buf. SJIS should be 1 or
// 2. UTF8 should be 1 to 4 (byte). UCS2 is 1 (we use wchar for UCS2 data. So
// it is 1, not 2).
Int32 Attributes::getFirstCharLength(const char              *buf,
                                           Int32             buflen,
                                           CharInfo::CharSet cs)
{
  UInt32 UCS4value;
  UInt32 firstCharLenInBuf;

  // The buffer explain send to string function includes character 0,
  // treat it as single byte character.
  if( cs == CharInfo::ISO88591 ||
      cs == CharInfo::UCS2 ||
      buf[0] == 0)
  {
      firstCharLenInBuf = 1;
  }
  else
  {
    firstCharLenInBuf =
      LocaleCharToUCS4(buf, buflen, &UCS4value, convertCharsetEnum(cs));
  }
  return firstCharLenInBuf;
}

Example #6

0

Show file

File: conversionHex.cpp Project: RuoYuHP/incubator-trafodion

hex_conversion_code verifyAndConvertHex(const NAWchar *str, Int32 len, NAWchar quote,
                   CharInfo::CharSet cs, CollHeap* heap, void*& result)
{
  if ( CharInfo::isHexFormatSupported(cs) == FALSE )
    return NOT_SUPPORTED;

  if ( isValidHexFormat(str, len, cs) == FALSE )
    return INVALID;

  if ( heap == 0 )
    return CONV_FAILED;

  NAWString *tmpStr = removeWSpaces(str, len, quote, heap);

  // convert to actual string literal
  hex_conversion_code ok = INVALID_CODEPOINTS;
  switch ( cs ) {
    case CharInfo::KANJI_MP:
    case CharInfo::KSC5601_MP:
    case CharInfo::ISO88591:
    case CharInfo::UTF8:
      {
        Int32   StrLength = (Int32)(tmpStr->length());
        result = convHexToChar(tmpStr->data(), StrLength, cs, heap);
        if (result ) {
           ok = SINGLE_BYTE; // Assume good data for now
           if (cs == CharInfo::UTF8) {
              // Verify UTF8 code point values are valid
              Int32   iii = 0;
              Int32   rtnv = 0;
              NAString* reslt = (NAString*)result;
              UInt32  UCS4 = 0;
              StrLength = StrLength/2;  // Orig StrLength was for hex-ASCII string
              while ( iii < StrLength )
              {
                 rtnv = LocaleCharToUCS4( &(reslt->data()[iii]), StrLength - iii,
                                          &UCS4, cnv_UTF8 );
                 if (rtnv == CNV_ERR_INVALID_CHAR)
                 {
                    ok = INVALID_CODEPOINTS; // Return error
                    break;
                 }
                 iii += rtnv;
              }
           }
        }
      }
      break;

    case CharInfo::UNICODE:
      {
        result = convHexToWChar(tmpStr->data(), (Int32)(tmpStr->length()), cs, heap);
        if (result) ok = DOUBLE_BYTE;
      }
      break;

    default:
      ok = INVALID;
      break;
  }
  return ok;
}