// Return number of bytes used by the characters in buf preceding the Nth char. // Return an error if we encounter a character that is not valid in the cs // character set. Int32 Attributes::convertCharToOffset (const char *buf, Int32 numOfChar, Int32 maxBufLen, CharInfo::CharSet cs) { if (cs == CharInfo::ISO88591 || cs == CharInfo::UCS2) return((numOfChar <= maxBufLen) ? numOfChar - 1 : maxBufLen); Int32 firstCharLenInBuf; UInt32 UCS4value; cnv_charset charset = convertCharsetEnum(cs); // Number of character in string functions start from 1, not 0. 1 means // the first character in the string. Offset start from 0. The offset of // the first character in a string is 0. Int32 count = 1; Int32 offset = 0; while(count < numOfChar && offset < maxBufLen) { firstCharLenInBuf = LocaleCharToUCS4(&buf[offset], maxBufLen - offset, &UCS4value, charset); if(firstCharLenInBuf < 0) return firstCharLenInBuf; offset += firstCharLenInBuf; ++count; } return offset; }
// Find the number of character at the offset in buf. Int32 Attributes::convertOffsetToChar(const char *buf, Int32 offset, CharInfo::CharSet cs) { if (cs == CharInfo::ISO88591 || cs == CharInfo::UCS2) return(offset); Int32 firstCharLenInBuf; UInt32 UCS4value; cnv_charset charset = convertCharsetEnum(cs); Int32 numberOfChar = 0; Int32 i = 0; while(i < offset) { firstCharLenInBuf = LocaleCharToUCS4(&buf[i], offset - i, &UCS4value, charset); if(firstCharLenInBuf < 0) return firstCharLenInBuf; i += firstCharLenInBuf; ++numberOfChar; } return numberOfChar; }
// ----------------------------------------------------------------------- // ComputeStrLenInUCS4chars: // // Returns the actual (i.e., UCS4) character count of the input string // (in the specified character set) in the actual (i.e., UCS4) characters. // Return an error code (a negative number) if encounters an error. The // error code values are defined in w:/common/csconvert.h. Note that // this function does not need to use a workspace heap. // ----------------------------------------------------------------------- NA_EIDPROC Int32 ComputeStrLenInUCS4chars (const char * pStr, const Int32 strLenInBytes, const CharInfo::CharSet cs) { if (cs == CharInfo::ISO88591 || strLenInBytes == 0) return strLenInBytes; Int32 numberOfUCS4chars = 0; Int32 firstCharLenInBuf = 0; UInt32 /*ucs4_t*/ UCS4value; cnv_charset cnvCharSet = convertCharsetEnum(cs); const char *s = pStr; Int32 num_trailing_zeros = 0; Int32 len = (Int32)strLenInBytes; while (len > 0) { firstCharLenInBuf = LocaleCharToUCS4 (s, len, &UCS4value, cnvCharSet); if (firstCharLenInBuf <= 0) return CNV_ERR_INVALID_CHAR; numberOfUCS4chars++; if ( *s == '\0' ) num_trailing_zeros += 1; else num_trailing_zeros = 0; s += firstCharLenInBuf; len -= firstCharLenInBuf; } return numberOfUCS4chars - num_trailing_zeros ; //NOTE: Don't count trailing zeros ! } // ComputeStrLenInUCS4chars ()
Int32 Attributes::getCharLengthInBuf (const char *buf, const char *endOfBuf, char *charLengthInBuf, CharInfo::CharSet cs) { Int32 numberOfCharacterInBuf; if (cs == CharInfo::ISO88591 || cs == CharInfo::UCS2) { numberOfCharacterInBuf = endOfBuf - buf; if(charLengthInBuf != NULL) { for(Int32 i = 0; i < numberOfCharacterInBuf; i ++) charLengthInBuf[i] = 1; } return numberOfCharacterInBuf; } Int32 firstCharLenInBuf; UInt32 UCS4value; cnv_charset charset = convertCharsetEnum(cs); // For SJIS, it is impossible to get the length of the last character // from right. Scan the string from the beginning and save the vales to // an array. // For example: SJIS string (x'5182828251') and (x'51828251'), the last // character in the first string is 2-byte, double-byte "2". The last // character in the second string is 1 byte, single-byte "Q". size_t len = endOfBuf - buf; numberOfCharacterInBuf = 0; while(len > 0) { firstCharLenInBuf = LocaleCharToUCS4 (buf, len, &UCS4value, charset); if (firstCharLenInBuf <= 0) return CNV_ERR_INVALID_CHAR; else { if(charLengthInBuf != NULL) { charLengthInBuf[numberOfCharacterInBuf] = (char)firstCharLenInBuf; } numberOfCharacterInBuf++; buf += firstCharLenInBuf; len -= firstCharLenInBuf; } } return numberOfCharacterInBuf; }
// Return number of bytes of the first character in buf. SJIS should be 1 or // 2. UTF8 should be 1 to 4 (byte). UCS2 is 1 (we use wchar for UCS2 data. So // it is 1, not 2). Int32 Attributes::getFirstCharLength(const char *buf, Int32 buflen, CharInfo::CharSet cs) { UInt32 UCS4value; UInt32 firstCharLenInBuf; // The buffer explain send to string function includes character 0, // treat it as single byte character. if( cs == CharInfo::ISO88591 || cs == CharInfo::UCS2 || buf[0] == 0) { firstCharLenInBuf = 1; } else { firstCharLenInBuf = LocaleCharToUCS4(buf, buflen, &UCS4value, convertCharsetEnum(cs)); } return firstCharLenInBuf; }
hex_conversion_code verifyAndConvertHex(const NAWchar *str, Int32 len, NAWchar quote, CharInfo::CharSet cs, CollHeap* heap, void*& result) { if ( CharInfo::isHexFormatSupported(cs) == FALSE ) return NOT_SUPPORTED; if ( isValidHexFormat(str, len, cs) == FALSE ) return INVALID; if ( heap == 0 ) return CONV_FAILED; NAWString *tmpStr = removeWSpaces(str, len, quote, heap); // convert to actual string literal hex_conversion_code ok = INVALID_CODEPOINTS; switch ( cs ) { case CharInfo::KANJI_MP: case CharInfo::KSC5601_MP: case CharInfo::ISO88591: case CharInfo::UTF8: { Int32 StrLength = (Int32)(tmpStr->length()); result = convHexToChar(tmpStr->data(), StrLength, cs, heap); if (result ) { ok = SINGLE_BYTE; // Assume good data for now if (cs == CharInfo::UTF8) { // Verify UTF8 code point values are valid Int32 iii = 0; Int32 rtnv = 0; NAString* reslt = (NAString*)result; UInt32 UCS4 = 0; StrLength = StrLength/2; // Orig StrLength was for hex-ASCII string while ( iii < StrLength ) { rtnv = LocaleCharToUCS4( &(reslt->data()[iii]), StrLength - iii, &UCS4, cnv_UTF8 ); if (rtnv == CNV_ERR_INVALID_CHAR) { ok = INVALID_CODEPOINTS; // Return error break; } iii += rtnv; } } } } break; case CharInfo::UNICODE: { result = convHexToWChar(tmpStr->data(), (Int32)(tmpStr->length()), cs, heap); if (result) ok = DOUBLE_BYTE; } break; default: ok = INVALID; break; } return ok; }