Ejemplo n.º 1
0
// Return number of bytes used by the characters in buf preceding the Nth char.
// Return an error if we encounter a character that is not valid in the cs 
// character set.
Int32 Attributes::convertCharToOffset (const char        *buf,
                                       Int32             numOfChar,
                                       Int32             maxBufLen,
                                       CharInfo::CharSet cs)
{
  if (cs == CharInfo::ISO88591 || cs == CharInfo::UCS2)
     return((numOfChar <= maxBufLen) ? numOfChar - 1 : maxBufLen);

  Int32 firstCharLenInBuf;
  UInt32 UCS4value;

  cnv_charset charset = convertCharsetEnum(cs);

  // Number of character in string functions start from 1, not 0. 1 means
  // the first character in the string. Offset start from 0. The offset of
  // the first character in a string is 0.
  Int32 count = 1;
  Int32 offset = 0;

  while(count < numOfChar && offset < maxBufLen)
  {
    firstCharLenInBuf = LocaleCharToUCS4(&buf[offset],
                                           maxBufLen - offset,
                                           &UCS4value,
                                           charset);

    if(firstCharLenInBuf < 0) return firstCharLenInBuf;

    offset += firstCharLenInBuf;
    ++count;
  }
  return offset;
}
// -----------------------------------------------------------------------
// ComputeStrLenInUCS4chars:
//
// Returns the actual (i.e., UCS4) character count of the input string
// (in the specified character set) in the actual (i.e., UCS4) characters.
// Return an error code (a negative number) if encounters an error.  The
// error code values are defined in w:/common/csconvert.h.  Note that
// this function does not need to use a workspace heap.
// -----------------------------------------------------------------------
NA_EIDPROC
Int32 ComputeStrLenInUCS4chars (const char * pStr,
                                const Int32 strLenInBytes,
                                const CharInfo::CharSet cs)
{
  if (cs == CharInfo::ISO88591 || strLenInBytes == 0)
    return strLenInBytes;

  Int32 numberOfUCS4chars = 0;
  Int32 firstCharLenInBuf = 0;
  UInt32 /*ucs4_t*/ UCS4value;
  cnv_charset cnvCharSet = convertCharsetEnum(cs);
  const char *s = pStr;
  Int32 num_trailing_zeros = 0;
  Int32 len = (Int32)strLenInBytes;

  while (len > 0)
  {
    firstCharLenInBuf = LocaleCharToUCS4 (s, len, &UCS4value, cnvCharSet);

    if (firstCharLenInBuf <= 0)
      return CNV_ERR_INVALID_CHAR;

    numberOfUCS4chars++;
    if ( *s == '\0' )
       num_trailing_zeros += 1;
    else 
       num_trailing_zeros = 0;
    s += firstCharLenInBuf;
    len -= firstCharLenInBuf;
  }

  return numberOfUCS4chars - num_trailing_zeros ; //NOTE: Don't count trailing zeros !

} // ComputeStrLenInUCS4chars ()
Ejemplo n.º 3
0
// Find the number of character at the offset in buf.
Int32 Attributes::convertOffsetToChar(const char        *buf,
                                      Int32             offset, 
                                      CharInfo::CharSet cs)
{
  if (cs == CharInfo::ISO88591 || cs == CharInfo::UCS2)
     return(offset);

  Int32 firstCharLenInBuf;
  UInt32 UCS4value;

  cnv_charset charset = convertCharsetEnum(cs);

  Int32 numberOfChar = 0;
  Int32 i = 0;

  while(i < offset)
  {
    firstCharLenInBuf = LocaleCharToUCS4(&buf[i],
                                         offset - i,
                                         &UCS4value,
                                         charset);
    if(firstCharLenInBuf < 0) return firstCharLenInBuf;

    i += firstCharLenInBuf;
    ++numberOfChar;
  }
  return numberOfChar;
}
Ejemplo n.º 4
0
Int32 Attributes::getCharLengthInBuf
    (const char        *buf,
     const char        *endOfBuf,
     char              *charLengthInBuf,
     CharInfo::CharSet cs)
{
  Int32 numberOfCharacterInBuf;

  if (cs == CharInfo::ISO88591 || cs == CharInfo::UCS2)
  {
    numberOfCharacterInBuf = endOfBuf - buf;
    if(charLengthInBuf != NULL)
    {
      for(Int32 i = 0; i < numberOfCharacterInBuf; i ++)
        charLengthInBuf[i] = 1;
    }
    return numberOfCharacterInBuf;
  }

  Int32 firstCharLenInBuf;
  UInt32 UCS4value;
  cnv_charset charset = convertCharsetEnum(cs);

  // For SJIS, it is impossible to get the length of the last character
  // from right. Scan the string from the beginning and save the vales to
  // an array.
  // For example: SJIS string (x'5182828251') and (x'51828251'), the last
  // character in the first string is 2-byte, double-byte "2". The last
  // character in the second string is 1 byte, single-byte "Q".

  size_t len = endOfBuf - buf;
  numberOfCharacterInBuf = 0;

  while(len > 0)
  {
    firstCharLenInBuf = LocaleCharToUCS4 (buf, len, &UCS4value, charset);

    if (firstCharLenInBuf <= 0)
      return CNV_ERR_INVALID_CHAR;
    else
    {
      if(charLengthInBuf != NULL)
      {
        charLengthInBuf[numberOfCharacterInBuf] = (char)firstCharLenInBuf;
      }

      numberOfCharacterInBuf++;
      buf += firstCharLenInBuf;
      len -= firstCharLenInBuf;
    }
  }
  return numberOfCharacterInBuf;
}
// returned error code described in w:/common/csconvert.h
Int32 ComAnsiNameToUCS2 ( const char * inAnsiNameInMBCS      // in  - valid name in default ANSI name char set
                        , NAWchar *    outBuf4AnsiNameInUCS2 // out - out buffer
                        , const Int32  outBufSizeInNAWchars  // in  - out buffer max len in NAWchars
                        , const NABoolean padWithSpaces      // in  - default is FALSE
                        )
{
  if (outBuf4AnsiNameInUCS2 == NULL || outBufSizeInNAWchars <= 0)
    return -2; // CNV_ERR_BUFFER_OVERRUN - No output buffer or not big enough
  if (inAnsiNameInMBCS == NULL)
    return -3; // CNV_ERR_NOINPUT - No input buffer or input cnt <= 0 
  else if (strlen(inAnsiNameInMBCS) == 0)
  {
    outBuf4AnsiNameInUCS2[0] = 0;
    return 0; // success
  }

  Int32 inAnsiNameLenInBytes = (Int32)strlen(inAnsiNameInMBCS);
  Int32 outBufSizeInBytes = outBufSizeInNAWchars * BYTES_PER_NAWCHAR;
  Int32 ansiNameCharSet = (Int32)ComGetNameInterfaceCharSet();
  Int32 convAnsiNameCS  = (Int32)/*cnv_charset*/convertCharsetEnum (ansiNameCharSet);
  char * pFirstByteOfTheUntranslatedChar = NULL;
  UInt32 iTranslatedStrLenInBytes = 0;
  UInt32 iNumberOfTranslatedChars = 0;
  Int32  iConvErrorCode = LocaleToUTF16
    ( cnv_version1                         // in  - const enum cnv_version version
    , inAnsiNameInMBCS                     // in  - const char *  in_bufr
    , (Int32) inAnsiNameLenInBytes         // in  - const Int32   in_len_in_bytes
    , (const char *) outBuf4AnsiNameInUCS2 // out - const char *  out_bufr
    , (Int32)(outBufSizeInBytes -  BYTES_PER_NAWCHAR) // in - const Int32 out_bufr_max_len_in_bytes
    , (cnv_charset) convAnsiNameCS         // in  - enum cnv_charset conv_charset
    , pFirstByteOfTheUntranslatedChar      // out - char * &      first_untranslated_char
    , &iTranslatedStrLenInBytes            // out - UInt32 *      output_data_len_p
    , (Int32) 0                            // in  - const Int32   conv_flags
    , (Int32) FALSE                        // in  - const Int32   addNullAtEnd_flag
    , &iNumberOfTranslatedChars            // out - UInt32 *      translated_char_cnt_p
 // , 0xffffffff                           // in  - UInt32 max_chars_to_convert = 0xffffffff
    );
  Int32 outStrLenInNAWchars = iTranslatedStrLenInBytes / BYTES_PER_NAWCHAR;
  outBuf4AnsiNameInUCS2[outStrLenInNAWchars]  = 0; // Append the NULL terminator

  if (iConvErrorCode == 0 && padWithSpaces)
  {
    wc_str_pad ( (NAWchar *) &outBuf4AnsiNameInUCS2[outStrLenInNAWchars] // out - NAWchar *str
               , outBufSizeInNAWchars - outStrLenInNAWchars - 1 // in  - Int32 length
               , unicode_char_set::SPACE   // in  - NAWchar padchar = unicode_char_set::SPACE
               );
    outBuf4AnsiNameInUCS2[outBufSizeInNAWchars-1] = 0; // Append the NULL terminator
  }
  return iConvErrorCode;
}
// -----------------------------------------------------------------------
// ComputeStrLenInNAWchars:
//
// Returns the length of the input string (in the specified character set)
// in number of NAWchar(acters) - Note that a UTF16 character (i.e., a
// surrogate pair) will have a count of 2 NAWchar(acters).
//
// Return an error code (a negative number) if encounters an error.  The
// error code values are defined in w:/common/csconvert.h.
// -----------------------------------------------------------------------
Int32 ComputeStrLenInNAWchars (const char * pStr,
                               const Int32 strLenInBytes,
                               const CharInfo::CharSet strCS,
                               NAMemory *workspaceHeap) // in - default is NULL (the C++ runtime heap)
{
  if (pStr == NULL || strLenInBytes == 0)
    return 0;

  if (strCS == CharInfo::UCS2)
    return strLenInBytes / BYTES_PER_NAWCHAR;

  Int32        lenInNAWchars = 0;
  char *       pFirstByteOfUntranslatedChar = NULL;
  UInt32       outputDataLen = 0;
  Int32        rtnCode = 0;

  cnv_charset  cnvCharSet = convertCharsetEnum(strCS);

  // Compute the size of the to-be-allocated output buffer, include a UCS-2 NULL terminator, for the worst case.
  const Int32  bufSizeInBytes = (BYTES_PER_NAWCHAR+1) * strLenInBytes + BYTES_PER_NAWCHAR;
  char *       charBuf = new (workspaceHeap) char [bufSizeInBytes];

  if (charBuf EQU NULL)
    return CNV_ERR_INVALID_HEAP;

  rtnCode =
    LocaleToUTF16 ( cnv_version1                    // in  - const enum cnv_version
                  , pStr                            // in  - const char *   in_buf
                  , strLenInBytes                   // in  - const int      in_len
                  , charBuf                         // out - const char *   out_buf - plenty of room
                  , bufSizeInBytes                  // in  - const int      out_len - buffer size in bytes
                  , cnvCharSet                      // in  - const int      cnv_charset
                  , pFirstByteOfUntranslatedChar    // out - char *       & ptr_to_first_untranslated_char
                  , & outputDataLen                 // out - unsigned int * output_data_len_p     = NULL
               // , 0                               // in  - const int      cnv_flags             = 0
               // , (Int32)FALSE                    // in  - const int      addNullAtEnd_flag     = FALSE
               // , & translatedCharCount           // out - unsigned int * translated_char_cnt_p = NULL
               // ,                                 // in  - unsigned int   max_chars_to_convert  = 0xffffffff
                  );
  lenInNAWchars = outputDataLen / BYTES_PER_NAWCHAR;
  NADELETEBASIC(charBuf, workspaceHeap);

  if (rtnCode == 0)
    return lenInNAWchars; // a positive integer value
  else
    return rtnCode;       // a negative integer value

  return lenInNAWchars;
} // ComputeStrLenInNAWchars()
Ejemplo n.º 7
0
// Return number of bytes of the first character in buf. SJIS should be 1 or
// 2. UTF8 should be 1 to 4 (byte). UCS2 is 1 (we use wchar for UCS2 data. So
// it is 1, not 2).
Int32 Attributes::getFirstCharLength(const char              *buf,
                                           Int32             buflen,
                                           CharInfo::CharSet cs)
{
  UInt32 UCS4value;
  UInt32 firstCharLenInBuf;

  // The buffer explain send to string function includes character 0,
  // treat it as single byte character.
  if( cs == CharInfo::ISO88591 ||
      cs == CharInfo::UCS2 ||
      buf[0] == 0)
  {
      firstCharLenInBuf = 1;
  }
  else
  {
    firstCharLenInBuf =
      LocaleCharToUCS4(buf, buflen, &UCS4value, convertCharsetEnum(cs));
  }
  return firstCharLenInBuf;
}
Int32 ComAnsiNameToUTF8 ( const NAWchar * inAnsiNameInUCS2  // in  - valid ANSI SQL name in UCS2
                        , char *      outBuf4AnsiNameInMBCS // out - out buffer
                        , const Int32 outBufSizeInBytes     // in  - out buffer max len in bytes
                        )
{
  if (outBuf4AnsiNameInMBCS == NULL || outBufSizeInBytes <= 0)
    return -2; // CNV_ERR_BUFFER_OVERRUN - No output buffer or not big enough
  if (inAnsiNameInUCS2 == NULL)
    return -3; // CNV_ERR_NOINPUT - No input buffer or input cnt <= 0 
  else if (NAWstrlen(inAnsiNameInUCS2) == 0)
  {
    outBuf4AnsiNameInMBCS[0] = 0;
    return 0; // success
  }

  const Int32 inAnsiNameInBytes = NAWstrlen(inAnsiNameInUCS2) * BYTES_PER_NAWCHAR;
  Int32 ansiNameCharSet = (Int32)ComGetNameInterfaceCharSet();
  Int32 convAnsiNameCS  = (Int32)/*cnv_charset*/convertCharsetEnum (ansiNameCharSet);
  char * pFirstByteOfTheUntranslatedChar = NULL;
  UInt32 iOutStrLenInBytesIncludingNull = 0;
  UInt32 iNumTranslatedChars = 0;
  Int32  iConvErrorCode = UTF16ToLocale
    ( cnv_version1                         // in     - const enum cnv_version
    , (const char *) inAnsiNameInUCS2      // in     - const char *     in_bufr
    , (Int32) inAnsiNameInBytes            // in     - const Int32      in_len_in_bytes
    , (const char *) outBuf4AnsiNameInMBCS // in/out - const char *     out_bufr
    , (Int32) outBufSizeInBytes            // in     - const Int32      out_bufr_max_len_in bytes
    , (cnv_charset) convAnsiNameCS         // in     - enum cnv_charset conv_charset
    , pFirstByteOfTheUntranslatedChar      // out    - char * &         first_untranslated_char
    , &iOutStrLenInBytesIncludingNull      // out    - UInt32 *         output_data_len_p
    , 0                                    // in     - const Int32      conv_flags
    , (Int32) TRUE                         // in     - const Int32      add_null_at_end_Flag
    , (Int32) FALSE                        // in     - const int32      allow_invalids
    , &iNumTranslatedChars                 // out    - UInt32 *         translated_char_cnt_p
    , (const char *) NULL /* i.e. "?" */   // in     - const char *     substitution_char = NULL
    );

  return iConvErrorCode;
}
Lng32 
LocaleStringToUnicode(Lng32 charset, const char* str, Lng32 strLen, 
                      NAWchar* wstrBuf, Lng32 wstrBufLen, NABoolean addNullAtEnd)
{
   // Changed the algorithm to call the new LocaleToUTF16() but keep
   // the old call to old ISO88591ToUnicode() when the character set is
   // ISO88591.  We want to keep the old "pass through" behavior so
   // Use of ISO 8859-15 characters (a.k.a., Latin-9) in
   // CHARACTER SET ISO88591 target column continues to work.

   if (charset == (Lng32) CharInfo::ISO88591)
   {
     NAWcharBuf wcbuf(wstrBuf, wstrBufLen);
     NAWcharBuf* wcbufPtr = &wcbuf;
     NAWcharBuf* res = 0;
     res = ISO88591ToUnicode(
                charBuf((unsigned char*)str, strLen), 0,
                wcbufPtr, addNullAtEnd
                        );
     return (res) ? res->getStrLen() : 0;
   }

   //
   // else (charset != (Lng32) CharInfo::ISO88591)
   //

   enum cnv_charset convCS = convertCharsetEnum(charset);
   if (convCS == cnv_UnknownCharSet)
     return 0; // nothing we can do; exit the routine

   UInt32 outBufSizeInBytes = wstrBufLen*sizeof(NAWchar);
   char * pFirstUntranslatedChar = NULL;
   UInt32 outputDataLenInBytes = 0;
   UInt32 translatedtCharCount = 0;
   Int32 convStatus =
     LocaleToUTF16(cnv_version1,           // const enum cnv_version version
                   str,                    // const char *in_bufr
                   strLen,                 // const int in_len in # of bytes
                   (const char *)wstrBuf,  // const char *out_bufr
                   (const Int32)outBufSizeInBytes,
                   convCS,       // enum cnv_charset charset -- output charset
                   pFirstUntranslatedChar, // char * & first_untranslated_char
                   &outputDataLenInBytes,  // unsigned int *output_data_len_p
                   0,                      // const int cnv_flags (default is 0)
                   (const Int32)addNullAtEnd,
                   &translatedtCharCount); // unsigned int *translated_char_cnt_p

   UInt32 outLenInW = outputDataLenInBytes/sizeof(NAWchar);
   if (convStatus == 0) // success
     return outLenInW;  // include the NULL terminator if (addNullAtEnd == TRUE)

   // If convStatus != 0, LocaleToUTF16 will not add the NULL terminator
   if (addNullAtEnd && wstrBuf && wstrBufLen > 0)
   {
     if (outLenInW < (UInt32)wstrBufLen)
       wstrBuf[outLenInW] = WIDE_('\0');
     else
     {
       // assume the specified wstrBufLen includes room for the NULL terminator
       // when the passed-in addNullAtEnd parameter is set to TRUE
       wstrBuf[wstrBufLen-1] = WIDE_('\0');
     }
   }
   return 0; // tell the caller not to use data in wstrBuf
}
Ejemplo n.º 10
0
charBuf* unicodeTocset(const NAWcharBuf& input, 
	CollHeap *heap, charBuf*& csetString, Int32 cset, Int32 &errorcode,
        NABoolean addNullAtEnd, NABoolean allowInvalidCodePoint,
        Int32 *charCount, Int32 *errorByteOff)
{
   char * err_ptr;
   UInt32 byteCount, lvCharCount;
   enum cnv_charset cnvSet = convertCharsetEnum (cset);
   Int32 cwidth = CharInfo::maxBytesPerChar((CharInfo::CharSet)cset);
   charBuf* output = NULL;
   if ( input.data() != NULL && input.getStrLen() > 0)
   {
     Int32 cSetTargetBufferSizeInBytes = input.getStrLen/*in_NAWchars*/()*cwidth+16; // memory is cheap
     UInt32 cSetTargetStrLenInBytes = 0;
     char *pTempTargetBuf = new(heap) char[cSetTargetBufferSizeInBytes];
     errorcode = UTF16ToLocale ( cnv_version1
                               , (const char *)input.data()           // source string
                               , input.getStrLen()*BYTES_PER_NAWCHAR  // source string length in bytes
                               , (const char *)pTempTargetBuf         // buffer for the converted string
                               , cSetTargetBufferSizeInBytes          // target buffer size in bytes
                               , cnvSet                               // convert from UTF16 to cnvSet
                               , err_ptr
                               , &cSetTargetStrLenInBytes // out - length in bytes of the converted string
                               , 0
                               , addNullAtEnd
                               , allowInvalidCodePoint
                               );
     NADELETEBASICARRAY(pTempTargetBuf, heap); pTempTargetBuf = NULL;
     if (errorcode == 0 && cSetTargetStrLenInBytes > 0)
       output = checkSpace(heap, cSetTargetStrLenInBytes, csetString, addNullAtEnd);
     else // invoke the old code (i.e., keep the old behavior)
       output = checkSpace(heap, input.getStrLen()*cwidth, csetString, addNullAtEnd);
   }
   else // invoke the old code (i.e., keep the old behavior)
     output = checkSpace(heap, input.getStrLen()*cwidth, csetString, addNullAtEnd);

   if ( output == 0 ) {errorcode = CNV_ERR_BUFFER_OVERRUN; return 0;}

   unsigned char* target = output->data();

   errorcode =   UTF16ToLocale( cnv_version1,
                    (const char *)input.data(), input.getStrLen()*BYTES_PER_NAWCHAR,
                    (const char *)target, output->getBufSize(),
                    cnvSet ,
                    err_ptr,
                    &byteCount ,
		    0,
		    addNullAtEnd,
                    allowInvalidCodePoint,
                    &lvCharCount);
   if (errorcode == CNV_ERR_NOINPUT)
     errorcode=0;  // empty string is OK
   if (errorByteOff)
     *errorByteOff = err_ptr - (char *)input.data();
   if (charCount)
     *charCount    = (Int32)lvCharCount;

   // If errorcode != 0, LocaleToUTF16 will not add the NULL terminator
   if (errorcode == 0 && addNullAtEnd && byteCount > 0)
     {
       // Exclude the size (in bytes) of the NULL terminator from the byte count.
       UInt32 nullLen = CharInfo::minBytesPerChar((CharInfo::CharSet) cset);

       if (byteCount > nullLen)
         byteCount -= nullLen;
       else
         byteCount = 0;
     }

   output -> setStrLen(byteCount/*in_bytes*/); // excluding the null terminator from the count
   return output;
}
Ejemplo n.º 11
0
NAWcharBuf* csetToUnicode(const charBuf& input, 
	CollHeap *heap, NAWcharBuf*& unicodeString, Int32 cset, Int32 &errorcode,
        NABoolean addNullAtEnd, Int32 *charCount, Int32 *errorByteOff)
{
    char * err_ptr = NULL;
    UInt32 byteCount = 0, lv_charCount = 0, computedMaxBufSizeInNAWchars = 0;
    NABoolean outputBufferAllocatedByThisRoutine = (unicodeString == NULL) ? TRUE : FALSE;
    enum cnv_charset cnvSet = convertCharsetEnum (cset);

    computedMaxBufSizeInNAWchars = (input.getStrLen()+1)*2; // in NAWchar elements for the worst case
      
    NAWcharBuf* output = checkSpace(heap, computedMaxBufSizeInNAWchars, unicodeString, addNullAtEnd);

    if ( output == NULL ) {errorcode = CNV_ERR_BUFFER_OVERRUN; return NULL;}

    NAWchar* target = output->data();

    errorcode = LocaleToUTF16(
         cnv_version1,
         (const char *)input.data(), input.getStrLen(),
         (const char *)target, output->getBufSize()*BYTES_PER_NAWCHAR /* in bytes */,
         cnvSet,
         err_ptr,
         &byteCount,
         0,
         addNullAtEnd,
         &lv_charCount);
    if (errorcode == CNV_ERR_NOINPUT) errorcode=0;  // empty string is OK
    if (errorByteOff) *errorByteOff = err_ptr - (char *)input.data();
    if (charCount)    *charCount    = (Int32)lv_charCount;
    // If errorcode != 0, LocaleToUTF16 will not add the NULL terminator
    if (errorcode == 0 && addNullAtEnd && byteCount > 0)
       {
         // Exclude the size (in bytes) of the NULL terminator from the byte count.
         if (byteCount > BYTES_PER_NAWCHAR)
           byteCount -= BYTES_PER_NAWCHAR;
         else
           byteCount = 0;
       }
        
    output->setStrLen/*in_NAWchar_s*/(byteCount/BYTES_PER_NAWCHAR); // excluding the NULL terminator

    if (outputBufferAllocatedByThisRoutine &&
        output->getBufSize() > output->getStrLen() + 500) // allocated too much space
    {
      // Try to save space in the heap but still allocate 50 extra NAWchars so we do not need
      // to resize the buffer if there is a need to append a few more characters later on.
      // The additional 1 NAWchar is for the NULL terminator.

      NAWcharBuf * outNAWcharBuf2 = new (heap) NAWcharBuf ( output->getStrLen() + 51 // in NAWchars
                                                          , heap );
      if (outNAWcharBuf2 != NULL) // successful allocation
      {
        // Copy data to the newly allocated, smaller buffer.
        NAWstrncpy(outNAWcharBuf2->data(), output->data(), output->getStrLen());
        outNAWcharBuf2->setStrLen/*in_NAWchar_s*/(output->getStrLen());
        // Always append a UCS-2 NULL terminator but exclude it from the string length count.
        outNAWcharBuf2->data()[outNAWcharBuf2->getStrLen()] = 0;

        // Remove the old buffer and set up for the returned value and out parameter.
        unicodeString = outNAWcharBuf2; // return via the out parameter NAWcharBuf*& unicodeString
        NADELETE(output, NAWcharBuf, heap);
        output = outNAWcharBuf2;
      }
    }
    return output;

}
Ejemplo n.º 12
0
void
ElemDDLColDef::setDefaultAttribute(ElemDDLNode * pColDefaultNode)
{
  ElemDDLColDefault * pColDefault = NULL;
  ComBoolean isIdentityColumn = FALSE;

  NAType * pColumnDataType = columnDataType_;

  if (pColDefaultNode NEQ NULL)
    {
      ComASSERT(pColDefaultNode->castToElemDDLColDefault() NEQ NULL);
      pColDefault = pColDefaultNode->castToElemDDLColDefault();
    }

  if (pColDefault NEQ NULL)
    {
      switch (pColDefault->getColumnDefaultType())
        {
        case ElemDDLColDefault::COL_NO_DEFAULT:
          defaultClauseStatus_ = NO_DEFAULT_CLAUSE_SPEC;
          break;
        case ElemDDLColDefault::COL_DEFAULT:
          {
            defaultClauseStatus_ = DEFAULT_CLAUSE_SPEC;
            
            if (pColDefault->getSGOptions())
              {
                isIdentityColumn = TRUE;
                pSGOptions_ = pColDefault->getSGOptions();
                pSGLocation_ = pColDefault->getSGLocation();
              }
            else
              {
                ComASSERT(pColDefault->getDefaultValueExpr() NEQ NULL);
                pDefault_ = pColDefault->getDefaultValueExpr();
              }
            
            // The cast ItemExpr to ConstValue for (ConstValue *)pDefault_; 
            // statement below sets arbitary value for the isNULL_. 
            // Bypass these checks for ID column (basically ITM_IDENTITY).
            ConstValue *cvDef = (ConstValue *)pDefault_;
            if ((cvDef && !cvDef->isNull()) && (!isIdentityColumn))
              {
                const NAType *cvTyp = cvDef->getType();
                NABoolean isAnErrorAlreadyIssued = FALSE;
                
                if ( cvTyp->getTypeQualifier() == NA_CHARACTER_TYPE )
                  {
                    CharInfo::CharSet defaultValueCS = ((const CharType *)cvTyp)->getCharSet();
                    // Always check for INFER_CHARSET setting before the ICAT setting.
                    NAString inferCharSetFlag;
                    if (getCharSetInferenceSetting(inferCharSetFlag) == TRUE &&
                        NOT cvDef->isStrLitWithCharSetPrefixSpecified())
                      {
                        if (pColumnDataType->getTypeQualifier() == NA_CHARACTER_TYPE
                            && ((const CharType *)pColumnDataType)->getCharSet() == CharInfo::UCS2
                            && SqlParser_DEFAULT_CHARSET == CharInfo::UCS2
                            && defaultValueCS == CharInfo::ISO88591
                            )
                          {
                            *SqlParser_Diags << DgSqlCode(-1186)
                                             << DgColumnName(ToAnsiIdentifier(getColumnName()))
                                             << DgString0(pColumnDataType->getTypeSQLname(TRUE/*terse*/))
                                             << DgString1(cvTyp->getTypeSQLname(TRUE/*terse*/));
                            isAnErrorAlreadyIssued = TRUE;
                          }
                        else
                          {
                            cvTyp = cvDef -> pushDownType(*columnDataType_, NA_CHARACTER_TYPE);
                          }
                      }
                    else if (CmpCommon::getDefault(ALLOW_IMPLICIT_CHAR_CASTING) == DF_ON &&
                             NOT cvDef->isStrLitWithCharSetPrefixSpecified() &&
                             cvTyp->getTypeQualifier() == NA_CHARACTER_TYPE &&
                             SqlParser_DEFAULT_CHARSET == CharInfo::ISO88591 &&
                             defaultValueCS == CharInfo::UnknownCharSet)
                      {
                        cvTyp = cvDef -> pushDownType(*columnDataType_, NA_CHARACTER_TYPE);
                      }
                    
                  } // column default value has character data type
                
                if (NOT isAnErrorAlreadyIssued &&
                    pColumnDataType->getTypeQualifier() == NA_CHARACTER_TYPE &&
                    cvTyp->getTypeQualifier() == NA_CHARACTER_TYPE &&
                    (
                         CmpCommon::getDefault(ALLOW_IMPLICIT_CHAR_CASTING) == DF_ON ||
                         NOT cvDef->isStrLitWithCharSetPrefixSpecified()))
                  {
                    const CharType *cdCharType = (const CharType *)pColumnDataType;
                    const CharType *cvCharType = (const CharType *)cvTyp;
                    CharInfo::CharSet cdCharSet = cdCharType->getCharSet(); // cd = column definition
                    CharInfo::CharSet cvCharSet = cvCharType->getCharSet(); // cv = constant value
                    if (cvCharSet == CharInfo::ISO88591)  // default value is a _ISO88591 str lit
                      {
                        
                      }
                    else if ( (cvCharSet == CharInfo::UNICODE ||  // default value is a _UCS2 string literal
                               cvCharSet == CharInfo::UTF8)   &&  // or a _UTF8 string literal
                              cdCharSet != cvCharSet )
                      {
                        //
                        // Check to see if all characters in the specified column default
                        // string literal value can be successfully converted/translated
                        // to the actual character set of the column.
                        //
                        char buf[2032];  // the output buffer - should be big enough
                        buf[0] = '\0';
                        enum cnv_charset eCnvCS = convertCharsetEnum( cdCharSet );
                        const char * pInStr = cvDef->getRawText()->data();
                        Int32 inStrLen = cvDef->getRawText()->length();
                        char * p1stUnstranslatedChar = NULL;
                        UInt32 outStrLenInBytes = 0;
                        unsigned charCount = 0;  // number of characters translated/converted
                        Int32 cnvErrStatus = 0;
                        char *pSubstitutionChar = NULL;
                        Int32 convFlags = 0;
                        
                        if ( cvCharSet == CharInfo::UNICODE )
                          {
                            cnvErrStatus =
                              UTF16ToLocale
                              ( cnv_version1            // in  - const enum cnv_version version
                                , pInStr                  // in  - const char *in_bufr
                                , inStrLen                // in  - const int in_len
                                , buf                     // out - const char *out_bufr
                                , 2016                    // in  - const int out_len
                                , eCnvCS                  // in  - enum cnv_charset charset
                                , p1stUnstranslatedChar   // out - char * & first_untranslated_char
                                , &outStrLenInBytes       // out - unsigned int *output_data_len_p
                                , convFlags               // in  - const int cnv_flags
                                , (Int32)TRUE               // in  - const int addNullAtEnd_flag
                                , (Int32)FALSE              // in  - const int allow_invalids
                                , &charCount              // out - unsigned int * translated_char_cnt_p
                                , pSubstitutionChar       // in  - const char *substitution_char
                                );
                          }
                        else // cvCharSet must be CharInfo::UTF8
                          {
                            cnvErrStatus =
                              UTF8ToLocale
                              ( cnv_version1            // in  - const enum cnv_version version
                                , pInStr                  // in  - const char *in_bufr
                                , inStrLen                // in  - const int in_len
                                , buf                     // out - const char *out_bufr
                                , 2016                    // in  - const int out_len
                                , eCnvCS                  // in  - enum cnv_charset charset
                                , p1stUnstranslatedChar   // out - char * & first_untranslated_char
                                , &outStrLenInBytes       // out - unsigned int *output_data_len_p
                                , (Int32)TRUE               // in  - const int addNullAtEnd_flag
                                , (Int32)FALSE              // in  - const int allow_invalids
                                , &charCount              // out - unsigned int * translated_char_cnt_p
                                , pSubstitutionChar       // in  - const char *substitution_char
                                );
                          }
                        switch (cnvErrStatus)
                          {
                          case 0: // success
                          case CNV_ERR_NOINPUT: // an empty input string will get this error code
                            {
                              ConstValue *pMBStrLitConstValue ;
                              // convert the string literal saved in cvDef (column default value)
                              // from UNICODE (e.g. UTF16) to the column character data type
                              if ( cdCharSet != CharInfo::UNICODE)
                                {
                                  NAString mbs2(buf, PARSERHEAP());  // note that buf is NULL terminated
                                  pMBStrLitConstValue =
                                    new(PARSERHEAP()) ConstValue ( mbs2
                                                                   , cdCharSet // use this for str lit prefix
                                                                   , CharInfo::DefaultCollation
                                                                   , CharInfo::COERCIBLE
                                                                   , PARSERHEAP()
                                                                   );
                                }
                              else
                                {
                                  NAWString mbs2((NAWchar*)buf, PARSERHEAP());  // note that buf is NULL terminated
                                  pMBStrLitConstValue = 
                                    new(PARSERHEAP()) ConstValue ( mbs2
                                                                   , cdCharSet // use this for str lit prefix
                                                                   , CharInfo::DefaultCollation
                                                                   , CharInfo::COERCIBLE
                                                                   , PARSERHEAP()
                                                                   );
                                }
                              delete pDefault_; // deallocate the old ConstValue object
                              cvDef = NULL;     // do not use cvDef anymore
                              pDefault_ = pMBStrLitConstValue;
                              pColDefault->setDefaultValueExpr(pDefault_);
                            }
                            break;
                          case CNV_ERR_INVALID_CHAR:
                            {
                              // 1401 ==  CAT_UNABLE_TO_CONVERT_COLUMN_DEFAULT_VALUE_TO_CHARSET
                              *SqlParser_Diags << DgSqlCode(-1401)
                                               << DgColumnName(ToAnsiIdentifier(getColumnName()))
                                               << DgString0(CharInfo::getCharSetName(cdCharSet));
                            }
                            break;
                          case CNV_ERR_BUFFER_OVERRUN: // output buffer not big enough
                          case CNV_ERR_INVALID_CS:
                          default:
                            CMPABORT_MSG("Parser internal logic error");
                            break;
                          } // switch
                      }
                    else if(!pColumnDataType->isCompatible(*cvTyp))
                      {
                        if (NOT isAnErrorAlreadyIssued)
                          {
                            *SqlParser_Diags << DgSqlCode(-1186)
                                             << DgColumnName(ToAnsiIdentifier(getColumnName()))
                                             << DgString0(pColumnDataType->getTypeSQLname(TRUE/*terse*/))
                                             << DgString1(cvTyp->getTypeSQLname(TRUE/*terse*/));
                            isAnErrorAlreadyIssued = TRUE;
                          }
                      }
                  } // column has character data type
                else
                  // if interval data type, the default value must have the same
                  // interval qualifier as the column.
                  if (NOT isAnErrorAlreadyIssued &&
                      (!pColumnDataType->isCompatible(*cvTyp) ||
                       (pColumnDataType->getTypeQualifier() == NA_INTERVAL_TYPE &&
                        pColumnDataType->getFSDatatype() != cvTyp->getFSDatatype())))
                    {
                      *SqlParser_Diags << DgSqlCode(-1186)
                                       << DgColumnName(ToAnsiIdentifier(getColumnName()))
                                       << DgString0(pColumnDataType->getTypeSQLname(TRUE/*terse*/))
                                       << DgString1(cvTyp->getTypeSQLname(TRUE/*terse*/));
                      isAnErrorAlreadyIssued = TRUE;
                    }
              }
          }
          break;
        case ElemDDLColDefault::COL_COMPUTED_DEFAULT:
          {
            defaultClauseStatus_ = DEFAULT_CLAUSE_SPEC;
            computedDefaultExpr_ = pColDefault->getComputedDefaultExpr();
          }
          break;
        default:
          CMPABORT_MSG("Parser internal logic error");
          break;
        }
    }

}
short CmpSeabaseDDL::buildViewText(StmtDDLCreateView * createViewParseNode,
				   NAString &viewText) 
{
  const ParNameLocList &nameLocList = createViewParseNode->getNameLocList();
  const char *pInputStr = nameLocList.getInputStringPtr();
  
  StringPos inputStrPos = createViewParseNode->getStartPosition();
  
  for (CollIndex i = 0; i < nameLocList.entries(); i++)
    {
      const ParNameLoc &nameLoc = nameLocList[i];
      const NAString &nameExpanded = nameLoc.getExpandedName(FALSE/*no assert*/);
      size_t nameAsIs = 0;
      size_t nameLenInBytes = 0;
      size_t nameLenInNAWchars = 0;
      
      //
      // When the character set of the input string is a variable-length/width
      // multi-byte characters set, the value returned by getNameLength()
      // may not be numerically equal to the number of bytes in the original
      // input string that we need to skip.  So, we get the character
      // conversion routines to tell us how many bytes we need to skip.
      //
      CMPASSERT(nameLocList.getInputStringCharSet() EQU CharInfo::UTF8);
      enum cnv_charset eCnvCS = convertCharsetEnum(nameLocList.getInputStringCharSet());
      
      const char *str_to_test = (const char *) &pInputStr[nameLoc.getNamePosition()];
      const Int32 max_bytes2cnv = createViewParseNode->getEndPosition()
	- nameLoc.getNamePosition() + 1;
      const char *tmp_out_bufr = new (STMTHEAP) char[max_bytes2cnv * 4 + 10 /* Ensure big enough! */ ];
      char * p1stUnstranslatedChar = NULL;
      UInt32 iTransCharCountInChars = 0;
      Int32 cnvErrStatus = LocaleToUTF16(
					 cnv_version1          // in  - const enum cnv_version version
					 , str_to_test           // in  - const char *in_bufr
					 , max_bytes2cnv         // in  - const int in_len
					 , tmp_out_bufr          // out - const char *out_bufr
					 , max_bytes2cnv * 4 + 1 // in  - const int out_len
					 , eCnvCS                // in  - enum cnv_charset charset
					 , p1stUnstranslatedChar // out - char * & first_untranslated_char
					 , NULL                  // out - unsigned int *output_data_len_p
					 , 0                     // in  - const int cnv_flags
					 , (Int32)TRUE           // in  - const int addNullAtEnd_flag
					 , &iTransCharCountInChars  // out - unsigned int * translated_char_cnt_p
					 , nameLoc.getNameLength()     // in  - unsigned int max_NAWchars_to_convert
					 );
      // NOTE: No errors should be possible -- string has been converted before.
      
      NADELETEBASIC (tmp_out_bufr, STMTHEAP);
      nameLenInBytes = p1stUnstranslatedChar - str_to_test;
      
      // If name not expanded, then use the original name as is
      if (nameExpanded.isNull())
	nameAsIs = nameLenInBytes;
      
      // Copy from (last position in) input string up to current name
      viewText += NAString(&pInputStr[inputStrPos],
			   nameLoc.getNamePosition() - inputStrPos +
			   nameAsIs);
      
      if (NOT nameAsIs) // original name to be replaced with expanded
	{
	  size_t namePos = nameLoc.getNamePosition();
	  size_t nameLen = nameLoc.getNameLength();
	  
	  if ( ( /* case #1 */ pInputStr[namePos] EQU '*' OR
		 /* case #2 */ pInputStr[namePos] EQU '"' )
	       AND nameExpanded.data()[0] NEQ '"'
	       AND namePos > 1
	       AND ( pInputStr[namePos - 1] EQU '_' OR
		     isAlNumIsoMapCS((unsigned char)pInputStr[namePos - 1]) )
	       )
	    {
	      // insert a blank separator to avoid syntax error
	      // WITHOUT FIX
	      // ex#1: CREATE VIEW C.S.V AS SELECTC.S.T.COL FROM C.S.T
	      // ex#2: CREATE VIEW C.S.V AS SELECTC.S.T.COL FROM C.S.T
	      viewText += " "; // the FIX
	      // WITH FIX
	      // ex#1: CREATE VIEW C.S.V AS SELECT C.S.T.COL FROM C.S.T
	      // ex#2: CREATE VIEW C.S.V AS SELECT C.S.T.COL FROM C.S.T
	    }
	  
	  // Add the expanded (fully qualified) name (if exists)
	  viewText += nameExpanded;
	  
	  if ( ( /* case #3 */ ( pInputStr[namePos] EQU '*' AND nameLen EQU 1 ) OR
		 /* case #4 */ pInputStr[namePos + nameLen - 1] EQU '"' )
	       AND nameExpanded.data()[nameExpanded.length() - 1] NEQ '"'
	       AND pInputStr[namePos + nameLen] NEQ '\0'
	       AND ( pInputStr[namePos + nameLen] EQU '_' OR
		     isAlNumIsoMapCS((unsigned char)pInputStr[namePos + nameLen]) )
	       )
	    {
	      // insert a blank separator to avoid syntax error
	      // WITHOUT FIX
	      // ex: CREATE VIEW C.S.V AS SELECT C.S.T.COLFROM C.S.T
	      viewText += " "; // the FIX
	      // WITH FIX
	      // ex: CREATE VIEW C.S.V AS SELECT C.S.T.COL FROM C.S.T
	    }
	} // if (NOT nameAsIs)
      
      // Advance input pointer beyond original name in input string
      inputStrPos = nameLoc.getNamePosition() + nameLenInBytes /* same as nameLenInNAWchars */;
      
    } // for
  
  if (createViewParseNode->getEndPosition() >= inputStrPos)
    {
      viewText += NAString(&pInputStr[inputStrPos],
			   createViewParseNode->getEndPosition()
			   + 1 - inputStrPos);
    }
  else
    CMPASSERT(createViewParseNode->getEndPosition() == inputStrPos-1);
  
  PrettifySqlText(viewText,
		  CharType::getCharSetAsPrefix(SqlParser_NATIONAL_CHARSET));

  return 0;
} // CmpSeabaseDDL::buildViewText()