// a helper function converting a hexdecimal digit to a double-byte string
static NAWString * 
convHexToWChar(const NAWchar *s, Int32 inputLen, CharInfo::CharSet cs, CollHeap* heap)
{
  if ( cs == CharInfo::UNICODE )
  {
    NAWString *r = new (heap) NAWString(heap);
    if (!s || inputLen <= 0) return r;

    assert((inputLen % 4) == 0);
  
    for (Int32 i = 0; i < inputLen; i=i+4) {
      if ( isHexDigit8859_1(s[i])   AND isHexDigit8859_1(s[i+1]) AND
           isHexDigit8859_1(s[i+2]) AND isHexDigit8859_1(s[i+3]) )
      {
        unsigned short first4Bits  = getHexDigitValue(s[i]);
  	unsigned short second4Bits = getHexDigitValue(s[i+1]);
  	unsigned short third4Bits  = getHexDigitValue(s[i+2]);
  	unsigned short fourth4Bits = getHexDigitValue(s[i+3]);
  
#pragma nowarn(1506)   // warning elimination 
        NAWchar wc = (first4Bits << 12) | (second4Bits << 8) | (third4Bits << 4) | fourth4Bits;
#pragma warn(1506)  // warning elimination 
        r->append(wc);
      }
      else {
        NADELETE(r, NAWString, heap);
        return NULL;
      }
    }
  
#pragma nowarn(1506)   // warning elimination 
    if (! CharInfo::checkCodePoint(r->data(), r->length(), cs) ) {
#pragma warn(1506)  // warning elimination 
      NADELETE(r, NAWString, heap);
      return NULL;
    }

    return r;
  }
  return NULL;
}
// -----------------------------------------------------------------------
// Translate ANSI SQL names from UCS-2/UTF-16 encoding values to
// the Default ANSI SQL Name character set.
// -----------------------------------------------------------------------
void CmAnsiNameToUTF8(const NAWString &inWcs, NAString &outMbs)
{
  outMbs.remove(0); // set to an empty string
  if (inWcs.length() <= 0)
  {
    return;
  }

  NAString *pConvStr =
    unicodeToChar ( inWcs.data()                        // in - const char * str
                  , (Int32)inWcs.length()               // in - Int32        len
                  , (Lng32)ComGetNameInterfaceCharSet() // in - Lng32        strCharSet
                  , (NAMemory *)STMTHEAP                // in - NAMemory *   h
                  , FALSE                               // in - NABoolean allowInvalidChar
                  );
  if (pConvStr != NULL AND pConvStr->length() > 0)
  {
    outMbs = *pConvStr;
  }
  delete pConvStr;
}
// JQ
// spaces are allowed in hexadecimal format string literals
// these spaces have to be removed
//
static NAWString *removeWSpaces(const NAWchar *s, Int32& len, NAWchar quote, CollHeap *heap)
{
  NAWString *r = new (heap) NAWString(heap);
  Int32 tmpLen = 0;
  if (!s || len <= 0) return r;

  for (Int32 x = 0; x < len; x++)
    {
      if (s[x] == quote)
        {
        // prematurely end the process
          break;
        }
      if (s[x] != L' ') {
        ++tmpLen;
        r->append(s[x]);
      }
    }
  len = tmpLen;
  return r;
}
// -----------------------------------------------------------------------
// Translate ANSI SQL names from Default ANSI SQL Name character set
// to UCS-2 encoding values.  The contents of the outWcs parameter is
// clear and set to the newly computed UCS2 string
// -----------------------------------------------------------------------
void CmAnsiNameToUCS2(const NAString &inMbs, NAWString &outWcs)
{
  outWcs.remove(0); // set to an empty string
  if (inMbs.length() <= 0)
  {
    return;
  }
  NAWString * pTargetNAWString =
    charToUnicode ( (Lng32)ComGetNameInterfaceCharSet() // in - Lng32        strCharSet
                  , inMbs.data()                        // in - const char * str
                  , (Int32)inMbs.length()               // in - Int32        len
                  , (NAMemory *)STMTHEAP                // in - NAMemory *   h
                  );
  ComASSERT(pTargetNAWString != NULL AND pTargetNAWString->length() > 0 AND
             pTargetNAWString->length() <= ComMAX_ANSI_IDENTIFIER_INTERNAL_LEN/*in NAWchars*/);
  outWcs.append(pTargetNAWString->data(), pTargetNAWString->length());
  delete pTargetNAWString;
}
NAString *charToChar(Lng32 targetCS, const char *s, Int32 sLenInBytes, Lng32 sourceCS, 
                     NAMemory *h /* = NULL */, NABoolean allowInvalidChar /* = FALSE */)
{
  NAString *res = NULL;
  if (s == NULL || sourceCS == (Lng32)CharInfo::UnknownCharSet || targetCS == (Lng32)CharInfo::UnknownCharSet)
  {
    return NULL; // error
  }
  if (sLenInBytes == 0)
  {
    if (h)
      res = new (h) NAString(h); // empty string
    else
      res = new NAString;
    return res;
  }
  if (targetCS == sourceCS)
  {
    if (h)
      res = new (h) NAString(s, sLenInBytes, h); // deep copy
    else
      res = new NAString(s, sLenInBytes); // deep copy

    return res;
  }

  // targetCS != sourceCS

  if ((CharInfo::CharSet)sourceCS == CharInfo::UCS2)
  {
    res = unicodeToChar ( (const NAWchar *)s              // source string
                        , sLenInBytes / BYTES_PER_NAWCHAR // src len in NAWchars
                        , targetCS
                        , h
                        , allowInvalidChar
                        );
    return res;
  }

  // sourceCS != CharInfo::UCS2

  NAWString * wstr = charToUnicode ( sourceCS     // src char set
                                   , s            // src str
                                   , sLenInBytes  // src str len in bytes
                                   , h            // heap for allocated target str
                                   );
  if (wstr == NULL) // conversion failed
  {
    return NULL; // error
  }
  if ((CharInfo::CharSet)targetCS == CharInfo::UCS2)
  {
    if (h)
      res = new (h) NAString ( (const char *)wstr->data()         // source string
                             , wstr->length() * BYTES_PER_NAWCHAR // source len in bytes
                             , h
                             );
    else
      res = new NAString ( (const char *)wstr->data()         // source string
                         , wstr->length() * BYTES_PER_NAWCHAR // source len in bytes
                         );

    delete wstr;
    return res;
  }

  // targetCS != CharInfo::UCS2
  
  res = unicodeToChar ( wstr->data()
                      , wstr->length() // in NAWchars
                      , targetCS
                      , h
                      , allowInvalidChar
                      );
  delete wstr;
  return res;
}
Example #6
0
Lng32 FormatRow(const HSColumnStruct *srcDesc,
               const char *src,
               HSDataBuffer &target)
{
    const Lng32 REC_INTERVAL = REC_MIN_INTERVAL;
    Lng32 retcode = 0;
    const Lng32 workBufLen = 4096;
    NAWchar workBuf[workBufLen];
    Lng32 type = srcDesc->datatype;
    NAWString wStr;

    //The input source buffer will always be in the following form and will
    //contain unicode format. We need to separate the buffer accordingly.
    //         |-------|--------------|
    //  SRC -->|  LEN  |  DATA        |
    //         |-------|--------------|
    short inDataLen;
    memcpy((char*)&inDataLen, src, sizeof(short));
    const NAWchar *inData = (NAWchar*)(src + sizeof(short));


    if (DFS2REC::isInterval(type))
      type = REC_INTERVAL;

    if (DFS2REC::isAnyCharacter(type))
      {
         wStr = WIDE_("'");
         for (short i = 0; i < inDataLen/sizeof(NAWchar); i++)
           {
             if (inData[i] == NAWchar('\0'))
               wStr += NAWchar('\1');                /* convert x00 to x01      */
             else
               {
                 wStr += inData[i];
                 if (inData[i] == NAWchar('\''))
                   wStr.append(WIDE_("'"));
               }
           }
         wStr.append(WIDE_("'"));

         target = wStr.data();
      }
    else
      {
        switch (type)
          {
            case REC_DATETIME:
              {
                switch (srcDesc->precision)
                  {
                    case REC_DTCODE_DATE:
                      {
                        wStr = WIDE_("DATE '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("'"));
                        break;
                      }

                    case REC_DTCODE_TIME:
                      {
                        wStr = WIDE_("TIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("'"));
                        break;
                      }

                    case REC_DTCODE_TIMESTAMP:
                      {
                        wStr = WIDE_("TIMESTAMP '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("'"));
                        break;
                      }

// Here begin a number of cases that are only possible with MP datetime types.
// LCOV_EXCL_START :mp
                    case REC_DTCODE_YEAR:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' YEAR"));
                        break;
                      }

                    case REC_DTCODE_YEAR_MONTH:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' YEAR TO MONTH"));
                        break;
                      }

                    case REC_DTCODE_YEAR_HOUR:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' YEAR TO HOUR"));
                        break;
                      }

                    case REC_DTCODE_YEAR_MINUTE:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' YEAR TO MINUTE"));
                        break;
                      }

                    case REC_DTCODE_MONTH:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' MONTH"));
                        break;
                      }

                    case REC_DTCODE_MONTH_DAY:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' MONTH TO DAY"));
                        break;
                      }

                    case REC_DTCODE_MONTH_HOUR:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' MONTH TO HOUR"));
                        break;
                      }

                    case REC_DTCODE_MONTH_MINUTE:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' MONTH TO MINUTE"));
                        break;
                      }

                    case REC_DTCODE_MONTH_SECOND:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        if (srcDesc->scale == 0)
                          wStr.append(WIDE_("' MONTH TO SECOND"));
                        else
                          {
                            wStr.append(WIDE_("' MONTH TO "));
                            wStr.append(appendFraction(srcDesc->scale));
                          }
                        break;
                      }

                    case REC_DTCODE_DAY:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' DAY"));
                        break;
                      }

                    case REC_DTCODE_DAY_HOUR:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' DAY TO HOUR"));
                        break;
                      }

                    case REC_DTCODE_DAY_MINUTE:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' DAY TO MINUTE"));
                        break;
                      }

                    case REC_DTCODE_DAY_SECOND:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        if (srcDesc->scale == 0)
                          wStr.append(WIDE_("' DAY TO SECOND"));
                        else
                          {
                            wStr.append(WIDE_("' DAY TO "));
                            wStr.append(appendFraction(srcDesc->scale));
                          }
                        break;
                      }

                    case REC_DTCODE_HOUR:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' HOUR"));
                        break;
                      }

                    case REC_DTCODE_HOUR_MINUTE:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' HOUR TO MINUTE"));
                        break;
                      }

                    case REC_DTCODE_MINUTE:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        wStr.append(WIDE_("' MINUTE"));
                        break;
                      }

                    case REC_DTCODE_MINUTE_SECOND:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        if (srcDesc->scale == 0)
                          wStr.append(WIDE_("' MINUTE TO SECOND"));
                        else
                          {
                            wStr.append(WIDE_("' MINUTE TO "));
                            wStr.append(appendFraction(srcDesc->scale));
                          }
                        break;
                      }

                    case REC_DTCODE_SECOND:
                      {
                        wStr = WIDE_("DATETIME '");
                        wStr.append(inData, inDataLen/sizeof(NAWchar));
                        if (srcDesc->scale == 0)
                          wStr.append(WIDE_("' SECOND"));
                        else
                          {
                            wStr.append(WIDE_("' SECOND TO "));
                            wStr.append(appendFraction(srcDesc->scale));
                          }
                        break;
                      }
// LCOV_EXCL_STOP

                    // LCOV_EXCL_START :rfi
                    default:
                      {
                        HS_ASSERT(FALSE);
                        break;
                      }
                    // LCOV_EXCL_STOP
                  }

                target = wStr.data();
                break;
              }

            case REC_INTERVAL:
              {
                //The INTERVAL may contain spaces and the negative sign
                //in front of the number.
                //We must capture the sign, but do not copy the extra character.
                Int32 spaceLen = 0;
                NABoolean signPresent = FALSE;
                spaceLen = wcsspn(inData, L" ");
                if (inData[spaceLen] == L'-')
                  {
                    signPresent = TRUE;
                    wStr = WIDE_("INTERVAL -'");
                  }
                else
                  wStr = WIDE_("INTERVAL '");
                for (short i=0; i < spaceLen; i++)
                  wStr.append(L" ");
                wStr.append( (inData+((signPresent) ? 1 : 0)+spaceLen),
			     (inDataLen/sizeof(NAWchar)-((signPresent) ? 1 : 0)-spaceLen));
                wStr.append(WIDE_("'"));

                switch (srcDesc->datatype)
                  {
                    case REC_INT_YEAR:
                      {
                        na_wsprintf(workBuf, WIDE_("%s YEAR(%d)"), wStr.data(), srcDesc->precision);
                        break;
                      }
                    case REC_INT_YEAR_MONTH:
                      {
                        na_wsprintf(workBuf, WIDE_("%s YEAR(%d) TO MONTH"), wStr.data(), srcDesc->precision);
                        break;
                      }
                    case REC_INT_MONTH:
                      {
                        na_wsprintf(workBuf, WIDE_("%s MONTH(%d)"), wStr.data(), srcDesc->precision);
                        break;
                      }
                    case REC_INT_DAY:
                      {
                        na_wsprintf(workBuf, WIDE_("%s DAY(%d)"), wStr.data(), srcDesc->precision);
                        break;
                      }
                    case REC_INT_DAY_HOUR:
                      {
                        na_wsprintf(workBuf, WIDE_("%s DAY(%d) TO HOUR"), wStr.data(), srcDesc->precision);
                        break;
                      }
                    case REC_INT_DAY_MINUTE:
                      {
                        na_wsprintf(workBuf, WIDE_("%s DAY(%d) TO MINUTE"), wStr.data(), srcDesc->precision);
                        break;
                      }
                    case REC_INT_DAY_SECOND:
                      {
                        na_wsprintf(workBuf, WIDE_("%s DAY(%d) TO SECOND(%d)"), wStr.data(), srcDesc->precision, srcDesc->scale);
                        break;
                      }
                    case REC_INT_HOUR:
                      {
                        na_wsprintf(workBuf, WIDE_("%s HOUR(%d)"), wStr.data(), srcDesc->precision);
                        break;
                      }
                    case REC_INT_HOUR_MINUTE:
                      {
                        na_wsprintf(workBuf, WIDE_("%s HOUR(%d) TO MINUTE"), wStr.data(), srcDesc->precision);
                        break;
                      }
                    case REC_INT_HOUR_SECOND:
                      {
                        na_wsprintf(workBuf, WIDE_("%s HOUR(%d) TO SECOND(%d)"), wStr.data(), srcDesc->precision, srcDesc->scale);
                        break;
                      }
                    case REC_INT_MINUTE:
                      {
                        na_wsprintf(workBuf, WIDE_("%s MINUTE(%d)"), wStr.data(), srcDesc->precision);
                        break;
                      }
                    case REC_INT_MINUTE_SECOND:
                      {
                        na_wsprintf(workBuf, WIDE_("%s MINUTE(%d) TO SECOND(%d)"), wStr.data(), srcDesc->precision, srcDesc->scale);
                        break;
                      }
                    case REC_INT_SECOND:
                      {
                        na_wsprintf(workBuf, WIDE_("%s SECOND(%d, %d)"), wStr.data(), srcDesc->precision, srcDesc->scale);
                        break;
                      }
                    // LCOV_EXCL_START :rfi
                    default:
                      {
                        HS_ASSERT(FALSE);
                        break;
                      }
                    // LCOV_EXCL_STOP
                  }

                target = workBuf;
                break;
              }

            default:
              {
                wStr.replace(0, wStr.length(), inData, inDataLen/sizeof(NAWchar));
                target = wStr.data();
                break;
              }
          }
      }

    return retcode;
  }
hex_conversion_code verifyAndConvertHex(const NAWchar *str, Int32 len, NAWchar quote,
                   CharInfo::CharSet cs, CollHeap* heap, void*& result)
{
  if ( CharInfo::isHexFormatSupported(cs) == FALSE )
    return NOT_SUPPORTED;

  if ( isValidHexFormat(str, len, cs) == FALSE )
    return INVALID;

  if ( heap == 0 )
    return CONV_FAILED;

  NAWString *tmpStr = removeWSpaces(str, len, quote, heap);

  // convert to actual string literal
  hex_conversion_code ok = INVALID_CODEPOINTS;
  switch ( cs ) {
    case CharInfo::KANJI_MP:
    case CharInfo::KSC5601_MP:
    case CharInfo::ISO88591:
    case CharInfo::UTF8:
      {
        Int32   StrLength = (Int32)(tmpStr->length());
        result = convHexToChar(tmpStr->data(), StrLength, cs, heap);
        if (result ) {
           ok = SINGLE_BYTE; // Assume good data for now
           if (cs == CharInfo::UTF8) {
              // Verify UTF8 code point values are valid
              Int32   iii = 0;
              Int32   rtnv = 0;
              NAString* reslt = (NAString*)result;
              UInt32  UCS4 = 0;
              StrLength = StrLength/2;  // Orig StrLength was for hex-ASCII string
              while ( iii < StrLength )
              {
                 rtnv = LocaleCharToUCS4( &(reslt->data()[iii]), StrLength - iii,
                                          &UCS4, cnv_UTF8 );
                 if (rtnv == CNV_ERR_INVALID_CHAR)
                 {
                    ok = INVALID_CODEPOINTS; // Return error
                    break;
                 }
                 iii += rtnv;
              }
           }
        }
      }
      break;

    case CharInfo::UNICODE:
      {
        result = convHexToWChar(tmpStr->data(), (Int32)(tmpStr->length()), cs, heap);
        if (result) ok = DOUBLE_BYTE;
      }
      break;

    default:
      ok = INVALID;
      break;
  }
  return ok;
}