int str_convert_string(string_encoding_t from_charset, size_t from_length, uint8_t *from_string_p, size_t spare_bits,
                       string_encoding_t to_charset, size_t *to_length_p, uint8_t *to_string_p)
{
    CONVERTER_HANDLE from_conv;
    CONVERTER_HANDLE to_conv;
    bool from_7_bit = false;
    bool to_7_bit = false;
    size_t length;
    uint8_t *string_p;
    int converted = 0;
    int result;


    CN_LOG_D("from_charset=%d, from_length=%d, to_charset=%d, *to_length_p=%d",
             from_charset, from_length, to_charset, *to_length_p);

    if (from_charset == to_charset) {
        /* No conversion, adjust/copy length and strings, and return */
        if (*to_length_p > from_length) {
            *to_length_p = from_length;
        }

        memcpy(to_string_p, from_string_p, *to_length_p);
        to_string_p[*to_length_p] = '\0';
        return from_length;
    }

    /* GSM Default alphabet (7-bit packed) is not supported by the converters.
     * These strings must first be massaged from an array of septets to octets... */
    if (CHARSET_GSM_DEFAULT_7_BIT == from_charset) {
        from_charset = CHARSET_GSM_DEFAULT_8_BIT;
        from_7_bit = true;
    }

    /* or from an array of octets to septets. */
    if (CHARSET_GSM_DEFAULT_7_BIT == to_charset) {
        to_charset = CHARSET_GSM_DEFAULT_8_BIT;
        to_7_bit = true;
    }

    /* (Al)Locate converter(s) */
    result = str_convert_get_converter(from_charset, &from_conv, to_charset, &to_conv);

    if (result < 0) {
        CN_LOG_E("Unable to convert from character set %d to %d!", from_charset, to_charset);
        return result;
    }

    /* Run zeroth conversion */
    if (from_7_bit) {
        length = from_length + from_length / 7;
        string_p = (uint8_t *)alloca(length + 1);
        length = str_convert_septet_to_octet(from_length, from_string_p, spare_bits, length, string_p);
    } else {
        length = from_length;
        string_p = from_string_p;
    }

    /* Run first conversion */
#ifdef USE_ICONV
    int errnum;
    char *inbuf_p = string_p;
    size_t inbytesleft = (size_t)length;
    char *outbuf_p;
    char *outstr_p;
    size_t outbytesleft, outstr_length;
    size_t res;

    /* Allocate memory to hold result string and call ICONV to perform the conversion.
     * Assume worst case, 1 input character becomes 4 output characters. */
    outstr_length = outbytesleft = (length + 1) * sizeof(uint32_t);
    outstr_p = outbuf_p = (char *)alloca(outbytesleft);

    res = iconv(from_conv,
                &inbuf_p, &inbytesleft,
                &outbuf_p, &outbytesleft);

    /* Check for errors */
    if (res == (size_t) - 1) {
        errnum = errno;
        CN_LOG_E("iconv error %d (%s) !", errnum, strerror(errnum));
        return -1;
    }

    /* Find out how many bytes iconv actualy used for the converted string */
    outstr_length -= outbytesleft;

#elif USE_UCNV
    UChar *ustr_p;
    int32_t ustr_length;
    UErrorCode errcode = U_ZERO_ERROR;
    int32_t res;

    if (str_convert_is_utf_16(from_charset)) {
        /* From string is already UTF-16 encoded. Skip this conversion step */
        ustr_length = length;
        ustr_p = (UChar *)string_p;
    } else {
        /* Allocate memory to hold result string and call ICU to perform the conversion */
        ustr_length = length + 1;
        ustr_p = (UChar *)alloca((size_t)(ustr_length * sizeof(UChar)));

        res = ucnv_toUChars(from_conv,
                            ustr_p, ustr_length,
                            (const char *)string_p, length,
                            &errcode);

        if (res < 0) {
            CN_LOG_E("ucnv_toUChars res %d!", res);
            return -1;
        }

        /* Check for errors, ignore warnings */
        if (U_ZERO_ERROR != errcode && errcode > U_ERROR_WARNING_LIMIT) {
            CN_LOG_E("ucnv_toUChars error code %d!", errcode);
            return -1;
        }

#ifdef CN_DEBUG_ENABLED
        {
            char datastr[100] = { '\0' };
            char str[10];
            int i;

            for (i = 0; i < 30 && i < res; i++) {
                sprintf(str, "%02X ", *(ustr_p + i));
                strcat(datastr, str);
            }

            CN_LOG_D("ucnv_toUChars(conv=%p, charset=%d, ustr=\"%s\") = %d",
                     (void *)from_conv,
                     from_charset,
                     datastr,
                     res);
        }
#endif /* CN_DEBUG_ENABLED */

        ustr_length = res;
    }

#endif /* USE_UCNV */

    /* Run second conversion, unless it is the same converter as we ran before */
    if (to_conv == from_conv) {
        /* One step conversion, copy result to output parameters and return */
#ifdef USE_ICONV
        /* ICONV uses a one step conversion */

        if (*to_length_p > outstr_length) {
            *to_length_p = outstr_length;
        }

        memcpy(to_string_p, outstr_p, *to_length_p);
        to_string_p[*to_length_p] = '\0';
        converted = outstr_length;

#elif USE_UCNV

        if (*to_length_p > (size_t)ustr_length) {
            *to_length_p = (size_t)ustr_length;
        }

        memcpy(to_string_p, ustr_p, *to_length_p);
        to_string_p[*to_length_p] = '\0';
        converted = ustr_length;
#endif
    } else {
#ifdef USE_UCNV
        res = ucnv_fromUChars(to_conv,
                              (char *)to_string_p, (int32_t) * to_length_p,
                              ustr_p, ustr_length,
                              &errcode);

        /* Check for errors, ignore warnings */
        if (U_ZERO_ERROR != errcode && errcode > U_ERROR_WARNING_LIMIT) {
            CN_LOG_E("ucnv_fromUChars error code %d!", errcode);
            return -1;
        }

        if (*to_length_p > (size_t)res) {
            *to_length_p = (size_t)res;
        }

#ifdef CN_DEBUG_ENABLED
        {
            char datastr[100] = { '\0' };
            char str[10];
            int i;

            for (i = 0; i < 30 && i < res; i++) {
                sprintf(str, "%02X ", *(to_string_p + i));
                strcat(datastr, str);
            }

            CN_LOG_D("ucnv_fromUChars(conv=%p, charset=%d, str=\"%s\") = %d",
                     (void *)to_conv,
                     to_charset,
                     datastr,
                     res);
        }
#endif /* CN_DEBUG_ENABLED */

        converted = res;
#endif
    }

    /* Run final conversion, convert in-place */
    if (to_7_bit) {
        converted = str_convert_octet_to_septet(converted, to_string_p, converted, to_string_p);
    }

    return converted;
}
Example #2
0
UErrorCode
convert_to_utf8(const UChar* buffer, int32_t buffer_len, char** converted_buf, int32_t *converted_buf_len, bool force, bool* dropped_bytes)
{
    UErrorCode status = U_ZERO_ERROR;
    UConverter *conv;
    int32_t utfConvertedLen = 0;

    // used to set dropped_bytes flag if force is true
    FromUFLAGContext * context = NULL;

    // open UTF8 converter
    conv = ucnv_open("utf-8", &status);

    if (U_FAILURE(status))
    {
        ereport(WARNING,
            (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
             errmsg("Cannot open utf-8 converter - error: %s.\n", u_errorName(status))));

        ucnv_close(conv);
        return status;
    }

    if (force)
    {
        // set callback to skip illegal, irregular or unassigned bytes

        // set converter to use SKIP callback
        // contecxt will save and call it after calling custom callback
        ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, NULL, &status);

        //TODO: refactor warning and error message reporting
        if (U_FAILURE(status))
        {
            ereport(WARNING,
                (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
                 errmsg("Cannot set callback on converter - error: %s.\n", u_errorName(status))));

            ucnv_close(conv);
            return status;
        }

        // initialize flagging callback
        context = flagCB_fromU_openContext();

        /* Set our special callback */
        ucnv_setFromUCallBack(conv,
                              flagCB_fromU,
                              context,
                              &(context->subCallback),
                              &(context->subContext),
                              &status
                             );

        if (U_FAILURE(status))
        {
            ereport(WARNING,
                (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
                 errmsg("Cannot set callback on converter - error: %s.\n", u_errorName(status))));

           ucnv_close(conv);
            return status;
        }
    }

    // convert to UTF8
    // input buffer from ucnv_toUChars, which always returns a
    // NUL-terminated buffer
    utfConvertedLen = ucnv_fromUChars(conv,
                                      *converted_buf,
                                      *converted_buf_len,
                                      buffer,
                                      STRING_IS_NULL_TERMINATED,
                                      &status
                                     );

    if (U_SUCCESS(status))
    {
        *converted_buf_len = utfConvertedLen;

        ereport(DEBUG1,
            (errcode(ERRCODE_SUCCESSFUL_COMPLETION),
                errmsg("Converted string: %s\n", (const char*) *converted_buf)));

        // see if any bytes where dropped
        // context struct will go away when converter is closed
        if (NULL != context)
            *dropped_bytes = context->flag;
        else
            *dropped_bytes = false;
    }

    if (U_FAILURE(status))
    {
        ereport(WARNING,
            (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
             errmsg("ICU conversion from Unicode to UTF8 failed - error: %s.\n", u_errorName(status))));
    }

    // close the converter
    ucnv_close(conv);
    return status;
}
Example #3
0
static int32_t 
uCharsToChars( char* target,int32_t targetLen, UChar* source, int32_t sourceLen,UErrorCode* status){
    int i=0, j=0;
    char str[30]={'\0'};
    while(i<sourceLen){
        if (source[i] == '\n') {
            if (j + 2 < targetLen) {
                uprv_strcat(target, "\\n");
            }
            j += 2;
        }else if(source[i]==0x0D){
            if(j+2<targetLen){
                uprv_strcat(target,"\\f");
            }
            j+=2;
        }else if(source[i] == '"'){
            if(source[i-1]=='\''){
                if(j+2<targetLen){
                    uprv_strcat(target,"\\");
                    target[j+1]= (char)source[i];
                }
                j+=2;
            }else if(source[i-1]!='\\'){
                     
                if(j+2<targetLen){
                    uprv_strcat(target,"\\");
                    target[j+1]= (char)source[i];
                }
                j+=2;
            }else if(source[i-1]=='\\'){
                target[j++]= (char)source[i];
            }
        }else if(source[i]=='\\'){
            if(i+1<sourceLen){
                switch(source[i+1]){
                case ',':
                case '!':
                case '?':
                case '#':
                case '.':
                case '%':
                case '&':
                case ':':
                case ';':
                    if(j+2<targetLen){
                       uprv_strcat(target,"\\\\");
                    }
                    j+=2;
                    break;
                case '"':
                case '\'':
                    if(j+3<targetLen){
                       uprv_strcat(target,"\\\\\\");
                    }
                    j+=3;
                    break;
                default :
                    if(j<targetLen){
                        target[j]=(char)source[i];
                    }
                    j++;
                    break;
                }
            }else{
                if(j<targetLen){
                    uprv_strcat(target,"\\\\");
                }
                j+=2;
            }
        }else if(source[i]>=0x20 && source[i]<0x7F/*ASCII*/){
            if(j<targetLen){
                target[j] = (char) source[i];
            }
            j++;            
        }else{
            if(*enc =='\0' || source[i]==0x0000){
                uprv_strcpy(str,"\\u");
                itostr(str+2,source[i],16,4);
                if(j+6<targetLen){
                    uprv_strcat(target,str);
                }
                j+=6;
            }else{
                char dest[30] = {0};
                int retVal=ucnv_fromUChars(conv,dest,30,source+i,1,status);
                if(U_FAILURE(*status)){
                    return 0;
                }
                if(j+retVal<targetLen){
                    uprv_strcat(target,dest);
                }
                j+=retVal;
            }
        }
        i++;
    }
    return j;
}
Example #4
0
/* test invariant-character handling */
static void
TestInvariant() {
    /* all invariant graphic chars and some control codes (not \n!) */
    const char invariantChars[]=
        "\t\r \"%&'()*+,-./"
        "0123456789:;<=>?"
        "ABCDEFGHIJKLMNOPQRSTUVWXYZ_"
        "abcdefghijklmnopqrstuvwxyz";

    const UChar invariantUChars[]={
        9, 0xd, 0x20, 0x22, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
        0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
        0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
        0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5f,
        0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
        0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0
    };

    const char variantChars[]="\n!#$@[\\]^`{|}~";

    const UChar variantUChars[]={
        0x0a, 0x21, 0x23, 0x24, 0x40, 0x5b, 0x5c, 0x5d, 0x5e, 0x60, 0x7b, 0x7c, 0x7d, 0x7e, 0
    };

    const UChar nonASCIIUChars[]={ 0x80, 0xa0, 0x900, 0xff51 };

    UChar us[120];
    char cs[120];

    int32_t i, length;

    /* make sure that all invariant characters convert both ways */
    length=sizeof(invariantChars);
    u_charsToUChars(invariantChars, us, length);
    if(u_strcmp(us, invariantUChars)!=0) {
        log_err("u_charsToUChars(invariantChars) failed\n");
    }

    u_UCharsToChars(invariantUChars, cs, length);
    if(strcmp(cs, invariantChars)!=0) {
        log_err("u_UCharsToChars(invariantUChars) failed\n");
    }


    /*
     * make sure that variant characters convert from source code literals to Unicode
     * but not back to char *
     */
    length=sizeof(variantChars);
    u_charsToUChars(variantChars, us, length);
    if(u_strcmp(us, variantUChars)!=0) {
        log_err("u_charsToUChars(variantChars) failed\n");
    }

#ifdef NDEBUG
    /*
     * Test u_UCharsToChars(variantUChars) only in release mode because it will
     * cause an assertion failure in debug builds.
     */
    u_UCharsToChars(variantUChars, cs, length);
    for(i=0; i<length; ++i) {
        if(cs[i]!=0) {
            log_err("u_UCharsToChars(variantUChars) converted the %d-th character to %02x instead of 00\n", i, cs[i]);
        }
    }
#endif

    /*
     * Verify that invariant characters roundtrip from Unicode to the
     * default converter and back.
     */
    {
        UConverter *cnv;
        UErrorCode errorCode;

        errorCode=U_ZERO_ERROR;
        cnv=ucnv_open(NULL, &errorCode);
        if(U_FAILURE(errorCode)) {
            log_err("unable to open the default converter\n");
        } else {
            length=ucnv_fromUChars(cnv, cs, sizeof(cs), invariantUChars, -1, &errorCode);
            if(U_FAILURE(errorCode)) {
                log_err("ucnv_fromUChars(invariantUChars) failed - %s\n", u_errorName(errorCode));
            } else if(length!=sizeof(invariantChars)-1 || strcmp(cs, invariantChars)!=0) {
                log_err("ucnv_fromUChars(invariantUChars) failed\n");
            }

            errorCode=U_ZERO_ERROR;
            length=ucnv_toUChars(cnv, us, LENGTHOF(us), invariantChars, -1, &errorCode);
            if(U_FAILURE(errorCode)) {
                log_err("ucnv_toUChars(invariantChars) failed - %s\n", u_errorName(errorCode));
            } else if(length!=LENGTHOF(invariantUChars)-1 || u_strcmp(us, invariantUChars)!=0) {
                log_err("ucnv_toUChars(invariantChars) failed\n");
            }

            ucnv_close(cnv);
        }
    }

    /* API tests */
    if(!uprv_isInvariantString(invariantChars, -1)) {
        log_err("uprv_isInvariantString(invariantChars) failed\n");
    }
    if(!uprv_isInvariantUString(invariantUChars, -1)) {
        log_err("uprv_isInvariantUString(invariantUChars) failed\n");
    }
    if(!uprv_isInvariantString(invariantChars+strlen(invariantChars), 1)) {
        log_err("uprv_isInvariantString(\"\\0\") failed\n");
    }

    for(i=0; i<(sizeof(variantChars)-1); ++i) {
        if(uprv_isInvariantString(variantChars+i, 1)) {
            log_err("uprv_isInvariantString(variantChars[%d]) failed\n", i);
        }
        if(uprv_isInvariantUString(variantUChars+i, 1)) {
            log_err("uprv_isInvariantUString(variantUChars[%d]) failed\n", i);
        }
    }

    for(i=0; i<LENGTHOF(nonASCIIUChars); ++i) {
        if(uprv_isInvariantUString(nonASCIIUChars+i, 1)) {
            log_err("uprv_isInvariantUString(nonASCIIUChars[%d]) failed\n", i);
        }
    }
}
i8_q* QStringUnicode::ToBytes(const EQTextEncoding &eEncoding, unsigned int &uOutputLength) const
{
    i8_q* pOutputBytes = null_q;
    uOutputLength = 0;

    const unsigned int CHARACTERS_COUNT = m_strString.countChar32(); // It does not include the final null character

    if(CHARACTERS_COUNT > 0)
    {
        UErrorCode errorCode = U_ZERO_ERROR;
        UConverter* pConverter = QStringUnicode::GetConverter(eEncoding);
        const unsigned int CODE_UNITS_COUNT = m_strString.length(); // It does not include the final null character

        // Depending on whether the string is already null-terminated or not, a null terminator will be added at the end
        // of the resultant array of bytes
        const unsigned int ADD_NULL_TERMINATION = m_strString.char32At(CHARACTERS_COUNT - 1) == 0 ? 0 : 1;

        // By default, it is assigned as if it was to be encoded in ASCII or ISO 8859-1 (8-bits per character)
        int32_t nRequiredLengthBytes = CHARACTERS_COUNT + ADD_NULL_TERMINATION;

        // Output size calculation for Unicode encoding forms
        switch(eEncoding)
        {
        case EQTextEncoding::E_UTF8:
            // It is not possible to know in advance how much memory the UTF-8 will require
            // (each character could be represented by 1, 2, 3 or 4 8-bits code units) so we reserve the maximum it would need
            nRequiredLengthBytes = sizeof(i32_q) * (CHARACTERS_COUNT + ADD_NULL_TERMINATION);
            break;
        case EQTextEncoding::E_UTF16:
            // We already know the number of 16 bits code units. A BOM character is added at the beginning
            nRequiredLengthBytes = sizeof(i16_q) * (CODE_UNITS_COUNT + 1 + ADD_NULL_TERMINATION);
            break;
        case EQTextEncoding::E_UTF16BE:
        case EQTextEncoding::E_UTF16LE:
            // We already know the number of 16 bits code units
            nRequiredLengthBytes = sizeof(i16_q) * (CODE_UNITS_COUNT + ADD_NULL_TERMINATION);
            break;
        case EQTextEncoding::E_UTF32:
            // The width of UTF32 characters is always 32 bits. A BOM character is added at the beginning
            nRequiredLengthBytes = sizeof(i32_q) * (CHARACTERS_COUNT + 1 + ADD_NULL_TERMINATION);
            break;
        case EQTextEncoding::E_UTF32BE:
        case EQTextEncoding::E_UTF32LE:
            // The width of UTF32 characters is always 32 bits
            nRequiredLengthBytes = sizeof(i32_q) * (CHARACTERS_COUNT + ADD_NULL_TERMINATION);
            break;
        }

        // Conversion from native encoding (UTF16) to input encoding
        const UChar* pBuffer = m_strString.getBuffer();
        pOutputBytes = new char[nRequiredLengthBytes];
        ucnv_reset(pConverter);
        uOutputLength = ucnv_fromUChars(pConverter, pOutputBytes, nRequiredLengthBytes, pBuffer, CODE_UNITS_COUNT, &errorCode);

        // If it was necessary to add a null terminator...
        if(ADD_NULL_TERMINATION == 1)
        {
            // The last character has to be set to zero (ICU adds only 1 byte at the end as the null terminator)
            // The last character has to be added to the output length
            switch(eEncoding)
            {
            case EQTextEncoding::E_ASCII:
            case EQTextEncoding::E_ISO88591:
            case EQTextEncoding::E_UTF8:
                // 8 bits character
                uOutputLength += sizeof(i8_q);
                memset(&pOutputBytes[uOutputLength - sizeof(i8_q)], 0, sizeof(i8_q));
                break;
            case EQTextEncoding::E_UTF16:
            case EQTextEncoding::E_UTF16BE:
            case EQTextEncoding::E_UTF16LE:
                // 16 bits character
                uOutputLength += sizeof(i16_q);
                memset(&pOutputBytes[uOutputLength - sizeof(i16_q)], 0, sizeof(i16_q));
                break;
            case EQTextEncoding::E_UTF32:
            case EQTextEncoding::E_UTF32BE:
            case EQTextEncoding::E_UTF32LE:
                // 32 bits character
                uOutputLength += sizeof(i32_q);
                memset(&pOutputBytes[uOutputLength - sizeof(i32_q)], 0, sizeof(i32_q));
                break;
            }
        }
    }

    return pOutputBytes;
}
/**
 * Convert character vector between marked encodings and the encoding provided
 *
 * @param str     input character vector or list of raw vectors
 * @param to    target encoding, \code{NULL} or \code{""} for default enc
 * @param to_raw single logical, should list of raw vectors be returned?
 * @return a converted character vector or list of raw vectors
 *
 * @version 0.1-?? (Marek Gagolewski, 2013-11-12)
 *
 * @version 0.2-1 (Marek Gagolewski, 2014-03-28)
 *          use StriUcnv
 *
 * @version 0.2-1 (Marek Gagolewski, 2014-04-01)
 *          calc required buf size a priori
 *
 * @version 0.3-1 (Marek Gagolewski, 2014-11-04)
 *    Issue #112: str_prepare_arg* retvals were not PROTECTed from gc
 */
SEXP stri_encode_from_marked(SEXP str, SEXP to, SEXP to_raw)
{
   PROTECT(str = stri_prepare_arg_string(str, "str"));
   const char* selected_to   = stri__prepare_arg_enc(to, "to", true); /* this is R_alloc'ed */
   bool to_raw_logical = stri__prepare_arg_logical_1_notNA(to_raw, "to_raw");

   STRI__ERROR_HANDLER_BEGIN(1)
   R_len_t str_n = LENGTH(str);
   StriContainerUTF16 str_cont(str, str_n);

   // get the number of strings to convert; if == 0, then you know what's the result
   if (str_n <= 0) return Rf_allocVector(to_raw_logical?VECSXP:STRSXP, 0);

   // Open converters
   StriUcnv ucnv(selected_to);
   UConverter* uconv_to = ucnv.getConverter(true /*register_callbacks*/);

   // Get target encoding mark
   cetype_t encmark_to = to_raw_logical?CE_BYTES:ucnv.getCE();

   // Prepare out val
   SEXP ret;
   STRI__PROTECT(ret = Rf_allocVector(to_raw_logical?VECSXP:STRSXP, str_n));

   // calculate required buf size
   R_len_t bufsize = 0;
   for (R_len_t i=0; i<str_n; ++i) {
      if (!str_cont.isNA(i) && str_cont.get(i).length() > bufsize)
         bufsize = str_cont.get(i).length();
   }
   bufsize = UCNV_GET_MAX_BYTES_FOR_STRING(bufsize, ucnv_getMaxCharSize(uconv_to));
   // "The calculated size is guaranteed to be sufficient for this conversion."
   String8buf buf(bufsize);

   for (R_len_t i=0; i<str_n; ++i) {
      if (str_cont.isNA(i)) {
         if (to_raw_logical) SET_VECTOR_ELT(ret, i, R_NilValue);
         else                SET_STRING_ELT(ret, i, NA_STRING);
         continue;
      }

      R_len_t curn_tmp = str_cont.get(i).length();
      const UChar* curs_tmp = str_cont.get(i).getBuffer(); // The buffer content is (probably) not NUL-terminated.
      if (!curs_tmp)
         throw StriException(MSG__INTERNAL_ERROR);

      UErrorCode status = U_ZERO_ERROR;
      ucnv_resetFromUnicode(uconv_to);
      R_len_t bufneed = ucnv_fromUChars(uconv_to, buf.data(), buf.size(),
            curs_tmp, curn_tmp, &status);
      if (bufneed <= buf.size()) {
         STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
      }
      else {// larger buffer needed [this shouldn't happen?]
         buf.resize(bufneed, false/*destroy contents*/);
         status = U_ZERO_ERROR;
         bufneed = ucnv_fromUChars(uconv_to, buf.data(), buf.size(),
               curs_tmp, curn_tmp, &status);
         STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
      }

      if (to_raw_logical) {
         SEXP outobj;
         STRI__PROTECT(outobj = Rf_allocVector(RAWSXP, bufneed));
         memcpy(RAW(outobj), buf.data(), (size_t)bufneed);
         SET_VECTOR_ELT(ret, i, outobj);
         STRI__UNPROTECT(1);
      }
      else {
         SET_STRING_ELT(ret, i,
            Rf_mkCharLenCE(buf.data(), bufneed, encmark_to));
      }
   }

   STRI__UNPROTECT_ALL
   return ret;

   STRI__ERROR_HANDLER_END({/* nothing special on error */})
}
Example #7
0
const std::string convU16StrToCharStr(const U16Char_t* src, const char* Encoding)
{
	//static char const* const tocode = CHARCONV_ICONV_UTF16;
	char const* const tocode = getPlatformEncoding(Encoding);
    
    UErrorCode status = U_ZERO_ERROR;

#ifdef ENCCONV_DEBUG
	std::cout << "\t" "convString" << std::endl;
	std::cout << "\t\t" "tocode   = " << tocode   << std::endl;
	//std::cout << "\t\t" "fromcode = " << fromcode << std::endl;
#endif

	//iconv_t cd = iconv_open(tocode, fromcode);
    // Initializing ICU converter
	UConverter *conv = ucnv_open(tocode, &status);

#ifdef CHARCONV_DEBUG
	std::cout << "\t\t" "aft ucnv_open: status = " << status << std::endl;
#endif
	if (conv == NULL)
	{ // try default encoding "ISO-8859-1"
		//throw std::runtime_error("Unable to create Unicode converter object");
		status = U_ZERO_ERROR;
		conv = ucnv_open("ISO-8859-1", &status);
	}

	//still if conv is null simply return blank string

	if (conv == NULL)
	{ 
		return std::string("");
	}

	U16Char_t const* srcWrk = src;
	const size_t srcSizeInUnits = GetNumOfUnits(src);
	const size_t srcSizeInBytes = srcSizeInUnits * sizeof(U16Char_t);
	const size_t dstSizeInBytes = MAX(256, (srcSizeInUnits + 1)) * 4;	// How much byte buffer is needed? (UTF16 --> MBCS)
	char* dst = new char [dstSizeInBytes];
	if(dst==NULL) return std::string("");

	char* dstWrk =(char*)(dst);
	size_t srcLeftInBytes = srcSizeInBytes;
	size_t dstLeftInBytes = dstSizeInBytes - sizeof(char);
    status = U_ZERO_ERROR;

		ucnv_fromUChars(conv, dstWrk, dstLeftInBytes, (UChar*)srcWrk, -1, &status);
		U16Char_t* reverseConvertedVal = convCharStrToU16Str(dstWrk,Encoding);
		if(strcmp((char*)reverseConvertedVal,(char*)src)!=0)
		{
			EncConv::releaseU16Str(reverseConvertedVal);
	        delete[] dst;
			return std::string("");
		}
		EncConv::releaseU16Str(reverseConvertedVal);
		
	
#ifdef CHARCONV_DEBUG
		std::cout << "\t\t" "aft iconv: status = " << status << std::endl;
#endif
		if (status != U_ZERO_ERROR )
		{
			//	throw std::runtime_error("Unable to convert to string");
			*dstWrk = 0;
		}


		std::string dst2(dst);

        delete[] dst;
	//const int err = iconv_close(cd);

    ucnv_close(conv);

	//if (err == -1)
	//	throw std::runtime_error("Unable to deallocate iconv_t object");

        return dst2;
}
Example #8
0
static void mmenc_func(sqlite3_context *db, int argc, sqlite3_value **argv)
{
    mm_cipher_context_t *ctx;
    const UChar *src;
    int32_t src_len;
    char buf[1024];
    char *dst = buf;
    int32_t dst_len;
    UErrorCode status = U_ZERO_ERROR;
    int arg_type;

    // only accept 1 argument.
    if (argc != 1)
        goto error_misuse;

    // encoding BLOB data type is not supported.
    arg_type = sqlite3_value_type(argv[0]);
    if (arg_type == SQLITE_BLOB)
        goto error_misuse;

    // for data types other than TEXT, just return them.
    if (arg_type != SQLITE_TEXT) {
        sqlite3_result_value(db, argv[0]);
        return;
    }

    ctx = (mm_cipher_context_t *) sqlite3_user_data(db);
    src_len = sqlite3_value_bytes16(argv[0]) / 2;
    src = (const UChar *) sqlite3_value_text16(argv[0]);

    // transform input string to BOCU-1 encoding.
    // try stack buffer first, if it doesn't fit, malloc a new buffer.
    dst_len =
        ucnv_fromUChars(ctx->cnv, dst, sizeof(buf), src, src_len, &status);
    if (status == U_BUFFER_OVERFLOW_ERROR) {
        status = U_ZERO_ERROR;
        dst = (char *) sqlite3_malloc(dst_len);
        dst_len =
            ucnv_fromUChars(ctx->cnv, dst, dst_len, src, src_len, &status);
    }
    if (U_FAILURE(status) && status != U_STRING_NOT_TERMINATED_WARNING) {
        sqlite3_mm_set_last_error(
            "Failed transforming text to internal encoding.");
        goto error_error;
    }

    // encrypt transformed BOCU-1 string.
    do_rc4(ctx, dst, dst_len);

    // return
    sqlite3_result_blob(db, dst, dst_len, SQLITE_TRANSIENT);
    if (dst != buf)
        sqlite3_free(dst);
    return;

error_error:
    if (dst != buf)
        sqlite3_free(dst);
    sqlite3_result_error_code(db, SQLITE_ERROR);
    return;

error_misuse:
    if (dst != buf)
        sqlite3_free(dst);
    sqlite3_result_error_code(db, SQLITE_MISUSE);
    return;
}
Example #9
0
int32_t __hs_ucnv_fromUChars(UConverter *cnv, char *dest, int32_t destCapacity,
			     const UChar *src, int32_t srcLength,
			     UErrorCode *pErrorCode)
{
    return ucnv_fromUChars(cnv, dest, destCapacity, src, srcLength, pErrorCode);
}