コード例 #1
0
CF_INLINE UConverter *__CFStringEncodingConverterCreateICUConverter(const char *icuName, uint32_t flags, bool toUnicode) {
    UConverter *converter;
    UErrorCode errorCode = U_ZERO_ERROR;
    uint8_t streamID = CFStringEncodingStreamIDFromMask(flags);

    if (0 != streamID) { // this is a part of streaming previously created
        __CFICUThreadData *data = __CFStringEncodingICUGetThreadData();

        --streamID; // map to array index

        if ((streamID < data->_numSlots) && (NULL != data->_converters[streamID])) return data->_converters[streamID];
    }

    converter = ucnv_open(icuName, &errorCode);

    if (NULL != converter) {
        char lossyByte = CFStringEncodingMaskToLossyByte(flags);

        if ((0 == lossyByte) && (0 != (flags & kCFStringEncodingAllowLossyConversion))) lossyByte = '?';

        if (0 ==lossyByte) {
            if (toUnicode) {
                ucnv_setToUCallBack(converter, &UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
            } else {
                ucnv_setFromUCallBack(converter, &UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
            }
        } else {
            ucnv_setSubstChars(converter, &lossyByte, 1, &errorCode);
        }
    }

    return converter;
}
コード例 #2
0
static void NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address,
        jint onMalformedInput, jint onUnmappableInput, jstring javaReplacement) {
    UConverter* cnv = toUConverter(address);
    if (cnv == NULL) {
        maybeThrowIcuException(env, "toConverter", U_ILLEGAL_ARGUMENT_ERROR);
        return;
    }

    UConverterToUCallback oldCallback;
    const void* oldCallbackContext;
    ucnv_getToUCallBack(cnv, &oldCallback, &oldCallbackContext);

    DecoderCallbackContext* callbackContext = const_cast<DecoderCallbackContext*>(
            reinterpret_cast<const DecoderCallbackContext*>(oldCallbackContext));
    if (callbackContext == NULL) {
        callbackContext = new DecoderCallbackContext;
    }

    callbackContext->onMalformedInput = getToUCallback(onMalformedInput);
    callbackContext->onUnmappableInput = getToUCallback(onUnmappableInput);

    ScopedStringChars replacement(env, javaReplacement);
    if (replacement.get() == NULL) {
        maybeThrowIcuException(env, "replacement", U_ILLEGAL_ARGUMENT_ERROR);
        return;
    }
    u_strncpy(callbackContext->replacementChars, replacement.get(), replacement.size());
    callbackContext->replacementCharCount = replacement.size();

    UErrorCode errorCode = U_ZERO_ERROR;
    ucnv_setToUCallBack(cnv, CHARSET_DECODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
    maybeThrowIcuException(env, "ucnv_setToUCallBack", errorCode);
}
コード例 #3
0
static UConverter *
GSStringOpenConverter (CFStringEncoding encoding, char lossByte)
{
  const char *converterName;
  UConverter *cnv;
  UErrorCode err = U_ZERO_ERROR;

  converterName = CFStringICUConverterName (encoding);

  cnv = ucnv_open (converterName, &err);
  if (U_FAILURE (err))
    cnv = NULL;

  if (lossByte)
    {
      /* FIXME: for some reason this is returning U_ILLEGAL_ARGUMENTS_ERROR */
      ucnv_setSubstChars (cnv, &lossByte, 1, &err);
    }
  else
    {
      ucnv_setToUCallBack (cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
                           &err);
      ucnv_setFromUCallBack (cnv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL,
                             &err);
    }

  return cnv;
}
コード例 #4
0
ファイル: TextCodecICU.cpp プロジェクト: ollie314/chromium
 ErrorCallbackSetter(UConverter* converter, bool stopOnError)
     : m_converter(converter), m_shouldStopOnEncodingErrors(stopOnError) {
   if (m_shouldStopOnEncodingErrors) {
     UErrorCode err = U_ZERO_ERROR;
     ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_STOP, 0,
                         &m_savedAction, &m_savedContext, &err);
     DCHECK_EQ(err, U_ZERO_ERROR);
   }
 }
コード例 #5
0
U_STABLE void U_EXPORT2
ucnv_setToUCallBack_53(UConverter * converter,
                     UConverterToUCallback newAction,
                     const void* newContext,
                     UConverterToUCallback *oldAction,
                     const void** oldContext,
                     UErrorCode * err)
{
    ucnv_setToUCallBack(converter, newAction, newContext, oldAction, oldContext, err);
}
コード例 #6
0
 ~ErrorCallbackSetter()
 {
     if (m_shouldStopOnEncodingErrors) {
         UErrorCode err = U_ZERO_ERROR;
         const void* oldContext;
         UConverterToUCallback oldAction;
         ucnv_setToUCallBack(m_converter, m_savedAction, m_savedContext, &oldAction, &oldContext, &err);
         ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE);
         ASSERT(!strcmp(static_cast<const char*>(oldContext), UCNV_SUB_STOP_ON_ILLEGAL));
         ASSERT(err == U_ZERO_ERROR);
     }
 }
コード例 #7
0
ファイル: TextCodecICU.cpp プロジェクト: ollie314/chromium
 ~ErrorCallbackSetter() {
   if (m_shouldStopOnEncodingErrors) {
     UErrorCode err = U_ZERO_ERROR;
     const void* oldContext;
     UConverterToUCallback oldAction;
     ucnv_setToUCallBack(m_converter, m_savedAction, m_savedContext,
                         &oldAction, &oldContext, &err);
     DCHECK_EQ(oldAction, UCNV_TO_U_CALLBACK_STOP);
     DCHECK(!oldContext);
     DCHECK_EQ(err, U_ZERO_ERROR);
   }
 }
コード例 #8
0
 ErrorCallbackSetter(UConverter* converter, bool stopOnError)
     : m_converter(converter)
     , m_shouldStopOnEncodingErrors(stopOnError)
 {
     if (m_shouldStopOnEncodingErrors) {
         UErrorCode err = U_ZERO_ERROR;
         ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE,
             UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction,
             &m_savedContext, &err);
         ASSERT(err == U_ZERO_ERROR);
     }
 }
コード例 #9
0
int initTxt(struct doc_descriptor *desc) {
  UErrorCode err;
  char *encoding = NULL;
  int len, BOMlength = 0;
  char buf[BUFSIZE];
  UChar outbuf[4*BUFSIZE];


  lseek(desc->fd, 0, SEEK_SET);
  len = read(desc->fd, buf, BUFSIZE);

  /* detect BOM */
  err = U_ZERO_ERROR;
  encoding = ucnv_detectUnicodeSignature(buf, BUFSIZE, &BOMlength, &err);
  if(encoding != NULL) {
    lseek(desc->fd, BOMlength, SEEK_SET);

    /* initialize converter to encoding */
    err = U_ZERO_ERROR;
    desc->conv = ucnv_open(encoding, &err);
    if (U_FAILURE(err)) {
      fprintf(stderr, "unable to open ICU converter\n");
      return ERR_ICU;
    }
    
  } else {
    /* initialize converter to UTF-8 */
    err = U_ZERO_ERROR;
    desc->conv = ucnv_open("utf8", &err);
    if (U_FAILURE(err)) {
      fprintf(stderr, "unable to open ICU converter\n");
      return ERR_ICU;
    }

    /* check the first 2048 bytes */
    err = U_ZERO_ERROR;
    ucnv_setToUCallBack(desc->conv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err);
    if (U_FAILURE(err)) {
      fprintf(stderr, "error setToUCallback\n");
      return ERR_ICU;
    }
    err = U_ZERO_ERROR;
    ucnv_toUChars(desc->conv, outbuf, 4 * BUFSIZE, buf, len, &err);
    if (U_FAILURE(err)) {
      fprintf(stderr, "Unknown encoding\n");
      return ERR_ICU;
    }
    lseek(desc->fd, 0, SEEK_SET);
  }

  return OK;
}
コード例 #10
0
status_t
ICUCategoryData::_GetConverter(UConverter*& converterOut)
{
	// we use different converters per thread to avoid concurrent accesses
	ICUThreadLocalStorageValue* tlsValue = NULL;
	status_t result = ICUThreadLocalStorageValue::GetInstanceForKey(
		fThreadLocalStorageKey, tlsValue);
	if (result != B_OK)
		return result;

	if (tlsValue->converter != NULL) {
		if (strcmp(tlsValue->charset, fGivenCharset) == 0) {
			converterOut = tlsValue->converter;
			return B_OK;
		}

		// charset no longer matches the converter, we need to dump it and
		// create a new one
		ucnv_close(tlsValue->converter);
		tlsValue->converter = NULL;
	}

	// create a new converter for the current charset
	UErrorCode icuStatus = U_ZERO_ERROR;
	UConverter* icuConverter = ucnv_open(fGivenCharset, &icuStatus);
	if (icuConverter == NULL)
		return B_NAME_NOT_FOUND;

	// setup the new converter to stop upon any errors
	icuStatus = U_ZERO_ERROR;
	ucnv_setToUCallBack(icuConverter, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
		&icuStatus);
	if (!U_SUCCESS(icuStatus)) {
		ucnv_close(icuConverter);
		return B_ERROR;
	}
	icuStatus = U_ZERO_ERROR;
	ucnv_setFromUCallBack(icuConverter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
		NULL, &icuStatus);
	if (!U_SUCCESS(icuStatus)) {
		ucnv_close(icuConverter);
		return B_ERROR;
	}

	tlsValue->converter = icuConverter;
	strlcpy(tlsValue->charset, fGivenCharset, sizeof(tlsValue->charset));

	converterOut = icuConverter;

	return B_OK;
}
コード例 #11
0
charsetFilteredOutputStream_icu::charsetFilteredOutputStream_icu
	(const charset& source, const charset& dest, outputStream* os,
	 const charsetConverterOptions& opts)
	: m_from(NULL), m_to(NULL), m_sourceCharset(source),
	  m_destCharset(dest), m_stream(*os), m_options(opts)
{
	UErrorCode err = U_ZERO_ERROR;
	m_from = ucnv_open(source.getName().c_str(), &err);

	if (!U_SUCCESS(err))
	{
		throw exceptions::charset_conv_error
			("Cannot initialize ICU converter for source charset '" + source.getName() + "' (error code: " + u_errorName(err) + ".");
	}

	m_to = ucnv_open(dest.getName().c_str(), &err);

	if (!U_SUCCESS(err))
	{
		throw exceptions::charset_conv_error
			("Cannot initialize ICU converter for destination charset '" + dest.getName() + "' (error code: " + u_errorName(err) + ".");
	}

	// Tell ICU what to do when encountering an illegal byte sequence
	if (m_options.silentlyReplaceInvalidSequences)
	{
		// Set replacement chars for when converting from Unicode to codepage
		icu::UnicodeString substString(m_options.invalidSequence.c_str());
		ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
	}
	else
	{
		// Tell ICU top stop (and return an error) on illegal byte sequences
		ucnv_setToUCallBack
			(m_to, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");

		ucnv_setFromUCallBack
			(m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
	}
}
コード例 #12
0
ファイル: codeset.c プロジェクト: nolange/pkg-open-vm-tools
Bool
CodeSet_Validate(const char *buf,   // IN: the string
                 size_t size,	    // IN: length of string
                 const char *code)  // IN: encoding
{
#if defined(NO_ICU)
   return CodeSetOld_Validate(buf, size, code);
#else
   UConverter *cv;
   UErrorCode uerr;

   // ucnv_toUChars takes 32-bit int size
   ASSERT_NOT_IMPLEMENTED(size <= (size_t) MAX_INT32);

   if (size == 0) {
      return TRUE;
   }

   /*
    * Fallback if necessary.
    */

   if (dontUseIcu) {
      return CodeSetOld_Validate(buf, size, code);
   }

   /*
    * Calling ucnv_toUChars() this way is the idiom to precompute
    * the length of the output.  (See preflighting in the ICU User Guide.)
    * So if the error is not U_BUFFER_OVERFLOW_ERROR, then the input
    * is bad.
    */

   uerr = U_ZERO_ERROR;
   cv = ucnv_open(code, &uerr);
   ASSERT_NOT_IMPLEMENTED(uerr == U_ZERO_ERROR);
   ucnv_setToUCallBack(cv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &uerr);
   ASSERT_NOT_IMPLEMENTED(uerr == U_ZERO_ERROR);
   ucnv_toUChars(cv, NULL, 0, buf, size, &uerr);
   ucnv_close(cv);

   return uerr == U_BUFFER_OVERFLOW_ERROR;
#endif
}
コード例 #13
0
// Requires free() of returned UTF16Chars.
void convertUTF8ToUTF16(const NPUTF8 *UTF8Chars, int UTF8Length, NPUTF16 **UTF16Chars, unsigned int *UTF16Length)
{
#if USE(ICU_UNICODE)
    assert(UTF8Chars || UTF8Length == 0);
    assert(UTF16Chars);
    
    if (UTF8Length == -1)
        UTF8Length = static_cast<int>(strlen(UTF8Chars));
        
    // UTF16Length maximum length is the length of the UTF8 string, plus one to include terminator
    // Without the plus one, it will convert ok, but a warning is generated from the converter as
    // there is not enough room for a terminating character.
    *UTF16Length = UTF8Length + 1; 
        
    *UTF16Chars = 0;
    UErrorCode status = U_ZERO_ERROR;
    UConverter* conv = ucnv_open("utf8", &status);
    if (U_SUCCESS(status)) { 
        *UTF16Chars = (NPUTF16 *)malloc(sizeof(NPUTF16) * (*UTF16Length));
        ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, 0, 0, 0, &status);
        *UTF16Length = ucnv_toUChars(conv, (::UChar*)*UTF16Chars, *UTF16Length, UTF8Chars, UTF8Length, &status); 
        ucnv_close(conv);
    } 
    
    // Check to see if the conversion was successful
    // Some plugins return invalid UTF-8 in NPVariantType_String, see <http://bugs.webkit.org/show_bug.cgi?id=5163>
    // There is no "bad data" for latin1. It is unlikely that the plugin was really sending text in this encoding,
    // but it should have used UTF-8, and now we are simply avoiding a crash.
    if (!U_SUCCESS(status)) {
        *UTF16Length = UTF8Length;
        
        if (!*UTF16Chars)   // If the memory wasn't allocated, allocate it.
            *UTF16Chars = (NPUTF16 *)malloc(sizeof(NPUTF16) * (*UTF16Length));
 
        for (unsigned i = 0; i < *UTF16Length; i++)
            (*UTF16Chars)[i] = UTF8Chars[i] & 0xFF;
    }
#else
    assert(!"Implement me!");    
#endif
}
コード例 #14
0
ファイル: xh_encoder.c プロジェクト: yoreek/XML-Hash-XS
static UConverter *
xh_encoder_uconv_create(xh_char_t *encoding, xh_bool_t toUnicode)
{
    UConverter *uconv;
    UErrorCode  status = U_ZERO_ERROR;

    uconv = ucnv_open((char *) encoding, &status);
    if ( U_FAILURE(status) ) {
        return NULL;
    }

    if (toUnicode) {
        ucnv_setToUCallBack(uconv, UCNV_TO_U_CALLBACK_STOP,
                            NULL, NULL, NULL, &status);
    }
    else {
        ucnv_setFromUCallBack(uconv, UCNV_FROM_U_CALLBACK_STOP,
                              NULL, NULL, NULL, &status);
    }

    return uconv;
}
コード例 #15
0
ファイル: gtm_conv.c プロジェクト: 5HT/mumps
UConverter* get_chset_desc(const mstr* chset)
{
	int 			chset_indx;
	UErrorCode		status;

	if (0 >= (chset_indx = verify_chset(chset)))
		return NULL;
	if (NULL == chset_desc[chset_indx])
	{
		status = U_ZERO_ERROR;
		chset_desc[chset_indx] = ucnv_open(chset_names[chset_indx].addr, &status);
		if (U_FAILURE(status))
			GTMASSERT;
		/* Initialize the callback for illegal/invalid characters, so that conversion
		 * stops at the first illegal character rather than continuing with replacement */
		status = U_ZERO_ERROR;
		ucnv_setToUCallBack(chset_desc[chset_indx], &callback_stop, NULL, NULL, NULL, &status);
		if (U_FAILURE(status))
			GTMASSERT;
	}
	return chset_desc[chset_indx];
}
コード例 #16
0
ファイル: common.cpp プロジェクト: kluge-iitk/pyicu
EXPORT UnicodeString &PyBytes_AsUnicodeString(PyObject *object,
                                              const char *encoding,
                                              const char *mode,
                                              UnicodeString &string)
{
    UErrorCode status = U_ZERO_ERROR;
    UConverter *conv = ucnv_open(encoding, &status);

    if (U_FAILURE(status))
        throw ICUException(status);

    _STOPReason stop;
    char *src;
    Py_ssize_t len;
    UChar *buffer, *target;

    memset(&stop, 0, sizeof(stop));

    if (!strcmp(mode, "strict"))
    {
        ucnv_setToUCallBack(conv, _stopDecode, &stop, NULL, NULL, &status);
        if (U_FAILURE(status))
        {
            ucnv_close(conv);
            throw ICUException(status);
        }
    }

    PyBytes_AsStringAndSize(object, &src, &len);
    stop.src = src;
    stop.src_length = len;

    buffer = target = new UChar[len];
    if (buffer == NULL)
    {
        ucnv_close(conv);

        PyErr_NoMemory();
        throw ICUException();
    }

    ucnv_toUnicode(conv, &target, target + len,
                   (const char **) &src, src + len, NULL, true, &status);

    if (U_FAILURE(status))
    {
        const char *reasonName;

        switch (stop.reason) {
          case UCNV_UNASSIGNED:
            reasonName = "the code point is unassigned";
            break;
          case UCNV_ILLEGAL:
            reasonName = "the code point is illegal";
            break;
          case UCNV_IRREGULAR:
            reasonName = "the code point is not a regular sequence in the encoding";
            break;
          default:
            reasonName = "unexpected reason code";
            break;
        }
        status = U_ZERO_ERROR;

        PyErr_Format(PyExc_ValueError, "'%s' codec can't decode byte 0x%x in position %d: reason code %d (%s)", ucnv_getName(conv, &status), (int) (unsigned char) stop.chars[0], stop.error_position, stop.reason, reasonName);

        delete[] buffer;
        ucnv_close(conv);

        throw ICUException();
    }

    string.setTo(buffer, target - buffer);

    delete[] buffer;
    ucnv_close(conv);

    return string;
}
コード例 #17
0
ファイル: uconv.cpp プロジェクト: gitpan/ponie
// Convert a file from one encoding to another
static UBool convertFile(const char *pname,
                         const char *fromcpage,
                         UConverterToUCallback toucallback,
                         const void *touctxt,
                         const char *tocpage,
                         UConverterFromUCallback fromucallback,
                         const void *fromuctxt,
                         int fallback,
                         size_t bufsz,
                         const char *translit,
                         const char *infilestr,
                         FILE * outfile, int verbose)
{
    FILE *infile;
    UBool ret = TRUE;
    UConverter *convfrom = 0;
    UConverter *convto = 0;
    UErrorCode err = U_ZERO_ERROR;
    UBool flush;
    const char *cbufp;
    char *bufp;
    char *buf = 0;

    uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */

    const UChar *unibufbp;
    UChar *unibufp;
    UChar *unibuf = 0;
    int32_t *fromoffsets = 0, *tooffsets = 0;

    size_t rd, wr, tobufsz;

#if !UCONFIG_NO_TRANSLITERATION
    Transliterator *t = 0;      // Transliterator acting on Unicode data.
#endif
    UnicodeString u;            // String to do the transliteration.

    // Open the correct input file or connect to stdin for reading input

    if (infilestr != 0 && strcmp(infilestr, "-")) {
        infile = fopen(infilestr, "rb");
        if (infile == 0) {
            UnicodeString str1(infilestr, "");
            str1.append((UChar32) 0);
            UnicodeString str2(strerror(errno), "");
            str2.append((UChar32) 0);
            initMsg(pname);
            u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
            return FALSE;
        }
    } else {
        infilestr = "-";
        infile = stdin;
#ifdef WIN32
        if (setmode(fileno(stdin), O_BINARY) == -1) {
            initMsg(pname);
            u_wmsg(stderr, "cantSetInBinMode");
            return FALSE;
        }
#endif
    }

    if (verbose) {
        fprintf(stderr, "%s:\n", infilestr);
    }

#if !UCONFIG_NO_TRANSLITERATION
    // Create transliterator as needed.

    if (translit != NULL && *translit) {
        UParseError parse;
        UnicodeString str(translit), pestr;

        /* Create from rules or by ID as needed. */

        parse.line = -1;

        if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
            t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
        } else {
            t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
        }

        if (U_FAILURE(err)) {
            str.append((UChar32) 0);
            initMsg(pname);

            if (parse.line >= 0) {
                UChar linebuf[20], offsetbuf[20];
                uprv_itou(linebuf, 20, parse.line, 10, 0);
                uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
                u_wmsg(stderr, "cantCreateTranslitParseErr", str.getBuffer(),
                    u_wmsg_errorName(err), linebuf, offsetbuf);
            } else {
                u_wmsg(stderr, "cantCreateTranslit", str.getBuffer(),
                    u_wmsg_errorName(err));
            }

            if (t) {
                delete t;
                t = 0;
            }
            goto error_exit;
        }
    }
#endif

    // Create codepage converter. If the codepage or its aliases weren't
    // available, it returns NULL and a failure code. We also set the
    // callbacks, and return errors in the same way.

    convfrom = ucnv_open(fromcpage, &err);
    if (U_FAILURE(err)) {
        UnicodeString str(fromcpage, (int32_t)(uprv_strlen(fromcpage) + 1));
        initMsg(pname);
        u_wmsg(stderr, "cantOpenFromCodeset", str.getBuffer(),
            u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
    if (U_FAILURE(err)) {
        initMsg(pname);
        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
        goto error_exit;
    }

    convto = ucnv_open(tocpage, &err);
    if (U_FAILURE(err)) {
        UnicodeString str(tocpage, (int32_t)(uprv_strlen(tocpage) + 1));
        initMsg(pname);
        u_wmsg(stderr, "cantOpenToCodeset", str.getBuffer(),
            u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
    if (U_FAILURE(err)) {
        initMsg(pname);
        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setFallback(convto, fallback);

    // To ensure that the buffer always is of enough size, we
    // must take the worst case scenario, that is the character in
    // the codepage that uses the most bytes and multiply it against
    // the buffer size.

    // use bufsz+1 to allow for additional BOM/signature character (U+FEFF)
    tobufsz = (bufsz+1) * ucnv_getMaxCharSize(convto);

    buf = new char[tobufsz];
    unibuf = new UChar[bufsz];

    fromoffsets = new int32_t[bufsz];
    tooffsets = new int32_t[tobufsz];

    // OK, we can convert now.

    do {
        char willexit = 0;

        rd = fread(buf, 1, bufsz, infile);
        if (ferror(infile) != 0) {
            UnicodeString str(strerror(errno));
            str.append((UChar32) 0);
            initMsg(pname);
            u_wmsg(stderr, "cantRead", str.getBuffer());
            goto error_exit;
        }

        // Convert the read buffer into the new coding
        // After the call 'unibufp' will be placed on the last
        // character that was converted in the 'unibuf'.
        // Also the 'cbufp' is positioned on the last converted
        // character.
        // At the last conversion in the file, flush should be set to
        // true so that we get all characters converted
        //
        // The converter must be flushed at the end of conversion so
        // that characters on hold also will be written.

        unibufp = unibuf;
        cbufp = buf;
        flush = rd != bufsz;
        ucnv_toUnicode(convfrom, &unibufp, unibufp + bufsz, &cbufp,
            cbufp + rd, fromoffsets, flush, &err);

        infoffset += (uint32_t)(cbufp - buf);

        if (U_FAILURE(err)) {
            char pos[32];
            sprintf(pos, "%u", infoffset - 1);
            UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
            initMsg(pname);
            u_wmsg(stderr, "problemCvtToU", str.getBuffer(), u_wmsg_errorName(err));
            willexit = 1;
            err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
        }

        // At the last conversion, the converted characters should be
        // equal to number of chars read.

        if (flush && !willexit && cbufp != (buf + rd)) {
            char pos[32];
            sprintf(pos, "%u", infoffset);
            UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
            initMsg(pname);
            u_wmsg(stderr, "premEndInput", str.getBuffer());
            willexit = 1;
        }

        // Prepare to transliterate and convert. Transliterate if needed.

#if !UCONFIG_NO_TRANSLITERATION
        if (t) {
            u.setTo(unibuf, (int32_t)(unibufp - unibuf)); // Copy into string.
            t->transliterate(u);
        } else
#endif
        {
            u.setTo(unibuf, (int32_t)(unibufp - unibuf), (int32_t)(bufsz)); // Share the buffer.
        }

        int32_t ulen = u.length();

        // Convert the Unicode buffer into the destination codepage
        // Again 'bufp' will be placed on the last converted character
        // And 'unibufbp' will be placed on the last converted unicode character
        // At the last conversion flush should be set to true to ensure that
        // all characters left get converted

        const UChar *unibufu = unibufbp = u.getBuffer();

        do {
            int32_t len = ulen > (int32_t)bufsz ? (int32_t)bufsz : ulen;

            bufp = buf;
            unibufp = (UChar *) (unibufbp + len);

            ucnv_fromUnicode(convto, &bufp, bufp + tobufsz,
                             &unibufbp,
                             unibufp,
                             tooffsets, flush, &err);

            if (U_FAILURE(err)) {
                const char *errtag;
                char pos[32];

                uint32_t erroffset =
                    dataOffset((int32_t)(bufp - buf - 1), fromoffsets, (int32_t)(bufsz), tooffsets, (int32_t)(tobufsz));
                int32_t ferroffset = (int32_t)(infoffset - (unibufp - unibufu) + erroffset);

                if ((int32_t) ferroffset < 0) {
                    ferroffset = (int32_t)(outfoffset + (bufp - buf));
                    errtag = "problemCvtFromUOut";
                } else {
                    errtag = "problemCvtFromU";
                }
                sprintf(pos, "%u", ferroffset);
                UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
                initMsg(pname);
                u_wmsg(stderr, errtag, str.getBuffer(),
                       u_wmsg_errorName(err));
                willexit = 1;
            }

            // At the last conversion, the converted characters should be equal to number
            // of consumed characters.
            if (flush && !willexit && unibufbp != (unibufu + (size_t) (unibufp - unibufu))) {
                char pos[32];
                sprintf(pos, "%u", infoffset);
                UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
                initMsg(pname);
                u_wmsg(stderr, "premEnd", str.getBuffer());
                willexit = 1;
            }

            // Finally, write the converted buffer to the output file


            rd = (size_t) (bufp - buf);
            outfoffset += (int32_t)(wr = fwrite(buf, 1, rd, outfile));
            if (wr != rd) {
                UnicodeString str(strerror(errno), "");
                initMsg(pname);
                u_wmsg(stderr, "cantWrite", str.getBuffer());
                willexit = 1;
            }

            if (willexit) {
                goto error_exit;
            }
        } while ((ulen -= (int32_t)(bufsz)) > 0);
    } while (!flush);           // Stop when we have flushed the
                                // converters (this means that it's
                                // the end of output)

    goto normal_exit;

error_exit:
    ret = FALSE;

normal_exit:
    // Cleanup.

    if (convfrom) ucnv_close(convfrom);
    if (convto) ucnv_close(convto);

#if !UCONFIG_NO_TRANSLITERATION
    if (t) delete t;
#endif

    if (buf) delete[] buf;
    if (unibuf) delete[] unibuf;

    if (fromoffsets) delete[] fromoffsets;
    if (tooffsets) delete[] tooffsets;

    if (infile != stdin) {
        fclose(infile);
    }

    return ret;
}
コード例 #18
0
void charsetConverter_icu::convert
	(utility::inputStream& in, utility::outputStream& out, status* st)
{
	UErrorCode err = U_ZERO_ERROR;

	ucnv_reset(m_from);
	ucnv_reset(m_to);

	if (st)
		new (st) status();

	// From buffers
	byte_t cpInBuffer[16]; // stream data put here
	const size_t outSize = ucnv_getMinCharSize(m_from) * sizeof(cpInBuffer) * sizeof(UChar);
	std::vector <UChar> uOutBuffer(outSize); // Unicode chars end up here

	// To buffers
	// converted (char) data end up here
	const size_t cpOutBufferSz = ucnv_getMaxCharSize(m_to) * outSize;
	std::vector <char> cpOutBuffer(cpOutBufferSz);

	// Tell ICU what to do when encountering an illegal byte sequence
	if (m_options.silentlyReplaceInvalidSequences)
	{
		// Set replacement chars for when converting from Unicode to codepage
		icu::UnicodeString substString(m_options.invalidSequence.c_str());
		ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
	}
	else
	{
		// Tell ICU top stop (and return an error) on illegal byte sequences
		ucnv_setToUCallBack
			(m_from, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");

		ucnv_setFromUCallBack
			(m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
	}

	// Input data available
	while (!in.eof())
	{
		// Read input data into buffer
		size_t inLength = in.read(cpInBuffer, sizeof(cpInBuffer));

		// Beginning of read data
		const char* source = reinterpret_cast <const char*>(&cpInBuffer[0]);
		const char* sourceLimit = source + inLength; // end + 1

		UBool flush = in.eof();  // is this last run?

		UErrorCode toErr;

		// Loop until all source has been processed
		do
		{
			// Set up target pointers
			UChar* target = &uOutBuffer[0];
			UChar* targetLimit = &target[0] + outSize;

			toErr = U_ZERO_ERROR;
			ucnv_toUnicode(m_from, &target, targetLimit,
			               &source, sourceLimit, NULL, flush, &toErr);

			if (st)
				st->inputBytesRead += (source - reinterpret_cast <const char*>(&cpInBuffer[0]));

			if (toErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(toErr))
			{
				if (toErr == U_INVALID_CHAR_FOUND ||
				    toErr == U_TRUNCATED_CHAR_FOUND ||
				    toErr == U_ILLEGAL_CHAR_FOUND)
				{
					// Error will be thrown later (*)
				}
				else
				{
					throw exceptions::charset_conv_error("[ICU] Error converting to Unicode from " + m_source.getName());
				}
			}

			// The Unicode source is the buffer just written and the limit
			// is where the previous conversion stopped (target is moved in the conversion)
			const UChar* uSource = &uOutBuffer[0];
			UChar* uSourceLimit = &target[0];
			UErrorCode fromErr;

			// Loop until converted chars are fully written
			do
			{
				char* cpTarget = &cpOutBuffer[0];
				const char* cpTargetLimit = &cpOutBuffer[0] + cpOutBufferSz;

				fromErr = U_ZERO_ERROR;

				// Write converted bytes (Unicode) to destination codepage
				ucnv_fromUnicode(m_to, &cpTarget, cpTargetLimit,
				                 &uSource, uSourceLimit, NULL, flush, &fromErr);

				if (st)
				{
					// Decrement input bytes count by the number of input bytes in error
					char errBytes[16];
					int8_t errBytesLen = sizeof(errBytes);
					UErrorCode errBytesErr = U_ZERO_ERROR;

	 				ucnv_getInvalidChars(m_from, errBytes, &errBytesLen, &errBytesErr);

					st->inputBytesRead -= errBytesLen;
					st->outputBytesWritten += cpTarget - &cpOutBuffer[0];
				}

				// (*) If an error occurred while converting from input charset, throw it now
				if (toErr == U_INVALID_CHAR_FOUND ||
				    toErr == U_TRUNCATED_CHAR_FOUND ||
				    toErr == U_ILLEGAL_CHAR_FOUND)
				{
					throw exceptions::illegal_byte_sequence_for_charset();
				}

				if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr))
				{
					if (fromErr == U_INVALID_CHAR_FOUND ||
					    fromErr == U_TRUNCATED_CHAR_FOUND ||
					    fromErr == U_ILLEGAL_CHAR_FOUND)
					{
						throw exceptions::illegal_byte_sequence_for_charset();
					}
					else
					{
						throw exceptions::charset_conv_error("[ICU] Error converting from Unicode to " + m_dest.getName());
					}
				}

				// Write to destination stream
				out.write(&cpOutBuffer[0], (cpTarget - &cpOutBuffer[0]));

			} while (fromErr == U_BUFFER_OVERFLOW_ERROR);

		} while (toErr == U_BUFFER_OVERFLOW_ERROR);
	}
}
コード例 #19
0
ファイル: ucbuf.c プロジェクト: flwh/Alcatel_OT_985_kernel
/* fill the uchar buffer */
static UCHARBUF*
ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
    UChar* pTarget=NULL;
    UChar* target=NULL;
    const char* source=NULL;
    char  carr[MAX_IN_BUF] = {'\0'};
    char* cbuf =  carr;
    int32_t inputRead=0;
    int32_t outputWritten=0;
    int32_t offset=0;
    const char* sourceLimit =NULL;
    int32_t cbufSize=0;
    pTarget = buf->buffer;
    /* check if we arrived here without exhausting the buffer*/
    if(buf->currentPos<buf->bufLimit){
        offset = (int32_t)(buf->bufLimit-buf->currentPos);
        memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
    }

#if DEBUG
    memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
#endif
    if(buf->isBuffered){
        cbufSize = MAX_IN_BUF;
        /* read the file */
        inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
        buf->remaining-=inputRead;
        
    }else{
        cbufSize = T_FileStream_size(buf->in);
        cbuf = (char*)uprv_malloc(cbufSize);
        if (cbuf == NULL) {
        	*error = U_MEMORY_ALLOCATION_ERROR;
        	return NULL;
        }
        inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
        buf->remaining-=inputRead;
    }

    /* just to be sure...*/
    if ( 0 == inputRead )
       buf->remaining = 0;

    target=pTarget;
    /* convert the bytes */
    if(buf->conv){
        /* set the callback to stop */
        UConverterToUCallback toUOldAction ;
        void* toUOldContext;
        void* toUNewContext=NULL;
        ucnv_setToUCallBack(buf->conv,
           UCNV_TO_U_CALLBACK_STOP,
           toUNewContext,
           &toUOldAction,
           (const void**)&toUOldContext,
           error);
        /* since state is saved in the converter we add offset to source*/
        target = pTarget+offset;
        source = cbuf;
        sourceLimit = source + inputRead;
        ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
                        &source,sourceLimit,NULL,
                        (UBool)(buf->remaining==0),error);

        if(U_FAILURE(*error)){
            char context[CONTEXT_LEN+1];
            char preContext[CONTEXT_LEN+1];
            char postContext[CONTEXT_LEN+1];
            int8_t len = CONTEXT_LEN;
            int32_t start=0;
            int32_t stop =0;
            int32_t pos =0;
            /* use erro1 to preserve the error code */
            UErrorCode error1 =U_ZERO_ERROR;
            
            if( buf->showWarning==TRUE){
                fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
                               " converting input stream to target encoding: %s\n",
                               u_errorName(*error));
            }


            /* now get the context chars */
            ucnv_getInvalidChars(buf->conv,context,&len,&error1);
            context[len]= 0 ; /* null terminate the buffer */

            pos = (int32_t)(source - cbuf - len);

            /* for pre-context */
            start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
            stop  = pos-len;

            memcpy(preContext,cbuf+start,stop-start);
            /* null terminate the buffer */
            preContext[stop-start] = 0;

            /* for post-context */
            start = pos+len;
            stop  = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));

            memcpy(postContext,source,stop-start);
            /* null terminate the buffer */
            postContext[stop-start] = 0;

            if(buf->showWarning ==TRUE){
                /* print out the context */
                fprintf(stderr,"\tPre-context: %s\n",preContext);
                fprintf(stderr,"\tContext: %s\n",context);
                fprintf(stderr,"\tPost-context: %s\n", postContext);
            }

            /* reset the converter */
            ucnv_reset(buf->conv);

            /* set the call back to substitute
             * and restart conversion
             */
            ucnv_setToUCallBack(buf->conv,
               UCNV_TO_U_CALLBACK_SUBSTITUTE,
               toUNewContext,
               &toUOldAction,
               (const void**)&toUOldContext,
               &error1);

            /* reset source and target start positions */
            target = pTarget+offset;
            source = cbuf;

            /* re convert */
            ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
                            &source,sourceLimit,NULL,
                            (UBool)(buf->remaining==0),&error1);

        }
        outputWritten = (int32_t)(target - pTarget);


#if DEBUG
        {
            int i;
            target = pTarget;
            for(i=0;i<numRead;i++){
              /*  printf("%c", (char)(*target++));*/
            }
        }
#endif

    }else{
        u_charsToUChars(cbuf,target+offset,inputRead);
        outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
    }
    buf->currentPos = pTarget;
    buf->bufLimit=pTarget+outputWritten;
    *buf->bufLimit=0; /*NUL terminate*/
    if(cbuf!=carr){
        uprv_free(cbuf);
    }
    return buf;
}
コード例 #20
0
Bool
CodeSet_GenericToGenericDb(const char *codeIn,  // IN
                           const char *bufIn,   // IN
                           size_t sizeIn,       // IN
                           const char *codeOut, // IN
                           unsigned int flags,  // IN
                           DynBuf *db)          // IN/OUT
{
   Bool result = FALSE;
   UErrorCode uerr;
   const char *bufInCur;
   const char *bufInEnd;
   UChar bufPiv[1024];
   UChar *bufPivSource;
   UChar *bufPivTarget;
   UChar *bufPivEnd;
   char *bufOut;
   char *bufOutCur;
   char *bufOutEnd;
   size_t bufOutSize;
   size_t bufOutOffset;
   UConverter *cvin = NULL;
   UConverter *cvout = NULL;
   UConverterToUCallback toUCb;
   UConverterFromUCallback fromUCb;

   ASSERT(codeIn);
   ASSERT(sizeIn == 0 || bufIn);
   ASSERT(codeOut);
   ASSERT(db);
   ASSERT((CSGTG_NORMAL == flags) || (CSGTG_TRANSLIT == flags) ||
          (CSGTG_IGNORE == flags));

   if (dontUseIcu) {
      // fall back
      return CodeSetOld_GenericToGenericDb(codeIn, bufIn, sizeIn, codeOut,
                                           flags, db);
   }

   /*
    * Trivial case.
    */

   if ((0 == sizeIn) || (NULL == bufIn)) {
      result = TRUE;
      goto exit;
   }

   /*
    * Open converters.
    */

   uerr = U_ZERO_ERROR;
   cvin = ucnv_open(codeIn, &uerr);
   if (!cvin) {
      goto exit;
   }

   uerr = U_ZERO_ERROR;
   cvout = ucnv_open(codeOut, &uerr);
   if (!cvout) {
      goto exit;
   }

   /*
    * Set callbacks according to flags.
    */

   switch (flags) {
   case CSGTG_NORMAL:
      toUCb = UCNV_TO_U_CALLBACK_STOP;
      fromUCb = UCNV_FROM_U_CALLBACK_STOP;
      break;

   case CSGTG_TRANSLIT:
      toUCb = UCNV_TO_U_CALLBACK_SUBSTITUTE;
      fromUCb = UCNV_FROM_U_CALLBACK_SUBSTITUTE;
      break;

   case CSGTG_IGNORE:
      toUCb = UCNV_TO_U_CALLBACK_SKIP;
      fromUCb = UCNV_FROM_U_CALLBACK_SKIP;
      break;

   default:
      NOT_IMPLEMENTED();
      break;
   }

   uerr = U_ZERO_ERROR;
   ucnv_setToUCallBack(cvin, toUCb, NULL, NULL, NULL, &uerr);
   if (U_ZERO_ERROR != uerr) {
      goto exit;
   }

   uerr = U_ZERO_ERROR;
   ucnv_setFromUCallBack(cvout, fromUCb, NULL, NULL, NULL, &uerr);
   if (U_ZERO_ERROR != uerr) {
      goto exit;
   }

   /*
    * Convert using ucnv_convertEx().
    * As a starting guess, make the output buffer the same size as
    * the input string (with a fudge constant added in to avoid degen
    * cases).
    */

   bufInCur = bufIn;
   bufInEnd = bufIn + sizeIn;
   bufOutSize = sizeIn + 4;
   bufOutOffset = 0;
   bufPivSource = bufPiv;
   bufPivTarget = bufPiv;
   bufPivEnd = bufPiv + ARRAYSIZE(bufPiv);

   for (;;) {
      if (!DynBuf_Enlarge(db, bufOutSize)) {
         goto exit;
      }
      bufOut = DynBuf_Get(db);
      bufOutCur = bufOut + bufOutOffset;
      bufOutSize = DynBuf_GetAllocatedSize(db);
      bufOutEnd = bufOut + bufOutSize;

      uerr = U_ZERO_ERROR;
      ucnv_convertEx(cvout, cvin, &bufOutCur, bufOutEnd,
		     &bufInCur, bufInEnd,
		     bufPiv, &bufPivSource, &bufPivTarget, bufPivEnd,
		     FALSE, TRUE, &uerr);

      if (!U_FAILURE(uerr)) {
         /*
          * "This was a triumph.
          *  I'm making a note here:
          *  HUGE SUCCESS.
          *  It's hard to overstate
          *  my satisfaction."
          */

         break;
      }

      if (U_BUFFER_OVERFLOW_ERROR != uerr) {
	 // failure
         goto exit;
      }

      /*
       * Our guess at 'bufOutSize' was obviously wrong, just double it.
       * We'll be reallocating bufOut, so will need to recompute bufOutCur
       * based on bufOutOffset.
       */

      bufOutSize *= 2;
      bufOutOffset = bufOutCur - bufOut;
   }

   /*
    * Set final size and return.
    */

   DynBuf_SetSize(db, bufOutCur - bufOut);

   result = TRUE;

  exit:
   if (cvin) {
      ucnv_close(cvin);
   }

   if (cvout) {
      ucnv_close(cvout);
   }

   return result;
}
コード例 #21
0
ファイル: pg_chardetect.c プロジェクト: aweber/pg_chardetect
UErrorCode
convert_to_unicode(const text* buffer, const text* encoding, UChar** uBuf, int32_t *uBuf_len, bool force, bool* dropped_bytes)
{
    UErrorCode status = U_ZERO_ERROR;

    UConverter *conv;
    int32_t uConvertedLen = 0;

    // used to set dropped_bytes flag if force is true
    ToUFLAGContext * context = NULL;

    size_t uBufSize = 0;

    const char* encoding_cstr = text_to_cstring(encoding);

    // open converter for detected encoding
    conv = ucnv_open(encoding_cstr, &status);

    if (U_FAILURE(status))
    {
        ereport(WARNING,
            (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
             errmsg("Cannot open %s converter - error: %s.\n", (const char *) encoding_cstr, u_errorName(status))));

        if (NULL != encoding_cstr)
            pfree((void *) encoding_cstr);

        ucnv_close(conv);
        return status;
    }

    if (force)
    {
        // set callback to skip illegal, irregular or unassigned bytes

        // set converter to use SKIP callback
        // contecxt will save and call it after calling custom callback
        ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &status);

        //TODO: refactor warning and error message reporting
        if (U_FAILURE(status))
        {
            ereport(WARNING,
                (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
                 errmsg("Cannot set callback on converter - error: %s.\n", u_errorName(status))));

            if (NULL != encoding_cstr)
                pfree((void *) encoding_cstr);

            ucnv_close(conv);
            return status;
        }

        // initialize flagging callback
        context = flagCB_toU_openContext();

        /* Set our special callback */
        ucnv_setToUCallBack(conv,
                            flagCB_toU,
                            context,
                            &(context->subCallback),
                            &(context->subContext),
                            &status
                           );

        if (U_FAILURE(status))
        {
            ereport(WARNING,
                (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
                 errmsg("Cannot set callback on converter - error: %s.\n", u_errorName(status))));

            if (NULL != encoding_cstr)
                pfree((void *) encoding_cstr);

            ucnv_close(conv);
            return status;
        }
    }

    // allocate unicode buffer
    // must pfree before exiting calling function
    uBufSize = (VARSIZE_ANY_EXHDR(buffer)/ucnv_getMinCharSize(conv) + 1);
    *uBuf = (UChar*) palloc0(uBufSize * sizeof(UChar));

    if (*uBuf == NULL)
    {
        status = U_MEMORY_ALLOCATION_ERROR;

        ereport(WARNING,
            (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
             errmsg("Cannot allocate %d bytes for Unicode pivot buffer - error: %s.\n", (int) uBufSize, u_errorName(status))));

        if (NULL != encoding_cstr)
            pfree((void *) encoding_cstr);

        ucnv_close(conv);
        return status;
    }

    ereport(DEBUG1,
        (errcode(ERRCODE_SUCCESSFUL_COMPLETION),
            errmsg("Original string: %s\n", (const char*) text_to_cstring(buffer))));

    // convert to Unicode
    // returns length of converted string, not counting NUL-terminator
    uConvertedLen = ucnv_toUChars(conv,
                                  *uBuf,
                                  uBufSize,
                                  (const char*) text_to_cstring(buffer),
                                  STRING_IS_NULL_TERMINATED,
                                  &status
                                 );

    if (U_SUCCESS(status))
    {
        // add 1 for NUL terminator
        *uBuf_len = uConvertedLen + 1;

        ereport(DEBUG1,
            (errcode(ERRCODE_SUCCESSFUL_COMPLETION),
                errmsg("Converted string: %s\n", (const char*) *uBuf)));

        // see if any bytes where dropped
        // context struct will go away with converter is closed
        if (NULL != context)
            *dropped_bytes = context->flag;
        else
            *dropped_bytes = false;
    }

    if (U_FAILURE(status))
    {
        ereport(WARNING,
            (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
             errmsg("ICU conversion from %s to Unicode failed - error: %s.\n", encoding_cstr, u_errorName(status))));
    }

    if (NULL != encoding_cstr)
        pfree((void *) encoding_cstr);

    ucnv_close(conv);
    return status;
}
コード例 #22
0
uint16_t *
ppb_char_set_char_set_to_utf16(PP_Instance instance, const char *input, uint32_t input_len,
                               const char *input_char_set, enum PP_CharSet_ConversionError on_error,
                               uint32_t *output_length)
{
    // each character could be converted into a surrogate pair
    const uint32_t output_buffer_length = (input_len + 2) * 2 * sizeof(uint16_t);
    uint16_t *output = ppb_memory_mem_alloc(output_buffer_length);

    if (!output) {
        trace_error("%s, can't allocate memory, %u bytes\n", __func__, output_buffer_length);
        goto err;
    }

    const char *charset = encoding_alias_get_canonical_name(input_char_set);

    const UChar subst = '?';
    UErrorCode st = U_ZERO_ERROR;
    UConverter *u = ucnv_open(charset, &st);
    if (!U_SUCCESS(st)) {
        trace_error("%s, wrong charset %s\n", __func__, input_char_set);
        goto err;
    }

    switch (on_error) {
    default:
    case PP_CHARSET_CONVERSIONERROR_FAIL:
        st = U_ZERO_ERROR;
        ucnv_setToUCallBack(u, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &st);
        break;

    case PP_CHARSET_CONVERSIONERROR_SKIP:
        st = U_ZERO_ERROR;
        ucnv_setToUCallBack(u, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &st);
        break;

    case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE:
        st = U_ZERO_ERROR;
        ucnv_setToUCallBack(u, UCNV_TO_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &st);

        st = U_ZERO_ERROR;
        ucnv_setSubstString(u, &subst, 1, &st);
        break;
    }

    st = U_ZERO_ERROR;
    *output_length = ucnv_toUChars(u, output, output_buffer_length / sizeof(uint16_t),
                                   input, input_len, &st);

    if (st != U_BUFFER_OVERFLOW_ERROR && !U_SUCCESS(st))
        goto err;

    ucnv_close(u);
    return output;

err:
    *output_length = 0;
    ppb_memory_mem_free(output);
    if (u)
        ucnv_close(u);
    return NULL;
}