char *
ppb_char_set_utf16_to_char_set(PP_Instance instance, const uint16_t *utf16, uint32_t utf16_len,
                               const char *output_char_set,
                               enum PP_CharSet_ConversionError on_error, uint32_t *output_length)
{
    // each character could take up to 4 bytes in UTF-8; with additional zero-terminator byte
    const uint32_t output_buffer_length = (utf16_len + 1) * 4 + 1;
    char *output = ppb_memory_mem_alloc(output_buffer_length);

    if (!output) {
        trace_error("%s, can't allocate memory, %u bytes\n", __func__, output_buffer_length);
        goto err;
    }

    const char *charset = encoding_alias_get_canonical_name(output_char_set);

    const UChar subst = '?';
    UErrorCode st = U_ZERO_ERROR;
    UConverter *u = ucnv_open(charset, &st);
    if (!U_SUCCESS(st)) {
        trace_error("%s, wrong charset %s\n", __func__, output_char_set);
        goto err;
    }

    switch (on_error) {
    default:
    case PP_CHARSET_CONVERSIONERROR_FAIL:
        st = U_ZERO_ERROR;
        ucnv_setFromUCallBack(u, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &st);
        break;

    case PP_CHARSET_CONVERSIONERROR_SKIP:
        st = U_ZERO_ERROR;
        ucnv_setFromUCallBack(u, UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, NULL, &st);
        break;

    case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE:
        st = U_ZERO_ERROR;
        ucnv_setFromUCallBack(u, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &st);

        st = U_ZERO_ERROR;
        ucnv_setSubstString(u, &subst, 1, &st);
        break;
    }

    *output_length = ucnv_fromUChars(u, output, output_buffer_length, utf16, utf16_len, &st);

    if (st != U_BUFFER_OVERFLOW_ERROR && !U_SUCCESS(st))
        goto err;

    ucnv_close(u);
    return output;

err:
    *output_length = 0;
    ppb_memory_mem_free(output);
    if (u)
        ucnv_close(u);
    return NULL;
}
Example #2
0
CString TextCodecICU::encode(const UChar* characters, size_t length, UnencodableHandling handling)
{
    if (!length)
        return "";

    if (!m_converterICU)
        createICUConverter();
    if (!m_converterICU)
        return CString();

    // FIXME: We should see if there is "force ASCII range" mode in ICU;
    // until then, we change the backslash into a yen sign.
    // Encoding will change the yen sign back into a backslash.
    String copy(characters, length);
    copy.replace('\\', m_encoding.backslashAsCurrencySymbol());

    const UChar* source = copy.characters();
    const UChar* sourceLimit = source + copy.length();

    UErrorCode err = U_ZERO_ERROR;

    switch (handling) {
        case QuestionMarksForUnencodables:
            ucnv_setSubstChars(m_converterICU, "?", 1, &err);
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
            break;
        case EntitiesForUnencodables:
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
            break;
        case URLEncodedEntitiesForUnencodables:
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
            break;
    }

    ASSERT(U_SUCCESS(err));
    if (U_FAILURE(err))
        return CString();

    Vector<char> result;
    size_t size = 0;
    do {
        char buffer[ConversionBufferSize];
        char* target = buffer;
        char* targetLimit = target + ConversionBufferSize;
        err = U_ZERO_ERROR;
        ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err);
        size_t count = target - buffer;
        result.grow(size + count);
        memcpy(result.data() + size, buffer, count);
        size += count;
    } while (err == U_BUFFER_OVERFLOW_ERROR);

    return CString(result.data(), size);
}
static UConverter *
GSStringOpenConverter (CFStringEncoding encoding, char lossByte)
{
  const char *converterName;
  UConverter *cnv;
  UErrorCode err = U_ZERO_ERROR;

  converterName = CFStringICUConverterName (encoding);

  cnv = ucnv_open (converterName, &err);
  if (U_FAILURE (err))
    cnv = NULL;

  if (lossByte)
    {
      /* FIXME: for some reason this is returning U_ILLEGAL_ARGUMENTS_ERROR */
      ucnv_setSubstChars (cnv, &lossByte, 1, &err);
    }
  else
    {
      ucnv_setToUCallBack (cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
                           &err);
      ucnv_setFromUCallBack (cnv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL,
                             &err);
    }

  return cnv;
}
Example #4
0
extern int
main(int argc, const char *argv[]) {
    UErrorCode errorCode=U_ZERO_ERROR;

    // Note: Using a global variable for any object is not exactly thread-safe...

    // You can change this call to e.g. ucnv_open("UTF-8", &errorCode) if you pipe
    // the output to a file and look at it with a Unicode-capable editor.
    // This will currently affect only the printUString() function, see the code above.
    // printUnicodeString() could use this, too, by changing to an extract() overload
    // that takes a UConverter argument.
    cnv=ucnv_open(NULL, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "error %s opening the default converter\n", u_errorName(errorCode));
        return errorCode;
    }

    ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, NULL, NULL, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "error %s setting the escape callback in the default converter\n", u_errorName(errorCode));
        ucnv_close(cnv);
        return errorCode;
    }

    demo_utf_h_macros();
    demo_C_Unicode_strings();
    demoCaseMapInC();
    demoCaseMapInCPlusPlus();
    demoUnicodeStringStorage();
    demoUnicodeStringInit();

    ucnv_close(cnv);
    return 0;
}
static void NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address,
        jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) {
    UConverter* cnv = toUConverter(address);
    if (cnv == NULL) {
        maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
        return;
    }

    UConverterFromUCallback oldCallback = NULL;
    const void* oldCallbackContext = NULL;
    ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext));

    EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>(
            reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext));
    if (callbackContext == NULL) {
        callbackContext = new EncoderCallbackContext;
    }

    callbackContext->onMalformedInput = getFromUCallback(onMalformedInput);
    callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput);

    ScopedByteArrayRO replacementBytes(env, javaReplacement);
    if (replacementBytes.get() == NULL) {
        maybeThrowIcuException(env, "replacementBytes", U_ILLEGAL_ARGUMENT_ERROR);
        return;
    }
    memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size());
    callbackContext->replacementByteCount = replacementBytes.size();

    UErrorCode errorCode = U_ZERO_ERROR;
    ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
    maybeThrowIcuException(env, "ucnv_setFromUCallBack", errorCode);
}
Example #6
0
char *aescstrdup(const UChar* unichars,int32_t length){
    char *newString,*targetLimit,*target;
    UConverterFromUCallback cb;
    const void *p;
    UErrorCode errorCode = U_ZERO_ERROR;
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
#   if U_PLATFORM == U_PF_OS390
        static const char convName[] = "ibm-1047";
#   else
        static const char convName[] = "ibm-37";
#   endif
#else
    static const char convName[] = "US-ASCII";
#endif
    UConverter* conv = ucnv_open(convName, &errorCode);
    if(length==-1){
        length = u_strlen( unichars);
    }
    newString = (char*)ctst_malloc ( sizeof(char) * 8 * (length +1));
    target = newString;
    targetLimit = newString+sizeof(char) * 8 * (length +1);
    ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, &cb, &p, &errorCode);
    ucnv_fromUnicode(conv,&target,targetLimit, &unichars, (UChar*)(unichars+length),NULL,TRUE,&errorCode);
    ucnv_close(conv);
    *target = '\0';
    return newString;
}
Example #7
0
CF_INLINE UConverter *__CFStringEncodingConverterCreateICUConverter(const char *icuName, uint32_t flags, bool toUnicode) {
    UConverter *converter;
    UErrorCode errorCode = U_ZERO_ERROR;
    uint8_t streamID = CFStringEncodingStreamIDFromMask(flags);

    if (0 != streamID) { // this is a part of streaming previously created
        __CFICUThreadData *data = __CFStringEncodingICUGetThreadData();

        --streamID; // map to array index

        if ((streamID < data->_numSlots) && (NULL != data->_converters[streamID])) return data->_converters[streamID];
    }

    converter = ucnv_open(icuName, &errorCode);

    if (NULL != converter) {
        char lossyByte = CFStringEncodingMaskToLossyByte(flags);

        if ((0 == lossyByte) && (0 != (flags & kCFStringEncodingAllowLossyConversion))) lossyByte = '?';

        if (0 ==lossyByte) {
            if (toUnicode) {
                ucnv_setToUCallBack(converter, &UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
            } else {
                ucnv_setFromUCallBack(converter, &UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
            }
        } else {
            ucnv_setSubstChars(converter, &lossyByte, 1, &errorCode);
        }
    }

    return converter;
}
U_STABLE void U_EXPORT2
ucnv_setFromUCallBack_53(UConverter * converter,
                       UConverterFromUCallback newAction,
                       const void *newContext,
                       UConverterFromUCallback *oldAction,
                       const void **oldContext,
                       UErrorCode * err)
{
    ucnv_setFromUCallBack(converter, newAction, newContext, oldAction, oldContext, err);
}
Example #9
0
CString TextCodecICU::encodeInternal(const TextCodecInput& input, UnencodableHandling handling)
{
    const UChar* source = input.begin();
    const UChar* end = input.end();

    UErrorCode err = U_ZERO_ERROR;

    switch (handling) {
        case QuestionMarksForUnencodables:
            ucnv_setSubstChars(m_converterICU, "?", 1, &err);
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
            break;
        case EntitiesForUnencodables:
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
            break;
        case URLEncodedEntitiesForUnencodables:
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
            break;
    }

    ASSERT(U_SUCCESS(err));
    if (U_FAILURE(err))
        return CString();

    Vector<char> result;
    size_t size = 0;
    do {
        char buffer[ConversionBufferSize];
        char* target = buffer;
        char* targetLimit = target + ConversionBufferSize;
        err = U_ZERO_ERROR;
        ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, end, 0, true, &err);
        size_t count = target - buffer;
        result.grow(size + count);
        memcpy(result.data() + size, buffer, count);
        size += count;
    } while (err == U_BUFFER_OVERFLOW_ERROR);

    return CString(result.data(), size);
}
Example #10
0
status_t
ICUCategoryData::_GetConverter(UConverter*& converterOut)
{
	// we use different converters per thread to avoid concurrent accesses
	ICUThreadLocalStorageValue* tlsValue = NULL;
	status_t result = ICUThreadLocalStorageValue::GetInstanceForKey(
		fThreadLocalStorageKey, tlsValue);
	if (result != B_OK)
		return result;

	if (tlsValue->converter != NULL) {
		if (strcmp(tlsValue->charset, fGivenCharset) == 0) {
			converterOut = tlsValue->converter;
			return B_OK;
		}

		// charset no longer matches the converter, we need to dump it and
		// create a new one
		ucnv_close(tlsValue->converter);
		tlsValue->converter = NULL;
	}

	// create a new converter for the current charset
	UErrorCode icuStatus = U_ZERO_ERROR;
	UConverter* icuConverter = ucnv_open(fGivenCharset, &icuStatus);
	if (icuConverter == NULL)
		return B_NAME_NOT_FOUND;

	// setup the new converter to stop upon any errors
	icuStatus = U_ZERO_ERROR;
	ucnv_setToUCallBack(icuConverter, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
		&icuStatus);
	if (!U_SUCCESS(icuStatus)) {
		ucnv_close(icuConverter);
		return B_ERROR;
	}
	icuStatus = U_ZERO_ERROR;
	ucnv_setFromUCallBack(icuConverter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
		NULL, &icuStatus);
	if (!U_SUCCESS(icuStatus)) {
		ucnv_close(icuConverter);
		return B_ERROR;
	}

	tlsValue->converter = icuConverter;
	strlcpy(tlsValue->charset, fGivenCharset, sizeof(tlsValue->charset));

	converterOut = icuConverter;

	return B_OK;
}
Example #11
0
charsetFilteredOutputStream_icu::charsetFilteredOutputStream_icu
	(const charset& source, const charset& dest, outputStream* os,
	 const charsetConverterOptions& opts)
	: m_from(NULL), m_to(NULL), m_sourceCharset(source),
	  m_destCharset(dest), m_stream(*os), m_options(opts)
{
	UErrorCode err = U_ZERO_ERROR;
	m_from = ucnv_open(source.getName().c_str(), &err);

	if (!U_SUCCESS(err))
	{
		throw exceptions::charset_conv_error
			("Cannot initialize ICU converter for source charset '" + source.getName() + "' (error code: " + u_errorName(err) + ".");
	}

	m_to = ucnv_open(dest.getName().c_str(), &err);

	if (!U_SUCCESS(err))
	{
		throw exceptions::charset_conv_error
			("Cannot initialize ICU converter for destination charset '" + dest.getName() + "' (error code: " + u_errorName(err) + ".");
	}

	// Tell ICU what to do when encountering an illegal byte sequence
	if (m_options.silentlyReplaceInvalidSequences)
	{
		// Set replacement chars for when converting from Unicode to codepage
		icu::UnicodeString substString(m_options.invalidSequence.c_str());
		ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
	}
	else
	{
		// Tell ICU top stop (and return an error) on illegal byte sequences
		ucnv_setToUCallBack
			(m_to, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");

		ucnv_setFromUCallBack
			(m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
	}
}
Example #12
0
static UConverter *
xh_encoder_uconv_create(xh_char_t *encoding, xh_bool_t toUnicode)
{
    UConverter *uconv;
    UErrorCode  status = U_ZERO_ERROR;

    uconv = ucnv_open((char *) encoding, &status);
    if ( U_FAILURE(status) ) {
        return NULL;
    }

    if (toUnicode) {
        ucnv_setToUCallBack(uconv, UCNV_TO_U_CALLBACK_STOP,
                            NULL, NULL, NULL, &status);
    }
    else {
        ucnv_setFromUCallBack(uconv, UCNV_FROM_U_CALLBACK_STOP,
                              NULL, NULL, NULL, &status);
    }

    return uconv;
}
Example #13
0
// Convert a file from one encoding to another
static UBool convertFile(const char *pname,
                         const char *fromcpage,
                         UConverterToUCallback toucallback,
                         const void *touctxt,
                         const char *tocpage,
                         UConverterFromUCallback fromucallback,
                         const void *fromuctxt,
                         int fallback,
                         size_t bufsz,
                         const char *translit,
                         const char *infilestr,
                         FILE * outfile, int verbose)
{
    FILE *infile;
    UBool ret = TRUE;
    UConverter *convfrom = 0;
    UConverter *convto = 0;
    UErrorCode err = U_ZERO_ERROR;
    UBool flush;
    const char *cbufp;
    char *bufp;
    char *buf = 0;

    uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */

    const UChar *unibufbp;
    UChar *unibufp;
    UChar *unibuf = 0;
    int32_t *fromoffsets = 0, *tooffsets = 0;

    size_t rd, wr, tobufsz;

#if !UCONFIG_NO_TRANSLITERATION
    Transliterator *t = 0;      // Transliterator acting on Unicode data.
#endif
    UnicodeString u;            // String to do the transliteration.

    // Open the correct input file or connect to stdin for reading input

    if (infilestr != 0 && strcmp(infilestr, "-")) {
        infile = fopen(infilestr, "rb");
        if (infile == 0) {
            UnicodeString str1(infilestr, "");
            str1.append((UChar32) 0);
            UnicodeString str2(strerror(errno), "");
            str2.append((UChar32) 0);
            initMsg(pname);
            u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
            return FALSE;
        }
    } else {
        infilestr = "-";
        infile = stdin;
#ifdef WIN32
        if (setmode(fileno(stdin), O_BINARY) == -1) {
            initMsg(pname);
            u_wmsg(stderr, "cantSetInBinMode");
            return FALSE;
        }
#endif
    }

    if (verbose) {
        fprintf(stderr, "%s:\n", infilestr);
    }

#if !UCONFIG_NO_TRANSLITERATION
    // Create transliterator as needed.

    if (translit != NULL && *translit) {
        UParseError parse;
        UnicodeString str(translit), pestr;

        /* Create from rules or by ID as needed. */

        parse.line = -1;

        if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
            t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
        } else {
            t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
        }

        if (U_FAILURE(err)) {
            str.append((UChar32) 0);
            initMsg(pname);

            if (parse.line >= 0) {
                UChar linebuf[20], offsetbuf[20];
                uprv_itou(linebuf, 20, parse.line, 10, 0);
                uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
                u_wmsg(stderr, "cantCreateTranslitParseErr", str.getBuffer(),
                    u_wmsg_errorName(err), linebuf, offsetbuf);
            } else {
                u_wmsg(stderr, "cantCreateTranslit", str.getBuffer(),
                    u_wmsg_errorName(err));
            }

            if (t) {
                delete t;
                t = 0;
            }
            goto error_exit;
        }
    }
#endif

    // Create codepage converter. If the codepage or its aliases weren't
    // available, it returns NULL and a failure code. We also set the
    // callbacks, and return errors in the same way.

    convfrom = ucnv_open(fromcpage, &err);
    if (U_FAILURE(err)) {
        UnicodeString str(fromcpage, (int32_t)(uprv_strlen(fromcpage) + 1));
        initMsg(pname);
        u_wmsg(stderr, "cantOpenFromCodeset", str.getBuffer(),
            u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
    if (U_FAILURE(err)) {
        initMsg(pname);
        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
        goto error_exit;
    }

    convto = ucnv_open(tocpage, &err);
    if (U_FAILURE(err)) {
        UnicodeString str(tocpage, (int32_t)(uprv_strlen(tocpage) + 1));
        initMsg(pname);
        u_wmsg(stderr, "cantOpenToCodeset", str.getBuffer(),
            u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
    if (U_FAILURE(err)) {
        initMsg(pname);
        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setFallback(convto, fallback);

    // To ensure that the buffer always is of enough size, we
    // must take the worst case scenario, that is the character in
    // the codepage that uses the most bytes and multiply it against
    // the buffer size.

    // use bufsz+1 to allow for additional BOM/signature character (U+FEFF)
    tobufsz = (bufsz+1) * ucnv_getMaxCharSize(convto);

    buf = new char[tobufsz];
    unibuf = new UChar[bufsz];

    fromoffsets = new int32_t[bufsz];
    tooffsets = new int32_t[tobufsz];

    // OK, we can convert now.

    do {
        char willexit = 0;

        rd = fread(buf, 1, bufsz, infile);
        if (ferror(infile) != 0) {
            UnicodeString str(strerror(errno));
            str.append((UChar32) 0);
            initMsg(pname);
            u_wmsg(stderr, "cantRead", str.getBuffer());
            goto error_exit;
        }

        // Convert the read buffer into the new coding
        // After the call 'unibufp' will be placed on the last
        // character that was converted in the 'unibuf'.
        // Also the 'cbufp' is positioned on the last converted
        // character.
        // At the last conversion in the file, flush should be set to
        // true so that we get all characters converted
        //
        // The converter must be flushed at the end of conversion so
        // that characters on hold also will be written.

        unibufp = unibuf;
        cbufp = buf;
        flush = rd != bufsz;
        ucnv_toUnicode(convfrom, &unibufp, unibufp + bufsz, &cbufp,
            cbufp + rd, fromoffsets, flush, &err);

        infoffset += (uint32_t)(cbufp - buf);

        if (U_FAILURE(err)) {
            char pos[32];
            sprintf(pos, "%u", infoffset - 1);
            UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
            initMsg(pname);
            u_wmsg(stderr, "problemCvtToU", str.getBuffer(), u_wmsg_errorName(err));
            willexit = 1;
            err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
        }

        // At the last conversion, the converted characters should be
        // equal to number of chars read.

        if (flush && !willexit && cbufp != (buf + rd)) {
            char pos[32];
            sprintf(pos, "%u", infoffset);
            UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
            initMsg(pname);
            u_wmsg(stderr, "premEndInput", str.getBuffer());
            willexit = 1;
        }

        // Prepare to transliterate and convert. Transliterate if needed.

#if !UCONFIG_NO_TRANSLITERATION
        if (t) {
            u.setTo(unibuf, (int32_t)(unibufp - unibuf)); // Copy into string.
            t->transliterate(u);
        } else
#endif
        {
            u.setTo(unibuf, (int32_t)(unibufp - unibuf), (int32_t)(bufsz)); // Share the buffer.
        }

        int32_t ulen = u.length();

        // Convert the Unicode buffer into the destination codepage
        // Again 'bufp' will be placed on the last converted character
        // And 'unibufbp' will be placed on the last converted unicode character
        // At the last conversion flush should be set to true to ensure that
        // all characters left get converted

        const UChar *unibufu = unibufbp = u.getBuffer();

        do {
            int32_t len = ulen > (int32_t)bufsz ? (int32_t)bufsz : ulen;

            bufp = buf;
            unibufp = (UChar *) (unibufbp + len);

            ucnv_fromUnicode(convto, &bufp, bufp + tobufsz,
                             &unibufbp,
                             unibufp,
                             tooffsets, flush, &err);

            if (U_FAILURE(err)) {
                const char *errtag;
                char pos[32];

                uint32_t erroffset =
                    dataOffset((int32_t)(bufp - buf - 1), fromoffsets, (int32_t)(bufsz), tooffsets, (int32_t)(tobufsz));
                int32_t ferroffset = (int32_t)(infoffset - (unibufp - unibufu) + erroffset);

                if ((int32_t) ferroffset < 0) {
                    ferroffset = (int32_t)(outfoffset + (bufp - buf));
                    errtag = "problemCvtFromUOut";
                } else {
                    errtag = "problemCvtFromU";
                }
                sprintf(pos, "%u", ferroffset);
                UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
                initMsg(pname);
                u_wmsg(stderr, errtag, str.getBuffer(),
                       u_wmsg_errorName(err));
                willexit = 1;
            }

            // At the last conversion, the converted characters should be equal to number
            // of consumed characters.
            if (flush && !willexit && unibufbp != (unibufu + (size_t) (unibufp - unibufu))) {
                char pos[32];
                sprintf(pos, "%u", infoffset);
                UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
                initMsg(pname);
                u_wmsg(stderr, "premEnd", str.getBuffer());
                willexit = 1;
            }

            // Finally, write the converted buffer to the output file


            rd = (size_t) (bufp - buf);
            outfoffset += (int32_t)(wr = fwrite(buf, 1, rd, outfile));
            if (wr != rd) {
                UnicodeString str(strerror(errno), "");
                initMsg(pname);
                u_wmsg(stderr, "cantWrite", str.getBuffer());
                willexit = 1;
            }

            if (willexit) {
                goto error_exit;
            }
        } while ((ulen -= (int32_t)(bufsz)) > 0);
    } while (!flush);           // Stop when we have flushed the
                                // converters (this means that it's
                                // the end of output)

    goto normal_exit;

error_exit:
    ret = FALSE;

normal_exit:
    // Cleanup.

    if (convfrom) ucnv_close(convfrom);
    if (convto) ucnv_close(convto);

#if !UCONFIG_NO_TRANSLITERATION
    if (t) delete t;
#endif

    if (buf) delete[] buf;
    if (unibuf) delete[] unibuf;

    if (fromoffsets) delete[] fromoffsets;
    if (tooffsets) delete[] tooffsets;

    if (infile != stdin) {
        fclose(infile);
    }

    return ret;
}
Example #14
0
extern int
main(int argc, char* argv[]) {
    const char *encoding = NULL;
    const char *outputDir = NULL; /* NULL = no output directory, use current */
    const char *inputDir  = ".";
    int tostdout = 0;
    int prbom = 0;

    const char *pname;

    UResourceBundle *bundle = NULL;
    int32_t i = 0;

    const char* arg;

    /* Get the name of tool. */
    pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
    if (!pname) {
        pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR);
    }
#endif
    if (!pname) {
        pname = *argv;
    } else {
        ++pname;
    }

    /* error handling, printing usage message */
    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "%s: error in command line argument \"%s\"\n", pname,
            argv[-argc]);
    }
    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        fprintf(argc < 0 ? stderr : stdout,
            "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n"
            " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n"
            " [ -t, --truncate [ size ] ]\n"
            " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n"
            " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n"
            " [ -A, --suppressAliases]\n"
            " bundle ...\n", argc < 0 ? 'u' : 'U',
            pname);
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[10].doesOccur) {
        fprintf(stderr,
                "%s version %s (ICU version %s).\n"
                "%s\n",
                pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
        return U_ZERO_ERROR;
    }
    if(options[2].doesOccur) {
        encoding = options[2].value;
    }

    if (options[3].doesOccur) {
      if(options[2].doesOccur) {
        fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname);
        return 3;
      }
      tostdout = 1;
    }

    if(options[4].doesOccur) {
        opt_truncate = TRUE;
        if(options[4].value != NULL) {
            truncsize = atoi(options[4].value); /* user defined printable size */
        } else {
            truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */
        }
    } else {
        opt_truncate = FALSE;
    }

    if(options[5].doesOccur) {
        verbose = TRUE;
    }

    if (options[6].doesOccur) {
        outputDir = options[6].value;
    }

    if(options[7].doesOccur) {
        inputDir = options[7].value; /* we'll use users resources */
    }

    if (options[8].doesOccur) {
        prbom = 1;
    }

    if (options[9].doesOccur) {
        u_setDataDirectory(options[9].value);
    }

    if (options[11].doesOccur) {
      suppressAliases = TRUE;
    }

    fflush(stderr); // use ustderr now.
    ustderr = u_finit(stderr, NULL, NULL);

    for (i = 1; i < argc; ++i) {
        static const UChar sp[] = { 0x0020 }; /* " " */

        arg = getLongPathname(argv[i]);

        if (verbose) {
          u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]);
        }

        icu::CharString locale;
        UErrorCode status = U_ZERO_ERROR;
        {
            const char *p = findBasename(arg);
            const char *q = uprv_strrchr(p, '.');
            if (q == NULL) {
                locale.append(p, status);
            } else {
                locale.append(p, (int32_t)(q - p), status);
            }
        }
        if (U_FAILURE(status)) {
            return status;
        }

        icu::CharString infile;
        const char *thename = NULL;
        UBool fromICUData = !uprv_strcmp(inputDir, "-");
        if (!fromICUData) {
            UBool absfilename = *arg == U_FILE_SEP_CHAR;
#if U_PLATFORM_HAS_WIN32_API
            if (!absfilename) {
                absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0])
                    && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR);
            }
#endif
            if (absfilename) {
                thename = arg;
            } else {
                const char *q = uprv_strrchr(arg, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
                if (q == NULL) {
                    q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
                }
#endif
                infile.append(inputDir, status);
                if(q != NULL) {
                    infile.appendPathPart(icu::StringPiece(arg, (int32_t)(q - arg)), status);
                }
                if (U_FAILURE(status)) {
                    return status;
                }
                thename = infile.data();
            }
        }
        if (thename) {
            bundle = ures_openDirect(thename, locale.data(), &status);
        } else {
            bundle = ures_open(fromICUData ? 0 : inputDir, locale.data(), &status);
        }
        if (U_SUCCESS(status)) {
            UFILE *out = NULL;

            const char *filename = 0;
            const char *ext = 0;

            if (locale.isEmpty() || !tostdout) {
                filename = findBasename(arg);
                ext = uprv_strrchr(filename, '.');
                if (!ext) {
                    ext = uprv_strchr(filename, 0);
                }
            }

            if (tostdout) {
                out = u_get_stdout();
            } else {
                icu::CharString thefile;
                if (outputDir) {
                    thefile.append(outputDir, status);
                }
                thefile.appendPathPart(filename, status);
                if (*ext) {
                    thefile.truncate(thefile.length() - (int32_t)uprv_strlen(ext));
                }
                thefile.append(".txt", status);
                if (U_FAILURE(status)) {
                    return status;
                }

                out = u_fopen(thefile.data(), "w", NULL, encoding);
                if (!out) {
                  u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile.data());
                  u_fclose(ustderr);
                  return 4;
                }
            }

            // now, set the callback.
            ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status);
            if (U_FAILURE(status)) {
              u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname);
              u_fclose(ustderr);
              if(!tostdout) {
                u_fclose(out);
              }
              return 3;
            }

            if (prbom) { /* XXX: Should be done only for UTFs */
              u_fputc(0xFEFF, out);
            }
            u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName()));
            u_fprintf(out, "// This file was dumped by derb(8) from ");
            if (thename) {
              u_fprintf(out, "%s", thename);
            } else if (fromICUData) {
              u_fprintf(out, "the ICU internal %s locale", locale.data());
            }

            u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n");

            if (!locale.isEmpty()) {
              u_fprintf(out, "%s", locale.data());
            } else {
              u_fprintf(out, "%.*s%.*S", (int32_t)(ext - filename),  filename, UPRV_LENGTHOF(sp), sp);
            }
            printOutBundle(out, bundle, 0, pname, &status);

            if (!tostdout) {
                u_fclose(out);
            }
        }
        else {
            reportError(pname, &status, "opening resource file");
        }

        ures_close(bundle);
    }

    return 0;
}
Example #15
0
DeprecatedCString StreamingTextDecoderICU::fromUnicode(const DeprecatedString &qcs, bool allowEntities)
{
    TextEncodingID encoding = m_encoding.effectiveEncoding().encodingID();

    if (encoding == WinLatin1Encoding && qcs.isAllLatin1())
        return qcs.latin1();

    if ((encoding == WinLatin1Encoding || encoding == UTF8Encoding || encoding == ASCIIEncoding) 
        && qcs.isAllASCII())
        return qcs.ascii();

    // FIXME: We should see if there is "force ASCII range" mode in ICU;
    // until then, we change the backslash into a yen sign.
    // Encoding will change the yen sign back into a backslash.
    DeprecatedString copy = qcs;
    copy.replace('\\', m_encoding.backslashAsCurrencySymbol());

    if (!m_converterICU)
        createICUConverter();
    if (!m_converterICU)
        return DeprecatedCString();

    // FIXME: when DeprecatedString buffer is latin1, it would be nice to
    // convert from that w/o having to allocate a unicode buffer

    char buffer[ConversionBufferSize];
    const UChar* source = reinterpret_cast<const UChar*>(copy.unicode());
    const UChar* sourceLimit = source + copy.length();

    UErrorCode err = U_ZERO_ERROR;
    DeprecatedString normalizedString;
    if (UNORM_YES != unorm_quickCheck(source, copy.length(), UNORM_NFC, &err)) {
        normalizedString.truncate(copy.length()); // normalization to NFC rarely increases the length, so this first attempt will usually succeed
        
        int32_t normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), copy.length(), &err);
        if (err == U_BUFFER_OVERFLOW_ERROR) {
            err = U_ZERO_ERROR;
            normalizedString.truncate(normalizedLength);
            normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), normalizedLength, &err);
        }
        
        source = reinterpret_cast<const UChar*>(normalizedString.unicode());
        sourceLimit = source + normalizedLength;
    }

    DeprecatedCString result(1); // for trailing zero

    if (allowEntities)
        ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
    else {
        ucnv_setSubstChars(m_converterICU, "?", 1, &err);
        ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
    }

    ASSERT(U_SUCCESS(err));
    if (U_FAILURE(err))
        return DeprecatedCString();

    do {
        char* target = buffer;
        char* targetLimit = target + ConversionBufferSize;
        err = U_ZERO_ERROR;
        ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true,  &err);
        int count = target - buffer;
        buffer[count] = 0;
        result.append(buffer);
    } while (err == U_BUFFER_OVERFLOW_ERROR);

    return result;
}
Example #16
0
U_CAPI void U_EXPORT2 flagCB_fromU(
                  const void *context,
                  UConverterFromUnicodeArgs *fromUArgs,
                  const UChar* codeUnits,
                  int32_t length,
                  UChar32 codePoint,
                  UConverterCallbackReason reason,
				  UErrorCode * err)
{
  /* First step - based on the reason code, take action */

  if(reason == UCNV_UNASSIGNED) { /* whatever set should be trapped here */
    ((FromUFLAGContext*)context)->flag = TRUE;
  }

  if(reason == UCNV_CLONE) {
      /* The following is the recommended way to implement UCNV_CLONE
         in a callback. */
      UConverterFromUCallback   saveCallback;
      const void *saveContext;
      FromUFLAGContext *old, *cloned;
      UErrorCode subErr = U_ZERO_ERROR;

#if DEBUG_TMI
      printf("*** FLAGCB: cloning %p ***\n", context);
#endif
      old = (FromUFLAGContext*)context;
      cloned = flagCB_fromU_openContext();
      
      memcpy(cloned, old, sizeof(FromUFLAGContext));

#if DEBUG_TMI
      printf("%p: my subcb=%p:%p\n", old, old->subCallback, 
             old->subContext);
      printf("%p: cloned subcb=%p:%p\n", cloned, cloned->subCallback, 
             cloned->subContext);
#endif

      /* We need to get the sub CB to handle cloning, 
       * so we have to set up the following, temporarily:
       *
       *   - Set the callback+context to the sub of this (flag) cb
       *   - preserve the current cb+context, it could be anything
       *
       *   Before:
       *      CNV  ->   FLAG ->  subcb -> ...
       *
       *   After:
       *      CNV  ->   subcb -> ...
       *
       *    The chain from 'something' on is saved, and will be restored
       *   at the end of this block.
       *
       */
      
      ucnv_setFromUCallBack(fromUArgs->converter,
                            cloned->subCallback,
                            cloned->subContext,
                            &saveCallback,
                            &saveContext,
                            &subErr);
      
      if( cloned->subCallback != NULL ) {
          /* Now, call the sub callback if present */
          cloned->subCallback(cloned->subContext, fromUArgs, codeUnits,
                              length, codePoint, reason, err);
      }
      
      ucnv_setFromUCallBack(fromUArgs->converter,
                            saveCallback,  /* Us */
                            cloned,        /* new context */
                            &cloned->subCallback,  /* IMPORTANT! Accept any change in CB or context */
                            &cloned->subContext,
                            &subErr);

      if(U_FAILURE(subErr)) {
          *err = subErr;
      }
  }

  /* process other reasons here if need be */

  /* Always call the subCallback if present */
  if(((FromUFLAGContext*)context)->subCallback != NULL &&
      reason != UCNV_CLONE) {
      ((FromUFLAGContext*)context)->subCallback(  ((FromUFLAGContext*)context)->subContext,
                                                  fromUArgs,
                                                  codeUnits,
                                                  length,
                                                  codePoint,
                                                  reason,
                                                  err);
  }

  /* cleanup - free the memory AFTER calling the sub CB */
  if(reason == UCNV_CLOSE) {
      free((void*)context);
  }
}
Example #17
0
UErrorCode
convert_to_utf8(const UChar* buffer, int32_t buffer_len, char** converted_buf, int32_t *converted_buf_len, bool force, bool* dropped_bytes)
{
    UErrorCode status = U_ZERO_ERROR;
    UConverter *conv;
    int32_t utfConvertedLen = 0;

    // used to set dropped_bytes flag if force is true
    FromUFLAGContext * context = NULL;

    // open UTF8 converter
    conv = ucnv_open("utf-8", &status);

    if (U_FAILURE(status))
    {
        ereport(WARNING,
            (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
             errmsg("Cannot open utf-8 converter - error: %s.\n", u_errorName(status))));

        ucnv_close(conv);
        return status;
    }

    if (force)
    {
        // set callback to skip illegal, irregular or unassigned bytes

        // set converter to use SKIP callback
        // contecxt will save and call it after calling custom callback
        ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, NULL, &status);

        //TODO: refactor warning and error message reporting
        if (U_FAILURE(status))
        {
            ereport(WARNING,
                (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
                 errmsg("Cannot set callback on converter - error: %s.\n", u_errorName(status))));

            ucnv_close(conv);
            return status;
        }

        // initialize flagging callback
        context = flagCB_fromU_openContext();

        /* Set our special callback */
        ucnv_setFromUCallBack(conv,
                              flagCB_fromU,
                              context,
                              &(context->subCallback),
                              &(context->subContext),
                              &status
                             );

        if (U_FAILURE(status))
        {
            ereport(WARNING,
                (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
                 errmsg("Cannot set callback on converter - error: %s.\n", u_errorName(status))));

           ucnv_close(conv);
            return status;
        }
    }

    // convert to UTF8
    // input buffer from ucnv_toUChars, which always returns a
    // NUL-terminated buffer
    utfConvertedLen = ucnv_fromUChars(conv,
                                      *converted_buf,
                                      *converted_buf_len,
                                      buffer,
                                      STRING_IS_NULL_TERMINATED,
                                      &status
                                     );

    if (U_SUCCESS(status))
    {
        *converted_buf_len = utfConvertedLen;

        ereport(DEBUG1,
            (errcode(ERRCODE_SUCCESSFUL_COMPLETION),
                errmsg("Converted string: %s\n", (const char*) *converted_buf)));

        // see if any bytes where dropped
        // context struct will go away when converter is closed
        if (NULL != context)
            *dropped_bytes = context->flag;
        else
            *dropped_bytes = false;
    }

    if (U_FAILURE(status))
    {
        ereport(WARNING,
            (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
             errmsg("ICU conversion from Unicode to UTF8 failed - error: %s.\n", u_errorName(status))));
    }

    // close the converter
    ucnv_close(conv);
    return status;
}
Example #18
0
int main( void )
{
    UFILE *out;
    UErrorCode status  = U_ZERO_ERROR;
    out = u_finit(stdout, NULL, NULL);
    if(!out) {
        fprintf(stderr, "Could not initialize (finit()) over stdout! \n");
        return 1;
    }
    ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE,
                          NULL, NULL, NULL, &status);
    if(U_FAILURE(status)) {
        u_fprintf(out, "Warning- couldn't set the substitute callback - err %s\n", u_errorName(status));
    }

    /* End Demo boilerplate */

    u_fprintf(out,"ICU Case Mapping Sample Program\n\n");
    u_fprintf(out, "C++ Case Mapping\n\n");

    UnicodeString string("This is a test");
    /* lowercase = "istanbul" */
    UChar lowercase[] = {0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0};
    /* uppercase = "LATIN CAPITAL I WITH DOT ABOVE STANBUL" */
    UChar uppercase[] = {0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4C, 0};

    UnicodeString upper(uppercase);
    UnicodeString lower(lowercase);

    u_fprintf(out, "\nstring: ");
    printUnicodeString(out, string);
    string.toUpper(); /* string = "THIS IS A TEST" */
    u_fprintf(out, "\ntoUpper(): ");
    printUnicodeString(out, string);
    string.toLower(); /* string = "this is a test" */
    u_fprintf(out, "\ntoLower(): ");
    printUnicodeString(out, string);

    u_fprintf(out, "\n\nlowercase=%S, uppercase=%S\n", lowercase, uppercase);


    string = upper;
    string.toLower(Locale("tr", "TR")); /* Turkish lower case map string =
                                        lowercase */
    u_fprintf(out, "\nupper.toLower: ");
    printUnicodeString(out, string);

    string = lower;
    string.toUpper(Locale("tr", "TR")); /* Turkish upper case map string =
                                        uppercase */
    u_fprintf(out, "\nlower.toUpper: ");
    printUnicodeString(out, string);


    u_fprintf(out, "\nEnd C++ sample\n\n");

    // Call the C version
    int rc = c_main(out);
    u_fclose(out);
    return rc;
}
Example #19
0
/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
 *uses a clean copy (resetted) of the converter, to convert that unicode
 *escape sequence to the target codepage (if conversion failure happens then
 *we revert to substituting with subchar)
 */
U_CAPI void    U_EXPORT2
UCNV_FROM_U_CALLBACK_ESCAPE (
                         const void *context,
                         UConverterFromUnicodeArgs *fromArgs,
                         const UChar *codeUnits,
                         int32_t length,
                         UChar32 codePoint,
                         UConverterCallbackReason reason,
                         UErrorCode * err)
{

  UChar valueString[VALUE_STRING_LENGTH];
  int32_t valueStringLength = 0;
  int32_t i = 0;

  const UChar *myValueSource = NULL;
  UErrorCode err2 = U_ZERO_ERROR;
  UConverterFromUCallback original = NULL;
  const void *originalContext;

  UConverterFromUCallback ignoredCallback = NULL;
  const void *ignoredContext;
  
  if (reason > UCNV_IRREGULAR)
  {
      return;
  }

  ucnv_setFromUCallBack (fromArgs->converter,
                     (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
                     NULL,
                     &original,
                     &originalContext,
                     &err2);
  
  if (U_FAILURE (err2))
  {
    *err = err2;
    return;
  } 
  if(context==NULL)
  { 
      while (i < length)
      {
        valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
        valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
        valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
      }
  }
  else
  {
      switch(*((char*)context))
      {
      case UCNV_PRV_ESCAPE_JAVA:
          while (i < length)
          {
              valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
              valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
          }
          break;

      case UCNV_PRV_ESCAPE_C:
          valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */

          if(length==2){
              valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);

          }
          else{
              valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
          }
          break;

      case UCNV_PRV_ESCAPE_XML_DEC:

          valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
          valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
          if(length==2){
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
          }
          else{
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
          }
          valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
          break;

      case UCNV_PRV_ESCAPE_XML_HEX:

          valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
          valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
          valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
          if(length==2){
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
          }
          else{
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
          }
          valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
          break;

      case UCNV_PRV_ESCAPE_UNICODE:
          valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT;    /* adding { */
          valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;    /* adding U */
          valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
          if (length == 2) {
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
          } else {
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
          }
          valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT;    /* adding } */
          break;

      case UCNV_PRV_ESCAPE_CSS2:
          valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
          valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
          /* Always add space character, becase the next character might be whitespace,
             which would erroneously be considered the termination of the escape sequence. */
          valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
          break;

      default:
          while (i < length)
          {
              valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
              valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;             /* adding U */
              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
          }
      }
  }  
  myValueSource = valueString;

  /* reset the error */
  *err = U_ZERO_ERROR;

  ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);

  ucnv_setFromUCallBack (fromArgs->converter,
                         original,
                         originalContext,
                         &ignoredCallback,
                         &ignoredContext,
                         &err2);
  if (U_FAILURE (err2))
  {
      *err = err2;
      return;
  }

  return;
}
Bool
CodeSet_GenericToGenericDb(const char *codeIn,  // IN
                           const char *bufIn,   // IN
                           size_t sizeIn,       // IN
                           const char *codeOut, // IN
                           unsigned int flags,  // IN
                           DynBuf *db)          // IN/OUT
{
   Bool result = FALSE;
   UErrorCode uerr;
   const char *bufInCur;
   const char *bufInEnd;
   UChar bufPiv[1024];
   UChar *bufPivSource;
   UChar *bufPivTarget;
   UChar *bufPivEnd;
   char *bufOut;
   char *bufOutCur;
   char *bufOutEnd;
   size_t bufOutSize;
   size_t bufOutOffset;
   UConverter *cvin = NULL;
   UConverter *cvout = NULL;
   UConverterToUCallback toUCb;
   UConverterFromUCallback fromUCb;

   ASSERT(codeIn);
   ASSERT(sizeIn == 0 || bufIn);
   ASSERT(codeOut);
   ASSERT(db);
   ASSERT((CSGTG_NORMAL == flags) || (CSGTG_TRANSLIT == flags) ||
          (CSGTG_IGNORE == flags));

   if (dontUseIcu) {
      // fall back
      return CodeSetOld_GenericToGenericDb(codeIn, bufIn, sizeIn, codeOut,
                                           flags, db);
   }

   /*
    * Trivial case.
    */

   if ((0 == sizeIn) || (NULL == bufIn)) {
      result = TRUE;
      goto exit;
   }

   /*
    * Open converters.
    */

   uerr = U_ZERO_ERROR;
   cvin = ucnv_open(codeIn, &uerr);
   if (!cvin) {
      goto exit;
   }

   uerr = U_ZERO_ERROR;
   cvout = ucnv_open(codeOut, &uerr);
   if (!cvout) {
      goto exit;
   }

   /*
    * Set callbacks according to flags.
    */

   switch (flags) {
   case CSGTG_NORMAL:
      toUCb = UCNV_TO_U_CALLBACK_STOP;
      fromUCb = UCNV_FROM_U_CALLBACK_STOP;
      break;

   case CSGTG_TRANSLIT:
      toUCb = UCNV_TO_U_CALLBACK_SUBSTITUTE;
      fromUCb = UCNV_FROM_U_CALLBACK_SUBSTITUTE;
      break;

   case CSGTG_IGNORE:
      toUCb = UCNV_TO_U_CALLBACK_SKIP;
      fromUCb = UCNV_FROM_U_CALLBACK_SKIP;
      break;

   default:
      NOT_IMPLEMENTED();
      break;
   }

   uerr = U_ZERO_ERROR;
   ucnv_setToUCallBack(cvin, toUCb, NULL, NULL, NULL, &uerr);
   if (U_ZERO_ERROR != uerr) {
      goto exit;
   }

   uerr = U_ZERO_ERROR;
   ucnv_setFromUCallBack(cvout, fromUCb, NULL, NULL, NULL, &uerr);
   if (U_ZERO_ERROR != uerr) {
      goto exit;
   }

   /*
    * Convert using ucnv_convertEx().
    * As a starting guess, make the output buffer the same size as
    * the input string (with a fudge constant added in to avoid degen
    * cases).
    */

   bufInCur = bufIn;
   bufInEnd = bufIn + sizeIn;
   bufOutSize = sizeIn + 4;
   bufOutOffset = 0;
   bufPivSource = bufPiv;
   bufPivTarget = bufPiv;
   bufPivEnd = bufPiv + ARRAYSIZE(bufPiv);

   for (;;) {
      if (!DynBuf_Enlarge(db, bufOutSize)) {
         goto exit;
      }
      bufOut = DynBuf_Get(db);
      bufOutCur = bufOut + bufOutOffset;
      bufOutSize = DynBuf_GetAllocatedSize(db);
      bufOutEnd = bufOut + bufOutSize;

      uerr = U_ZERO_ERROR;
      ucnv_convertEx(cvout, cvin, &bufOutCur, bufOutEnd,
		     &bufInCur, bufInEnd,
		     bufPiv, &bufPivSource, &bufPivTarget, bufPivEnd,
		     FALSE, TRUE, &uerr);

      if (!U_FAILURE(uerr)) {
         /*
          * "This was a triumph.
          *  I'm making a note here:
          *  HUGE SUCCESS.
          *  It's hard to overstate
          *  my satisfaction."
          */

         break;
      }

      if (U_BUFFER_OVERFLOW_ERROR != uerr) {
	 // failure
         goto exit;
      }

      /*
       * Our guess at 'bufOutSize' was obviously wrong, just double it.
       * We'll be reallocating bufOut, so will need to recompute bufOutCur
       * based on bufOutOffset.
       */

      bufOutSize *= 2;
      bufOutOffset = bufOutCur - bufOut;
   }

   /*
    * Set final size and return.
    */

   DynBuf_SetSize(db, bufOutCur - bufOut);

   result = TRUE;

  exit:
   if (cvin) {
      ucnv_close(cvin);
   }

   if (cvout) {
      ucnv_close(cvout);
   }

   return result;
}
Example #21
0
UBool convsample_20_didSubstitute(const char *source)
{
  UChar uchars[100];
  char bytes[100];
  UConverter *conv = NULL;
  UErrorCode status = U_ZERO_ERROR;
  uint32_t len, len2;
  UBool  flagVal;
  
  FromUFLAGContext * context = NULL;

  printf("\n\n==============================================\n"
         "Sample 20: C: Test for substitution using callbacks\n");

  /* print out the original source */
  printBytes("src", source);
  printf("\n");

  /* First, convert from UTF8 to unicode */
  conv = ucnv_open("utf-8", &status);
  U_ASSERT(status);

  len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
  U_ASSERT(status);
 
  printUChars("uch", uchars, len);
  printf("\n");

  /* Now, close the converter */
  ucnv_close(conv);

  /* Now, convert to windows-1252 */
  conv = ucnv_open("windows-1252", &status);
  U_ASSERT(status);

  /* Converter starts out with the SUBSTITUTE callback set. */

  /* initialize our callback */
  context = flagCB_fromU_openContext();

  /* Set our special callback */
  ucnv_setFromUCallBack(conv,
                        flagCB_fromU,
                        context,
                        &(context->subCallback),
                        &(context->subContext),
                        &status);

  U_ASSERT(status);

  len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status);
  U_ASSERT(status);

  flagVal = context->flag;  /* it's about to go away when we close the cnv */

  ucnv_close(conv);

  /* print out the original source */
  printBytes("bytes", bytes, len2);

  return flagVal; /* true if callback was called */
}
Example #22
0
void charsetConverter_icu::convert
	(utility::inputStream& in, utility::outputStream& out, status* st)
{
	UErrorCode err = U_ZERO_ERROR;

	ucnv_reset(m_from);
	ucnv_reset(m_to);

	if (st)
		new (st) status();

	// From buffers
	byte_t cpInBuffer[16]; // stream data put here
	const size_t outSize = ucnv_getMinCharSize(m_from) * sizeof(cpInBuffer) * sizeof(UChar);
	std::vector <UChar> uOutBuffer(outSize); // Unicode chars end up here

	// To buffers
	// converted (char) data end up here
	const size_t cpOutBufferSz = ucnv_getMaxCharSize(m_to) * outSize;
	std::vector <char> cpOutBuffer(cpOutBufferSz);

	// Tell ICU what to do when encountering an illegal byte sequence
	if (m_options.silentlyReplaceInvalidSequences)
	{
		// Set replacement chars for when converting from Unicode to codepage
		icu::UnicodeString substString(m_options.invalidSequence.c_str());
		ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
	}
	else
	{
		// Tell ICU top stop (and return an error) on illegal byte sequences
		ucnv_setToUCallBack
			(m_from, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");

		ucnv_setFromUCallBack
			(m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
	}

	// Input data available
	while (!in.eof())
	{
		// Read input data into buffer
		size_t inLength = in.read(cpInBuffer, sizeof(cpInBuffer));

		// Beginning of read data
		const char* source = reinterpret_cast <const char*>(&cpInBuffer[0]);
		const char* sourceLimit = source + inLength; // end + 1

		UBool flush = in.eof();  // is this last run?

		UErrorCode toErr;

		// Loop until all source has been processed
		do
		{
			// Set up target pointers
			UChar* target = &uOutBuffer[0];
			UChar* targetLimit = &target[0] + outSize;

			toErr = U_ZERO_ERROR;
			ucnv_toUnicode(m_from, &target, targetLimit,
			               &source, sourceLimit, NULL, flush, &toErr);

			if (st)
				st->inputBytesRead += (source - reinterpret_cast <const char*>(&cpInBuffer[0]));

			if (toErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(toErr))
			{
				if (toErr == U_INVALID_CHAR_FOUND ||
				    toErr == U_TRUNCATED_CHAR_FOUND ||
				    toErr == U_ILLEGAL_CHAR_FOUND)
				{
					// Error will be thrown later (*)
				}
				else
				{
					throw exceptions::charset_conv_error("[ICU] Error converting to Unicode from " + m_source.getName());
				}
			}

			// The Unicode source is the buffer just written and the limit
			// is where the previous conversion stopped (target is moved in the conversion)
			const UChar* uSource = &uOutBuffer[0];
			UChar* uSourceLimit = &target[0];
			UErrorCode fromErr;

			// Loop until converted chars are fully written
			do
			{
				char* cpTarget = &cpOutBuffer[0];
				const char* cpTargetLimit = &cpOutBuffer[0] + cpOutBufferSz;

				fromErr = U_ZERO_ERROR;

				// Write converted bytes (Unicode) to destination codepage
				ucnv_fromUnicode(m_to, &cpTarget, cpTargetLimit,
				                 &uSource, uSourceLimit, NULL, flush, &fromErr);

				if (st)
				{
					// Decrement input bytes count by the number of input bytes in error
					char errBytes[16];
					int8_t errBytesLen = sizeof(errBytes);
					UErrorCode errBytesErr = U_ZERO_ERROR;

	 				ucnv_getInvalidChars(m_from, errBytes, &errBytesLen, &errBytesErr);

					st->inputBytesRead -= errBytesLen;
					st->outputBytesWritten += cpTarget - &cpOutBuffer[0];
				}

				// (*) If an error occurred while converting from input charset, throw it now
				if (toErr == U_INVALID_CHAR_FOUND ||
				    toErr == U_TRUNCATED_CHAR_FOUND ||
				    toErr == U_ILLEGAL_CHAR_FOUND)
				{
					throw exceptions::illegal_byte_sequence_for_charset();
				}

				if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr))
				{
					if (fromErr == U_INVALID_CHAR_FOUND ||
					    fromErr == U_TRUNCATED_CHAR_FOUND ||
					    fromErr == U_ILLEGAL_CHAR_FOUND)
					{
						throw exceptions::illegal_byte_sequence_for_charset();
					}
					else
					{
						throw exceptions::charset_conv_error("[ICU] Error converting from Unicode to " + m_dest.getName());
					}
				}

				// Write to destination stream
				out.write(&cpOutBuffer[0], (cpTarget - &cpOutBuffer[0]));

			} while (fromErr == U_BUFFER_OVERFLOW_ERROR);

		} while (toErr == U_BUFFER_OVERFLOW_ERROR);
	}
}
Example #23
0
unsigned int
ICUTranscoder::transcodeTo( const   XMLCh* const    srcData
                            , const unsigned int    srcCount
                            ,       XMLByte* const  toFill
                            , const unsigned int    maxBytes
                            ,       unsigned int&   charsEaten
                            , const UnRepOpts       options)
{
    //
    //  Get a pointer to the buffer to transcode. If UChar and XMLCh are
    //  the same size here, then use the original. Else, create a temp
    //  one and put a janitor on it.
    //
    const UChar* srcPtr;
    UChar* tmpBufPtr = 0;
    if (sizeof(XMLCh) == sizeof(UChar))
    {
        srcPtr = (const UChar*)srcData;
    }
    else
    {
        tmpBufPtr = convertToUChar(srcData, srcCount, getMemoryManager());
        srcPtr = tmpBufPtr;
    }
    ArrayJanitor<UChar> janTmpBuf(tmpBufPtr, getMemoryManager());

    //
    //  Set the appropriate callback so that it will either fail or use
    //  the rep char. Remember the old one so we can put it back.
    //
    UErrorCode  err = U_ZERO_ERROR;
    UConverterFromUCallback oldCB = NULL;
    #if (U_ICU_VERSION_MAJOR_NUM < 2)
    void* orgContent;
    #else
    const void* orgContent;
    #endif
    ucnv_setFromUCallBack
    (
        fConverter
        , (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP
                                   : UCNV_FROM_U_CALLBACK_SUBSTITUTE
        , NULL
        , &oldCB
        , &orgContent
        , &err
    );

    //
    //  Ok, lets transcode as many chars as we we can in one shot. The
    //  ICU API gives enough info not to have to do this one char by char.
    //
    XMLByte*        startTarget = toFill;
    const UChar*    startSrc = srcPtr;
    err = U_ZERO_ERROR;
    ucnv_fromUnicode
    (
        fConverter
        , (char**)&startTarget
        , (char*)(startTarget + maxBytes)
        , &startSrc
        , srcPtr + srcCount
        , 0
        , false
        , &err
    );

    // Rememember the status before we possibly overite the error code
    const bool res = (err == U_ZERO_ERROR);

    // Put the old handler back
    err = U_ZERO_ERROR;
    UConverterFromUCallback orgAction = NULL;

    ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);

    if (!res)
    {
        XMLCh tmpBuf[17];
        XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16, getMemoryManager());
        ThrowXMLwithMemMgr2
        (
            TranscodingException
            , XMLExcepts::Trans_Unrepresentable
            , tmpBuf
            , getEncodingName()
            , getMemoryManager()
        );
    }

    // Fill in the chars we ate from the input
    charsEaten = startSrc - srcPtr;

    // Return the chars we stored
    return startTarget - toFill;
}
Example #24
0
UBool convsample_21_didSubstitute(const char *source)
{
  UChar uchars[100];
  char bytes[100];
  UConverter *conv = NULL, *cloneCnv = NULL;
  UErrorCode status = U_ZERO_ERROR;
  uint32_t len, len2;
  int32_t  cloneLen;
  UBool  flagVal = FALSE;
  UConverterFromUCallback junkCB;
  
  FromUFLAGContext *flagCtx = NULL, 
                   *cloneFlagCtx = NULL;

  debugCBContext   *debugCtx1 = NULL,
                   *debugCtx2 = NULL,
                   *cloneDebugCtx = NULL;

  printf("\n\n==============================================\n"
         "Sample 21: C: Test for substitution w/ callbacks & clones \n");

  /* print out the original source */
  printBytes("src", source);
  printf("\n");

  /* First, convert from UTF8 to unicode */
  conv = ucnv_open("utf-8", &status);
  U_ASSERT(status);

  len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
  U_ASSERT(status);
 
  printUChars("uch", uchars, len);
  printf("\n");

  /* Now, close the converter */
  ucnv_close(conv);

  /* Now, convert to windows-1252 */
  conv = ucnv_open("windows-1252", &status);
  U_ASSERT(status);

  /* Converter starts out with the SUBSTITUTE callback set. */

  /* initialize our callback */
  /* from the 'bottom' innermost, out
   *   CNV ->  debugCtx1[debug]  ->  flagCtx[flag] -> debugCtx2[debug]  */

#if DEBUG_TMI
  printf("flagCB_fromU = %p\n", &flagCB_fromU);
  printf("debugCB_fromU = %p\n", &debugCB_fromU);
#endif

  debugCtx1 = debugCB_openContext();
   flagCtx  = flagCB_fromU_openContext();
  debugCtx2 = debugCB_openContext();

  debugCtx1->subCallback =  flagCB_fromU;  /* debug1 -> flag */
  debugCtx1->subContext  =  flagCtx;

  flagCtx->subCallback   =  debugCB_fromU; /*  flag -> debug2 */
  flagCtx->subContext    =  debugCtx2;

  debugCtx2->subCallback =  UCNV_FROM_U_CALLBACK_SUBSTITUTE;
  debugCtx2->subContext  = NULL;

  /* Set our special callback */

  ucnv_setFromUCallBack(conv,
                        debugCB_fromU,
                        debugCtx1,
                        &(debugCtx2->subCallback),
                        &(debugCtx2->subContext),
                        &status);

  U_ASSERT(status);

#if DEBUG_TMI
  printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
         conv, debugCtx1, debugCtx1->subCallback,
         debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback);
#endif

  cloneLen = 1; /* but passing in null so it will clone */
  cloneCnv = ucnv_safeClone(conv,  NULL,  &cloneLen, &status);

  U_ASSERT(status);

#if DEBUG_TMI
  printf("Cloned converter from %p -> %p.  Closing %p.\n", conv, cloneCnv, conv);
#endif
  
  ucnv_close(conv);

#if DEBUG_TMI
  printf("%p closed.\n", conv);
#endif 

  U_ASSERT(status);
  /* Now, we have to extract the context */
  cloneDebugCtx = NULL;
  cloneFlagCtx  = NULL;

  ucnv_getFromUCallBack(cloneCnv, &junkCB, (const void **)&cloneDebugCtx);
  if(cloneDebugCtx != NULL) {
      cloneFlagCtx = (FromUFLAGContext*) cloneDebugCtx -> subContext;
  }

  printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
         cloneCnv, cloneDebugCtx, cloneFlagCtx, cloneFlagCtx?cloneFlagCtx->subContext:NULL );

  len2 = ucnv_fromUChars(cloneCnv, bytes, 100, uchars, len, &status);
  U_ASSERT(status);

  if(cloneFlagCtx != NULL) {
      flagVal = cloneFlagCtx->flag;  /* it's about to go away when we close the cnv */
  } else {
      printf("** Warning, couldn't get the subcallback \n");
  }

  ucnv_close(cloneCnv);

  /* print out the original source */
  printBytes("bytes", bytes, len2);

  return flagVal; /* true if callback was called */
}
Example #25
0
extern int
main(int argc, char* argv[]) {
    const char *encoding = NULL;
    const char *outputDir = NULL; /* NULL = no output directory, use current */
    const char *inputDir  = ".";
    int tostdout = 0;
    int prbom = 0;

    const char *pname;

    UResourceBundle *bundle = NULL;
    UErrorCode status = U_ZERO_ERROR;
    int32_t i = 0;

    UConverter *converter;

    const char* arg;

    /* Get the name of tool. */
    pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
    if (!pname) {
        pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR);
    }
#endif
    if (!pname) {
        pname = *argv;
    } else {
        ++pname;
    }

    /* error handling, printing usage message */
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "%s: error in command line argument \"%s\"\n", pname,
            argv[-argc]);
    }
    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        fprintf(argc < 0 ? stderr : stdout,
            "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n"
            " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n"
            " [ -t, --truncate [ size ] ]\n"
            " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n"
            " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n"
            " [ -A, --suppressAliases]\n"
            " bundle ...\n", argc < 0 ? 'u' : 'U',
            pname);
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[10].doesOccur) {
        fprintf(stderr,
                "%s version %s (ICU version %s).\n"
                "%s\n",
                pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
        return U_ZERO_ERROR;
    }
    if(options[2].doesOccur) {
        encoding = options[2].value;
    }

    if (options[3].doesOccur) {
        tostdout = 1;
    }

    if(options[4].doesOccur) {
        trunc = TRUE;
        if(options[4].value != NULL) {
            truncsize = atoi(options[4].value); /* user defined printable size */
        } else {
            truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */
        }
    } else {
        trunc = FALSE;
    }

    if(options[5].doesOccur) {
        verbose = TRUE;
    }

    if (options[6].doesOccur) {
        outputDir = options[6].value;
    }

    if(options[7].doesOccur) {
        inputDir = options[7].value; /* we'll use users resources */
    }

    if (options[8].doesOccur) {
        prbom = 1;
    }

    if (options[9].doesOccur) {
        u_setDataDirectory(options[9].value);
    }

    if (options[11].doesOccur) {
      suppressAliases = TRUE;
    }

    converter = ucnv_open(encoding, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: couldn't create %s converter for encoding\n", pname, encoding ? encoding : ucnv_getDefaultName());
        return 2;
    }
    ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: couldn't configure converter for encoding\n", pname);
        return 3;
    }

    defaultConverter = ucnv_open(0, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: couldn't create %s converter for encoding\n", ucnv_getDefaultName(), pname);
        return 2;
    }

    for (i = 1; i < argc; ++i) {
        static const UChar sp[] = { 0x0020 }; /* " " */
        char infile[4096]; /* XXX Sloppy. */
        char locale[64];
        const char *thename = 0, *p, *q;
        UBool fromICUData = FALSE;

        arg = getLongPathname(argv[i]);

        if (verbose) {
            printf("processing bundle \"%s\"\n", argv[i]);
        }

        p = uprv_strrchr(arg, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
        if (p == NULL) {
            p = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
        }
#endif
        if (!p) {
            p = arg;
        } else {
            p++;
        }
        q = uprv_strrchr(p, '.');
        if (!q) {
            for (q = p; *q; ++q)
                ;
        }
        uprv_strncpy(locale, p, q - p);
        locale[q - p] = 0;

        if (!(fromICUData = !uprv_strcmp(inputDir, "-"))) {
            UBool absfilename = *arg == U_FILE_SEP_CHAR;
#ifdef U_WINDOWS
            if (!absfilename) {
                absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0])
                    && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR);
            }
#endif
            if (absfilename) {
                thename = arg;
            } else {
                q = uprv_strrchr(arg, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
                if (q == NULL) {
                    q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
                }
#endif
                uprv_strcpy(infile, inputDir);
                if(q != NULL) {
                    uprv_strcat(infile, U_FILE_SEP_STRING);
                    strncat(infile, arg, q-arg);
                }
                thename = infile;
            }
        }
        status = U_ZERO_ERROR;
        if (thename) {
            bundle = ures_openDirect(thename, locale, &status);
        } else {
            bundle = ures_open(fromICUData ? 0 : inputDir, locale, &status);
        }
        if (status == U_ZERO_ERROR) {
            FILE *out;

            const char *filename = 0;
            const char *ext = 0;

            if (!locale || !tostdout) {
                filename = uprv_strrchr(arg, U_FILE_SEP_CHAR);

#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
                if (!filename) {
                    filename = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
                }
#endif
                if (!filename) {
                    filename = arg;
                } else {
                    ++filename;
                }
                ext = uprv_strrchr(arg, '.');
                if (!ext) {
                    ext = filename + uprv_strlen(filename);
                }
            }

            if (tostdout) {
                out = stdout;
#if defined(U_WINDOWS) || defined(U_CYGWIN)
                if (setmode(fileno(out), O_BINARY) == -1) {
                    fprintf(stderr, "%s: couldn't set standard output to binary mode\n", pname);
                    return 4;
                }
#endif
            } else {
                char thefile[4096], *tp;
                int32_t len;

                if (outputDir) {
                    uprv_strcpy(thefile, outputDir);
                    uprv_strcat(thefile, U_FILE_SEP_STRING);
                } else {
                    *thefile = 0;
                }
                uprv_strcat(thefile, filename);
                tp = thefile + uprv_strlen(thefile);
                len = (int32_t)uprv_strlen(ext);
                if (len) {
                    tp -= len - 1;
                } else {
                    *tp++ = '.';
                }
                uprv_strcpy(tp, "txt");

                out = fopen(thefile, "w");
                if (!out) {
                    fprintf(stderr, "%s: couldn't create %s\n", pname, thefile);
                    return 4;
                }
            }

            if (prbom) { /* XXX: Should be done only for UTFs */
                static const UChar bom[] = { 0xFEFF };
                printString(out, converter, bom, (int32_t)(sizeof(bom)/sizeof(*bom)));
            }

            printCString(out, converter, "// -*- Coding: ", -1);
            printCString(out, converter, encoding ? encoding : getEncodingName(ucnv_getDefaultName()), -1);
            printCString(out, converter, "; -*-\n//\n", -1);
            printCString(out, converter, "// This file was dumped by derb(8) from ", -1);
            if (thename) {
                printCString(out, converter, thename, -1);
            } else if (fromICUData) {
                printCString(out, converter, "the ICU internal ", -1);
                printCString(out, converter, locale, -1);
                printCString(out, converter, " locale", -1);
            }

            printCString(out, converter, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n", -1);

            if (locale) {
                printCString(out, converter, locale, -1);
            } else {
                printCString(out, converter, filename, (int32_t)(ext - filename));
                printString(out, converter, sp, (int32_t)(sizeof(sp)/sizeof(*sp)));
            }
            printOutBundle(out, converter, bundle, 0, pname, &status);

            if (out != stdout) {
                fclose(out);
            }
        }
        else {
            reportError(pname, &status, "opening resource file");
        }

        ures_close(bundle);
    }

    ucnv_close(defaultConverter);
    ucnv_close(converter);

    return 0;
}
Example #26
0
void debugCB_fromU(const void *context,
                   UConverterFromUnicodeArgs *fromUArgs,
                   const UChar* codeUnits,
                   int32_t length,
                   UChar32 codePoint,
                   UConverterCallbackReason reason,
                   UErrorCode * err)
{
    debugCBContext *ctx = (debugCBContext*)context;
    /*UConverterFromUCallback junkFrom;*/
    
#if DEBUG_TMI
    printf("debugCB_fromU: Context %p:%d called, reason %d on cnv %p [err=%s]\n", ctx, ctx->serial, reason, fromUArgs->converter, u_errorName(*err));
#endif

    if(ctx->magic != 0xC0FFEE) {
        fprintf(stderr, "debugCB_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
        return;
    }

    if(reason == UCNV_CLONE) {
        /* see comments in above flagCB clone code */

        UConverterFromUCallback   saveCallback;
        const void *saveContext;
        debugCBContext *cloned;
        UErrorCode subErr = U_ZERO_ERROR;

        /* "recreate" it */
#if DEBUG_TMI
        printf("debugCB_fromU: cloning..\n");
#endif
        cloned = debugCB_clone(ctx);

        if(cloned == NULL) {
            fprintf(stderr, "debugCB_fromU: internal clone failed on %p\n", ctx);
            *err = U_MEMORY_ALLOCATION_ERROR;
            return;
        }

        ucnv_setFromUCallBack(fromUArgs->converter,
                              cloned->subCallback,
                              cloned->subContext,
                              &saveCallback,
                              &saveContext,
                              &subErr);
        
        if( cloned->subCallback != NULL) {
#if DEBUG_TMI
            printf("debugCB_fromU:%p calling subCB %p\n", ctx, cloned->subCallback);
#endif
            /* call subCB if present */
            cloned->subCallback(cloned->subContext, fromUArgs, codeUnits,
                                length, codePoint, reason, err);
        } else {
            printf("debugCB_fromU:%p, NOT calling subCB, it's NULL\n", ctx);
        }

        /* set back callback */
        ucnv_setFromUCallBack(fromUArgs->converter,
                              saveCallback,  /* Us */
                              cloned,        /* new context */
                              &cloned->subCallback,  /* IMPORTANT! Accept any change in CB or context */
                              &cloned->subContext,
                              &subErr);
        
        if(U_FAILURE(subErr)) {
            *err = subErr;
        }
    }
    
    /* process other reasons here */

    /* always call subcb if present */
    if(ctx->subCallback != NULL && reason != UCNV_CLONE) {
        ctx->subCallback(ctx->subContext,
                         fromUArgs,
                         codeUnits,
                         length,
                         codePoint,
                         reason,
                         err);
    }

    if(reason == UCNV_CLOSE) {
#if DEBUG_TMI
        printf("debugCB_fromU: Context %p:%d closing\n", ctx, ctx->serial);
#endif
        free(ctx);
    }

#if DEBUG_TMI
    printf("debugCB_fromU: leaving cnv %p, ctx %p: err %s\n", fromUArgs->converter, ctx, u_errorName(*err));
#endif
}