char * ppb_char_set_utf16_to_char_set(PP_Instance instance, const uint16_t *utf16, uint32_t utf16_len, const char *output_char_set, enum PP_CharSet_ConversionError on_error, uint32_t *output_length) { // each character could take up to 4 bytes in UTF-8; with additional zero-terminator byte const uint32_t output_buffer_length = (utf16_len + 1) * 4 + 1; char *output = ppb_memory_mem_alloc(output_buffer_length); if (!output) { trace_error("%s, can't allocate memory, %u bytes\n", __func__, output_buffer_length); goto err; } const char *charset = encoding_alias_get_canonical_name(output_char_set); const UChar subst = '?'; UErrorCode st = U_ZERO_ERROR; UConverter *u = ucnv_open(charset, &st); if (!U_SUCCESS(st)) { trace_error("%s, wrong charset %s\n", __func__, output_char_set); goto err; } switch (on_error) { default: case PP_CHARSET_CONVERSIONERROR_FAIL: st = U_ZERO_ERROR; ucnv_setFromUCallBack(u, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &st); break; case PP_CHARSET_CONVERSIONERROR_SKIP: st = U_ZERO_ERROR; ucnv_setFromUCallBack(u, UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, NULL, &st); break; case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE: st = U_ZERO_ERROR; ucnv_setFromUCallBack(u, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &st); st = U_ZERO_ERROR; ucnv_setSubstString(u, &subst, 1, &st); break; } *output_length = ucnv_fromUChars(u, output, output_buffer_length, utf16, utf16_len, &st); if (st != U_BUFFER_OVERFLOW_ERROR && !U_SUCCESS(st)) goto err; ucnv_close(u); return output; err: *output_length = 0; ppb_memory_mem_free(output); if (u) ucnv_close(u); return NULL; }
CString TextCodecICU::encode(const UChar* characters, size_t length, UnencodableHandling handling) { if (!length) return ""; if (!m_converterICU) createICUConverter(); if (!m_converterICU) return CString(); // FIXME: We should see if there is "force ASCII range" mode in ICU; // until then, we change the backslash into a yen sign. // Encoding will change the yen sign back into a backslash. String copy(characters, length); copy.replace('\\', m_encoding.backslashAsCurrencySymbol()); const UChar* source = copy.characters(); const UChar* sourceLimit = source + copy.length(); UErrorCode err = U_ZERO_ERROR; switch (handling) { case QuestionMarksForUnencodables: ucnv_setSubstChars(m_converterICU, "?", 1, &err); ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); break; case EntitiesForUnencodables: ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); break; case URLEncodedEntitiesForUnencodables: ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err); break; } ASSERT(U_SUCCESS(err)); if (U_FAILURE(err)) return CString(); Vector<char> result; size_t size = 0; do { char buffer[ConversionBufferSize]; char* target = buffer; char* targetLimit = target + ConversionBufferSize; err = U_ZERO_ERROR; ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err); size_t count = target - buffer; result.grow(size + count); memcpy(result.data() + size, buffer, count); size += count; } while (err == U_BUFFER_OVERFLOW_ERROR); return CString(result.data(), size); }
static UConverter * GSStringOpenConverter (CFStringEncoding encoding, char lossByte) { const char *converterName; UConverter *cnv; UErrorCode err = U_ZERO_ERROR; converterName = CFStringICUConverterName (encoding); cnv = ucnv_open (converterName, &err); if (U_FAILURE (err)) cnv = NULL; if (lossByte) { /* FIXME: for some reason this is returning U_ILLEGAL_ARGUMENTS_ERROR */ ucnv_setSubstChars (cnv, &lossByte, 1, &err); } else { ucnv_setToUCallBack (cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err); ucnv_setFromUCallBack (cnv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &err); } return cnv; }
extern int main(int argc, const char *argv[]) { UErrorCode errorCode=U_ZERO_ERROR; // Note: Using a global variable for any object is not exactly thread-safe... // You can change this call to e.g. ucnv_open("UTF-8", &errorCode) if you pipe // the output to a file and look at it with a Unicode-capable editor. // This will currently affect only the printUString() function, see the code above. // printUnicodeString() could use this, too, by changing to an extract() overload // that takes a UConverter argument. cnv=ucnv_open(NULL, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "error %s opening the default converter\n", u_errorName(errorCode)); return errorCode; } ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, NULL, NULL, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "error %s setting the escape callback in the default converter\n", u_errorName(errorCode)); ucnv_close(cnv); return errorCode; } demo_utf_h_macros(); demo_C_Unicode_strings(); demoCaseMapInC(); demoCaseMapInCPlusPlus(); demoUnicodeStringStorage(); demoUnicodeStringInit(); ucnv_close(cnv); return 0; }
static void NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address, jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) { UConverter* cnv = toUConverter(address); if (cnv == NULL) { maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); return; } UConverterFromUCallback oldCallback = NULL; const void* oldCallbackContext = NULL; ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext)); EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>( reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext)); if (callbackContext == NULL) { callbackContext = new EncoderCallbackContext; } callbackContext->onMalformedInput = getFromUCallback(onMalformedInput); callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput); ScopedByteArrayRO replacementBytes(env, javaReplacement); if (replacementBytes.get() == NULL) { maybeThrowIcuException(env, "replacementBytes", U_ILLEGAL_ARGUMENT_ERROR); return; } memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size()); callbackContext->replacementByteCount = replacementBytes.size(); UErrorCode errorCode = U_ZERO_ERROR; ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode); maybeThrowIcuException(env, "ucnv_setFromUCallBack", errorCode); }
char *aescstrdup(const UChar* unichars,int32_t length){ char *newString,*targetLimit,*target; UConverterFromUCallback cb; const void *p; UErrorCode errorCode = U_ZERO_ERROR; #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY # if U_PLATFORM == U_PF_OS390 static const char convName[] = "ibm-1047"; # else static const char convName[] = "ibm-37"; # endif #else static const char convName[] = "US-ASCII"; #endif UConverter* conv = ucnv_open(convName, &errorCode); if(length==-1){ length = u_strlen( unichars); } newString = (char*)ctst_malloc ( sizeof(char) * 8 * (length +1)); target = newString; targetLimit = newString+sizeof(char) * 8 * (length +1); ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, &cb, &p, &errorCode); ucnv_fromUnicode(conv,&target,targetLimit, &unichars, (UChar*)(unichars+length),NULL,TRUE,&errorCode); ucnv_close(conv); *target = '\0'; return newString; }
CF_INLINE UConverter *__CFStringEncodingConverterCreateICUConverter(const char *icuName, uint32_t flags, bool toUnicode) { UConverter *converter; UErrorCode errorCode = U_ZERO_ERROR; uint8_t streamID = CFStringEncodingStreamIDFromMask(flags); if (0 != streamID) { // this is a part of streaming previously created __CFICUThreadData *data = __CFStringEncodingICUGetThreadData(); --streamID; // map to array index if ((streamID < data->_numSlots) && (NULL != data->_converters[streamID])) return data->_converters[streamID]; } converter = ucnv_open(icuName, &errorCode); if (NULL != converter) { char lossyByte = CFStringEncodingMaskToLossyByte(flags); if ((0 == lossyByte) && (0 != (flags & kCFStringEncodingAllowLossyConversion))) lossyByte = '?'; if (0 ==lossyByte) { if (toUnicode) { ucnv_setToUCallBack(converter, &UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); } else { ucnv_setFromUCallBack(converter, &UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); } } else { ucnv_setSubstChars(converter, &lossyByte, 1, &errorCode); } } return converter; }
U_STABLE void U_EXPORT2 ucnv_setFromUCallBack_53(UConverter * converter, UConverterFromUCallback newAction, const void *newContext, UConverterFromUCallback *oldAction, const void **oldContext, UErrorCode * err) { ucnv_setFromUCallBack(converter, newAction, newContext, oldAction, oldContext, err); }
CString TextCodecICU::encodeInternal(const TextCodecInput& input, UnencodableHandling handling) { const UChar* source = input.begin(); const UChar* end = input.end(); UErrorCode err = U_ZERO_ERROR; switch (handling) { case QuestionMarksForUnencodables: ucnv_setSubstChars(m_converterICU, "?", 1, &err); ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); break; case EntitiesForUnencodables: ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); break; case URLEncodedEntitiesForUnencodables: ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err); break; } ASSERT(U_SUCCESS(err)); if (U_FAILURE(err)) return CString(); Vector<char> result; size_t size = 0; do { char buffer[ConversionBufferSize]; char* target = buffer; char* targetLimit = target + ConversionBufferSize; err = U_ZERO_ERROR; ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, end, 0, true, &err); size_t count = target - buffer; result.grow(size + count); memcpy(result.data() + size, buffer, count); size += count; } while (err == U_BUFFER_OVERFLOW_ERROR); return CString(result.data(), size); }
status_t ICUCategoryData::_GetConverter(UConverter*& converterOut) { // we use different converters per thread to avoid concurrent accesses ICUThreadLocalStorageValue* tlsValue = NULL; status_t result = ICUThreadLocalStorageValue::GetInstanceForKey( fThreadLocalStorageKey, tlsValue); if (result != B_OK) return result; if (tlsValue->converter != NULL) { if (strcmp(tlsValue->charset, fGivenCharset) == 0) { converterOut = tlsValue->converter; return B_OK; } // charset no longer matches the converter, we need to dump it and // create a new one ucnv_close(tlsValue->converter); tlsValue->converter = NULL; } // create a new converter for the current charset UErrorCode icuStatus = U_ZERO_ERROR; UConverter* icuConverter = ucnv_open(fGivenCharset, &icuStatus); if (icuConverter == NULL) return B_NAME_NOT_FOUND; // setup the new converter to stop upon any errors icuStatus = U_ZERO_ERROR; ucnv_setToUCallBack(icuConverter, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &icuStatus); if (!U_SUCCESS(icuStatus)) { ucnv_close(icuConverter); return B_ERROR; } icuStatus = U_ZERO_ERROR; ucnv_setFromUCallBack(icuConverter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &icuStatus); if (!U_SUCCESS(icuStatus)) { ucnv_close(icuConverter); return B_ERROR; } tlsValue->converter = icuConverter; strlcpy(tlsValue->charset, fGivenCharset, sizeof(tlsValue->charset)); converterOut = icuConverter; return B_OK; }
charsetFilteredOutputStream_icu::charsetFilteredOutputStream_icu (const charset& source, const charset& dest, outputStream* os, const charsetConverterOptions& opts) : m_from(NULL), m_to(NULL), m_sourceCharset(source), m_destCharset(dest), m_stream(*os), m_options(opts) { UErrorCode err = U_ZERO_ERROR; m_from = ucnv_open(source.getName().c_str(), &err); if (!U_SUCCESS(err)) { throw exceptions::charset_conv_error ("Cannot initialize ICU converter for source charset '" + source.getName() + "' (error code: " + u_errorName(err) + "."); } m_to = ucnv_open(dest.getName().c_str(), &err); if (!U_SUCCESS(err)) { throw exceptions::charset_conv_error ("Cannot initialize ICU converter for destination charset '" + dest.getName() + "' (error code: " + u_errorName(err) + "."); } // Tell ICU what to do when encountering an illegal byte sequence if (m_options.silentlyReplaceInvalidSequences) { // Set replacement chars for when converting from Unicode to codepage icu::UnicodeString substString(m_options.invalidSequence.c_str()); ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err); if (U_FAILURE(err)) throw exceptions::charset_conv_error("[ICU] Error when setting substitution string."); } else { // Tell ICU top stop (and return an error) on illegal byte sequences ucnv_setToUCallBack (m_to, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err); if (U_FAILURE(err)) throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback."); ucnv_setFromUCallBack (m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err); if (U_FAILURE(err)) throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback."); } }
static UConverter * xh_encoder_uconv_create(xh_char_t *encoding, xh_bool_t toUnicode) { UConverter *uconv; UErrorCode status = U_ZERO_ERROR; uconv = ucnv_open((char *) encoding, &status); if ( U_FAILURE(status) ) { return NULL; } if (toUnicode) { ucnv_setToUCallBack(uconv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &status); } else { ucnv_setFromUCallBack(uconv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); } return uconv; }
// Convert a file from one encoding to another static UBool convertFile(const char *pname, const char *fromcpage, UConverterToUCallback toucallback, const void *touctxt, const char *tocpage, UConverterFromUCallback fromucallback, const void *fromuctxt, int fallback, size_t bufsz, const char *translit, const char *infilestr, FILE * outfile, int verbose) { FILE *infile; UBool ret = TRUE; UConverter *convfrom = 0; UConverter *convto = 0; UErrorCode err = U_ZERO_ERROR; UBool flush; const char *cbufp; char *bufp; char *buf = 0; uint32_t infoffset = 0, outfoffset = 0; /* Where we are in the file, for error reporting. */ const UChar *unibufbp; UChar *unibufp; UChar *unibuf = 0; int32_t *fromoffsets = 0, *tooffsets = 0; size_t rd, wr, tobufsz; #if !UCONFIG_NO_TRANSLITERATION Transliterator *t = 0; // Transliterator acting on Unicode data. #endif UnicodeString u; // String to do the transliteration. // Open the correct input file or connect to stdin for reading input if (infilestr != 0 && strcmp(infilestr, "-")) { infile = fopen(infilestr, "rb"); if (infile == 0) { UnicodeString str1(infilestr, ""); str1.append((UChar32) 0); UnicodeString str2(strerror(errno), ""); str2.append((UChar32) 0); initMsg(pname); u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer()); return FALSE; } } else { infilestr = "-"; infile = stdin; #ifdef WIN32 if (setmode(fileno(stdin), O_BINARY) == -1) { initMsg(pname); u_wmsg(stderr, "cantSetInBinMode"); return FALSE; } #endif } if (verbose) { fprintf(stderr, "%s:\n", infilestr); } #if !UCONFIG_NO_TRANSLITERATION // Create transliterator as needed. if (translit != NULL && *translit) { UParseError parse; UnicodeString str(translit), pestr; /* Create from rules or by ID as needed. */ parse.line = -1; if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) { t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err); } else { t = Transliterator::createInstance(translit, UTRANS_FORWARD, err); } if (U_FAILURE(err)) { str.append((UChar32) 0); initMsg(pname); if (parse.line >= 0) { UChar linebuf[20], offsetbuf[20]; uprv_itou(linebuf, 20, parse.line, 10, 0); uprv_itou(offsetbuf, 20, parse.offset, 10, 0); u_wmsg(stderr, "cantCreateTranslitParseErr", str.getBuffer(), u_wmsg_errorName(err), linebuf, offsetbuf); } else { u_wmsg(stderr, "cantCreateTranslit", str.getBuffer(), u_wmsg_errorName(err)); } if (t) { delete t; t = 0; } goto error_exit; } } #endif // Create codepage converter. If the codepage or its aliases weren't // available, it returns NULL and a failure code. We also set the // callbacks, and return errors in the same way. convfrom = ucnv_open(fromcpage, &err); if (U_FAILURE(err)) { UnicodeString str(fromcpage, (int32_t)(uprv_strlen(fromcpage) + 1)); initMsg(pname); u_wmsg(stderr, "cantOpenFromCodeset", str.getBuffer(), u_wmsg_errorName(err)); goto error_exit; } ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err); if (U_FAILURE(err)) { initMsg(pname); u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err)); goto error_exit; } convto = ucnv_open(tocpage, &err); if (U_FAILURE(err)) { UnicodeString str(tocpage, (int32_t)(uprv_strlen(tocpage) + 1)); initMsg(pname); u_wmsg(stderr, "cantOpenToCodeset", str.getBuffer(), u_wmsg_errorName(err)); goto error_exit; } ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err); if (U_FAILURE(err)) { initMsg(pname); u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err)); goto error_exit; } ucnv_setFallback(convto, fallback); // To ensure that the buffer always is of enough size, we // must take the worst case scenario, that is the character in // the codepage that uses the most bytes and multiply it against // the buffer size. // use bufsz+1 to allow for additional BOM/signature character (U+FEFF) tobufsz = (bufsz+1) * ucnv_getMaxCharSize(convto); buf = new char[tobufsz]; unibuf = new UChar[bufsz]; fromoffsets = new int32_t[bufsz]; tooffsets = new int32_t[tobufsz]; // OK, we can convert now. do { char willexit = 0; rd = fread(buf, 1, bufsz, infile); if (ferror(infile) != 0) { UnicodeString str(strerror(errno)); str.append((UChar32) 0); initMsg(pname); u_wmsg(stderr, "cantRead", str.getBuffer()); goto error_exit; } // Convert the read buffer into the new coding // After the call 'unibufp' will be placed on the last // character that was converted in the 'unibuf'. // Also the 'cbufp' is positioned on the last converted // character. // At the last conversion in the file, flush should be set to // true so that we get all characters converted // // The converter must be flushed at the end of conversion so // that characters on hold also will be written. unibufp = unibuf; cbufp = buf; flush = rd != bufsz; ucnv_toUnicode(convfrom, &unibufp, unibufp + bufsz, &cbufp, cbufp + rd, fromoffsets, flush, &err); infoffset += (uint32_t)(cbufp - buf); if (U_FAILURE(err)) { char pos[32]; sprintf(pos, "%u", infoffset - 1); UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1)); initMsg(pname); u_wmsg(stderr, "problemCvtToU", str.getBuffer(), u_wmsg_errorName(err)); willexit = 1; err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */ } // At the last conversion, the converted characters should be // equal to number of chars read. if (flush && !willexit && cbufp != (buf + rd)) { char pos[32]; sprintf(pos, "%u", infoffset); UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1)); initMsg(pname); u_wmsg(stderr, "premEndInput", str.getBuffer()); willexit = 1; } // Prepare to transliterate and convert. Transliterate if needed. #if !UCONFIG_NO_TRANSLITERATION if (t) { u.setTo(unibuf, (int32_t)(unibufp - unibuf)); // Copy into string. t->transliterate(u); } else #endif { u.setTo(unibuf, (int32_t)(unibufp - unibuf), (int32_t)(bufsz)); // Share the buffer. } int32_t ulen = u.length(); // Convert the Unicode buffer into the destination codepage // Again 'bufp' will be placed on the last converted character // And 'unibufbp' will be placed on the last converted unicode character // At the last conversion flush should be set to true to ensure that // all characters left get converted const UChar *unibufu = unibufbp = u.getBuffer(); do { int32_t len = ulen > (int32_t)bufsz ? (int32_t)bufsz : ulen; bufp = buf; unibufp = (UChar *) (unibufbp + len); ucnv_fromUnicode(convto, &bufp, bufp + tobufsz, &unibufbp, unibufp, tooffsets, flush, &err); if (U_FAILURE(err)) { const char *errtag; char pos[32]; uint32_t erroffset = dataOffset((int32_t)(bufp - buf - 1), fromoffsets, (int32_t)(bufsz), tooffsets, (int32_t)(tobufsz)); int32_t ferroffset = (int32_t)(infoffset - (unibufp - unibufu) + erroffset); if ((int32_t) ferroffset < 0) { ferroffset = (int32_t)(outfoffset + (bufp - buf)); errtag = "problemCvtFromUOut"; } else { errtag = "problemCvtFromU"; } sprintf(pos, "%u", ferroffset); UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1)); initMsg(pname); u_wmsg(stderr, errtag, str.getBuffer(), u_wmsg_errorName(err)); willexit = 1; } // At the last conversion, the converted characters should be equal to number // of consumed characters. if (flush && !willexit && unibufbp != (unibufu + (size_t) (unibufp - unibufu))) { char pos[32]; sprintf(pos, "%u", infoffset); UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1)); initMsg(pname); u_wmsg(stderr, "premEnd", str.getBuffer()); willexit = 1; } // Finally, write the converted buffer to the output file rd = (size_t) (bufp - buf); outfoffset += (int32_t)(wr = fwrite(buf, 1, rd, outfile)); if (wr != rd) { UnicodeString str(strerror(errno), ""); initMsg(pname); u_wmsg(stderr, "cantWrite", str.getBuffer()); willexit = 1; } if (willexit) { goto error_exit; } } while ((ulen -= (int32_t)(bufsz)) > 0); } while (!flush); // Stop when we have flushed the // converters (this means that it's // the end of output) goto normal_exit; error_exit: ret = FALSE; normal_exit: // Cleanup. if (convfrom) ucnv_close(convfrom); if (convto) ucnv_close(convto); #if !UCONFIG_NO_TRANSLITERATION if (t) delete t; #endif if (buf) delete[] buf; if (unibuf) delete[] unibuf; if (fromoffsets) delete[] fromoffsets; if (tooffsets) delete[] tooffsets; if (infile != stdin) { fclose(infile); } return ret; }
extern int main(int argc, char* argv[]) { const char *encoding = NULL; const char *outputDir = NULL; /* NULL = no output directory, use current */ const char *inputDir = "."; int tostdout = 0; int prbom = 0; const char *pname; UResourceBundle *bundle = NULL; int32_t i = 0; const char* arg; /* Get the name of tool. */ pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (!pname) { pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR); } #endif if (!pname) { pname = *argv; } else { ++pname; } /* error handling, printing usage message */ argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "%s: error in command line argument \"%s\"\n", pname, argv[-argc]); } if(argc<0 || options[0].doesOccur || options[1].doesOccur) { fprintf(argc < 0 ? stderr : stdout, "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n" " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n" " [ -t, --truncate [ size ] ]\n" " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n" " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n" " [ -A, --suppressAliases]\n" " bundle ...\n", argc < 0 ? 'u' : 'U', pname); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } if(options[10].doesOccur) { fprintf(stderr, "%s version %s (ICU version %s).\n" "%s\n", pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING); return U_ZERO_ERROR; } if(options[2].doesOccur) { encoding = options[2].value; } if (options[3].doesOccur) { if(options[2].doesOccur) { fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname); return 3; } tostdout = 1; } if(options[4].doesOccur) { opt_truncate = TRUE; if(options[4].value != NULL) { truncsize = atoi(options[4].value); /* user defined printable size */ } else { truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */ } } else { opt_truncate = FALSE; } if(options[5].doesOccur) { verbose = TRUE; } if (options[6].doesOccur) { outputDir = options[6].value; } if(options[7].doesOccur) { inputDir = options[7].value; /* we'll use users resources */ } if (options[8].doesOccur) { prbom = 1; } if (options[9].doesOccur) { u_setDataDirectory(options[9].value); } if (options[11].doesOccur) { suppressAliases = TRUE; } fflush(stderr); // use ustderr now. ustderr = u_finit(stderr, NULL, NULL); for (i = 1; i < argc; ++i) { static const UChar sp[] = { 0x0020 }; /* " " */ arg = getLongPathname(argv[i]); if (verbose) { u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]); } icu::CharString locale; UErrorCode status = U_ZERO_ERROR; { const char *p = findBasename(arg); const char *q = uprv_strrchr(p, '.'); if (q == NULL) { locale.append(p, status); } else { locale.append(p, (int32_t)(q - p), status); } } if (U_FAILURE(status)) { return status; } icu::CharString infile; const char *thename = NULL; UBool fromICUData = !uprv_strcmp(inputDir, "-"); if (!fromICUData) { UBool absfilename = *arg == U_FILE_SEP_CHAR; #if U_PLATFORM_HAS_WIN32_API if (!absfilename) { absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0]) && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR); } #endif if (absfilename) { thename = arg; } else { const char *q = uprv_strrchr(arg, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (q == NULL) { q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR); } #endif infile.append(inputDir, status); if(q != NULL) { infile.appendPathPart(icu::StringPiece(arg, (int32_t)(q - arg)), status); } if (U_FAILURE(status)) { return status; } thename = infile.data(); } } if (thename) { bundle = ures_openDirect(thename, locale.data(), &status); } else { bundle = ures_open(fromICUData ? 0 : inputDir, locale.data(), &status); } if (U_SUCCESS(status)) { UFILE *out = NULL; const char *filename = 0; const char *ext = 0; if (locale.isEmpty() || !tostdout) { filename = findBasename(arg); ext = uprv_strrchr(filename, '.'); if (!ext) { ext = uprv_strchr(filename, 0); } } if (tostdout) { out = u_get_stdout(); } else { icu::CharString thefile; if (outputDir) { thefile.append(outputDir, status); } thefile.appendPathPart(filename, status); if (*ext) { thefile.truncate(thefile.length() - (int32_t)uprv_strlen(ext)); } thefile.append(".txt", status); if (U_FAILURE(status)) { return status; } out = u_fopen(thefile.data(), "w", NULL, encoding); if (!out) { u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile.data()); u_fclose(ustderr); return 4; } } // now, set the callback. ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status); if (U_FAILURE(status)) { u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname); u_fclose(ustderr); if(!tostdout) { u_fclose(out); } return 3; } if (prbom) { /* XXX: Should be done only for UTFs */ u_fputc(0xFEFF, out); } u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName())); u_fprintf(out, "// This file was dumped by derb(8) from "); if (thename) { u_fprintf(out, "%s", thename); } else if (fromICUData) { u_fprintf(out, "the ICU internal %s locale", locale.data()); } u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n"); if (!locale.isEmpty()) { u_fprintf(out, "%s", locale.data()); } else { u_fprintf(out, "%.*s%.*S", (int32_t)(ext - filename), filename, UPRV_LENGTHOF(sp), sp); } printOutBundle(out, bundle, 0, pname, &status); if (!tostdout) { u_fclose(out); } } else { reportError(pname, &status, "opening resource file"); } ures_close(bundle); } return 0; }
DeprecatedCString StreamingTextDecoderICU::fromUnicode(const DeprecatedString &qcs, bool allowEntities) { TextEncodingID encoding = m_encoding.effectiveEncoding().encodingID(); if (encoding == WinLatin1Encoding && qcs.isAllLatin1()) return qcs.latin1(); if ((encoding == WinLatin1Encoding || encoding == UTF8Encoding || encoding == ASCIIEncoding) && qcs.isAllASCII()) return qcs.ascii(); // FIXME: We should see if there is "force ASCII range" mode in ICU; // until then, we change the backslash into a yen sign. // Encoding will change the yen sign back into a backslash. DeprecatedString copy = qcs; copy.replace('\\', m_encoding.backslashAsCurrencySymbol()); if (!m_converterICU) createICUConverter(); if (!m_converterICU) return DeprecatedCString(); // FIXME: when DeprecatedString buffer is latin1, it would be nice to // convert from that w/o having to allocate a unicode buffer char buffer[ConversionBufferSize]; const UChar* source = reinterpret_cast<const UChar*>(copy.unicode()); const UChar* sourceLimit = source + copy.length(); UErrorCode err = U_ZERO_ERROR; DeprecatedString normalizedString; if (UNORM_YES != unorm_quickCheck(source, copy.length(), UNORM_NFC, &err)) { normalizedString.truncate(copy.length()); // normalization to NFC rarely increases the length, so this first attempt will usually succeed int32_t normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), copy.length(), &err); if (err == U_BUFFER_OVERFLOW_ERROR) { err = U_ZERO_ERROR; normalizedString.truncate(normalizedLength); normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), normalizedLength, &err); } source = reinterpret_cast<const UChar*>(normalizedString.unicode()); sourceLimit = source + normalizedLength; } DeprecatedCString result(1); // for trailing zero if (allowEntities) ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); else { ucnv_setSubstChars(m_converterICU, "?", 1, &err); ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); } ASSERT(U_SUCCESS(err)); if (U_FAILURE(err)) return DeprecatedCString(); do { char* target = buffer; char* targetLimit = target + ConversionBufferSize; err = U_ZERO_ERROR; ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err); int count = target - buffer; buffer[count] = 0; result.append(buffer); } while (err == U_BUFFER_OVERFLOW_ERROR); return result; }
U_CAPI void U_EXPORT2 flagCB_fromU( const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode * err) { /* First step - based on the reason code, take action */ if(reason == UCNV_UNASSIGNED) { /* whatever set should be trapped here */ ((FromUFLAGContext*)context)->flag = TRUE; } if(reason == UCNV_CLONE) { /* The following is the recommended way to implement UCNV_CLONE in a callback. */ UConverterFromUCallback saveCallback; const void *saveContext; FromUFLAGContext *old, *cloned; UErrorCode subErr = U_ZERO_ERROR; #if DEBUG_TMI printf("*** FLAGCB: cloning %p ***\n", context); #endif old = (FromUFLAGContext*)context; cloned = flagCB_fromU_openContext(); memcpy(cloned, old, sizeof(FromUFLAGContext)); #if DEBUG_TMI printf("%p: my subcb=%p:%p\n", old, old->subCallback, old->subContext); printf("%p: cloned subcb=%p:%p\n", cloned, cloned->subCallback, cloned->subContext); #endif /* We need to get the sub CB to handle cloning, * so we have to set up the following, temporarily: * * - Set the callback+context to the sub of this (flag) cb * - preserve the current cb+context, it could be anything * * Before: * CNV -> FLAG -> subcb -> ... * * After: * CNV -> subcb -> ... * * The chain from 'something' on is saved, and will be restored * at the end of this block. * */ ucnv_setFromUCallBack(fromUArgs->converter, cloned->subCallback, cloned->subContext, &saveCallback, &saveContext, &subErr); if( cloned->subCallback != NULL ) { /* Now, call the sub callback if present */ cloned->subCallback(cloned->subContext, fromUArgs, codeUnits, length, codePoint, reason, err); } ucnv_setFromUCallBack(fromUArgs->converter, saveCallback, /* Us */ cloned, /* new context */ &cloned->subCallback, /* IMPORTANT! Accept any change in CB or context */ &cloned->subContext, &subErr); if(U_FAILURE(subErr)) { *err = subErr; } } /* process other reasons here if need be */ /* Always call the subCallback if present */ if(((FromUFLAGContext*)context)->subCallback != NULL && reason != UCNV_CLONE) { ((FromUFLAGContext*)context)->subCallback( ((FromUFLAGContext*)context)->subContext, fromUArgs, codeUnits, length, codePoint, reason, err); } /* cleanup - free the memory AFTER calling the sub CB */ if(reason == UCNV_CLOSE) { free((void*)context); } }
UErrorCode convert_to_utf8(const UChar* buffer, int32_t buffer_len, char** converted_buf, int32_t *converted_buf_len, bool force, bool* dropped_bytes) { UErrorCode status = U_ZERO_ERROR; UConverter *conv; int32_t utfConvertedLen = 0; // used to set dropped_bytes flag if force is true FromUFLAGContext * context = NULL; // open UTF8 converter conv = ucnv_open("utf-8", &status); if (U_FAILURE(status)) { ereport(WARNING, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("Cannot open utf-8 converter - error: %s.\n", u_errorName(status)))); ucnv_close(conv); return status; } if (force) { // set callback to skip illegal, irregular or unassigned bytes // set converter to use SKIP callback // contecxt will save and call it after calling custom callback ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, NULL, &status); //TODO: refactor warning and error message reporting if (U_FAILURE(status)) { ereport(WARNING, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("Cannot set callback on converter - error: %s.\n", u_errorName(status)))); ucnv_close(conv); return status; } // initialize flagging callback context = flagCB_fromU_openContext(); /* Set our special callback */ ucnv_setFromUCallBack(conv, flagCB_fromU, context, &(context->subCallback), &(context->subContext), &status ); if (U_FAILURE(status)) { ereport(WARNING, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("Cannot set callback on converter - error: %s.\n", u_errorName(status)))); ucnv_close(conv); return status; } } // convert to UTF8 // input buffer from ucnv_toUChars, which always returns a // NUL-terminated buffer utfConvertedLen = ucnv_fromUChars(conv, *converted_buf, *converted_buf_len, buffer, STRING_IS_NULL_TERMINATED, &status ); if (U_SUCCESS(status)) { *converted_buf_len = utfConvertedLen; ereport(DEBUG1, (errcode(ERRCODE_SUCCESSFUL_COMPLETION), errmsg("Converted string: %s\n", (const char*) *converted_buf))); // see if any bytes where dropped // context struct will go away when converter is closed if (NULL != context) *dropped_bytes = context->flag; else *dropped_bytes = false; } if (U_FAILURE(status)) { ereport(WARNING, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("ICU conversion from Unicode to UTF8 failed - error: %s.\n", u_errorName(status)))); } // close the converter ucnv_close(conv); return status; }
int main( void ) { UFILE *out; UErrorCode status = U_ZERO_ERROR; out = u_finit(stdout, NULL, NULL); if(!out) { fprintf(stderr, "Could not initialize (finit()) over stdout! \n"); return 1; } ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, NULL, NULL, NULL, &status); if(U_FAILURE(status)) { u_fprintf(out, "Warning- couldn't set the substitute callback - err %s\n", u_errorName(status)); } /* End Demo boilerplate */ u_fprintf(out,"ICU Case Mapping Sample Program\n\n"); u_fprintf(out, "C++ Case Mapping\n\n"); UnicodeString string("This is a test"); /* lowercase = "istanbul" */ UChar lowercase[] = {0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0}; /* uppercase = "LATIN CAPITAL I WITH DOT ABOVE STANBUL" */ UChar uppercase[] = {0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4C, 0}; UnicodeString upper(uppercase); UnicodeString lower(lowercase); u_fprintf(out, "\nstring: "); printUnicodeString(out, string); string.toUpper(); /* string = "THIS IS A TEST" */ u_fprintf(out, "\ntoUpper(): "); printUnicodeString(out, string); string.toLower(); /* string = "this is a test" */ u_fprintf(out, "\ntoLower(): "); printUnicodeString(out, string); u_fprintf(out, "\n\nlowercase=%S, uppercase=%S\n", lowercase, uppercase); string = upper; string.toLower(Locale("tr", "TR")); /* Turkish lower case map string = lowercase */ u_fprintf(out, "\nupper.toLower: "); printUnicodeString(out, string); string = lower; string.toUpper(Locale("tr", "TR")); /* Turkish upper case map string = uppercase */ u_fprintf(out, "\nlower.toUpper: "); printUnicodeString(out, string); u_fprintf(out, "\nEnd C++ sample\n\n"); // Call the C version int rc = c_main(out); u_fclose(out); return rc; }
/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, *uses a clean copy (resetted) of the converter, to convert that unicode *escape sequence to the target codepage (if conversion failure happens then *we revert to substituting with subchar) */ U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( const void *context, UConverterFromUnicodeArgs *fromArgs, const UChar *codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode * err) { UChar valueString[VALUE_STRING_LENGTH]; int32_t valueStringLength = 0; int32_t i = 0; const UChar *myValueSource = NULL; UErrorCode err2 = U_ZERO_ERROR; UConverterFromUCallback original = NULL; const void *originalContext; UConverterFromUCallback ignoredCallback = NULL; const void *ignoredContext; if (reason > UCNV_IRREGULAR) { return; } ucnv_setFromUCallBack (fromArgs->converter, (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, &original, &originalContext, &err2); if (U_FAILURE (err2)) { *err = err2; return; } if(context==NULL) { while (i < length) { valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); } } else { switch(*((char*)context)) { case UCNV_PRV_ESCAPE_JAVA: while (i < length) { valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); } break; case UCNV_PRV_ESCAPE_C: valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ if(length==2){ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); } else{ valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); } break; case UCNV_PRV_ESCAPE_XML_DEC: valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ if(length==2){ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); } else{ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); } valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ break; case UCNV_PRV_ESCAPE_XML_HEX: valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ if(length==2){ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); } else{ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); } valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ break; case UCNV_PRV_ESCAPE_UNICODE: valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ if (length == 2) { valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); } else { valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); } valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ break; case UCNV_PRV_ESCAPE_CSS2: valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); /* Always add space character, becase the next character might be whitespace, which would erroneously be considered the termination of the escape sequence. */ valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; break; default: while (i < length) { valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); } } } myValueSource = valueString; /* reset the error */ *err = U_ZERO_ERROR; ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); ucnv_setFromUCallBack (fromArgs->converter, original, originalContext, &ignoredCallback, &ignoredContext, &err2); if (U_FAILURE (err2)) { *err = err2; return; } return; }
Bool CodeSet_GenericToGenericDb(const char *codeIn, // IN const char *bufIn, // IN size_t sizeIn, // IN const char *codeOut, // IN unsigned int flags, // IN DynBuf *db) // IN/OUT { Bool result = FALSE; UErrorCode uerr; const char *bufInCur; const char *bufInEnd; UChar bufPiv[1024]; UChar *bufPivSource; UChar *bufPivTarget; UChar *bufPivEnd; char *bufOut; char *bufOutCur; char *bufOutEnd; size_t bufOutSize; size_t bufOutOffset; UConverter *cvin = NULL; UConverter *cvout = NULL; UConverterToUCallback toUCb; UConverterFromUCallback fromUCb; ASSERT(codeIn); ASSERT(sizeIn == 0 || bufIn); ASSERT(codeOut); ASSERT(db); ASSERT((CSGTG_NORMAL == flags) || (CSGTG_TRANSLIT == flags) || (CSGTG_IGNORE == flags)); if (dontUseIcu) { // fall back return CodeSetOld_GenericToGenericDb(codeIn, bufIn, sizeIn, codeOut, flags, db); } /* * Trivial case. */ if ((0 == sizeIn) || (NULL == bufIn)) { result = TRUE; goto exit; } /* * Open converters. */ uerr = U_ZERO_ERROR; cvin = ucnv_open(codeIn, &uerr); if (!cvin) { goto exit; } uerr = U_ZERO_ERROR; cvout = ucnv_open(codeOut, &uerr); if (!cvout) { goto exit; } /* * Set callbacks according to flags. */ switch (flags) { case CSGTG_NORMAL: toUCb = UCNV_TO_U_CALLBACK_STOP; fromUCb = UCNV_FROM_U_CALLBACK_STOP; break; case CSGTG_TRANSLIT: toUCb = UCNV_TO_U_CALLBACK_SUBSTITUTE; fromUCb = UCNV_FROM_U_CALLBACK_SUBSTITUTE; break; case CSGTG_IGNORE: toUCb = UCNV_TO_U_CALLBACK_SKIP; fromUCb = UCNV_FROM_U_CALLBACK_SKIP; break; default: NOT_IMPLEMENTED(); break; } uerr = U_ZERO_ERROR; ucnv_setToUCallBack(cvin, toUCb, NULL, NULL, NULL, &uerr); if (U_ZERO_ERROR != uerr) { goto exit; } uerr = U_ZERO_ERROR; ucnv_setFromUCallBack(cvout, fromUCb, NULL, NULL, NULL, &uerr); if (U_ZERO_ERROR != uerr) { goto exit; } /* * Convert using ucnv_convertEx(). * As a starting guess, make the output buffer the same size as * the input string (with a fudge constant added in to avoid degen * cases). */ bufInCur = bufIn; bufInEnd = bufIn + sizeIn; bufOutSize = sizeIn + 4; bufOutOffset = 0; bufPivSource = bufPiv; bufPivTarget = bufPiv; bufPivEnd = bufPiv + ARRAYSIZE(bufPiv); for (;;) { if (!DynBuf_Enlarge(db, bufOutSize)) { goto exit; } bufOut = DynBuf_Get(db); bufOutCur = bufOut + bufOutOffset; bufOutSize = DynBuf_GetAllocatedSize(db); bufOutEnd = bufOut + bufOutSize; uerr = U_ZERO_ERROR; ucnv_convertEx(cvout, cvin, &bufOutCur, bufOutEnd, &bufInCur, bufInEnd, bufPiv, &bufPivSource, &bufPivTarget, bufPivEnd, FALSE, TRUE, &uerr); if (!U_FAILURE(uerr)) { /* * "This was a triumph. * I'm making a note here: * HUGE SUCCESS. * It's hard to overstate * my satisfaction." */ break; } if (U_BUFFER_OVERFLOW_ERROR != uerr) { // failure goto exit; } /* * Our guess at 'bufOutSize' was obviously wrong, just double it. * We'll be reallocating bufOut, so will need to recompute bufOutCur * based on bufOutOffset. */ bufOutSize *= 2; bufOutOffset = bufOutCur - bufOut; } /* * Set final size and return. */ DynBuf_SetSize(db, bufOutCur - bufOut); result = TRUE; exit: if (cvin) { ucnv_close(cvin); } if (cvout) { ucnv_close(cvout); } return result; }
UBool convsample_20_didSubstitute(const char *source) { UChar uchars[100]; char bytes[100]; UConverter *conv = NULL; UErrorCode status = U_ZERO_ERROR; uint32_t len, len2; UBool flagVal; FromUFLAGContext * context = NULL; printf("\n\n==============================================\n" "Sample 20: C: Test for substitution using callbacks\n"); /* print out the original source */ printBytes("src", source); printf("\n"); /* First, convert from UTF8 to unicode */ conv = ucnv_open("utf-8", &status); U_ASSERT(status); len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status); U_ASSERT(status); printUChars("uch", uchars, len); printf("\n"); /* Now, close the converter */ ucnv_close(conv); /* Now, convert to windows-1252 */ conv = ucnv_open("windows-1252", &status); U_ASSERT(status); /* Converter starts out with the SUBSTITUTE callback set. */ /* initialize our callback */ context = flagCB_fromU_openContext(); /* Set our special callback */ ucnv_setFromUCallBack(conv, flagCB_fromU, context, &(context->subCallback), &(context->subContext), &status); U_ASSERT(status); len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status); U_ASSERT(status); flagVal = context->flag; /* it's about to go away when we close the cnv */ ucnv_close(conv); /* print out the original source */ printBytes("bytes", bytes, len2); return flagVal; /* true if callback was called */ }
void charsetConverter_icu::convert (utility::inputStream& in, utility::outputStream& out, status* st) { UErrorCode err = U_ZERO_ERROR; ucnv_reset(m_from); ucnv_reset(m_to); if (st) new (st) status(); // From buffers byte_t cpInBuffer[16]; // stream data put here const size_t outSize = ucnv_getMinCharSize(m_from) * sizeof(cpInBuffer) * sizeof(UChar); std::vector <UChar> uOutBuffer(outSize); // Unicode chars end up here // To buffers // converted (char) data end up here const size_t cpOutBufferSz = ucnv_getMaxCharSize(m_to) * outSize; std::vector <char> cpOutBuffer(cpOutBufferSz); // Tell ICU what to do when encountering an illegal byte sequence if (m_options.silentlyReplaceInvalidSequences) { // Set replacement chars for when converting from Unicode to codepage icu::UnicodeString substString(m_options.invalidSequence.c_str()); ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err); if (U_FAILURE(err)) throw exceptions::charset_conv_error("[ICU] Error when setting substitution string."); } else { // Tell ICU top stop (and return an error) on illegal byte sequences ucnv_setToUCallBack (m_from, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err); if (U_FAILURE(err)) throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback."); ucnv_setFromUCallBack (m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err); if (U_FAILURE(err)) throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback."); } // Input data available while (!in.eof()) { // Read input data into buffer size_t inLength = in.read(cpInBuffer, sizeof(cpInBuffer)); // Beginning of read data const char* source = reinterpret_cast <const char*>(&cpInBuffer[0]); const char* sourceLimit = source + inLength; // end + 1 UBool flush = in.eof(); // is this last run? UErrorCode toErr; // Loop until all source has been processed do { // Set up target pointers UChar* target = &uOutBuffer[0]; UChar* targetLimit = &target[0] + outSize; toErr = U_ZERO_ERROR; ucnv_toUnicode(m_from, &target, targetLimit, &source, sourceLimit, NULL, flush, &toErr); if (st) st->inputBytesRead += (source - reinterpret_cast <const char*>(&cpInBuffer[0])); if (toErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(toErr)) { if (toErr == U_INVALID_CHAR_FOUND || toErr == U_TRUNCATED_CHAR_FOUND || toErr == U_ILLEGAL_CHAR_FOUND) { // Error will be thrown later (*) } else { throw exceptions::charset_conv_error("[ICU] Error converting to Unicode from " + m_source.getName()); } } // The Unicode source is the buffer just written and the limit // is where the previous conversion stopped (target is moved in the conversion) const UChar* uSource = &uOutBuffer[0]; UChar* uSourceLimit = &target[0]; UErrorCode fromErr; // Loop until converted chars are fully written do { char* cpTarget = &cpOutBuffer[0]; const char* cpTargetLimit = &cpOutBuffer[0] + cpOutBufferSz; fromErr = U_ZERO_ERROR; // Write converted bytes (Unicode) to destination codepage ucnv_fromUnicode(m_to, &cpTarget, cpTargetLimit, &uSource, uSourceLimit, NULL, flush, &fromErr); if (st) { // Decrement input bytes count by the number of input bytes in error char errBytes[16]; int8_t errBytesLen = sizeof(errBytes); UErrorCode errBytesErr = U_ZERO_ERROR; ucnv_getInvalidChars(m_from, errBytes, &errBytesLen, &errBytesErr); st->inputBytesRead -= errBytesLen; st->outputBytesWritten += cpTarget - &cpOutBuffer[0]; } // (*) If an error occurred while converting from input charset, throw it now if (toErr == U_INVALID_CHAR_FOUND || toErr == U_TRUNCATED_CHAR_FOUND || toErr == U_ILLEGAL_CHAR_FOUND) { throw exceptions::illegal_byte_sequence_for_charset(); } if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) { if (fromErr == U_INVALID_CHAR_FOUND || fromErr == U_TRUNCATED_CHAR_FOUND || fromErr == U_ILLEGAL_CHAR_FOUND) { throw exceptions::illegal_byte_sequence_for_charset(); } else { throw exceptions::charset_conv_error("[ICU] Error converting from Unicode to " + m_dest.getName()); } } // Write to destination stream out.write(&cpOutBuffer[0], (cpTarget - &cpOutBuffer[0])); } while (fromErr == U_BUFFER_OVERFLOW_ERROR); } while (toErr == U_BUFFER_OVERFLOW_ERROR); } }
unsigned int ICUTranscoder::transcodeTo( const XMLCh* const srcData , const unsigned int srcCount , XMLByte* const toFill , const unsigned int maxBytes , unsigned int& charsEaten , const UnRepOpts options) { // // Get a pointer to the buffer to transcode. If UChar and XMLCh are // the same size here, then use the original. Else, create a temp // one and put a janitor on it. // const UChar* srcPtr; UChar* tmpBufPtr = 0; if (sizeof(XMLCh) == sizeof(UChar)) { srcPtr = (const UChar*)srcData; } else { tmpBufPtr = convertToUChar(srcData, srcCount, getMemoryManager()); srcPtr = tmpBufPtr; } ArrayJanitor<UChar> janTmpBuf(tmpBufPtr, getMemoryManager()); // // Set the appropriate callback so that it will either fail or use // the rep char. Remember the old one so we can put it back. // UErrorCode err = U_ZERO_ERROR; UConverterFromUCallback oldCB = NULL; #if (U_ICU_VERSION_MAJOR_NUM < 2) void* orgContent; #else const void* orgContent; #endif ucnv_setFromUCallBack ( fConverter , (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP : UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL , &oldCB , &orgContent , &err ); // // Ok, lets transcode as many chars as we we can in one shot. The // ICU API gives enough info not to have to do this one char by char. // XMLByte* startTarget = toFill; const UChar* startSrc = srcPtr; err = U_ZERO_ERROR; ucnv_fromUnicode ( fConverter , (char**)&startTarget , (char*)(startTarget + maxBytes) , &startSrc , srcPtr + srcCount , 0 , false , &err ); // Rememember the status before we possibly overite the error code const bool res = (err == U_ZERO_ERROR); // Put the old handler back err = U_ZERO_ERROR; UConverterFromUCallback orgAction = NULL; ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err); if (!res) { XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_Unrepresentable , tmpBuf , getEncodingName() , getMemoryManager() ); } // Fill in the chars we ate from the input charsEaten = startSrc - srcPtr; // Return the chars we stored return startTarget - toFill; }
UBool convsample_21_didSubstitute(const char *source) { UChar uchars[100]; char bytes[100]; UConverter *conv = NULL, *cloneCnv = NULL; UErrorCode status = U_ZERO_ERROR; uint32_t len, len2; int32_t cloneLen; UBool flagVal = FALSE; UConverterFromUCallback junkCB; FromUFLAGContext *flagCtx = NULL, *cloneFlagCtx = NULL; debugCBContext *debugCtx1 = NULL, *debugCtx2 = NULL, *cloneDebugCtx = NULL; printf("\n\n==============================================\n" "Sample 21: C: Test for substitution w/ callbacks & clones \n"); /* print out the original source */ printBytes("src", source); printf("\n"); /* First, convert from UTF8 to unicode */ conv = ucnv_open("utf-8", &status); U_ASSERT(status); len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status); U_ASSERT(status); printUChars("uch", uchars, len); printf("\n"); /* Now, close the converter */ ucnv_close(conv); /* Now, convert to windows-1252 */ conv = ucnv_open("windows-1252", &status); U_ASSERT(status); /* Converter starts out with the SUBSTITUTE callback set. */ /* initialize our callback */ /* from the 'bottom' innermost, out * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */ #if DEBUG_TMI printf("flagCB_fromU = %p\n", &flagCB_fromU); printf("debugCB_fromU = %p\n", &debugCB_fromU); #endif debugCtx1 = debugCB_openContext(); flagCtx = flagCB_fromU_openContext(); debugCtx2 = debugCB_openContext(); debugCtx1->subCallback = flagCB_fromU; /* debug1 -> flag */ debugCtx1->subContext = flagCtx; flagCtx->subCallback = debugCB_fromU; /* flag -> debug2 */ flagCtx->subContext = debugCtx2; debugCtx2->subCallback = UCNV_FROM_U_CALLBACK_SUBSTITUTE; debugCtx2->subContext = NULL; /* Set our special callback */ ucnv_setFromUCallBack(conv, debugCB_fromU, debugCtx1, &(debugCtx2->subCallback), &(debugCtx2->subContext), &status); U_ASSERT(status); #if DEBUG_TMI printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n", conv, debugCtx1, debugCtx1->subCallback, debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback); #endif cloneLen = 1; /* but passing in null so it will clone */ cloneCnv = ucnv_safeClone(conv, NULL, &cloneLen, &status); U_ASSERT(status); #if DEBUG_TMI printf("Cloned converter from %p -> %p. Closing %p.\n", conv, cloneCnv, conv); #endif ucnv_close(conv); #if DEBUG_TMI printf("%p closed.\n", conv); #endif U_ASSERT(status); /* Now, we have to extract the context */ cloneDebugCtx = NULL; cloneFlagCtx = NULL; ucnv_getFromUCallBack(cloneCnv, &junkCB, (const void **)&cloneDebugCtx); if(cloneDebugCtx != NULL) { cloneFlagCtx = (FromUFLAGContext*) cloneDebugCtx -> subContext; } printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n", cloneCnv, cloneDebugCtx, cloneFlagCtx, cloneFlagCtx?cloneFlagCtx->subContext:NULL ); len2 = ucnv_fromUChars(cloneCnv, bytes, 100, uchars, len, &status); U_ASSERT(status); if(cloneFlagCtx != NULL) { flagVal = cloneFlagCtx->flag; /* it's about to go away when we close the cnv */ } else { printf("** Warning, couldn't get the subcallback \n"); } ucnv_close(cloneCnv); /* print out the original source */ printBytes("bytes", bytes, len2); return flagVal; /* true if callback was called */ }
extern int main(int argc, char* argv[]) { const char *encoding = NULL; const char *outputDir = NULL; /* NULL = no output directory, use current */ const char *inputDir = "."; int tostdout = 0; int prbom = 0; const char *pname; UResourceBundle *bundle = NULL; UErrorCode status = U_ZERO_ERROR; int32_t i = 0; UConverter *converter; const char* arg; /* Get the name of tool. */ pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (!pname) { pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR); } #endif if (!pname) { pname = *argv; } else { ++pname; } /* error handling, printing usage message */ argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "%s: error in command line argument \"%s\"\n", pname, argv[-argc]); } if(argc<0 || options[0].doesOccur || options[1].doesOccur) { fprintf(argc < 0 ? stderr : stdout, "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n" " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n" " [ -t, --truncate [ size ] ]\n" " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n" " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n" " [ -A, --suppressAliases]\n" " bundle ...\n", argc < 0 ? 'u' : 'U', pname); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } if(options[10].doesOccur) { fprintf(stderr, "%s version %s (ICU version %s).\n" "%s\n", pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING); return U_ZERO_ERROR; } if(options[2].doesOccur) { encoding = options[2].value; } if (options[3].doesOccur) { tostdout = 1; } if(options[4].doesOccur) { trunc = TRUE; if(options[4].value != NULL) { truncsize = atoi(options[4].value); /* user defined printable size */ } else { truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */ } } else { trunc = FALSE; } if(options[5].doesOccur) { verbose = TRUE; } if (options[6].doesOccur) { outputDir = options[6].value; } if(options[7].doesOccur) { inputDir = options[7].value; /* we'll use users resources */ } if (options[8].doesOccur) { prbom = 1; } if (options[9].doesOccur) { u_setDataDirectory(options[9].value); } if (options[11].doesOccur) { suppressAliases = TRUE; } converter = ucnv_open(encoding, &status); if (U_FAILURE(status)) { fprintf(stderr, "%s: couldn't create %s converter for encoding\n", pname, encoding ? encoding : ucnv_getDefaultName()); return 2; } ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status); if (U_FAILURE(status)) { fprintf(stderr, "%s: couldn't configure converter for encoding\n", pname); return 3; } defaultConverter = ucnv_open(0, &status); if (U_FAILURE(status)) { fprintf(stderr, "%s: couldn't create %s converter for encoding\n", ucnv_getDefaultName(), pname); return 2; } for (i = 1; i < argc; ++i) { static const UChar sp[] = { 0x0020 }; /* " " */ char infile[4096]; /* XXX Sloppy. */ char locale[64]; const char *thename = 0, *p, *q; UBool fromICUData = FALSE; arg = getLongPathname(argv[i]); if (verbose) { printf("processing bundle \"%s\"\n", argv[i]); } p = uprv_strrchr(arg, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (p == NULL) { p = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR); } #endif if (!p) { p = arg; } else { p++; } q = uprv_strrchr(p, '.'); if (!q) { for (q = p; *q; ++q) ; } uprv_strncpy(locale, p, q - p); locale[q - p] = 0; if (!(fromICUData = !uprv_strcmp(inputDir, "-"))) { UBool absfilename = *arg == U_FILE_SEP_CHAR; #ifdef U_WINDOWS if (!absfilename) { absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0]) && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR); } #endif if (absfilename) { thename = arg; } else { q = uprv_strrchr(arg, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (q == NULL) { q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR); } #endif uprv_strcpy(infile, inputDir); if(q != NULL) { uprv_strcat(infile, U_FILE_SEP_STRING); strncat(infile, arg, q-arg); } thename = infile; } } status = U_ZERO_ERROR; if (thename) { bundle = ures_openDirect(thename, locale, &status); } else { bundle = ures_open(fromICUData ? 0 : inputDir, locale, &status); } if (status == U_ZERO_ERROR) { FILE *out; const char *filename = 0; const char *ext = 0; if (!locale || !tostdout) { filename = uprv_strrchr(arg, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (!filename) { filename = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR); } #endif if (!filename) { filename = arg; } else { ++filename; } ext = uprv_strrchr(arg, '.'); if (!ext) { ext = filename + uprv_strlen(filename); } } if (tostdout) { out = stdout; #if defined(U_WINDOWS) || defined(U_CYGWIN) if (setmode(fileno(out), O_BINARY) == -1) { fprintf(stderr, "%s: couldn't set standard output to binary mode\n", pname); return 4; } #endif } else { char thefile[4096], *tp; int32_t len; if (outputDir) { uprv_strcpy(thefile, outputDir); uprv_strcat(thefile, U_FILE_SEP_STRING); } else { *thefile = 0; } uprv_strcat(thefile, filename); tp = thefile + uprv_strlen(thefile); len = (int32_t)uprv_strlen(ext); if (len) { tp -= len - 1; } else { *tp++ = '.'; } uprv_strcpy(tp, "txt"); out = fopen(thefile, "w"); if (!out) { fprintf(stderr, "%s: couldn't create %s\n", pname, thefile); return 4; } } if (prbom) { /* XXX: Should be done only for UTFs */ static const UChar bom[] = { 0xFEFF }; printString(out, converter, bom, (int32_t)(sizeof(bom)/sizeof(*bom))); } printCString(out, converter, "// -*- Coding: ", -1); printCString(out, converter, encoding ? encoding : getEncodingName(ucnv_getDefaultName()), -1); printCString(out, converter, "; -*-\n//\n", -1); printCString(out, converter, "// This file was dumped by derb(8) from ", -1); if (thename) { printCString(out, converter, thename, -1); } else if (fromICUData) { printCString(out, converter, "the ICU internal ", -1); printCString(out, converter, locale, -1); printCString(out, converter, " locale", -1); } printCString(out, converter, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n", -1); if (locale) { printCString(out, converter, locale, -1); } else { printCString(out, converter, filename, (int32_t)(ext - filename)); printString(out, converter, sp, (int32_t)(sizeof(sp)/sizeof(*sp))); } printOutBundle(out, converter, bundle, 0, pname, &status); if (out != stdout) { fclose(out); } } else { reportError(pname, &status, "opening resource file"); } ures_close(bundle); } ucnv_close(defaultConverter); ucnv_close(converter); return 0; }
void debugCB_fromU(const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode * err) { debugCBContext *ctx = (debugCBContext*)context; /*UConverterFromUCallback junkFrom;*/ #if DEBUG_TMI printf("debugCB_fromU: Context %p:%d called, reason %d on cnv %p [err=%s]\n", ctx, ctx->serial, reason, fromUArgs->converter, u_errorName(*err)); #endif if(ctx->magic != 0xC0FFEE) { fprintf(stderr, "debugCB_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); return; } if(reason == UCNV_CLONE) { /* see comments in above flagCB clone code */ UConverterFromUCallback saveCallback; const void *saveContext; debugCBContext *cloned; UErrorCode subErr = U_ZERO_ERROR; /* "recreate" it */ #if DEBUG_TMI printf("debugCB_fromU: cloning..\n"); #endif cloned = debugCB_clone(ctx); if(cloned == NULL) { fprintf(stderr, "debugCB_fromU: internal clone failed on %p\n", ctx); *err = U_MEMORY_ALLOCATION_ERROR; return; } ucnv_setFromUCallBack(fromUArgs->converter, cloned->subCallback, cloned->subContext, &saveCallback, &saveContext, &subErr); if( cloned->subCallback != NULL) { #if DEBUG_TMI printf("debugCB_fromU:%p calling subCB %p\n", ctx, cloned->subCallback); #endif /* call subCB if present */ cloned->subCallback(cloned->subContext, fromUArgs, codeUnits, length, codePoint, reason, err); } else { printf("debugCB_fromU:%p, NOT calling subCB, it's NULL\n", ctx); } /* set back callback */ ucnv_setFromUCallBack(fromUArgs->converter, saveCallback, /* Us */ cloned, /* new context */ &cloned->subCallback, /* IMPORTANT! Accept any change in CB or context */ &cloned->subContext, &subErr); if(U_FAILURE(subErr)) { *err = subErr; } } /* process other reasons here */ /* always call subcb if present */ if(ctx->subCallback != NULL && reason != UCNV_CLONE) { ctx->subCallback(ctx->subContext, fromUArgs, codeUnits, length, codePoint, reason, err); } if(reason == UCNV_CLOSE) { #if DEBUG_TMI printf("debugCB_fromU: Context %p:%d closing\n", ctx, ctx->serial); #endif free(ctx); } #if DEBUG_TMI printf("debugCB_fromU: leaving cnv %p, ctx %p: err %s\n", fromUArgs->converter, ctx, u_errorName(*err)); #endif }