コード例 #1
0
U_CAPI char*  U_EXPORT2
u_austrncpy(char *s1,
        const UChar *ucs2,
        int32_t n)
{
  char *target = s1;
  UErrorCode err = U_ZERO_ERROR;
  UConverter *cnv = u_getDefaultConverter(&err);
  if(U_SUCCESS(err) && cnv != NULL) {
    ucnv_reset(cnv);
    ucnv_fromUnicode(cnv,
                  &target,
                  s1+n,
                  &ucs2,
                  ucs2+u_ustrnlen(ucs2, n),
                  NULL,
                  TRUE,
                  &err);
    ucnv_reset(cnv); /* be good citizens */
    u_releaseDefaultConverter(cnv);
    if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
      *s1 = 0; /* failure */
    }
    if(target < (s1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
      *target = 0;  /* terminate */
    }
  } else {
    *s1 = 0;
  }
  return s1;
}
コード例 #2
0
ファイル: cintltst.c プロジェクト: icu-project/icu4c
char *aescstrdup(const UChar* unichars,int32_t length){
    char *newString,*targetLimit,*target;
    UConverterFromUCallback cb;
    const void *p;
    UErrorCode errorCode = U_ZERO_ERROR;
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
#   if U_PLATFORM == U_PF_OS390
        static const char convName[] = "ibm-1047";
#   else
        static const char convName[] = "ibm-37";
#   endif
#else
    static const char convName[] = "US-ASCII";
#endif
    UConverter* conv = ucnv_open(convName, &errorCode);
    if(length==-1){
        length = u_strlen( unichars);
    }
    newString = (char*)ctst_malloc ( sizeof(char) * 8 * (length +1));
    target = newString;
    targetLimit = newString+sizeof(char) * 8 * (length +1);
    ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, &cb, &p, &errorCode);
    ucnv_fromUnicode(conv,&target,targetLimit, &unichars, (UChar*)(unichars+length),NULL,TRUE,&errorCode);
    ucnv_close(conv);
    *target = '\0';
    return newString;
}
コード例 #3
0
ファイル: qicucodec.cpp プロジェクト: 3163504123/phantomjs
QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QTextCodec::ConverterState *state) const
{
    UConverter *conv = getConverter(state);

    int requiredLength = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv));
    QByteArray string(requiredLength, Qt::Uninitialized);

    const UChar *uc = (const UChar *)unicode;
    const UChar *end = uc + length;
    int convertedChars = 0;
    while (1) {
        char *ch = (char *)string.data();
        char *chEnd = ch + string.length();
        ch += convertedChars;
        UErrorCode error = U_ZERO_ERROR;
        ucnv_fromUnicode(conv,
                         &ch, chEnd,
                         &uc, end,
                         0, false, &error);
        if (!U_SUCCESS(error))
            qDebug() << "convertFromUnicode failed:" << u_errorName(error);
        convertedChars = ch - string.data();
        if (uc >= end)
            break;
        string.resize(string.length()*2);
    }
    string.resize(convertedChars);

    if (!state)
        ucnv_close(conv);

    return string;
}
コード例 #4
0
ファイル: ustream.cpp プロジェクト: servo/mozjs
U_IO_API STD_OSTREAM & U_EXPORT2
operator<<(STD_OSTREAM& stream, const UnicodeString& str)
{
    if(str.length() > 0) {
        char buffer[200];
        UConverter *converter;
        UErrorCode errorCode = U_ZERO_ERROR;

        // use the default converter to convert chunks of text
        converter = u_getDefaultConverter(&errorCode);
        if(U_SUCCESS(errorCode)) {
            const UChar *us = str.getBuffer();
            const UChar *uLimit = us + str.length();
            char *s, *sLimit = buffer + (sizeof(buffer) - 1);
            do {
                errorCode = U_ZERO_ERROR;
                s = buffer;
                ucnv_fromUnicode(converter, &s, sLimit, &us, uLimit, 0, FALSE, &errorCode);
                *s = 0;

                // write this chunk
                if(s > buffer) {
                    stream << buffer;
                }
            } while(errorCode == U_BUFFER_OVERFLOW_ERROR);
            u_releaseDefaultConverter(converter);
        }
    }

/*    stream.flush();*/
    return stream;
}
コード例 #5
0
inline int
mod_websocket_conv(UConverter *to, UConverter *from,
                   char **dst, size_t *dstsiz,
                   const char *src, size_t srcsiz) {
    UErrorCode err = U_ZERO_ERROR;
    size_t unisiz;
    UChar *unibuf, *punibuf, *ppunibuf;
    char *pdst;

    if (srcsiz == 0) {
        return -1;
    }
    if (!to) {
        *dst = (char *)malloc(srcsiz + 1);
        if (*dst == NULL) {
            return -1;
        }
        memcpy(*dst, src, srcsiz);
        (*dst)[srcsiz] = '\0';
        *dstsiz = srcsiz;
        return 0;
    }
    if (!from || !dst || !src || !dstsiz) {
        return -1;
    }
    unisiz = srcsiz / ucnv_getMinCharSize(from);
    unibuf = (UChar *)malloc(sizeof(UChar) * unisiz + 1);
    if (!unibuf) {
        return -1;
    }
    punibuf = unibuf;
    ucnv_toUnicode(from, &punibuf, punibuf + unisiz,
                   &src, src + srcsiz, 0, 0, &err);
    if (U_FAILURE(err)) {
        free(unibuf);
        return -1;
    }
    *punibuf = '\0';
    *dstsiz = (punibuf - unibuf) * ucnv_getMaxCharSize(to);
    *dst = (char *)malloc(*dstsiz + 1);
    if (!*dst) {
        free(unibuf);
        return -1;
    }
    pdst = *dst;
    ppunibuf = unibuf;
    ucnv_fromUnicode(to, &pdst, pdst + *dstsiz,
                     (const UChar **)&ppunibuf, punibuf, 0, 0, &err);
    free(unibuf);
    if (U_FAILURE(err)) {
        free(*dst);
        return -1;
    }
    *pdst = '\0';
    *dstsiz = pdst - *dst;
    return 0;
}
コード例 #6
0
static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address,
        jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd,
        jintArray data, jboolean flush) {

    UConverter* cnv = toUConverter(address);
    if (cnv == NULL) {
        maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
        return U_ILLEGAL_ARGUMENT_ERROR;
    }
    ScopedCharArrayRO uSource(env, source);
    if (uSource.get() == NULL) {
        maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR);
        return U_ILLEGAL_ARGUMENT_ERROR;
    }
    ScopedByteArrayRW uTarget(env, target);
    if (uTarget.get() == NULL) {
        maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR);
        return U_ILLEGAL_ARGUMENT_ERROR;
    }
    ScopedIntArrayRW myData(env, data);
    if (myData.get() == NULL) {
        maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR);
        return U_ILLEGAL_ARGUMENT_ERROR;
    }

    // Do the conversion.
    jint* sourceOffset = &myData[0];
    jint* targetOffset = &myData[1];
    const jchar* mySource = uSource.get() + *sourceOffset;
    const UChar* mySourceLimit= uSource.get() + sourceEnd;
    char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset);
    const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd);
    UErrorCode errorCode = U_ZERO_ERROR;
    ucnv_fromUnicode(cnv , &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, (UBool) flush, &errorCode);
    *sourceOffset = (mySource - uSource.get()) - *sourceOffset;
    *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get());

    // If there was an error, count the problematic characters.
    if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND ||
        errorCode == U_TRUNCATED_CHAR_FOUND) {
        int8_t invalidUCharCount = 32;
        UChar invalidUChars[32];
        UErrorCode minorErrorCode = U_ZERO_ERROR;
        ucnv_getInvalidUChars(cnv, invalidUChars, &invalidUCharCount, &minorErrorCode);
        if (U_SUCCESS(minorErrorCode)) {
            myData[2] = invalidUCharCount;
        }
    }

    // Managed code handles some cases; throw all other errors.
    if (shouldCodecThrow(flush, errorCode)) {
        maybeThrowIcuException(env, "ucnv_fromUnicode", errorCode);
    }
    return errorCode;
}
コード例 #7
0
CString TextCodecICU::encode(const UChar* characters, size_t length, UnencodableHandling handling)
{
    if (!length)
        return "";

    if (!m_converterICU)
        createICUConverter();
    if (!m_converterICU)
        return CString();

    // FIXME: We should see if there is "force ASCII range" mode in ICU;
    // until then, we change the backslash into a yen sign.
    // Encoding will change the yen sign back into a backslash.
    String copy(characters, length);
    copy.replace('\\', m_encoding.backslashAsCurrencySymbol());

    const UChar* source = copy.characters();
    const UChar* sourceLimit = source + copy.length();

    UErrorCode err = U_ZERO_ERROR;

    switch (handling) {
        case QuestionMarksForUnencodables:
            ucnv_setSubstChars(m_converterICU, "?", 1, &err);
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
            break;
        case EntitiesForUnencodables:
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
            break;
        case URLEncodedEntitiesForUnencodables:
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
            break;
    }

    ASSERT(U_SUCCESS(err));
    if (U_FAILURE(err))
        return CString();

    Vector<char> result;
    size_t size = 0;
    do {
        char buffer[ConversionBufferSize];
        char* target = buffer;
        char* targetLimit = target + ConversionBufferSize;
        err = U_ZERO_ERROR;
        ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err);
        size_t count = target - buffer;
        result.grow(size + count);
        memcpy(result.data() + size, buffer, count);
        size += count;
    } while (err == U_BUFFER_OVERFLOW_ERROR);

    return CString(result.data(), size);
}
コード例 #8
0
ファイル: uwmsg.c プロジェクト: 00zhengfu00/third_party
/* Print a ustring to the specified FILE* in the default codepage */
static void
uprint(const UChar *s,
       int32_t sourceLen,
       FILE *f,
       UErrorCode *status)
{
    /* converter */
    UConverter *converter;
    char buf [BUF_SIZE];
    const UChar *mySource;
    const UChar *mySourceEnd;
    char *myTarget;
    int32_t arraySize;

    if(s == 0) return;

    /* set up the conversion parameters */
    mySource     = s;
    mySourceEnd  = mySource + sourceLen;
    myTarget     = buf;
    arraySize    = BUF_SIZE;

    /* open a default converter */
    converter = ucnv_open(0, status);

    /* if we failed, clean up and exit */
    if(U_FAILURE(*status)) goto finish;

    /* perform the conversion */
    do {
        /* reset the error code */
        *status = U_ZERO_ERROR;

        /* perform the conversion */
        ucnv_fromUnicode(converter, &myTarget,  myTarget + arraySize,
            &mySource, mySourceEnd, NULL,
            TRUE, status);

        /* Write the converted data to the FILE* */
        fwrite(buf, sizeof(char), myTarget - buf, f);

        /* update the conversion parameters*/
        myTarget     = buf;
        arraySize    = BUF_SIZE;
    }
    while(*status == U_BUFFER_OVERFLOW_ERROR); 

finish:

    /* close the converter */
    ucnv_close(converter);
}
コード例 #9
0
CString TextCodecICU::encodeInternal(const TextCodecInput& input, UnencodableHandling handling)
{
    const UChar* source = input.begin();
    const UChar* end = input.end();

    UErrorCode err = U_ZERO_ERROR;

    switch (handling) {
    case QuestionMarksForUnencodables:
        ucnv_setSubstChars(m_converterICU, "?", 1, &err);
#if !defined(USING_SYSTEM_ICU)
        ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
#else
        ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
#endif
        break;
    case EntitiesForUnencodables:
#if !defined(USING_SYSTEM_ICU)
        ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
#else
        ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
#endif
        break;
    case URLEncodedEntitiesForUnencodables:
#if !defined(USING_SYSTEM_ICU)
        ucnv_setFromUCallBack(m_converterICU, urlEscapedEntityCallback, 0, 0, 0, &err);
#else
        ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
#endif
        break;
    }

    ASSERT(U_SUCCESS(err));
    if (U_FAILURE(err))
        return CString();

    Vector<char> result;
    size_t size = 0;
    do {
        char buffer[ConversionBufferSize];
        char* target = buffer;
        char* targetLimit = target + ConversionBufferSize;
        err = U_ZERO_ERROR;
        ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, end, 0, true, &err);
        size_t count = target - buffer;
        result.grow(size + count);
        memcpy(result.data() + size, buffer, count);
        size += count;
    } while (err == U_BUFFER_OVERFLOW_ERROR);

    return CString(result.data(), size);
}
コード例 #10
0
ファイル: derb.c プロジェクト: flwh/Alcatel_OT_985_kernel
static void printString(FILE *out, UConverter *converter, const UChar *str, int32_t len) {
    char buf[256];
    const UChar *strEnd;

    if (len < 0) {
        len = u_strlen(str);
    }
    strEnd = str + len;

    do {
        UErrorCode err = U_ZERO_ERROR;
        char *bufp = buf, *bufend = buf + sizeof(buf) - 1 ;

        ucnv_fromUnicode(converter, &bufp, bufend, &str, strEnd, 0, 0, &err);
        *bufp = 0;

        fprintf(out, "%s", buf);
    } while (str < strEnd);
}
コード例 #11
0
ファイル: icu_filter.hpp プロジェクト: hitochan777/cicada
    bool filter(const char_type*& src_begin, const char_type* src_end,
		char_type*& dest_begin, char_type* dest_end,
		bool flush)
    {
      if (! ucnv_from && ! ucnv_to) {
	// no converter... simply copy!
	
	const size_t copy_size = std::min(src_end - src_begin, dest_end - dest_begin);
	
	std::copy(src_begin, src_begin + copy_size, dest_begin);
	
	src_begin += copy_size;
	dest_begin += copy_size;
	
	return false;
      }

      UChar* pivot_end = pivot_start + boost::iostreams::default_device_buffer_size;
      
      UErrorCode status = U_ZERO_ERROR;
      
      if (pivot_target != pivot_end) {
	const char_type* src_begin_prev = src_begin;
	UChar* pivot_target_prev = pivot_target;
	
	status = U_ZERO_ERROR;
	ucnv_toUnicode(ucnv_from, &pivot_target, pivot_end, &src_begin, src_end, 0, flush, &status);
	
	offset_from += src_begin - src_begin_prev;
	offset_pivot_target += pivot_target - pivot_target_prev;
	
	if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status)) {
	  UErrorCode status_getname = U_ZERO_ERROR;
	  const char* encoding = ucnv_getName(ucnv_from, &status_getname);
	  
	  std::ostringstream offset_stream;
	  offset_stream << offset_from;

	  std::ostringstream offset_stream_unicode;
	  offset_stream_unicode << offset_pivot_target;
	  
	  message_from = (std::string("ucnv_toUnicode(): ") + u_errorName(status)
			  + " from " + encoding
			  + " offset: " + offset_stream.str()
			  + " unicode offset: " + offset_stream_unicode.str());
	  throw BOOST_IOSTREAMS_FAILURE(message_from);
	}
      }
      
      char_type*   dest_begin_prev = dest_begin;
      const UChar* pivot_source_prev = pivot_source;
      
      status = U_ZERO_ERROR;
      ucnv_fromUnicode(ucnv_to, &dest_begin, dest_end, &pivot_source, pivot_target, 0, flush, &status);
      
      offset_to += dest_begin - dest_begin_prev;
      offset_pivot_source += pivot_source - pivot_source_prev;
      
      if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status)) {
	UErrorCode status_getname = U_ZERO_ERROR;
	const char* encoding = ucnv_getName(ucnv_to, &status_getname);
	
	std::ostringstream offset_stream;
	offset_stream << offset_to;
	
	std::ostringstream offset_stream_unicode;
	offset_stream_unicode << offset_pivot_source;
	
	message_to = (std::string("ucnv_fromUnicode(): ") + u_errorName(status) 
		      + " to " + encoding
		      + " offset: " + offset_stream.str()
		      + " unicode offset: " + offset_stream_unicode.str());
	throw BOOST_IOSTREAMS_FAILURE(message_to);
      }
      
      if (pivot_source == pivot_target) {
	pivot_source = pivot_start;
	pivot_target = pivot_start;
      }
      
      return status == U_BUFFER_OVERFLOW_ERROR;
    }
コード例 #12
0
ファイル: ICUTransService.cpp プロジェクト: brock7/TianLong
unsigned int
ICUTranscoder::transcodeTo( const   XMLCh* const    srcData
                            , const unsigned int    srcCount
                            ,       XMLByte* const  toFill
                            , const unsigned int    maxBytes
                            ,       unsigned int&   charsEaten
                            , const UnRepOpts       options)
{
    //
    //  Get a pointer to the buffer to transcode. If UChar and XMLCh are
    //  the same size here, then use the original. Else, create a temp
    //  one and put a janitor on it.
    //
    const UChar* srcPtr;
    UChar* tmpBufPtr = 0;
    if (sizeof(XMLCh) == sizeof(UChar))
    {
        srcPtr = (const UChar*)srcData;
    }
    else
    {
        tmpBufPtr = convertToUChar(srcData, srcCount, getMemoryManager());
        srcPtr = tmpBufPtr;
    }
    ArrayJanitor<UChar> janTmpBuf(tmpBufPtr, getMemoryManager());

    //
    //  Set the appropriate callback so that it will either fail or use
    //  the rep char. Remember the old one so we can put it back.
    //
    UErrorCode  err = U_ZERO_ERROR;
    UConverterFromUCallback oldCB = NULL;
    #if (U_ICU_VERSION_MAJOR_NUM < 2)
    void* orgContent;
    #else
    const void* orgContent;
    #endif
    ucnv_setFromUCallBack
    (
        fConverter
        , (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP
                                   : UCNV_FROM_U_CALLBACK_SUBSTITUTE
        , NULL
        , &oldCB
        , &orgContent
        , &err
    );

    //
    //  Ok, lets transcode as many chars as we we can in one shot. The
    //  ICU API gives enough info not to have to do this one char by char.
    //
    XMLByte*        startTarget = toFill;
    const UChar*    startSrc = srcPtr;
    err = U_ZERO_ERROR;
    ucnv_fromUnicode
    (
        fConverter
        , (char**)&startTarget
        , (char*)(startTarget + maxBytes)
        , &startSrc
        , srcPtr + srcCount
        , 0
        , false
        , &err
    );

    // Rememember the status before we possibly overite the error code
    const bool res = (err == U_ZERO_ERROR);

    // Put the old handler back
    err = U_ZERO_ERROR;
    UConverterFromUCallback orgAction = NULL;

    ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);

    if (!res)
    {
        XMLCh tmpBuf[17];
        XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16, getMemoryManager());
        ThrowXMLwithMemMgr2
        (
            TranscodingException
            , XMLExcepts::Trans_Unrepresentable
            , tmpBuf
            , getEncodingName()
            , getMemoryManager()
        );
    }

    // Fill in the chars we ate from the input
    charsEaten = startSrc - srcPtr;

    // Return the chars we stored
    return startTarget - toFill;
}
コード例 #13
0
ファイル: uconv.cpp プロジェクト: gitpan/ponie
// Convert a file from one encoding to another
static UBool convertFile(const char *pname,
                         const char *fromcpage,
                         UConverterToUCallback toucallback,
                         const void *touctxt,
                         const char *tocpage,
                         UConverterFromUCallback fromucallback,
                         const void *fromuctxt,
                         int fallback,
                         size_t bufsz,
                         const char *translit,
                         const char *infilestr,
                         FILE * outfile, int verbose)
{
    FILE *infile;
    UBool ret = TRUE;
    UConverter *convfrom = 0;
    UConverter *convto = 0;
    UErrorCode err = U_ZERO_ERROR;
    UBool flush;
    const char *cbufp;
    char *bufp;
    char *buf = 0;

    uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */

    const UChar *unibufbp;
    UChar *unibufp;
    UChar *unibuf = 0;
    int32_t *fromoffsets = 0, *tooffsets = 0;

    size_t rd, wr, tobufsz;

#if !UCONFIG_NO_TRANSLITERATION
    Transliterator *t = 0;      // Transliterator acting on Unicode data.
#endif
    UnicodeString u;            // String to do the transliteration.

    // Open the correct input file or connect to stdin for reading input

    if (infilestr != 0 && strcmp(infilestr, "-")) {
        infile = fopen(infilestr, "rb");
        if (infile == 0) {
            UnicodeString str1(infilestr, "");
            str1.append((UChar32) 0);
            UnicodeString str2(strerror(errno), "");
            str2.append((UChar32) 0);
            initMsg(pname);
            u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
            return FALSE;
        }
    } else {
        infilestr = "-";
        infile = stdin;
#ifdef WIN32
        if (setmode(fileno(stdin), O_BINARY) == -1) {
            initMsg(pname);
            u_wmsg(stderr, "cantSetInBinMode");
            return FALSE;
        }
#endif
    }

    if (verbose) {
        fprintf(stderr, "%s:\n", infilestr);
    }

#if !UCONFIG_NO_TRANSLITERATION
    // Create transliterator as needed.

    if (translit != NULL && *translit) {
        UParseError parse;
        UnicodeString str(translit), pestr;

        /* Create from rules or by ID as needed. */

        parse.line = -1;

        if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
            t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
        } else {
            t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
        }

        if (U_FAILURE(err)) {
            str.append((UChar32) 0);
            initMsg(pname);

            if (parse.line >= 0) {
                UChar linebuf[20], offsetbuf[20];
                uprv_itou(linebuf, 20, parse.line, 10, 0);
                uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
                u_wmsg(stderr, "cantCreateTranslitParseErr", str.getBuffer(),
                    u_wmsg_errorName(err), linebuf, offsetbuf);
            } else {
                u_wmsg(stderr, "cantCreateTranslit", str.getBuffer(),
                    u_wmsg_errorName(err));
            }

            if (t) {
                delete t;
                t = 0;
            }
            goto error_exit;
        }
    }
#endif

    // Create codepage converter. If the codepage or its aliases weren't
    // available, it returns NULL and a failure code. We also set the
    // callbacks, and return errors in the same way.

    convfrom = ucnv_open(fromcpage, &err);
    if (U_FAILURE(err)) {
        UnicodeString str(fromcpage, (int32_t)(uprv_strlen(fromcpage) + 1));
        initMsg(pname);
        u_wmsg(stderr, "cantOpenFromCodeset", str.getBuffer(),
            u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
    if (U_FAILURE(err)) {
        initMsg(pname);
        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
        goto error_exit;
    }

    convto = ucnv_open(tocpage, &err);
    if (U_FAILURE(err)) {
        UnicodeString str(tocpage, (int32_t)(uprv_strlen(tocpage) + 1));
        initMsg(pname);
        u_wmsg(stderr, "cantOpenToCodeset", str.getBuffer(),
            u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
    if (U_FAILURE(err)) {
        initMsg(pname);
        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setFallback(convto, fallback);

    // To ensure that the buffer always is of enough size, we
    // must take the worst case scenario, that is the character in
    // the codepage that uses the most bytes and multiply it against
    // the buffer size.

    // use bufsz+1 to allow for additional BOM/signature character (U+FEFF)
    tobufsz = (bufsz+1) * ucnv_getMaxCharSize(convto);

    buf = new char[tobufsz];
    unibuf = new UChar[bufsz];

    fromoffsets = new int32_t[bufsz];
    tooffsets = new int32_t[tobufsz];

    // OK, we can convert now.

    do {
        char willexit = 0;

        rd = fread(buf, 1, bufsz, infile);
        if (ferror(infile) != 0) {
            UnicodeString str(strerror(errno));
            str.append((UChar32) 0);
            initMsg(pname);
            u_wmsg(stderr, "cantRead", str.getBuffer());
            goto error_exit;
        }

        // Convert the read buffer into the new coding
        // After the call 'unibufp' will be placed on the last
        // character that was converted in the 'unibuf'.
        // Also the 'cbufp' is positioned on the last converted
        // character.
        // At the last conversion in the file, flush should be set to
        // true so that we get all characters converted
        //
        // The converter must be flushed at the end of conversion so
        // that characters on hold also will be written.

        unibufp = unibuf;
        cbufp = buf;
        flush = rd != bufsz;
        ucnv_toUnicode(convfrom, &unibufp, unibufp + bufsz, &cbufp,
            cbufp + rd, fromoffsets, flush, &err);

        infoffset += (uint32_t)(cbufp - buf);

        if (U_FAILURE(err)) {
            char pos[32];
            sprintf(pos, "%u", infoffset - 1);
            UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
            initMsg(pname);
            u_wmsg(stderr, "problemCvtToU", str.getBuffer(), u_wmsg_errorName(err));
            willexit = 1;
            err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
        }

        // At the last conversion, the converted characters should be
        // equal to number of chars read.

        if (flush && !willexit && cbufp != (buf + rd)) {
            char pos[32];
            sprintf(pos, "%u", infoffset);
            UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
            initMsg(pname);
            u_wmsg(stderr, "premEndInput", str.getBuffer());
            willexit = 1;
        }

        // Prepare to transliterate and convert. Transliterate if needed.

#if !UCONFIG_NO_TRANSLITERATION
        if (t) {
            u.setTo(unibuf, (int32_t)(unibufp - unibuf)); // Copy into string.
            t->transliterate(u);
        } else
#endif
        {
            u.setTo(unibuf, (int32_t)(unibufp - unibuf), (int32_t)(bufsz)); // Share the buffer.
        }

        int32_t ulen = u.length();

        // Convert the Unicode buffer into the destination codepage
        // Again 'bufp' will be placed on the last converted character
        // And 'unibufbp' will be placed on the last converted unicode character
        // At the last conversion flush should be set to true to ensure that
        // all characters left get converted

        const UChar *unibufu = unibufbp = u.getBuffer();

        do {
            int32_t len = ulen > (int32_t)bufsz ? (int32_t)bufsz : ulen;

            bufp = buf;
            unibufp = (UChar *) (unibufbp + len);

            ucnv_fromUnicode(convto, &bufp, bufp + tobufsz,
                             &unibufbp,
                             unibufp,
                             tooffsets, flush, &err);

            if (U_FAILURE(err)) {
                const char *errtag;
                char pos[32];

                uint32_t erroffset =
                    dataOffset((int32_t)(bufp - buf - 1), fromoffsets, (int32_t)(bufsz), tooffsets, (int32_t)(tobufsz));
                int32_t ferroffset = (int32_t)(infoffset - (unibufp - unibufu) + erroffset);

                if ((int32_t) ferroffset < 0) {
                    ferroffset = (int32_t)(outfoffset + (bufp - buf));
                    errtag = "problemCvtFromUOut";
                } else {
                    errtag = "problemCvtFromU";
                }
                sprintf(pos, "%u", ferroffset);
                UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
                initMsg(pname);
                u_wmsg(stderr, errtag, str.getBuffer(),
                       u_wmsg_errorName(err));
                willexit = 1;
            }

            // At the last conversion, the converted characters should be equal to number
            // of consumed characters.
            if (flush && !willexit && unibufbp != (unibufu + (size_t) (unibufp - unibufu))) {
                char pos[32];
                sprintf(pos, "%u", infoffset);
                UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
                initMsg(pname);
                u_wmsg(stderr, "premEnd", str.getBuffer());
                willexit = 1;
            }

            // Finally, write the converted buffer to the output file


            rd = (size_t) (bufp - buf);
            outfoffset += (int32_t)(wr = fwrite(buf, 1, rd, outfile));
            if (wr != rd) {
                UnicodeString str(strerror(errno), "");
                initMsg(pname);
                u_wmsg(stderr, "cantWrite", str.getBuffer());
                willexit = 1;
            }

            if (willexit) {
                goto error_exit;
            }
        } while ((ulen -= (int32_t)(bufsz)) > 0);
    } while (!flush);           // Stop when we have flushed the
                                // converters (this means that it's
                                // the end of output)

    goto normal_exit;

error_exit:
    ret = FALSE;

normal_exit:
    // Cleanup.

    if (convfrom) ucnv_close(convfrom);
    if (convto) ucnv_close(convto);

#if !UCONFIG_NO_TRANSLITERATION
    if (t) delete t;
#endif

    if (buf) delete[] buf;
    if (unibuf) delete[] unibuf;

    if (fromoffsets) delete[] fromoffsets;
    if (tooffsets) delete[] tooffsets;

    if (infile != stdin) {
        fclose(infile);
    }

    return ret;
}
コード例 #14
0
ファイル: iconv_cnv.cpp プロジェクト: brock7/TianLong
/**************************
* Will convert a sequence of bytes from one codepage to another.
* @param toConverterName: The name of the converter that will be used to encode the output buffer
* @param fromConverterName: The name of the converter that will be used to decode the input buffer
* @param target: Pointer to the output buffer* written
* @param targetLength: on input contains the capacity of target, on output the number of bytes copied to target
* @param source: Pointer to the input buffer
* @param sourceLength: on input contains the capacity of source, on output the number of bytes processed in "source"
* @param internal: used internally to store store state data across calls
* @param err: fills in an error status
*/
void
T_UConverter_fromCodepageToCodepage (UConverter * outConverter,
				     UConverter * inConverter,
				     char **target,
				     const char *targetLimit,
				     const char **source,
				     const char *sourceLimit,
				     int32_t* offsets,
				     int flush,
				     UErrorCode * err)
{

  UChar out_chunk[CHUNK_SIZE];
  const UChar *out_chunk_limit = out_chunk + CHUNK_SIZE;
  UChar *out_chunk_alias;
  UChar const *out_chunk_alias2;


  if (U_FAILURE (*err))    return;


  /*loops until the input buffer is completely consumed
   *or if an error has be encountered
   *first we convert from inConverter codepage to Unicode
   *then from Unicode to outConverter codepage
   */
  while ((*source != sourceLimit) && U_SUCCESS (*err))
    {
      out_chunk_alias = out_chunk;
      ucnv_toUnicode (inConverter,
		      &out_chunk_alias,
		      out_chunk_limit,
		      source,
		      sourceLimit,
		      NULL,
		      flush,
		      err);

      /*BUFFER_OVERFLOW_ERROR means that the output "CHUNK" is full
       *we will require at least another loop (it's a recoverable error)
       */

      if (U_SUCCESS (*err) || (*err == U_BUFFER_OVERFLOW_ERROR))
	{
	  *err = U_ZERO_ERROR;
	  out_chunk_alias2 = out_chunk;

	  while ((out_chunk_alias2 != out_chunk_alias) && U_SUCCESS (*err))
	    {
	      ucnv_fromUnicode (outConverter,
				target,
				targetLimit,
				&out_chunk_alias2,
				out_chunk_alias,
				NULL,
				TRUE,
				err);

	    }
	}
      else
	break;
    }

  return;
}
コード例 #15
0
ファイル: iconv_cnv.cpp プロジェクト: brock7/TianLong
int32_t   ucnv_fromUChars (const UConverter * converter,
			   char *target,
			   int32_t targetSize,
			   const UChar * source,
			   UErrorCode * err)
{
  const UChar *mySource = source;
  const UChar *mySource_limit;
  int32_t mySourceLength = 0;
  UConverter myConverter;
  char *myTarget = target;
  int32_t targetCapacity = 0;

  if (U_FAILURE (*err))
    return 0;

  if ((converter == NULL) || (targetSize < 0))
    {
      *err = U_ILLEGAL_ARGUMENT_ERROR;
      return 0;
    }

  /*makes a local copy of the UConverter */
  myConverter = *converter;


  /*if the source is empty we return immediately */
  mySourceLength = u_strlen (source);
  if (mySourceLength == 0)
    {
      /*for consistency we still need to
       *store 0 in the targetCapacity
       *if the user requires it
       */
      return 0;
    }

  mySource_limit = mySource + mySourceLength;

  if (targetSize > 0)
    {
      ucnv_fromUnicode (&myConverter,
			&myTarget,
			target + targetSize,
			&mySource,
			mySource_limit,
			NULL,
			TRUE,
			err);
      targetCapacity = myTarget - target;
    }

  /*Updates targetCapacity to contain the number of bytes written to target */

  if (targetSize == 0)
    {
      *err = U_BUFFER_OVERFLOW_ERROR;
    }

  /* If the output buffer is exhausted, we need to stop writing
   * to it but continue the conversion in order to store in targetSize
   * the number of bytes that was required*/
  if (*err == U_BUFFER_OVERFLOW_ERROR)
    {
      char target2[CHUNK_SIZE];
      char *target2_alias = target2;
      const char *target2_limit = target2 + CHUNK_SIZE;

      /*We use a stack allocated buffer around which we loop
       *(in case the output is greater than CHUNK_SIZE)
       */

      while (*err == U_BUFFER_OVERFLOW_ERROR)
	{
	  *err = U_ZERO_ERROR;
	  target2_alias = target2;
	  ucnv_fromUnicode (&myConverter,
			    &target2_alias,
			    target2_limit,
			    &mySource,
			    mySource_limit,
			    NULL,
			    TRUE,
			    err);

	  /*updates the output parameter to contain the number of char required */
	  targetCapacity += (target2_alias - target2) + 1;
	}
      /*We will set the erro code to BUFFER_OVERFLOW_ERROR only if
       *nothing graver happened in the previous loop*/
      (targetCapacity)--;
      if (U_SUCCESS (*err))
	*err = U_BUFFER_OVERFLOW_ERROR;
    }

  return targetCapacity;
}
コード例 #16
0
ファイル: ucnv_cb.cpp プロジェクト: winlibs/icu4c
U_CAPI void  U_EXPORT2
ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
                             const UChar** source,
                             const UChar*  sourceLimit,
                             int32_t offsetIndex,
                             UErrorCode * err)
{
    /*
    This is a fun one.  Recursion can occur - we're basically going to
    just retry shoving data through the same converter. Note, if you got
    here through some kind of invalid sequence, you maybe should emit a
    reset sequence of some kind and/or call ucnv_reset().  Since this
    IS an actual conversion, take care that you've changed the callback
    or the data, or you'll get an infinite loop.

    Please set the err value to something reasonable before calling
    into this.
    */

    char *oldTarget;

    if(U_FAILURE(*err))
    {
        return;
    }

    oldTarget = args->target;

    ucnv_fromUnicode(args->converter,
        &args->target,
        args->targetLimit,
        source,
        sourceLimit,
        NULL, /* no offsets */
        FALSE, /* no flush */
        err);

    if(args->offsets)
    {
        while (args->target != oldTarget)  /* if it moved at all.. */
        {
            *(args->offsets)++ = offsetIndex;
            oldTarget++;
        }
    }

    /*
    Note, if you did something like used a Stop subcallback, things would get interesting.
    In fact, here's where we want to return the partially consumed in-source!
    */
    if(*err == U_BUFFER_OVERFLOW_ERROR)
    /* && (*source < sourceLimit && args->target >= args->targetLimit)
    -- S. Hrcek */
    {
        /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
        It's a fixed size. If we overflow it... Hmm */
        char *newTarget;
        const char *newTargetLimit;
        UErrorCode err2 = U_ZERO_ERROR;

        int8_t errBuffLen;

        errBuffLen  = args->converter->charErrorBufferLength;

        /* start the new target at the first free slot in the errbuff.. */
        newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);

        newTargetLimit = (char *)(args->converter->charErrorBuffer +
            sizeof(args->converter->charErrorBuffer));

        if(newTarget >= newTargetLimit)
        {
            *err = U_INTERNAL_PROGRAM_ERROR;
            return;
        }

        /* We're going to tell the converter that the errbuff len is empty.
        This prevents the existing errbuff from being 'flushed' out onto
        itself.  If the errbuff is needed by the converter this time,
        we're hosed - we're out of space! */

        args->converter->charErrorBufferLength = 0;

        ucnv_fromUnicode(args->converter,
                         &newTarget,
                         newTargetLimit,
                         source,
                         sourceLimit,
                         NULL,
                         FALSE,
                         &err2);

        /* We can go ahead and overwrite the  length here. We know just how
        to recalculate it. */

        args->converter->charErrorBufferLength = (int8_t)(
            newTarget - (char*)args->converter->charErrorBuffer);

        if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
        {
            /* now we're REALLY in trouble.
            Internal program error - callback shouldn't have written this much
            data!
            */
            *err = U_INTERNAL_PROGRAM_ERROR;
            return;
        }
        /*else {*/
            /* sub errs could be invalid/truncated/illegal chars or w/e.
            These might want to be passed on up.. But the problem is, we already
            need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
            other errs.. */

            /*
            if(U_FAILURE(err2))
            ??
            */
        /*}*/
    }
}
コード例 #17
0
ファイル: uscanf_p.c プロジェクト: 119120119/node
static int32_t
u_scanf_string_handler(UFILE        *input,
                       u_scanf_spec_info *info,
                       ufmt_args    *args,
                       const UChar  *fmt,
                       int32_t      *fmtConsumed,
                       int32_t      *argConverted)
{
    const UChar *source;
    UConverter  *conv;
    char        *arg    = (char*)(args[0].ptrValue);
    char        *alias  = arg;
    char        *limit;
    UErrorCode  status  = U_ZERO_ERROR;
    int32_t     count;
    int32_t     skipped = 0;
    UChar       c;
    UBool       isNotEOF = FALSE;

    /* skip all ws in the input */
    if (info->fIsString) {
        skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
    }

    /* get the string one character at a time, truncating to the width */
    count = 0;

    /* open the default converter */
    conv = u_getDefaultConverter(&status);

    if(U_FAILURE(status))
        return -1;

    while( (info->fWidth == -1 || count < info->fWidth)
        && (isNotEOF = ufile_getch(input, &c))
        && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
    {

        if (!info->fSkipArg) {
            /* put the character from the input onto the target */
            source = &c;
            /* Since we do this one character at a time, do it this way. */
            if (info->fWidth > 0) {
                limit = alias + info->fWidth - count;
            }
            else {
                limit = alias + ucnv_getMaxCharSize(conv);
            }

            /* convert the character to the default codepage */
            ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
                NULL, TRUE, &status);

            if(U_FAILURE(status)) {
                /* clean up */
                u_releaseDefaultConverter(conv);
                return -1;
            }
        }

        /* increment the count */
        ++count;
    }

    /* put the final character we read back on the input */
    if (!info->fSkipArg) {
        if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
            u_fungetc(c, input);

        /* add the terminator */
        if (info->fIsString) {
            *alias = 0x00;
        }
    }

    /* clean up */
    u_releaseDefaultConverter(conv);

    /* we converted 1 arg */
    *argConverted = !info->fSkipArg;
    return count + skipped;
}
コード例 #18
0
ファイル: ustdio.c プロジェクト: hihihippp/build-couchdb-1
U_CFUNC int32_t U_EXPORT2
u_file_write_flush(const UChar *chars,
                   int32_t     count,
                   UFILE       *f,
                   UBool       flushIO,
                   UBool       flushTranslit)
{
    /* Set up conversion parameters */
    UErrorCode  status       = U_ZERO_ERROR;
    const UChar *mySource    = chars;
    const UChar *mySourceBegin;
    const UChar *mySourceEnd;
    char        charBuffer[UFILE_CHARBUFFER_SIZE];
    char        *myTarget   = charBuffer;
    int32_t     written      = 0;
    int32_t     numConverted = 0;

    if (count < 0) {
        count = u_strlen(chars);
    }

#if !UCONFIG_NO_TRANSLITERATION
    if((f->fTranslit) && (f->fTranslit->translit))
    {
        /* Do the transliteration */
        mySource = u_file_translit(f, chars, &count, flushTranslit);
    }
#endif

    /* Write to a string. */
    if (!f->fFile) {
        int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
        if (flushIO && charsLeft > count) {
            count++;
        }
        written = ufmt_min(count, charsLeft);
        u_strncpy(f->str.fPos, mySource, written);
        f->str.fPos += written;
        return written;
    }

    mySourceEnd = mySource + count;

    /* Perform the conversion in a loop */
    do {
        mySourceBegin = mySource; /* beginning location for this loop */
        status     = U_ZERO_ERROR;
        if(f->fConverter != NULL) { /* We have a valid converter */
            ucnv_fromUnicode(f->fConverter,
                             &myTarget,
                             charBuffer + UFILE_CHARBUFFER_SIZE,
                             &mySource,
                             mySourceEnd,
                             NULL,
                             flushIO,
                             &status);
        } else { /*weiv: do the invariant conversion */
            int32_t convertChars = (int32_t) (mySourceEnd - mySource);
            if (convertChars > UFILE_CHARBUFFER_SIZE) {
                convertChars = UFILE_CHARBUFFER_SIZE;
                status = U_BUFFER_OVERFLOW_ERROR;
            }
            u_UCharsToChars(mySource, myTarget, convertChars);
            mySource += convertChars;
            myTarget += convertChars;
        }
        numConverted = (int32_t)(myTarget - charBuffer);

        if (numConverted > 0) {
            /* write the converted bytes */
            fwrite(charBuffer,
                   sizeof(char),
                   numConverted,
                   f->fFile);

            written     += (int32_t) (mySource - mySourceBegin);
        }
        myTarget     = charBuffer;
    }
    while(status == U_BUFFER_OVERFLOW_ERROR);

    /* return # of chars written */
    return written;
}
コード例 #19
0
/* helper function */
static wchar_t* 
_strToWCS(wchar_t *dest, 
           int32_t destCapacity,
           int32_t *pDestLength,
           const UChar *src, 
           int32_t srcLength,
           UErrorCode *pErrorCode){

    char stackBuffer [_STACK_BUFFER_CAPACITY];
    char* tempBuf = stackBuffer;
    int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
    char* tempBufLimit = stackBuffer + tempBufCapacity;
    UConverter* conv = NULL;
    char* saveBuf = tempBuf;
    wchar_t* intTarget=NULL;
    int32_t intTargetCapacity=0;
    int count=0,retVal=0;
    
    const UChar *pSrcLimit =NULL;
    const UChar *pSrc = src;

    conv = u_getDefaultConverter(pErrorCode);
    
    if(U_FAILURE(*pErrorCode)){
        return NULL;
    }
    
    if(srcLength == -1){
        srcLength = u_strlen(pSrc);
    }
    
    pSrcLimit = pSrc + srcLength;

    for(;;) {
        /* reset the error state */
        *pErrorCode = U_ZERO_ERROR;

        /* convert to chars using default converter */
        ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
        count =(tempBuf - saveBuf);
        
        /* This should rarely occur */
        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
            tempBuf = saveBuf;
            
            /* we dont have enough room on the stack grow the buffer */
            if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 
                (_BUFFER_CAPACITY_MULTIPLIER * (srcLength)), count,sizeof(char))){
                goto cleanup;
            }
          
           saveBuf = tempBuf;
           tempBufLimit = tempBuf + tempBufCapacity;
           tempBuf = tempBuf + count;

        } else {
            break;
        }
    }

    if(U_FAILURE(*pErrorCode)){
        goto cleanup;
    }

    /* done with conversion null terminate the char buffer */
    if(count>=tempBufCapacity){
        tempBuf = saveBuf;
        /* we dont have enough room on the stack grow the buffer */
        if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 
            tempBufCapacity-count+1, count,sizeof(char))){
            goto cleanup;
        }              
       saveBuf = tempBuf;
    }
    
    saveBuf[count]=0;
      

    /* allocate more space than required 
     * here we assume that every char requires 
     * no more than 2 wchar_ts
     */
    intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
    intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );

    if(intTarget){

        int32_t nulLen = 0;
        int32_t remaining = intTargetCapacity;
        wchar_t* pIntTarget=intTarget;
        tempBuf = saveBuf;
        
        /* now convert the mbs to wcs */
        for(;;){
            
            /* we can call the system API since we are sure that
             * there is atleast 1 null in the input
             */
            retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
            
            if(retVal==-1){
                *pErrorCode = U_INVALID_CHAR_FOUND;
                break;
            }else if(retVal== remaining){/* should never occur */
                int numWritten = (pIntTarget-intTarget);
                u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
                                          &intTargetCapacity,
                                          intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
                                          numWritten,
                                          sizeof(wchar_t));
                pIntTarget = intTarget;
                remaining=intTargetCapacity;

                if(nulLen!=count){ /*there are embedded nulls*/
                    pIntTarget+=numWritten;
                    remaining-=numWritten;
                }

            }else{
                int32_t nulVal;
                /*scan for nulls */
                /* we donot check for limit since tempBuf is null terminated */
                while(tempBuf[nulLen++] != 0){
                }
                nulVal = (nulLen < srcLength) ? 1 : 0; 
                pIntTarget = pIntTarget + retVal+nulVal;
                remaining -=(retVal+nulVal);
            
                /* check if we have reached the source limit*/
                if(nulLen>=(count)){
                    break;
                }
            }
        }
        count = (int32_t)(pIntTarget-intTarget);
       
        if(0 < count && count <= destCapacity){
            uprv_memcpy(dest,intTarget,count*sizeof(wchar_t));
        }  

        if(pDestLength){
            *pDestLength = count;
        }

        /* free the allocated memory */
        uprv_free(intTarget);

    }else{
        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    }
cleanup:
    /* are we still using stack buffer */
    if(stackBuffer != saveBuf){
        uprv_free(saveBuf);
    }
    u_terminateWChars(dest,destCapacity,count,pErrorCode);

    u_releaseDefaultConverter(conv);

    return dest;
}
コード例 #20
0
void charsetFilteredOutputStream_icu::writeImpl
	(const byte_t* const data, const size_t count)
{
	if (m_from == NULL || m_to == NULL)
		throw exceptions::charset_conv_error("Cannot initialize converters.");

	// Allocate buffer for Unicode chars
	const size_t uniSize = ucnv_getMinCharSize(m_from) * count * sizeof(UChar);
	std::vector <UChar> uniBuffer(uniSize);

	// Conversion loop
	UErrorCode toErr = U_ZERO_ERROR;

	const char* uniSource = reinterpret_cast <const char*>(data);
	const char* uniSourceLimit = uniSource + count;

	do
	{
		// Convert from source charset to Unicode
		UChar* uniTarget = &uniBuffer[0];
		UChar* uniTargetLimit = &uniBuffer[0] + uniSize;

		toErr = U_ZERO_ERROR;

		ucnv_toUnicode(m_from, &uniTarget, uniTargetLimit,
		               &uniSource, uniSourceLimit, NULL, /* flush */ FALSE, &toErr);

		if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR)
		{
			if (toErr == U_INVALID_CHAR_FOUND ||
			    toErr == U_TRUNCATED_CHAR_FOUND ||
			    toErr == U_ILLEGAL_CHAR_FOUND)
			{
				throw exceptions::illegal_byte_sequence_for_charset();
			}
			else
			{
				throw exceptions::charset_conv_error
					("[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'.");
			}
		}

		const size_t uniLength = uniTarget - &uniBuffer[0];

		// Allocate buffer for destination charset
		const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
		std::vector <char> cpBuffer(cpSize);

		// Convert from Unicode to destination charset
		UErrorCode fromErr = U_ZERO_ERROR;

		const UChar* cpSource = &uniBuffer[0];
		const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;

		do
		{
			char* cpTarget = &cpBuffer[0];
			char* cpTargetLimit = &cpBuffer[0] + cpSize;

			fromErr = U_ZERO_ERROR;

			ucnv_fromUnicode(m_to, &cpTarget, cpTargetLimit,
							 &cpSource, cpSourceLimit, NULL, /* flush */ FALSE, &fromErr);

			if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr))
			{
				if (fromErr == U_INVALID_CHAR_FOUND ||
				    fromErr == U_TRUNCATED_CHAR_FOUND ||
				    fromErr == U_ILLEGAL_CHAR_FOUND)
				{
					throw exceptions::illegal_byte_sequence_for_charset();
				}
				else
				{
					throw exceptions::charset_conv_error
						("[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'.");
				}
			}

			const size_t cpLength = cpTarget - &cpBuffer[0];

			// Write successfully converted bytes
			m_stream.write(&cpBuffer[0], cpLength);

		} while (fromErr == U_BUFFER_OVERFLOW_ERROR);

	} while (toErr == U_BUFFER_OVERFLOW_ERROR);
}
コード例 #21
0
void charsetFilteredOutputStream_icu::flush()
{
	if (m_from == NULL || m_to == NULL)
		throw exceptions::charset_conv_error("Cannot initialize converters.");

	// Allocate buffer for Unicode chars
	const size_t uniSize = ucnv_getMinCharSize(m_from) * 1024 * sizeof(UChar);
	std::vector <UChar> uniBuffer(uniSize);

	// Conversion loop (with flushing)
	UErrorCode toErr = U_ZERO_ERROR;

	const char* uniSource = 0;
	const char* uniSourceLimit = 0;

	do
	{
		// Convert from source charset to Unicode
		UChar* uniTarget = &uniBuffer[0];
		UChar* uniTargetLimit = &uniBuffer[0] + uniSize;

		toErr = U_ZERO_ERROR;

		ucnv_toUnicode(m_from, &uniTarget, uniTargetLimit,
		               &uniSource, uniSourceLimit, NULL, /* flush */ TRUE, &toErr);

		if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR)
		{
			throw exceptions::charset_conv_error
				("[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'.");
		}

		const size_t uniLength = uniTarget - &uniBuffer[0];

		// Allocate buffer for destination charset
		const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
		std::vector <char> cpBuffer(cpSize);

		// Convert from Unicode to destination charset
		UErrorCode fromErr = U_ZERO_ERROR;

		const UChar* cpSource = &uniBuffer[0];
		const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;

		do
		{
			char* cpTarget = &cpBuffer[0];
			char* cpTargetLimit = &cpBuffer[0] + cpSize;

			fromErr = U_ZERO_ERROR;

			ucnv_fromUnicode(m_to, &cpTarget, cpTargetLimit,
							 &cpSource, cpSourceLimit, NULL, /* flush */ TRUE, &fromErr);

			if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr))
			{
				throw exceptions::charset_conv_error
					("[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'.");
			}

			const size_t cpLength = cpTarget - &cpBuffer[0];

			// Write successfully converted bytes
			m_stream.write(&cpBuffer[0], cpLength);

		} while (fromErr == U_BUFFER_OVERFLOW_ERROR);

	} while (toErr == U_BUFFER_OVERFLOW_ERROR);

	m_stream.flush();
}
コード例 #22
0
ファイル: ncnvfbts.c プロジェクト: flwh/Alcatel_OT_985_kernel
static UBool testConvertFromUnicode(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen, 
                const char *codepage, UBool fallback, const int32_t *expectOffsets)
{


    UErrorCode status = U_ZERO_ERROR;
    UConverter *conv = 0;
    char junkout[NEW_MAX_BUFFER]; /* FIX */
    int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
    const UChar *src;
    char *end;
    char *targ;
    int32_t *offs;
    int i;
    int32_t   realBufferSize;
    char *realBufferEnd;
    const UChar *realSourceEnd;
    const UChar *sourceLimit;
    UBool checkOffsets = TRUE;
    UBool doFlush;
    UBool action=FALSE;
    char *p;


    for(i=0;i<NEW_MAX_BUFFER;i++)
        junkout[i] = (char)0xF0;
    for(i=0;i<NEW_MAX_BUFFER;i++)
        junokout[i] = 0xFF;
    setNuConvTestName(codepage, "FROM");

    log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize, 
            gOutBufferSize);

    conv = my_ucnv_open(codepage, &status);
    if(U_FAILURE(status))
    {
        log_data_err("Couldn't open converter %s\n",codepage);
        return TRUE;
    }

    log_verbose("Converter opened..\n");
    /*----setting the callback routine----*/
    ucnv_setFallback (conv, fallback);
    action = ucnv_usesFallback(conv);
    if(action != fallback){
        log_err("FAIL: Error is setting fallback. Errocode=%s\n", myErrorName(status));
    }
    /*------------------------*/
    src = source;
    targ = junkout;
    offs = junokout;

    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
    realBufferEnd = junkout + realBufferSize;
    realSourceEnd = source + sourceLen;

    if ( gOutBufferSize != realBufferSize )
        checkOffsets = FALSE;

    if( gInBufferSize != NEW_MAX_BUFFER )
        checkOffsets = FALSE;

    do
    {
        end = nct_min(targ + gOutBufferSize, realBufferEnd);
        sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);

        doFlush = (UBool)(sourceLimit == realSourceEnd);

        if(targ == realBufferEnd)
        {
            log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
            return FALSE;
        }
        log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");


        status = U_ZERO_ERROR;

        ucnv_fromUnicode (conv,
                  (char **)&targ,
                  (const char *)end,
                  &src,
                  sourceLimit,
                  checkOffsets ? offs : NULL,
                  doFlush, /* flush if we're at the end of the input data */
                  &status);

    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (sourceLimit < realSourceEnd) );

    if(U_FAILURE(status))
    {
        log_err("Problem doing toUnicode, errcode %d %s\n", myErrorName(status), gNuConvTestName);
        return FALSE;
    }

    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
        sourceLen, targ-junkout);
    if(VERBOSITY)
    {
        char junk[9999];
        char offset_str[9999];

        junk[0] = 0;
        offset_str[0] = 0;
        for(p = junkout;p<targ;p++)
        {
            sprintf(junk + uprv_strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
            sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
        }

        log_verbose(junk);
        printSeq((const unsigned char*)expect, expectLen);
        if ( checkOffsets )
        {
            log_verbose("\nOffsets:");
            log_verbose(offset_str);
        }
        log_verbose("\n");
    }
    ucnv_close(conv);


    if(expectLen != targ-junkout)
    {
        log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
        log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
        printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
        printSeqErr((const unsigned char*)expect, expectLen);
        return FALSE;
    }

    if (checkOffsets && (expectOffsets != 0) )
    {
        log_verbose("\ncomparing %d offsets..\n", targ-junkout);
        if(uprv_memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
            log_err("\ndid not get the expected offsets while %s \n", gNuConvTestName);
            log_err("Got  : ");
            printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
            for(p=junkout;p<targ;p++)
                log_err("%d, ", junokout[p-junkout]); 
            log_err("\nExpected: ");
            for(i=0; i<(targ-junkout); i++)
                log_err("%d,", expectOffsets[i]);
        }
    }

    log_verbose("\n\ncomparing..\n");
    if(!memcmp(junkout, expect, expectLen))
    {
        log_verbose("Matches!\n");
        return TRUE;
    }
    else
    {
        log_err("String does not match. %s\n", gNuConvTestName);
        log_verbose("String does not match. %s\n", gNuConvTestName);
        printSeqErr((const unsigned char*)junkout, expectLen);
        printSeqErr((const unsigned char*)expect, expectLen);
        return FALSE;
    }
}
コード例 #23
0
void charsetConverter_icu::convert
	(utility::inputStream& in, utility::outputStream& out, status* st)
{
	UErrorCode err = U_ZERO_ERROR;

	ucnv_reset(m_from);
	ucnv_reset(m_to);

	if (st)
		new (st) status();

	// From buffers
	byte_t cpInBuffer[16]; // stream data put here
	const size_t outSize = ucnv_getMinCharSize(m_from) * sizeof(cpInBuffer) * sizeof(UChar);
	std::vector <UChar> uOutBuffer(outSize); // Unicode chars end up here

	// To buffers
	// converted (char) data end up here
	const size_t cpOutBufferSz = ucnv_getMaxCharSize(m_to) * outSize;
	std::vector <char> cpOutBuffer(cpOutBufferSz);

	// Tell ICU what to do when encountering an illegal byte sequence
	if (m_options.silentlyReplaceInvalidSequences)
	{
		// Set replacement chars for when converting from Unicode to codepage
		icu::UnicodeString substString(m_options.invalidSequence.c_str());
		ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
	}
	else
	{
		// Tell ICU top stop (and return an error) on illegal byte sequences
		ucnv_setToUCallBack
			(m_from, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");

		ucnv_setFromUCallBack
			(m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err);

		if (U_FAILURE(err))
			throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
	}

	// Input data available
	while (!in.eof())
	{
		// Read input data into buffer
		size_t inLength = in.read(cpInBuffer, sizeof(cpInBuffer));

		// Beginning of read data
		const char* source = reinterpret_cast <const char*>(&cpInBuffer[0]);
		const char* sourceLimit = source + inLength; // end + 1

		UBool flush = in.eof();  // is this last run?

		UErrorCode toErr;

		// Loop until all source has been processed
		do
		{
			// Set up target pointers
			UChar* target = &uOutBuffer[0];
			UChar* targetLimit = &target[0] + outSize;

			toErr = U_ZERO_ERROR;
			ucnv_toUnicode(m_from, &target, targetLimit,
			               &source, sourceLimit, NULL, flush, &toErr);

			if (st)
				st->inputBytesRead += (source - reinterpret_cast <const char*>(&cpInBuffer[0]));

			if (toErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(toErr))
			{
				if (toErr == U_INVALID_CHAR_FOUND ||
				    toErr == U_TRUNCATED_CHAR_FOUND ||
				    toErr == U_ILLEGAL_CHAR_FOUND)
				{
					// Error will be thrown later (*)
				}
				else
				{
					throw exceptions::charset_conv_error("[ICU] Error converting to Unicode from " + m_source.getName());
				}
			}

			// The Unicode source is the buffer just written and the limit
			// is where the previous conversion stopped (target is moved in the conversion)
			const UChar* uSource = &uOutBuffer[0];
			UChar* uSourceLimit = &target[0];
			UErrorCode fromErr;

			// Loop until converted chars are fully written
			do
			{
				char* cpTarget = &cpOutBuffer[0];
				const char* cpTargetLimit = &cpOutBuffer[0] + cpOutBufferSz;

				fromErr = U_ZERO_ERROR;

				// Write converted bytes (Unicode) to destination codepage
				ucnv_fromUnicode(m_to, &cpTarget, cpTargetLimit,
				                 &uSource, uSourceLimit, NULL, flush, &fromErr);

				if (st)
				{
					// Decrement input bytes count by the number of input bytes in error
					char errBytes[16];
					int8_t errBytesLen = sizeof(errBytes);
					UErrorCode errBytesErr = U_ZERO_ERROR;

	 				ucnv_getInvalidChars(m_from, errBytes, &errBytesLen, &errBytesErr);

					st->inputBytesRead -= errBytesLen;
					st->outputBytesWritten += cpTarget - &cpOutBuffer[0];
				}

				// (*) If an error occurred while converting from input charset, throw it now
				if (toErr == U_INVALID_CHAR_FOUND ||
				    toErr == U_TRUNCATED_CHAR_FOUND ||
				    toErr == U_ILLEGAL_CHAR_FOUND)
				{
					throw exceptions::illegal_byte_sequence_for_charset();
				}

				if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr))
				{
					if (fromErr == U_INVALID_CHAR_FOUND ||
					    fromErr == U_TRUNCATED_CHAR_FOUND ||
					    fromErr == U_ILLEGAL_CHAR_FOUND)
					{
						throw exceptions::illegal_byte_sequence_for_charset();
					}
					else
					{
						throw exceptions::charset_conv_error("[ICU] Error converting from Unicode to " + m_dest.getName());
					}
				}

				// Write to destination stream
				out.write(&cpOutBuffer[0], (cpTarget - &cpOutBuffer[0]));

			} while (fromErr == U_BUFFER_OVERFLOW_ERROR);

		} while (toErr == U_BUFFER_OVERFLOW_ERROR);
	}
}
コード例 #24
0
CF_PRIVATE CFIndex __CFStringEncodingICUToBytes(const char *icuName, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
    UConverter *converter;
    UErrorCode errorCode = U_ZERO_ERROR;
    const UTF16Char *source = characters;
    const UTF16Char *sourceLimit = source + numChars;
    char *destination = (char *)bytes;
    const char *destinationLimit = destination + maxByteLen;
    bool flush = ((0 == (flags & kCFStringEncodingPartialInput)) ? true : false);
    CFIndex status;

    if (NULL == (converter = __CFStringEncodingConverterCreateICUConverter(icuName, flags, false))) return kCFStringEncodingConverterUnavailable;

    if (0 == maxByteLen) {
        char buffer[MAX_BUFFER_SIZE];
        CFIndex totalLength = 0;

        while ((source < sourceLimit) && (U_ZERO_ERROR == errorCode)) {
            destination = buffer;
            destinationLimit = destination + MAX_BUFFER_SIZE;

            ucnv_fromUnicode(converter, &destination, destinationLimit, (const UChar **)&source, (const UChar *)sourceLimit, NULL, flush, &errorCode);

            totalLength += (destination - buffer);

            if (U_BUFFER_OVERFLOW_ERROR == errorCode) errorCode = U_ZERO_ERROR;
        }

        if (NULL != usedByteLen) *usedByteLen = totalLength;
    } else {
        ucnv_fromUnicode(converter, &destination, destinationLimit, (const UChar **)&source, (const UChar *)sourceLimit, NULL, flush, &errorCode);

#if HAS_ICU_BUG_6024743
/* Another critical ICU design issue. Similar to conversion error, source pointer returned from U_BUFFER_OVERFLOW_ERROR is already beyond the last valid character position. It renders the returned value from source entirely unusable. We have to manually back up until succeeding <rdar://problem/7183045> Intrestingly, this issue doesn't apply to ucnv_toUnicode. The asynmmetric nature makes this more dangerous */
        if (U_BUFFER_OVERFLOW_ERROR == errorCode) {
            const uint8_t *bitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
            const uint8_t *nonBase;
            UTF32Char character;

            do {
                // Since the output buffer is filled, we can assume no invalid chars (including stray surrogates)
                do {
                    sourceLimit = (source - 1);
                    character = *sourceLimit;
                    nonBase = bitmap;

                    if (CFUniCharIsSurrogateLowCharacter(character)) {
                        --sourceLimit;
                        character = CFUniCharGetLongCharacterForSurrogatePair(*sourceLimit, character);
                        nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16) & 0x000F);
                        character &= 0xFFFF;
                    }
                } while ((sourceLimit > characters) && CFUniCharIsMemberOfBitmap(character, nonBase));

                if (sourceLimit > characters) {
                    source = characters;
                    destination = (char *)bytes;
                    errorCode = U_ZERO_ERROR;

                    ucnv_resetFromUnicode(converter);

                    ucnv_fromUnicode(converter, &destination, destinationLimit, (const UChar **)&source, (const UChar *)sourceLimit, NULL, flush, &errorCode);
                }
            } while (U_BUFFER_OVERFLOW_ERROR == errorCode);

            errorCode = U_BUFFER_OVERFLOW_ERROR;
        }
#endif
        if (NULL != usedByteLen) *usedByteLen = destination - (const char *)bytes;
    }

    status = ((U_ZERO_ERROR == errorCode) ? kCFStringEncodingConversionSuccess : ((U_BUFFER_OVERFLOW_ERROR == errorCode) ? kCFStringEncodingInsufficientOutputBufferLength : kCFStringEncodingInvalidInputStream));

    if (NULL != usedCharLen) {
#if HAS_ICU_BUG_6024743
/* ICU has a serious behavioral inconsistency issue that the source pointer returned from ucnv_fromUnicode() is after illegal input. We have to keep track of any changes in this area in order to prevent future binary compatiibility issues */
	if (kCFStringEncodingInvalidInputStream == status) {
#define MAX_ERROR_BUFFER_LEN (32)
	    UTF16Char errorBuffer[MAX_ERROR_BUFFER_LEN];
	    int8_t errorLength = MAX_ERROR_BUFFER_LEN;
#undef MAX_ERROR_BUFFER_LEN

	    errorCode = U_ZERO_ERROR;

	    ucnv_getInvalidUChars(converter, (UChar *)errorBuffer, &errorLength, &errorCode);

	    if (U_ZERO_ERROR == errorCode) {
		source -= errorLength;
	    } else {
		// Gah, something is terribly wrong. Reset everything
		source = characters; // 0 length
		if (NULL != usedByteLen) *usedByteLen = 0;
	    }
	}
#endif
	*usedCharLen = source - characters;
    }

    status |= __CFStringEncodingConverterReleaseICUConverter(converter, flags, status);

    return status;
}
コード例 #25
0
DeprecatedCString StreamingTextDecoderICU::fromUnicode(const DeprecatedString &qcs, bool allowEntities)
{
    TextEncodingID encoding = m_encoding.effectiveEncoding().encodingID();

    if (encoding == WinLatin1Encoding && qcs.isAllLatin1())
        return qcs.latin1();

    if ((encoding == WinLatin1Encoding || encoding == UTF8Encoding || encoding == ASCIIEncoding) 
        && qcs.isAllASCII())
        return qcs.ascii();

    // FIXME: We should see if there is "force ASCII range" mode in ICU;
    // until then, we change the backslash into a yen sign.
    // Encoding will change the yen sign back into a backslash.
    DeprecatedString copy = qcs;
    copy.replace('\\', m_encoding.backslashAsCurrencySymbol());

    if (!m_converterICU)
        createICUConverter();
    if (!m_converterICU)
        return DeprecatedCString();

    // FIXME: when DeprecatedString buffer is latin1, it would be nice to
    // convert from that w/o having to allocate a unicode buffer

    char buffer[ConversionBufferSize];
    const UChar* source = reinterpret_cast<const UChar*>(copy.unicode());
    const UChar* sourceLimit = source + copy.length();

    UErrorCode err = U_ZERO_ERROR;
    DeprecatedString normalizedString;
    if (UNORM_YES != unorm_quickCheck(source, copy.length(), UNORM_NFC, &err)) {
        normalizedString.truncate(copy.length()); // normalization to NFC rarely increases the length, so this first attempt will usually succeed
        
        int32_t normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), copy.length(), &err);
        if (err == U_BUFFER_OVERFLOW_ERROR) {
            err = U_ZERO_ERROR;
            normalizedString.truncate(normalizedLength);
            normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), normalizedLength, &err);
        }
        
        source = reinterpret_cast<const UChar*>(normalizedString.unicode());
        sourceLimit = source + normalizedLength;
    }

    DeprecatedCString result(1); // for trailing zero

    if (allowEntities)
        ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
    else {
        ucnv_setSubstChars(m_converterICU, "?", 1, &err);
        ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
    }

    ASSERT(U_SUCCESS(err));
    if (U_FAILURE(err))
        return DeprecatedCString();

    do {
        char* target = buffer;
        char* targetLimit = target + ConversionBufferSize;
        err = U_ZERO_ERROR;
        ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true,  &err);
        int count = target - buffer;
        buffer[count] = 0;
        result.append(buffer);
    } while (err == U_BUFFER_OVERFLOW_ERROR);

    return result;
}
コード例 #26
0
ファイル: ustdio.c プロジェクト: gitpan/ponie
U_CAPI int32_t U_EXPORT2
u_file_write_flush(    const UChar     *chars,
                   int32_t        count,
                   UFILE         *f,
                   UBool         flush)
{
    /* Set up conversion parameters */
    UErrorCode         status        = U_ZERO_ERROR;
    const UChar        *mySource       = chars;
    const UChar        *sourceAlias       = chars;
    const UChar        *mySourceEnd     = chars + count;
    char            *myTarget     = f->fCharBuffer;
    int32_t        bufferSize    = UFILE_CHARBUFFER_SIZE;
    int32_t        written        = 0;
    int32_t        numConverted   = 0;

#if !UCONFIG_NO_TRANSLITERATION
    if((f->fTranslit) && (f->fTranslit->translit))
    {
        /* Do the transliteration */
        mySource = u_file_translit(f, chars, &count, flush);
        sourceAlias = mySource;
        mySourceEnd = mySource + count;
    }
#endif

    /* Perform the conversion in a loop */
    do {
        status     = U_ZERO_ERROR;
        sourceAlias = mySource;
        if(f->fConverter != NULL) { /* We have a valid converter */
            ucnv_fromUnicode(f->fConverter,
                &myTarget,
                f->fCharBuffer + bufferSize,
                &mySource,
                mySourceEnd,
                NULL,
                flush,
                &status);
        } else { /*weiv: do the invariant conversion */
            u_UCharsToChars(mySource, myTarget, count);
            myTarget += count;
        }
        numConverted = (int32_t)(myTarget - f->fCharBuffer);

        if (numConverted > 0) {
            /* write the converted bytes */
            fwrite(f->fCharBuffer,
                sizeof(char),
                numConverted,
                f->fFile);

            written     += numConverted;
        }
        myTarget     = f->fCharBuffer;
    }
    while(status == U_BUFFER_OVERFLOW_ERROR);

    /* return # of chars written */
    return written;
}
コード例 #27
0
UErrorCode convsample_46()
{
  printf("\n\n==============================================\n"
    "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");

  FILE *f;
  FILE *out;
  int32_t count;
  UChar inBuf[BUFFERSIZE];
  const UChar *source;
  const UChar *sourceLimit;
  char *buf;
  char *target;
  char *targetLimit;

  int32_t bufSize = 0;
  UConverter *conv = NULL;
  UErrorCode status = U_ZERO_ERROR;
  uint32_t inchars=0, total=0;

  f = fopen("data40.utf16", "rb");
  if(!f)
  {
    fprintf(stderr, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
    return U_FILE_ACCESS_ERROR;
  }

  out = fopen("data46.out", "wb");
  if(!out)
  {
    fprintf(stderr, "Couldn't create file 'data46.out'.\n");
    fclose(f);
    return U_FILE_ACCESS_ERROR;
  }

  // **************************** START SAMPLE *******************
  conv = ucnv_open( "iso-8859-2", &status);
  assert(U_SUCCESS(status));

  bufSize = (BUFFERSIZE*ucnv_getMaxCharSize(conv));
  printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
         BUFFERSIZE, ucnv_getMaxCharSize(conv), bufSize);
  buf = (char*)malloc(bufSize * sizeof(char));
  assert(buf!=NULL);

  // grab another buffer's worth
  while((!feof(f)) && 
        ((count=fread(inBuf, sizeof(UChar), BUFFERSIZE , f)) > 0) )
  {
    inchars += count;

    // Convert bytes to unicode
    source = inBuf;
    sourceLimit = inBuf + count;
    
    do
    {
        target = buf;
        targetLimit = buf + bufSize;
        
        ucnv_fromUnicode( conv, &target, targetLimit, 
                       &source, sourceLimit, NULL,
                       feof(f)?TRUE:FALSE,         /* pass 'flush' when eof */
                                   /* is true (when no more data will come) */
                         &status);
      
        if(status == U_BUFFER_OVERFLOW_ERROR)
        {
          // simply ran out of space - we'll reset the target ptr the next
          // time through the loop.
          status = U_ZERO_ERROR;
        }
        else
        {
          //  Check other errors here.
          assert(U_SUCCESS(status));
          // Break out of the loop (by force)
        }

        // Process the Unicode
        assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) ==
               (size_t)(target-buf));
        total += (target-buf);
    } while (source < sourceLimit); // while simply out of space
  }

  printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, inchars * sizeof(UChar), total);
  
  // ***************************** END SAMPLE ********************
  ucnv_close(conv);

  fclose(f);
  fclose(out);
  printf("\n");

  return U_ZERO_ERROR;
}