예제 #1
0
파일: gtm_conv.c 프로젝트: mihawk/fis-gtm
int gtm_conv(UConverter* from, UConverter* to, mstr *src, char* dstbuff, int* bufflen)
{
	char		*dstptr, *dstbase, *srcptr;
	const char	*ichset;
	int		dstlen, src_charlen, srclen;
	UErrorCode	status, status1;

	if (0 == src->len)
		return 0;
	if (NULL == dstbuff)
	{
		/* Compute the stringpool buffer space needed for conversion given that source
		 * is encoded in the ichset representation.  The ICU functions ucnv_getMinCharSize()
		 * and ucnv_getMaxCharSize() are used to compute the minimum and maximum number of
		 * bytes required per UChar if converted from/to ichset/ochset respectively
		 */
		src_charlen = (src->len / ucnv_getMinCharSize(from)) + 1; /* number of UChar's from ichset */
		dstlen = UCNV_GET_MAX_BYTES_FOR_STRING(src_charlen, ucnv_getMaxCharSize(to));
		dstlen = (dstlen > MAX_STRLEN) ? MAX_STRLEN : dstlen;
		ENSURE_STP_FREE_SPACE(dstlen);
		dstbase = (char *)stringpool.free;
	} else
	{
		dstbase = dstbuff;
		dstlen = *bufflen;
	}
	srcptr = src->addr;
	srclen = (int)src->len;
	dstptr = dstbase;
	status = U_ZERO_ERROR; /* initialization to "success" is required by ICU */
	ucnv_convertEx(to, from, &dstptr, dstptr + dstlen, (const char**)&srcptr, srcptr + srclen,
		NULL, NULL, NULL, NULL, TRUE, TRUE, &status);
	if (U_FAILURE(status))
	{
		if (U_BUFFER_OVERFLOW_ERROR == status)
		{	/* translation requires more space than the maximum allowed GT.M string size */
			if (NULL == dstbuff)
				rts_error_csa(NULL, VARLSTCNT(1) ERR_MAXSTRLEN);
			else
			{
				/* Insufficient buffer passed. Return the required buffer length */
				src_charlen = (srclen / ucnv_getMinCharSize(from)) + 1;
				*bufflen = UCNV_GET_MAX_BYTES_FOR_STRING(src_charlen, ucnv_getMaxCharSize(to));
				return -1;
			}
		}
		status1 = U_ZERO_ERROR;
		ichset = ucnv_getName(from, &status1);
		assert(U_SUCCESS(status1));
		UTF8_BADCHAR(1,(unsigned char *) (srcptr - 1), NULL,STRLEN(ichset), ichset);
	}
	return (int) (dstptr - dstbase);
}
예제 #2
0
QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QTextCodec::ConverterState *state) const
{
    UConverter *conv = getConverter(state);

    int requiredLength = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv));
    QByteArray string(requiredLength, Qt::Uninitialized);

    const UChar *uc = (const UChar *)unicode;
    const UChar *end = uc + length;
    int convertedChars = 0;
    while (1) {
        char *ch = (char *)string.data();
        char *chEnd = ch + string.length();
        ch += convertedChars;
        UErrorCode error = U_ZERO_ERROR;
        ucnv_fromUnicode(conv,
                         &ch, chEnd,
                         &uc, end,
                         0, false, &error);
        if (!U_SUCCESS(error))
            qDebug() << "convertFromUnicode failed:" << u_errorName(error);
        convertedChars = ch - string.data();
        if (uc >= end)
            break;
        string.resize(string.length()*2);
    }
    string.resize(convertedChars);

    if (!state)
        ucnv_close(conv);

    return string;
}
예제 #3
0
	void FStringConverter::ConvertString(const icu::UnicodeString& Source, const int32 SourceStartIndex, const int32 SourceLen, FString& Destination)
	{
		if (Source.length() > 0)
		{
			UErrorCode ICUStatus = U_ZERO_ERROR;

			ucnv_reset(ICUConverter);
			
			// Get the internal buffer of the string, we're going to use it as scratch space
			TArray<TCHAR>& InternalStringBuffer = Destination.GetCharArray();
				
			// Work out the maximum size required and resize the buffer so it can hold enough data
			const int32_t DestinationCapacityBytes = UCNV_GET_MAX_BYTES_FOR_STRING(SourceLen, ucnv_getMaxCharSize(ICUConverter));
			const int32 DestinationCapacityTCHARs = DestinationCapacityBytes / sizeof(TCHAR);
			InternalStringBuffer.SetNumUninitialized(DestinationCapacityTCHARs);

			// Perform the conversion into the string buffer, and then null terminate the FString and size it back down to the correct size
			const int32_t DestinationSizeBytes = ucnv_fromUChars(ICUConverter, reinterpret_cast<char*>(InternalStringBuffer.GetData()), DestinationCapacityBytes, Source.getBuffer() + SourceStartIndex, SourceLen, &ICUStatus);
			const int32 DestinationSizeTCHARs = DestinationSizeBytes / sizeof(TCHAR);
			InternalStringBuffer[DestinationSizeTCHARs] = 0;
			InternalStringBuffer.SetNum(DestinationSizeTCHARs + 1, /*bAllowShrinking*/false); // the array size includes null

			check(U_SUCCESS(ICUStatus));
		}
		else
		{
			Destination.Empty();
		}
	}
/**
 * Convert character vector between marked encodings and the encoding provided
 *
 * @param str     input character vector or list of raw vectors
 * @param to    target encoding, \code{NULL} or \code{""} for default enc
 * @param to_raw single logical, should list of raw vectors be returned?
 * @return a converted character vector or list of raw vectors
 *
 * @version 0.1-?? (Marek Gagolewski, 2013-11-12)
 *
 * @version 0.2-1 (Marek Gagolewski, 2014-03-28)
 *          use StriUcnv
 *
 * @version 0.2-1 (Marek Gagolewski, 2014-04-01)
 *          calc required buf size a priori
 *
 * @version 0.3-1 (Marek Gagolewski, 2014-11-04)
 *    Issue #112: str_prepare_arg* retvals were not PROTECTed from gc
 */
SEXP stri_encode_from_marked(SEXP str, SEXP to, SEXP to_raw)
{
   PROTECT(str = stri_prepare_arg_string(str, "str"));
   const char* selected_to   = stri__prepare_arg_enc(to, "to", true); /* this is R_alloc'ed */
   bool to_raw_logical = stri__prepare_arg_logical_1_notNA(to_raw, "to_raw");

   STRI__ERROR_HANDLER_BEGIN(1)
   R_len_t str_n = LENGTH(str);
   StriContainerUTF16 str_cont(str, str_n);

   // get the number of strings to convert; if == 0, then you know what's the result
   if (str_n <= 0) return Rf_allocVector(to_raw_logical?VECSXP:STRSXP, 0);

   // Open converters
   StriUcnv ucnv(selected_to);
   UConverter* uconv_to = ucnv.getConverter(true /*register_callbacks*/);

   // Get target encoding mark
   cetype_t encmark_to = to_raw_logical?CE_BYTES:ucnv.getCE();

   // Prepare out val
   SEXP ret;
   STRI__PROTECT(ret = Rf_allocVector(to_raw_logical?VECSXP:STRSXP, str_n));

   // calculate required buf size
   R_len_t bufsize = 0;
   for (R_len_t i=0; i<str_n; ++i) {
      if (!str_cont.isNA(i) && str_cont.get(i).length() > bufsize)
         bufsize = str_cont.get(i).length();
   }
   bufsize = UCNV_GET_MAX_BYTES_FOR_STRING(bufsize, ucnv_getMaxCharSize(uconv_to));
   // "The calculated size is guaranteed to be sufficient for this conversion."
   String8buf buf(bufsize);

   for (R_len_t i=0; i<str_n; ++i) {
      if (str_cont.isNA(i)) {
         if (to_raw_logical) SET_VECTOR_ELT(ret, i, R_NilValue);
         else                SET_STRING_ELT(ret, i, NA_STRING);
         continue;
      }

      R_len_t curn_tmp = str_cont.get(i).length();
      const UChar* curs_tmp = str_cont.get(i).getBuffer(); // The buffer content is (probably) not NUL-terminated.
      if (!curs_tmp)
         throw StriException(MSG__INTERNAL_ERROR);

      UErrorCode status = U_ZERO_ERROR;
      ucnv_resetFromUnicode(uconv_to);
      R_len_t bufneed = ucnv_fromUChars(uconv_to, buf.data(), buf.size(),
            curs_tmp, curn_tmp, &status);
      if (bufneed <= buf.size()) {
         STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
      }
      else {// larger buffer needed [this shouldn't happen?]
         buf.resize(bufneed, false/*destroy contents*/);
         status = U_ZERO_ERROR;
         bufneed = ucnv_fromUChars(uconv_to, buf.data(), buf.size(),
               curs_tmp, curn_tmp, &status);
         STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
      }

      if (to_raw_logical) {
         SEXP outobj;
         STRI__PROTECT(outobj = Rf_allocVector(RAWSXP, bufneed));
         memcpy(RAW(outobj), buf.data(), (size_t)bufneed);
         SET_VECTOR_ELT(ret, i, outobj);
         STRI__UNPROTECT(1);
      }
      else {
         SET_STRING_ELT(ret, i,
            Rf_mkCharLenCE(buf.data(), bufneed, encmark_to));
      }
   }

   STRI__UNPROTECT_ALL
   return ret;

   STRI__ERROR_HANDLER_END({/* nothing special on error */})
}
예제 #5
0
/**
 * Convert character vector between given encodings
 *
 * @param str     input character vector or list of raw vectors
 * @param from  source encoding, \code{NULL} or \code{""} for default enc
 * @param to    target encoding, \code{NULL} or \code{""} for default enc
 * @param to_raw single logical, should list of raw vectors be returned?
 * @return a converted character vector or list of raw vectors
 *
 * @version 0.1 (Marek Gagolewski)
 * @version 0.2 (Marek Gagolewski) arg to_raw_added, encoding marking
 * @version 0.3 (Marek Gagolewski, 2013-06-16) make StriException-friendly
 * @version 0.4 (Marek Gagolewski, 2013-08-08) use StriContainerListRaw
 * @version 0.5 (Marek Gagolewski, 2013-11-20) BUGFIX call stri_encode_from_marked if necessary
 */
SEXP stri_encode(SEXP str, SEXP from, SEXP to, SEXP to_raw)
{
   const char* selected_from = stri__prepare_arg_enc(from, "from", true);
   if (!selected_from && Rf_isVectorAtomic(str))
      return stri_encode_from_marked(str, to, to_raw);

   str = stri_prepare_arg_list_raw(str, "str");
   const char* selected_to   = stri__prepare_arg_enc(to, "to", true);
   bool to_raw_logical = stri__prepare_arg_logical_1_notNA(to_raw, "to_raw");

   UConverter* uconv_from = NULL;
   UConverter* uconv_to = NULL;

   STRI__ERROR_HANDLER_BEGIN
   StriContainerListRaw str_cont(str);
   R_len_t str_n = str_cont.get_n();

   // get the number of strings to convert; if == 0, then you know what's the result
   if (str_n <= 0) return Rf_allocVector(to_raw_logical?VECSXP:STRSXP, 0);

   // Open converters
   uconv_from = stri__ucnv_open(selected_from);
   uconv_to = stri__ucnv_open(selected_to);


   // Get target encoding mark
   UErrorCode err = U_ZERO_ERROR;
   const char* uconv_to_name = ucnv_getName(uconv_to, &err);
   if (U_FAILURE(err))
      throw StriException(err);
   cetype_t encmark_to = CE_BYTES; // all other cases than the below ones
      // - bytes enc (this is reasonable, isn't it?)
   if (!to_raw_logical) { // otherwise not needed
      if (!strcmp(uconv_to_name, "US-ASCII") || !strcmp(uconv_to_name, "UTF-8"))
         encmark_to = CE_UTF8; // no CE for ASCII, will be auto-detected by mkCharLenCE
      else if (!strcmp(uconv_to_name, "ISO-8859-1"))
         encmark_to = CE_LATIN1;
      else if (!strcmp(uconv_to_name, ucnv_getDefaultName()))
         encmark_to = CE_NATIVE;
   }

   // Prepare out val
   SEXP ret;
   PROTECT(ret = Rf_allocVector(to_raw_logical?VECSXP:STRSXP, str_n));

   String8 buf(0); // will be extended in a moment

   for (R_len_t i=0; i<str_n; ++i) {
      if (str_cont.isNA(i)) {
         if (to_raw_logical) SET_VECTOR_ELT(ret, i, R_NilValue);
         else                SET_STRING_ELT(ret, i, NA_STRING);
         continue;
      }

      const char* curd = str_cont.get(i).c_str();
      R_len_t curn     = str_cont.get(i).length();

      err = U_ZERO_ERROR;
      UnicodeString encs(curd, curn, uconv_from, err); // FROM -> UTF-16 [this is the slow part]
      if (U_FAILURE(err))
         throw StriException(err);

      R_len_t curn_tmp = encs.length();
      const UChar* curs_tmp = encs.getBuffer(); // The buffer contents is (probably) not NUL-terminated.
      if (!curs_tmp)
         throw StriException(MSG__INTERNAL_ERROR);

      R_len_t bufneed = UCNV_GET_MAX_BYTES_FOR_STRING(curn_tmp, ucnv_getMaxCharSize(uconv_to));
      // "The calculated size is guaranteed to be sufficient for this conversion."
      buf.resize(bufneed);

      err = U_ZERO_ERROR;
//      bufneed = encs.extract(buf.data(), buf.size(), uconv_to, err); // UTF-16 -> TO
      ucnv_resetFromUnicode(uconv_to);
      bufneed = ucnv_fromUChars(uconv_to, buf.data(), buf.size(), curs_tmp, curn_tmp, &err);
      if (bufneed <= buf.size()) {
         if (U_FAILURE(err))
            throw StriException(err);
      }
      else {// larger buffer needed [this shouldn't happen?]
//         warning("buf extending");
         buf.resize(bufneed);
         err = U_ZERO_ERROR;
         bufneed = ucnv_fromUChars(uconv_to, buf.data(), buf.size(), curs_tmp, curn_tmp, &err);
         if (U_FAILURE(err))
            throw StriException(err);
         if (bufneed > buf.size())
            throw StriException(MSG__INTERNAL_ERROR);
      }

      if (to_raw_logical) {
         SEXP outobj = Rf_allocVector(RAWSXP, bufneed);
         memcpy(RAW(outobj), buf.data(), (size_t)bufneed);
         SET_VECTOR_ELT(ret, i, outobj);
      }
      else {
         SET_STRING_ELT(ret, i, Rf_mkCharLenCE(buf.data(), bufneed, encmark_to));
      }
   }

   if (uconv_from) {
      ucnv_close(uconv_from);
      uconv_from = NULL;
   }
   if (uconv_to) {
      ucnv_close(uconv_to);
      uconv_to = NULL;
   }

   UNPROTECT(1);
   return ret;

   STRI__ERROR_HANDLER_END({
      if (uconv_from)
         ucnv_close(uconv_from);
      if (uconv_to)
         ucnv_close(uconv_to);
   })
}
예제 #6
0
파일: text_icu.c 프로젝트: bos/text-icu
int __get_max_bytes_for_string(UConverter *cnv, int src_length)
{
    return UCNV_GET_MAX_BYTES_FOR_STRING(src_length, ucnv_getMaxCharSize(cnv));
}