/* Return allocated string containing UTF8 string converted from encoding fromcode */ static int utf8(const char *fromcode, char *inputbuf, char **outputbuf) { iconv_t cd; char *outputptr; size_t outbytesleft; size_t inbytesleft; inbytesleft = strlen(inputbuf); cd = iconv_open("UTF-8", fromcode); if ( cd == ((iconv_t)(-1)) ) return UTF8_NO_RESULT; outbytesleft = inbytesleft * 3 + 1; /* UTF8 string can be 3 times larger */ /* then local string */ *outputbuf = (char *)malloc(outbytesleft); if (!*outputbuf) return UTF8_NO_RESULT; memset(*outputbuf, 0, outbytesleft); outputptr = *outputbuf; /* Does this string convert cleanly? */ if ( iconv(cd, &inputbuf, &inbytesleft, &outputptr, &outbytesleft) == -1 ) { #ifdef HAVE_ICONVCTL int on = 1; /* No. Try to convert it while transliterating. */ iconvctl(cd, ICONV_SET_TRANSLITERATE, &on); if ( iconv(cd, &inputbuf, &inbytesleft, &outputptr, &outbytesleft) == -1 ) { /* No. Try to convert it while discarding errors. */ iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &on); if ( iconv(cd, &inputbuf, &inbytesleft, &outputptr, &outbytesleft) == -1 ) { /* Still no. Throw away the buffer and return. */ free(*outputbuf); iconv_close(cd); return UTF8_NO_RESULT; } } iconv_close(cd); return UTF8_BAD_RESULT; #else free(*outputbuf); iconv_close(cd); return UTF8_NO_RESULT; #endif } /* Return a good result, converted string is in buffer. */ iconv_close(cd); return UTF8_GOOD_RESULT; }
static int ctl_wc_hook(void) { struct iconv_hooks hooks; iconv_t cd; size_t inbytesleft, outbytesleft = 40; const char **inptr; const char *s = "Hello World!"; char **outptr; char *outbuf; inptr = &s; hooks.wc_hook = wchar_hook; hooks.uc_hook = NULL; outbuf = malloc(40); outptr = &outbuf; inbytesleft = sizeof(s); cd = iconv_open("SHIFT_JIS", "ASCII"); if (cd == (iconv_t)-1) return (-1); if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0) return (-1); if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1) return (-1); if (iconv_close(cd) == -1) return (-1); return (wc_hook ? 0 : 1); }
int wcs2mbs( unsigned int codepage, const unsigned short* src, int srclen, char* dst, int dstlen, bool* usedDefChar ) { size_t inbytesleft = srclen*sizeof(unsigned short), outbytesleft = dstlen-1; char *in = (char*)src, *out = (char*)dst; int value = 1; iconv_t cd = iconv_open("BIG5", "UTF-16LE"); iconvctl( cd, ICONV_SET_TRANSLITERATE, &value); iconvctl( cd, ICONV_SET_DISCARD_ILSEQ, &value); iconv( cd, &in, &inbytesleft, &out, &outbytesleft ); iconv_close( cd ); int len = dstlen-outbytesleft; dst[len] = 0; return len; }
int mbs2wcs( unsigned int codepage, const char* src, int srclen, unsigned short* dst, int dstlen ) { size_t inbytesleft = srclen, outbytesleft = (dstlen-1)*sizeof(unsigned short); char *in = (char*)src, *out = (char*)dst; int value = 1; iconv_t cd = iconv_open("UTF-16LE", "BIG5"); iconvctl( cd, ICONV_SET_TRANSLITERATE, &value); iconvctl( cd, ICONV_SET_DISCARD_ILSEQ, &value); iconv( cd, &in, &inbytesleft, &out, &outbytesleft ); iconv_close( cd ); int len = dstlen-(outbytesleft/sizeof(unsigned short)); dst[len] = 0; return len; }
/** * 对字符串进行语言编码转换 * param from 原始编码,比如"GB2312",的按照iconv支持的写 * param to 转换的目的编码 * param save 转换后的数据保存到这个指针里,需要在外部分配内存 * param savelen 存储转换后数据的内存大小 * param src 原始需要转换的字符串 * param srclen 原始字符串长度 */ int convert(const char *from, const char *to, char* save, int savelen, const char *src, int srclen) { iconv_t cd; const char *inbuf = src; char *outbuf = save; size_t outbufsize = savelen; int status = 0; size_t savesize = 0; size_t inbufsize = srclen; const char* inptr = inbuf; size_t insize = inbufsize; char* outptr = outbuf; size_t outsize = outbufsize; cd = iconv_open(to, from); iconv(cd, NULL, NULL, NULL, NULL); if (inbufsize == 0) { status = -1; goto done; } while (insize > 0) { size_t res = iconv(cd, (const char**)&inptr, &insize, &outptr, &outsize); if (outptr != outbuf) { int saved_errno = errno; int outsize = outptr - outbuf; strncpy(save+savesize, outbuf, outsize); errno = saved_errno; } if (res == (size_t)(-1)) { if (errno == EILSEQ) { int one = 1; iconvctl(cd, ICONV_SET_DISCARD_ILSEQ,&one); status = -3; } else if (errno == EINVAL) { if (inbufsize == 0) { status = -4; goto done; } else { break; } } else if (errno == E2BIG) { status = -5; goto done; } else { status = -6; goto done; } } } status = strlen(save); done: iconv_close(cd); return status; }
static int ctl_set_discard_ilseq2(void) { iconv_t cd; int arg = 0, ret; cd = iconv_open("ASCII//IGNORE", "UTF-8"); if (cd == (iconv_t)-1) return (-1); ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1; if (iconv_close(cd) == -1) return (-1); return (ret); }
static int ctl_set_translit1(void) { iconv_t cd; int arg = 1, ret; cd = iconv_open("ASCII", "UTF-8"); if (cd == (iconv_t)-1) return (-1); ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1; if (iconv_close(cd) == -1) return (-1); return (ret); }
static int ctl_trivialp2(void) { iconv_t cd; int arg, ret; cd = iconv_open("ASCII", "KOI8-R"); if (cd == (iconv_t)-1) return (-1); if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) { ret = (arg == 0) ? 0 : -1; } else ret = -1; if (iconv_close(cd) == -1) return (-1); return (ret); }
static int ctl_get_translit2(void) { iconv_t cd; int arg, ret; cd = iconv_open("ASCII", "UTF-8"); if (cd == (iconv_t)-1) return (-1); if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0) ret = (arg == 0) ? 0 : -1; else ret = -1; if (iconv_close(cd) == -1) return (-1); return (ret); }
static int ctl_get_discard_ilseq1(void) { iconv_t cd; int arg, ret; cd = iconv_open("ASCII", "UTF-8"); if (cd == (iconv_t)-1) return (-1); if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0) ret = arg == 0 ? 0 : -1; else ret = -1; if (iconv_close(cd) == -1) return (-1); return (ret); }
static int __unused ctl_mb_to_uc_fb(void) { struct iconv_fallbacks fb; iconv_t cd; size_t inbytesleft, outbytesleft; uint16_t inbuf[1] = { 0xF187 }; uint8_t outbuf[4] = { 0x00, 0x00, 0x00, 0x00 }; const char *inptr; char *outptr; int ret; if ((cd = iconv_open("UTF-32", "UTF-8")) == (iconv_t)-1) return (1); fb.uc_to_mb_fallback = NULL; fb.mb_to_wc_fallback = NULL; fb.wc_to_mb_fallback = NULL; fb.mb_to_uc_fallback = mb_to_uc_fb; fb.data = NULL; if (iconvctl(cd, ICONV_SET_FALLBACKS, (void *)&fb) != 0) return (1); inptr = (const char *)inbuf; outptr = (char *)outbuf; inbytesleft = 2; outbytesleft = 4; errno = 0; ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft); #ifdef VERBOSE printf("mb_uc fallback: %c\n", outbuf[0]); #endif if (mb_uc_fb && (outbuf[0] == 0x3F)) return (0); else return (1); }
int covert(const char *dstCode, const char *srcCode, char *input, size_t ilen, char *output, size_t olen) { char **pin = &input; char **pout = &output; //打开编码流 iconv_t cd = iconv_open(dstCode, srcCode); if ((iconv_t)-1 == cd) { printf("===> iconv_open is Failed!\n"); return -1; } const int argument = 1; //设置iconv行为 int iRet = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void*)&argument); //忽略无效序列并继续转换 if (0 != iRet) { printf("===> iconvctl is Failed!\n"); return -1; } //printf("===> argument = %d\n", argument); //编码格式转换 if (iconv(cd, (char**)pin, &ilen, pout, &olen)) { printf("===> iconv is Failed!\n"); return -1; } //关闭编码流 iconv_close(cd); return 0; }
static void do_conv(FILE *fp, const char *from, const char *to, bool silent, bool hide_invalid) { iconv_t cd; char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out; char *in; size_t inbytes, outbytes, ret; if ((cd = iconv_open(to, from)) == (iconv_t)-1) err(EXIT_FAILURE, "iconv_open(%s, %s)", to, from); if (hide_invalid) { int arg = 1; if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&arg) == -1) err(1, NULL); } while ((inbytes = fread(inbuf, 1, INBUFSIZE, fp)) > 0) { in = inbuf; while (inbytes > 0) { size_t inval; out = outbuf; outbytes = OUTBUFSIZE; ret = __iconv(cd, &in, &inbytes, &out, &outbytes, 0, &inval); invalids += inval; if (outbytes < OUTBUFSIZE) (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, stdout); if (ret == (size_t)-1 && errno != E2BIG) { if (errno != EINVAL || in == inbuf) err(EXIT_FAILURE, "iconv()"); /* incomplete input character */ (void)memmove(inbuf, in, inbytes); ret = fread(inbuf + inbytes, 1, INBUFSIZE - inbytes, fp); if (ret == 0) { fflush(stdout); if (feof(fp)) errx(EXIT_FAILURE, "unexpected end of file; " "the last character is " "incomplete."); else err(EXIT_FAILURE, "fread()"); } in = inbuf; inbytes += ret; } } } /* reset the shift state of the output buffer */ outbytes = OUTBUFSIZE; out = outbuf; ret = iconv(cd, NULL, NULL, &out, &outbytes); if (ret == (size_t)-1) err(EXIT_FAILURE, "iconv()"); if (outbytes < OUTBUFSIZE) (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, stdout); if (invalids > 0 && !silent) warnx("warning: invalid characters: %llu", invalids); iconv_close(cd); }
/** * Transcode all parameters supplied in the table. * * @param[in] connp * @param[in] params * @param[in] destroy_old */ int htp_transcode_params(htp_connp_t *connp, htp_table_t **params, int destroy_old) { htp_table_t *input_params = *params; // No transcoding unless necessary if ((connp->cfg->internal_encoding == NULL)||(connp->cfg->request_encoding == NULL)) return HTP_OK; // Create a new table that will hold transcoded parameters htp_table_t *output_params = htp_table_create(htp_table_size(input_params)); if (output_params == NULL) return HTP_ERROR; // Initialize iconv iconv_t cd = iconv_open(connp->cfg->internal_encoding, connp->cfg->request_encoding); if (cd == (iconv_t) -1) { htp_table_destroy(output_params); return HTP_ERROR; } #if (_LIBICONV_VERSION >= 0x0108) int iconv_param = 0; iconvctl(cd, ICONV_SET_TRANSLITERATE, &iconv_param); iconv_param = 1; iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &iconv_param); #endif // Convert the parameters, one by one bstr *name = NULL; bstr *value = NULL; for (int i = 0, n = htp_table_size(input_params); i < n; i++) { value = htp_table_get_index(input_params, i, &name); bstr *new_name = NULL, *new_value = NULL; // Convert name htp_transcode_bstr(cd, name, &new_name); if (new_name == NULL) { iconv_close(cd); bstr *b = NULL; for (int j = 0, k = htp_table_size(output_params); j < k; j++) { b = htp_table_get_index(output_params, j, NULL); bstr_free(b); } htp_table_destroy(output_params); return HTP_ERROR; } // Convert value htp_transcode_bstr(cd, value, &new_value); if (new_value == NULL) { bstr_free(new_name); iconv_close(cd); bstr *b = NULL; for (int j = 0, k = htp_table_size(output_params); j < k; j++) { b = htp_table_get_index(output_params, j, NULL); bstr_free(b); } htp_table_destroy(output_params); return HTP_ERROR; } // Add to new table htp_table_addn(output_params, new_name, new_value); } // Replace the old parameter table *params = output_params; // Destroy the old parameter table if necessary if (destroy_old) { bstr *b = NULL; for (int i = 0, n = htp_table_size(input_params); i < n; i++) { b = htp_table_get_index(input_params, i, NULL); bstr_free(b); } htp_table_destroy(input_params); } iconv_close(cd); return HTP_OK; }
/*=================================================== * iconv_trans -- Translate string via iconv * src: [IN] source codeset * dest: [IN] string to translate (& delete) * sin: [IN] source string to be converted * zout: [I/O] converted result * illegal: [IN] character to use as placeholder for unconvertible input *=================================================*/ BOOLEAN iconv_trans (CNSTRING src, CNSTRING dest, CNSTRING sin, ZSTR zout, char illegal) { #ifdef HAVE_ICONV iconv_t ict; const char * inptr; char * outptr; size_t inleft; size_t outleft; size_t cvted; #ifdef ICONV_SET_TRANSLITERATE int transliterate=2; #endif double expand=1.3; int chwidth=1; int badchars=0; /* count # illegal placeholders inserted */ int inlen = sin ? strlen(sin) : 0; ASSERT(src); ASSERT(dest); ict = iconv_open(dest, src); if (ict == (iconv_t)-1) { return FALSE; } if (!strncmp(src, "UCS-2", strlen("UCS-2"))) { /* assume MS-Windows makenarrow call */ inlen = 2 * wcslen((const wchar_t *)sin); } if (!strncmp(src, "UCS-4", strlen("UCS-4"))) { /* assume UNIX makenarrow call */ inlen = 4 * wcslen((const wchar_t *)sin); } if (!strncmp(dest, "UCS-2", strlen("UCS-2"))) { chwidth = expand = 2; } if (!strncmp(dest, "UCS-4", strlen("UCS-4"))) { chwidth = expand = 4; } if (eqstr(dest, "wchar_t")) { chwidth = expand = sizeof(wchar_t); } /* TODO: What about UTF-16 or UTF-32 ? */ zs_reserve(zout, (unsigned int)(inlen*expand+6)); if (!inlen) { outptr = zs_str(zout); goto icvt_terminate_and_exit; } /* testing recursive transliteration in my private iconv, Perry, 2002.07.11 */ #ifdef ICONV_SET_TRANSLITERATE iconvctl(ict, ICONV_SET_TRANSLITERATE, &transliterate); #endif inptr = sin; outptr = zs_str(zout); inleft = inlen; /* we are terminating with 4 zero bytes just in case dest is UCS-4 */ outleft = zs_allocsize(zout)-zs_len(zout)-4; cvted = 0; cvting: /* main convert */ cvted = iconv (ict, &inptr, &inleft, &outptr, &outleft); /* zero terminate & fix output zstring */ /* there may be embedded nulls, if UCS-2/4 is target! */ *outptr=0; zs_set_len(zout, outptr-zs_str(zout)); /* handle error cases */ if (cvted == (size_t)-1) { /* errno is not reliable, because on MS-Windows we called iconv in a dll & didn't get errno */ if (outleft<3) { /* may be out of space, so grow & retry */ zs_reserve(zout, (unsigned int)(inleft * expand + 6 + zs_allocsize(zout))); } else { /* unconvertible input character */ /* append placeholder & skip over */ size_t wid = 1; if (eqstr(src, "UTF-8")) { wid = utf8len(*inptr); } if (wid > inleft) wid = inleft; inptr += wid; inleft -= wid; /* Following code is only correct for UCS-2LE, UCS-4LE */ if (chwidth == 2) { unsigned short * u = (unsigned short *)outptr; *u = illegal; outptr += sizeof(u); } else if (chwidth == 4) { unsigned int * u = (unsigned int *)outptr; *u = illegal; outptr += sizeof(u); } else { *outptr++ = illegal; } ++badchars; zs_set_len(zout, outptr-zs_str(zout)); } /* update output variables */ /* (may have reallocated, plus need to point to end */ outptr = zs_str(zout)+zs_len(zout); outleft = zs_allocsize(zout)-zs_len(zout)-4; if (inleft) goto cvting; } icvt_terminate_and_exit: /* zero-terminate with appropriately wide zero */ if (chwidth > 1) { *outptr++=0; if (chwidth > 2) { *outptr++=0; *outptr++=0; } } *outptr=0; zs_set_len(zout, outptr-zs_str(zout)); iconv_close(ict); return TRUE; #else src=src; /* unused */ dest=dest; /* unused */ sin=sin; /* unused */ zout=zout; /* unused */ illegal=illegal; /* unused */ return FALSE; #endif /* HAVE_ICONV */ }
// // Handler // INT_32 FnIconv::Handler(CDT * aArguments, const UINT_32 iArgNum, CDT & oCDTRetVal, Logger & oLogger) { UINT_32 iMyArgNum = iArgNum; // 3 or 4 arguments need if (iMyArgNum != 3 && iMyArgNum != 4) { oLogger.Emerg("Usage: ICONV(x, src, dst[, flags])"); return -1; } // Arg 3: flags // Arg 2: destination charset // Arg 1: source charset // Arg 0: string to convert UINT_32 iFlags = 0; #ifdef ICONV_DISCARD_ILSEQ iFlags |= C_ICONV_DISCARD_ILSEQ; #endif #ifdef ICONV_TRANSLITERATE iFlags |= C_ICONV_TRANSLITERATE; #endif if (iMyArgNum == 4) { const STLW::string & sFlags = aArguments[0].GetString(); for (UINT_32 iPos = 0; iPos < sFlags.size(); ++iPos) { switch (sFlags[iPos]) { // Discard illegal sequence and continue case 'i': case 'I': iFlags |= C_ICONV_DISCARD_ILSEQ; break; // Enable transliteration case 't': case 'T': iFlags |= C_ICONV_TRANSLITERATE; break; default: oLogger.Error("Last argument should be 'i', 'I', 't' or 'T', but is `%s`", sFlags.c_str()); return -1; } } } const STLW::string & sTo = aArguments[--iMyArgNum].GetString(); const STLW::string & sFrom = aArguments[--iMyArgNum].GetString(); const STLW::string & sWhat = aArguments[--iMyArgNum].GetString(); STLW::string sFromTo(sFrom); sFromTo.append(sTo); iconv_t oIconvConverter = (iconv_t)(-1); STLW::map<STLW::string, iconv_t>::iterator itmIconvMap = mIconvMap.find(sFromTo); if (itmIconvMap != mIconvMap.end()) { oIconvConverter = itmIconvMap -> second; } // Try to open iconv converter else { oIconvConverter = iconv_open(sFrom.c_str(), sTo.c_str()); if (oIconvConverter != (iconv_t)(-1)) { mIconvMap[sFromTo] = oIconvConverter; } else { if (errno == EINVAL) { oLogger.Error("The conversion from `%s` to `%s` is not supported by the implementation", sFrom.c_str(), sTo.c_str()); } else { oLogger.Error("Error(%d) in iconv_open('%s', '%s'): %s", sFrom.c_str(), sTo.c_str(), strerror(errno)); } return -1; } } #if (_LIBICONV_VERSION >= 0x0108) int iFlag = 1; // Discard illegal characters if (iFlags & C_ICONV_DISCARD_ILSEQ) { if (iconvctl(oIconvConverter, ICONV_SET_DISCARD_ILSEQ, &iFlag) == -1) { oLogger.Error("ICONV_SET_DISCARD_ILSEQ is is not supported by the implementation"); return -1; } } // Ånable transliteration in the conver-sion if (iFlags & C_ICONV_TRANSLITERATE) { if (iconvctl(oIconvConverter, ICONV_SET_TRANSLITERATE, &iFlag) == -1) { oLogger.Error("ICONV_SET_TRANSLITERATE is is not supported by the implementation"); return -1; } } #endif // Allocate memory size_t iSrcLength = sWhat.size(); size_t iDstLength = CTPP_ESCAPE_BUFFER_LEN; char aDstData[CTPP_ESCAPE_BUFFER_LEN]; #if defined(linux) || defined(__APPLE__) char * aSrcData = (char *)sWhat.data(); #else const char * aSrcData = (const char *)sWhat.data(); #endif STLW::string sResult; for (;;) { char * aDstTMP = aDstData; size_t iDstLengthTMP = iDstLength; size_t iResult = iconv(oIconvConverter, &aSrcData, &iSrcLength, &aDstTMP, &iDstLengthTMP); if (aDstTMP - aDstData > 0) { sResult.append(aDstData, aDstTMP - aDstData); } // All data converted? if (iResult != (size_t)-1) { break; } else { if (errno != E2BIG) { ++aSrcData; --iSrcLength; } } } oCDTRetVal = sResult; return 0; }
/** * Transcode all parameters supplied in the table. * * @param connp * @param params * @param destroy_old */ int htp_transcode_params(htp_connp_t *connp, table_t **params, int destroy_old) { table_t *input_params = *params; // No transcoding unless necessary if (connp->cfg->internal_encoding == NULL) { return HTP_OK; } // Create a new table that will hold transcoded parameters table_t *output_params = connp->cfg->create_table(table_size(input_params)); if (output_params == NULL) { return HTP_ERROR; } // Initialize iconv iconv_t cd = iconv_open(connp->cfg->internal_encoding, connp->cfg->request_encoding); if (cd == (iconv_t) -1) { // TODO Report iconv initialization error table_destroy(&output_params); return HTP_ERROR; } #if (_LIBICONV_VERSION >= 0x0108) int iconv_param = 0; iconvctl(cd, ICONV_SET_TRANSLITERATE, &iconv_param); iconv_param = 1; iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &iconv_param); #endif // Convert the parameters, one by one bstr *name; void *tvalue; table_iterator_reset(input_params); while ((name = table_iterator_next(input_params, &tvalue)) != NULL) { bstr *new_name = NULL, *new_value = NULL; bstr *value = (bstr *)tvalue; // Convert name htp_transcode_bstr(cd, name, &new_name); if (new_name == NULL) { iconv_close(cd); table_iterator_reset(output_params); while(table_iterator_next(output_params, &tvalue) != NULL) { bstr *b = (bstr *)tvalue; bstr_free(&b); } table_destroy(&output_params); return HTP_ERROR; } // Convert value htp_transcode_bstr(cd, value, &new_value); if (new_value == NULL) { bstr_free(&new_name); iconv_close(cd); table_iterator_reset(output_params); while(table_iterator_next(output_params, &tvalue) != NULL) { bstr *b = (bstr *)tvalue; bstr_free(&b); } table_destroy(&output_params); return HTP_ERROR; } // Add to new table table_addn(output_params, new_name, new_value); } // Replace the old parameter table *params = output_params; // Destroy the old parameter table if necessary if (destroy_old) { table_iterator_reset(input_params); while(table_iterator_next(input_params, &tvalue) != NULL) { bstr *b = (bstr *)tvalue; bstr_free(&b); } table_destroy(&input_params); } iconv_close(cd); return HTP_OK; }