static int process_block (smb_iconv_t cd, const char *addr, size_t len, FILE *output) { #define OUTBUF_SIZE 32768 const char *start = addr; char outbuf[OUTBUF_SIZE]; char *outptr; size_t outlen; size_t n; while (len > 0) { outptr = outbuf; outlen = OUTBUF_SIZE; n = smb_iconv (cd, &addr, &len, &outptr, &outlen); if (outptr != outbuf) { /* We have something to write out. */ int errno_save = errno; if (fwrite (outbuf, 1, outptr - outbuf, output) < (size_t) (outptr - outbuf) || ferror (output)) { /* Error occurred while printing the result. */ DEBUG (0, ("conversion stopped due to problem in writing the output")); return -1; } errno = errno_save; } if (errno != E2BIG) { /* iconv() ran into a problem. */ switch (errno) { case EILSEQ: DEBUG(0,("illegal input sequence at position %ld", (long) (addr - start))); break; case EINVAL: DEBUG(0, ("\ incomplete character or shift sequence at end of buffer")); break; case EBADF: DEBUG(0, ("internal error (illegal descriptor)")); break; default: DEBUG(0, ("unknown iconv() error %d", errno)); break; } return -1; } } return 0; }
size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to, void const *src, size_t srclen, void *dst, BOOL allow_bad_conv) { size_t i_len, o_len, destlen = (srclen * 3) / 2; size_t retval; const char *inbuf = (const char *)src; char *outbuf = NULL, *ob = NULL; smb_iconv_t descriptor; void **dest = (void **)dst; *dest = NULL; if (src == NULL || srclen == (size_t)-1) return (size_t)-1; if (srclen == 0) return 0; lazy_initialize_conv(); descriptor = conv_handles[from][to]; if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { if (!conv_silent) DEBUG(0,("convert_string_allocate: Conversion not supported.\n")); return (size_t)-1; } convert: /* +2 is for ucs2 null termination. */ if ((destlen*2)+2 < destlen) { /* wrapped ! abort. */ if (!conv_silent) DEBUG(0, ("convert_string_allocate: destlen wrapped !\n")); if (!ctx) SAFE_FREE(outbuf); return (size_t)-1; } else { destlen = destlen * 2; } /* +2 is for ucs2 null termination. */ if (ctx) { ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2); } else { ob = (char *)SMB_REALLOC(ob, destlen + 2); } if (!ob) { DEBUG(0, ("convert_string_allocate: realloc failed!\n")); return (size_t)-1; } outbuf = ob; i_len = srclen; o_len = destlen; again: retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len); if(retval == (size_t)-1) { const char *reason="unknown error"; switch(errno) { case EINVAL: reason="Incomplete multibyte sequence"; if (!conv_silent) DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf)); if (allow_bad_conv) goto use_as_is; break; case E2BIG: goto convert; case EILSEQ: reason="Illegal multibyte sequence"; if (!conv_silent) DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf)); if (allow_bad_conv) goto use_as_is; break; } if (!conv_silent) DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf)); /* smb_panic(reason); */ return (size_t)-1; } out: destlen = destlen - o_len; if (ctx) { /* We're shrinking here so we know the +2 is safe from wrap. */ ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2); } else { ob = (char *)SMB_REALLOC(ob,destlen + 2); } if (destlen && !ob) { DEBUG(0, ("convert_string_allocate: out of memory!\n")); return (size_t)-1; } *dest = ob; /* Must ucs2 null terminate in the extra space we allocated. */ ob[destlen] = '\0'; ob[destlen+1] = '\0'; return destlen; use_as_is: /* * Conversion not supported. This is actually an error, but there are so * many misconfigured iconv systems and smb.conf's out there we can't just * fail. Do a very bad conversion instead.... JRA. */ { if (o_len == 0 || i_len == 0) goto out; if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) && ((to != CH_UTF16LE)||(to != CH_UTF16BE))) { /* Can't convert from utf16 any endian to multibyte. Replace with the default fail char. */ if (i_len < 2) goto out; if (i_len >= 2) { *outbuf = lp_failed_convert_char(); outbuf++; o_len--; inbuf += 2; i_len -= 2; } if (o_len == 0 || i_len == 0) goto out; /* Keep trying with the next char... */ goto again; } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { /* Can't convert to UTF16LE - just widen by adding the default fail char then zero. */ if (o_len < 2) goto out; outbuf[0] = lp_failed_convert_char(); outbuf[1] = '\0'; inbuf++; i_len--; outbuf += 2; o_len -= 2; if (o_len == 0 || i_len == 0) goto out; /* Keep trying with the next char... */ goto again; } else if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) { /* Failed multibyte to multibyte. Just copy the default fail char and try again. */ outbuf[0] = lp_failed_convert_char(); inbuf++; i_len--; outbuf++; o_len--; if (o_len == 0 || i_len == 0) goto out; /* Keep trying with the next char... */ goto again; } else { /* Keep compiler happy.... */ goto out; } } }
static size_t convert_string_internal(charset_t from, charset_t to, void const *src, size_t srclen, void *dest, size_t destlen, BOOL allow_bad_conv) { size_t i_len, o_len; size_t retval; const char* inbuf = (const char*)src; char* outbuf = (char*)dest; smb_iconv_t descriptor; lazy_initialize_conv(); descriptor = conv_handles[from][to]; if (srclen == (size_t)-1) { if (from == CH_UTF16LE || from == CH_UTF16BE) { srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2; } else { srclen = strlen((const char *)src)+1; } } if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { if (!conv_silent) DEBUG(0,("convert_string_internal: Conversion not supported.\n")); return (size_t)-1; } i_len=srclen; o_len=destlen; again: retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len); if(retval==(size_t)-1) { const char *reason="unknown error"; switch(errno) { case EINVAL: reason="Incomplete multibyte sequence"; if (!conv_silent) DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); if (allow_bad_conv) goto use_as_is; break; case E2BIG: reason="No more room"; if (!conv_silent) { if (from == CH_UNIX) { DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n", charset_name(from), charset_name(to), (unsigned int)srclen, (unsigned int)destlen, (const char *)src)); } else { DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n", charset_name(from), charset_name(to), (unsigned int)srclen, (unsigned int)destlen)); } } break; case EILSEQ: reason="Illegal multibyte sequence"; if (!conv_silent) DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); if (allow_bad_conv) goto use_as_is; break; default: if (!conv_silent) DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); break; } /* smb_panic(reason); */ } return destlen-o_len; use_as_is: /* * Conversion not supported. This is actually an error, but there are so * many misconfigured iconv systems and smb.conf's out there we can't just * fail. Do a very bad conversion instead.... JRA. */ { if (o_len == 0 || i_len == 0) return destlen - o_len; if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) && ((to != CH_UTF16LE)||(to != CH_UTF16BE))) { /* Can't convert from utf16 any endian to multibyte. Replace with the default fail char. */ if (i_len < 2) return destlen - o_len; if (i_len >= 2) { *outbuf = lp_failed_convert_char(); outbuf++; o_len--; inbuf += 2; i_len -= 2; } if (o_len == 0 || i_len == 0) return destlen - o_len; /* Keep trying with the next char... */ goto again; } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { /* Can't convert to UTF16LE - just widen by adding the default fail char then zero. */ if (o_len < 2) return destlen - o_len; outbuf[0] = lp_failed_convert_char(); outbuf[1] = '\0'; inbuf++; i_len--; outbuf += 2; o_len -= 2; if (o_len == 0 || i_len == 0) return destlen - o_len; /* Keep trying with the next char... */ goto again; } else if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) { /* Failed multibyte to multibyte. Just copy the default fail char and try again. */ outbuf[0] = lp_failed_convert_char(); inbuf++; i_len--; outbuf++; o_len--; if (o_len == 0 || i_len == 0) return destlen - o_len; /* Keep trying with the next char... */ goto again; } else { /* Keep compiler happy.... */ return destlen - o_len; } } }
codepoint_t next_codepoint(const char *str, size_t *size) { /* It cannot occupy more than 4 bytes in UTF16 format */ uint8_t buf[4]; smb_iconv_t descriptor; size_t ilen_orig; size_t ilen; size_t olen; char *outbuf; if ((str[0] & 0x80) == 0) { *size = 1; return (codepoint_t)str[0]; } /* We assume that no multi-byte character can take more than 5 bytes. This is OK as we only support codepoints up to 1M */ ilen_orig = strnlen(str, 5); ilen = ilen_orig; lazy_initialize_conv(); descriptor = conv_handles[CH_UNIX][CH_UTF16LE]; if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { *size = 1; return INVALID_CODEPOINT; } /* This looks a little strange, but it is needed to cope with codepoints above 64k which are encoded as per RFC2781. */ olen = 2; outbuf = (char *)buf; smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); if (olen == 2) { /* We failed to convert to a 2 byte character. See if we can convert to a 4 UTF16-LE byte char encoding. */ olen = 4; outbuf = (char *)buf; smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); if (olen == 4) { /* We didn't convert any bytes */ *size = 1; return INVALID_CODEPOINT; } olen = 4 - olen; } else { olen = 2 - olen; } *size = ilen_orig - ilen; if (olen == 2) { /* 2 byte, UTF16-LE encoded value. */ return (codepoint_t)SVAL(buf, 0); } if (olen == 4) { /* Decode a 4 byte UTF16-LE character manually. See RFC2871 for the encoding machanism. */ codepoint_t w1 = SVAL(buf,0) & ~0xD800; codepoint_t w2 = SVAL(buf,2) & ~0xDC00; return (codepoint_t)0x10000 + (w1 << 10) + w2; } /* no other length is valid */ return INVALID_CODEPOINT; }
size_t iconvert_talloc(const void* ctx, smb_iconv_t cd, const char* src, size_t srclen, char** pdst) { size_t dstlen, ret; size_t obytes, ibytes; char *optr, *dst, *tmp; const char* iptr; if (cd == NULL || cd == ((smb_iconv_t)-1)) { return -1; } dst = *pdst; if (dst == NULL) { /* * Allocate an extra two bytes for the * terminating zero. */ dstlen = srclen + 2; dst = (char *)talloc_size(ctx, dstlen); if (dst == NULL) { DEBUG(0,("iconver_talloc no mem\n")); return -1; } } else { dstlen = talloc_get_size(dst); } convert: iptr = src; ibytes = srclen; optr = dst; obytes = dstlen-2; ret = smb_iconv(cd, &iptr, &ibytes, &optr, &obytes); if(ret == -1) { const char *reason="unknown error"; switch(errno) { case EINVAL: reason="Incomplete multibyte sequence"; break; case E2BIG: dstlen = 2*dstlen + 2; tmp = talloc_realloc(ctx, dst, char, dstlen); if (tmp == NULL) { reason="talloc_realloc failed"; break; } dst = tmp; goto convert; case EILSEQ: reason="Illegal multibyte sequence"; break; } DEBUG(0,("Conversion error: %s(%.80s) %li\n", reason, iptr, (long int)(iptr-src))); talloc_free(dst); return -1; } dstlen = (dstlen-2) - obytes; SSVAL(dst, dstlen, 0); *pdst = dst; return dstlen; }