static int gnu_iconvlist(void) { int i; iconvlist(iconvlist_cb, (void *)&i); return (i); }
void CIconv::List(CStringList *lst) { lst->RemoveAll(); // MARC8 is from YAZ lst->AddHead(_T("MARC8")); // Internal Encoding lst->AddHead(_T("HTML")); lst->AddHead(_T("TeX")); lst->AddHead(_T("RTF")); iconvlist(print_one, lst); }
/* iconv(x, from, to, sub, mark) */ SEXP attribute_hidden do_iconv(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP ans, x = CAR(args), si; void * obj; const char *inbuf; char *outbuf; const char *sub; size_t inb, outb, res; R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE}; Rboolean isRawlist = FALSE; checkArity(op, args); if(isNull(x)) { /* list locales */ #ifdef HAVE_ICONVLIST cnt = 0; iconvlist(count_one, NULL); PROTECT(ans = allocVector(STRSXP, cnt)); cnt = 0; iconvlist(write_one, (void *)ans); #else PROTECT(ans = R_NilValue); #endif } else { int mark, toRaw; const char *from, *to; Rboolean isLatin1 = FALSE, isUTF8 = FALSE; args = CDR(args); if(!isString(CAR(args)) || length(CAR(args)) != 1) error(_("invalid '%s' argument"), "from"); from = CHAR(STRING_ELT(CAR(args), 0)); /* ASCII */ args = CDR(args); if(!isString(CAR(args)) || length(CAR(args)) != 1) error(_("invalid '%s' argument"), "to"); to = CHAR(STRING_ELT(CAR(args), 0)); args = CDR(args); if(!isString(CAR(args)) || length(CAR(args)) != 1) error(_("invalid '%s' argument"), "sub"); if(STRING_ELT(CAR(args), 0) == NA_STRING) sub = NULL; else sub = translateChar(STRING_ELT(CAR(args), 0)); args = CDR(args); mark = asLogical(CAR(args)); if(mark == NA_LOGICAL) error(_("invalid '%s' argument"), "mark"); args = CDR(args); toRaw = asLogical(CAR(args)); if(toRaw == NA_LOGICAL) error(_("invalid '%s' argument"), "toRaw"); /* some iconv's allow "UTF8", but libiconv does not */ if(streql(from, "UTF8") || streql(from, "utf8") ) from = "UTF-8"; if(streql(to, "UTF8") || streql(to, "utf8") ) to = "UTF-8"; /* Should we do something about marked CHARSXPs in 'from = ""'? */ if(streql(to, "UTF-8")) isUTF8 = TRUE; if(streql(to, "latin1") || streql(to, "ISO_8859-1") || streql(to, "CP1252")) isLatin1 = TRUE; if(streql(to, "") && known_to_be_latin1) isLatin1 = TRUE; if(streql(to, "") && known_to_be_utf8) isUTF8 = TRUE; obj = Riconv_open(to, from); if(obj == (iconv_t)(-1)) #ifdef Win32 error(_("unsupported conversion from '%s' to '%s' in codepage %d"), from, to, localeCP); #else error(_("unsupported conversion from '%s' to '%s'"), from, to); #endif isRawlist = (TYPEOF(x) == VECSXP); if(isRawlist) { if(toRaw) PROTECT(ans = duplicate(x)); else { PROTECT(ans = allocVector(STRSXP, LENGTH(x))); DUPLICATE_ATTRIB(ans, x); } } else { if(TYPEOF(x) != STRSXP) error(_("'x' must be a character vector")); if(toRaw) { PROTECT(ans = allocVector(VECSXP, LENGTH(x))); DUPLICATE_ATTRIB(ans, x); } else PROTECT(ans = duplicate(x)); } R_AllocStringBuffer(0, &cbuff); /* 0 -> default */ for(R_xlen_t i = 0; i < XLENGTH(x); i++) { if (isRawlist) { si = VECTOR_ELT(x, i); if (TYPEOF(si) == NILSXP) { if (!toRaw) SET_STRING_ELT(ans, i, NA_STRING); continue; } else if (TYPEOF(si) != RAWSXP) error(_("'x' must be a list of NULL or raw vectors")); } else { si = STRING_ELT(x, i); if (si == NA_STRING) { if(!toRaw) SET_STRING_ELT(ans, i, NA_STRING); continue; } } top_of_loop: inbuf = isRawlist ? (const char *) RAW(si) : CHAR(si); inb = LENGTH(si); outbuf = cbuff.data; outb = cbuff.bufsize - 1; /* First initialize output */ Riconv (obj, NULL, NULL, &outbuf, &outb); next_char: /* Then convert input */ res = Riconv(obj, &inbuf , &inb, &outbuf, &outb); *outbuf = '\0'; /* other possible error conditions are incomplete and invalid multibyte chars */ if(res == -1 && errno == E2BIG) { R_AllocStringBuffer(2*cbuff.bufsize, &cbuff); goto top_of_loop; } else if(res == -1 && sub && (errno == EILSEQ || errno == EINVAL)) { /* it seems this gets thrown for non-convertible input too */ if(strcmp(sub, "byte") == 0) { if(outb < 5) { R_AllocStringBuffer(2*cbuff.bufsize, &cbuff); goto top_of_loop; } snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf); outbuf += 4; outb -= 4; } else { size_t j; if(outb < strlen(sub)) { R_AllocStringBuffer(2*cbuff.bufsize, &cbuff); goto top_of_loop; } memcpy(outbuf, sub, j = strlen(sub)); outbuf += j; outb -= j; } inbuf++; inb--; goto next_char; } if(toRaw) { if(res != -1 && inb == 0) { size_t nout = cbuff.bufsize - 1 - outb; SEXP el = allocVector(RAWSXP, nout); memcpy(RAW(el), cbuff.data, nout); SET_VECTOR_ELT(ans, i, el); } /* otherwise is already NULL */ } else { if(res != -1 && inb == 0) { cetype_t ienc = CE_NATIVE; size_t nout = cbuff.bufsize - 1 - outb; if(mark) { if(isLatin1) ienc = CE_LATIN1; else if(isUTF8) ienc = CE_UTF8; } SET_STRING_ELT(ans, i, mkCharLenCE(cbuff.data, (int) nout, ienc)); } else SET_STRING_ELT(ans, i, NA_STRING); } } Riconv_close(obj); R_FreeStringBuffer(&cbuff); } UNPROTECT(1); return ans; }
static void show_iconvlist() { iconvlist(print_one, NULL); exit(EXIT_SUCCESS); }