Esempio n. 1
0
/* used in gram.c and devX11.c */
size_t ucstomb(char *s, const unsigned int wc)
{
    char     buf[MB_CUR_MAX+1];
    void    *cd = NULL ;
    unsigned int  wcs[2];
    const char *inbuf = (const char *) wcs;
    size_t   inbytesleft = sizeof(unsigned int); /* better be 4 */
    char    *outbuf = buf;
    size_t   outbytesleft = sizeof(buf);
    size_t   status;

    if(wc == 0) {*s = '\0'; return 1;}

    memset(buf, 0, sizeof(buf));
    memset(wcs, 0, sizeof(wcs));
    wcs[0] = wc;

    if(ucsmb_obj == NULL) {
	if((void *)(-1) == (cd = Riconv_open("", UNICODE))) {
#ifndef  Win32
	    char tocode[128];
	    /* locale set fuzzy case */
	    strncpy(tocode, locale2charset(NULL), sizeof(tocode));
            tocode[sizeof(tocode) - 1] = '\0';
	    if((void *)(-1) == (cd = Riconv_open(tocode, UNICODE)))
		return (size_t)(-1);
#else
	    return (size_t)(-1);
#endif
	}
	ucsmb_obj = cd;
    }

    status = Riconv(ucsmb_obj, &inbuf, &inbytesleft, &outbuf, &outbytesleft);

    if (status == (size_t) -1) {
	switch(errno){
	case EINVAL:
	    return (size_t) -2;
	case EILSEQ:
	    return (size_t) -1;
	case E2BIG:
	    break;
	default:
	    errno = EILSEQ;
	    return (size_t) -1;
	}
    }
    buf[MB_CUR_MAX] = '\0'; /* safety measure */
    strcpy(s, buf);
    return strlen(buf);
}
Esempio n. 2
0
wchar_t *filenameToWchar_wcc(const SEXP fn, const Rboolean expand){
	static wchar_t filename[BSIZE + 1];
	void *obj;
	const char *from = "", *inbuf;
	char *outbuf;
	size_t inb, outb, res;

	if(!strlen(CHAR(fn))){
		wcscpy(filename, L"");
		return filename;
	}

	if(IS_LATIN1(fn)) from = "latin1";
	if(IS_UTF8(fn)) from = "UTF-8";
	if(IS_BYTES(fn)) REprintf("encoding of a filename cannot be 'bytes'");

	obj = Riconv_open("UCS-2LE", from);
	if(obj == (void *)(-1))
		REprintf("unsupported conversion from '%s' in shellexec_wcc.c",
			  from);

	if(expand) inbuf = R_ExpandFileName(CHAR(fn)); else inbuf = CHAR(fn);

	inb = strlen(inbuf)+1; outb = 2*BSIZE;
	outbuf = (char *) filename;
	res = Riconv(obj, &inbuf , &inb, &outbuf, &outb);
	Riconv_close(obj);
	if(inb > 0) REprintf("file name conversion problem -- name too long?");
	if(res == -1) REprintf("file name conversion problem");

	return filename;
} /* End of filenameToWchar_wcc(). */
Esempio n. 3
0
/* used in plot.c for non-UTF-8 MBCS */
size_t attribute_hidden
mbtoucs(unsigned int *wc, const char *s, size_t n)
{
    unsigned int  wcs[2];
    char     buf[16];
    void    *cd;
    const char *inbuf = s;
    size_t   inbytesleft = strlen(s);
    char    *outbuf = (char *) wcs;
    size_t   outbytesleft = sizeof(buf);
    size_t   status;

    if(s[0] == 0) {*wc = 0; return 1;}

    if((void *)(-1) == (cd = Riconv_open(UNICODE, ""))) return (size_t)(-1);
    status = Riconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);

    if (status == (size_t) -1) {
	switch(errno){
	case EINVAL:
	    return (size_t) -2;
	case EILSEQ:
	    return (size_t) -1;
	case E2BIG:
	    break;
	default:
	    errno = EILSEQ;
	    return (size_t) -1;
	}
    }
    Riconv_close(cd);
    *wc = wcs[0];
    return (size_t) 1;
}
Esempio n. 4
0
/* This may return a R_alloc-ed result, so the caller has to manage the
   R_alloc stack */
const char *translateCharUTF8(SEXP x)
{
    void *obj;
    const char *inbuf, *ans = CHAR(x);
    char *outbuf, *p;
    size_t inb, outb, res;
    R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE};

    if(TYPEOF(x) != CHARSXP)
	error(_("'%s' must be called on a CHARSXP"), "translateCharUTF8");
    if(x == NA_STRING) return ans;
    if(IS_UTF8(x)) return ans;
    if(IS_ASCII(x)) return ans;
    if(IS_BYTES(x))
	error(_("translating strings with \"bytes\" encoding is not allowed"));

    obj = Riconv_open("UTF-8", IS_LATIN1(x) ? "latin1" : "");
    if(obj == (void *)(-1)) 
#ifdef Win32
	error(_("unsupported conversion from '%s' in codepage %d"),
	      "latin1", localeCP);
#else
       error(_("unsupported conversion from '%s' to '%s'"), "latin1", "UTF-8");
#endif
    R_AllocStringBuffer(0, &cbuff);
top_of_loop:
    inbuf = ans; inb = strlen(inbuf);
    outbuf = cbuff.data; outb = cbuff.bufsize - 1;
    /* First initialize output */
    Riconv (obj, NULL, NULL, &outbuf, &outb);
next_char:
    /* Then convert input  */
    res = Riconv(obj, &inbuf , &inb, &outbuf, &outb);
    if(res == -1 && errno == E2BIG) {
	R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
	goto top_of_loop;
    } else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) {
	if(outb < 5) {
	    R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
	    goto top_of_loop;
	}
	snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf);
	outbuf += 4; outb -= 4;
	inbuf++; inb--;
	goto next_char;
    }
    *outbuf = '\0';
    Riconv_close(obj);
    res = strlen(cbuff.data) + 1;
    p = R_alloc(res, 1);
    memcpy(p, cbuff.data, res);
    R_FreeStringBuffer(&cbuff);
    return p;
}
Esempio n. 5
0
static int
FileReadConsole(const char *prompt, char *buf, int len, int addhistory)
{
    int ll, err = 0;

    if (!R_Slave) {
	fputs(prompt, stdout);
	fflush(stdout);
    }
    if (fgets(buf, len, ifp ? ifp : stdin) == NULL) return 0;
    /* translate if necessary */
    if(strlen(R_StdinEnc) && strcmp(R_StdinEnc, "native.enc")) {
	size_t res, inb = strlen(buf), onb = len;
	const char *ib = buf; 
	char obuf[len+1], *ob = obuf;
	if(!cd) {
	    cd = Riconv_open("", R_StdinEnc);
	    if(cd == (void *)-1) error(_("encoding '%s' is not recognised"), R_StdinEnc);
	}
	res = Riconv(cd, &ib, &inb, &ob, &onb);
	*ob = '\0';
	err = (res == (size_t)(-1));
	/* errors lead to part of the input line being ignored */
	if(err) printf(_("<ERROR: re-encoding failure from encoding '%s'>\n"),
		       R_StdinEnc);
	strncpy(buf, obuf, len);
    }

/* according to system.txt, should be terminated in \n, so check this
   at eof or error */
    ll = strlen(buf);
    if ((err || feof(ifp ? ifp: stdin))
	&& buf[ll - 1] != '\n' && ll < len) {
	buf[ll++] = '\n'; buf[ll] = '\0';
    }

    if (!R_Interactive && !R_Slave) {
	fputs(buf, stdout);
	fflush(stdout);
    }
    return 1;
}
Esempio n. 6
0
/* made available for use in graphics devices */
size_t ucstoutf8(char *s, const unsigned int wc)
{
    char     buf[16];
    void    *cd = NULL ;
    unsigned int  wcs[2];
    const char *inbuf = (const char *) wcs;
    size_t   inbytesleft = sizeof(unsigned int); /* better be 4 */
    char    *outbuf = buf;
    size_t   outbytesleft = sizeof(buf);
    size_t   status;

    if(wc == 0) {*s = '\0'; return 1;}

    memset(buf, 0, sizeof(buf));
    wcs[0] = wc; wcs[1] = 0;

    if(ucsutf8_obj == NULL) {
	if((void *)(-1) == (cd = Riconv_open("UTF-8", UNICODE))) {
	    error(_("unsupported conversion from '%s' to '%s'"),
		  UNICODE, "UTF-8");
	    return (size_t)(-1);
	}
	ucsutf8_obj = cd;
    }
    
    status = Riconv(ucsutf8_obj, &inbuf, &inbytesleft, &outbuf, &outbytesleft);

    if (status == (size_t) -1) {
	switch(errno){
	case E2BIG:
	    break;
	default:
	    error(_("invalid Unicode point %u"), wc);
	    return (size_t) -1; // Not reached
	}
    }
    *outbuf = '\0';
    strcpy(s, buf);
    return strlen(buf);
}
Esempio n. 7
0
static int
FileReadConsole(char *prompt, char *buf, int len, int addhistory)
{
    int ll, err = 0;
    char inbuf[1001];
    if (!R_Slave) {
	fputs(prompt, stdout);
	fflush(stdout);
    }
    if (fgets(inbuf, len, stdin) == NULL)
	return 0;
    /* translate if necessary */
    if(strlen(R_StdinEnc) && strcmp(R_StdinEnc, "native.enc")) {
	size_t res, inb = strlen(inbuf), onb = len;
	char *ib = inbuf, *ob = buf;
	if(!cd) {
	    cd = Riconv_open("", R_StdinEnc);
	    if(!cd) error(_("encoding '%s' is not recognised"), R_StdinEnc);
	}
	res = Riconv(cd, &ib, &inb, &ob, &onb);
	*ob = '\0';
	err = res == (size_t)(-1);
	/* errors lead to part of the input line being ignored */
	if(err) fputs(_("<ERROR: invalid input in encoding> "), stdout);
    } else strncpy(buf, inbuf, strlen(inbuf)+1);
    
/* according to system.txt, should be terminated in \n, so check this
   at eof or error */
    ll = strlen((char *)buf);
    if ((err || feof(stdin)) && buf[ll - 1] != '\n' && ll < len) {
	buf[ll++] = '\n'; buf[ll] = '\0';
    }
    if (!R_Interactive && !R_Slave)
	fputs(buf, stdout);
    return 1;
}
Esempio n. 8
0
int attribute_hidden
Rstd_ReadConsole(const char *prompt, unsigned char *buf, int len,
		 int addtohistory)
{
    if(!R_Interactive) {
	size_t ll;
	int err = 0;
	if (!R_Slave) {
	    fputs(prompt, stdout);
	    fflush(stdout); /* make sure prompt is output */
	}
	if (fgets((char *)buf, len, ifp ? ifp: stdin) == NULL)
	    return 0;
	ll = strlen((char *)buf);
	/* remove CR in CRLF ending */
	if (ll >= 2 && buf[ll - 1] == '\n' && buf[ll - 2] == '\r') {
	    buf[ll - 2] = '\n';
	    buf[--ll] = '\0';
	}
	/* translate if necessary */
	if(strlen(R_StdinEnc) && strcmp(R_StdinEnc, "native.enc")) {
	    size_t res, inb = strlen((char *)buf), onb = len;
	    /* NB: this is somewhat dangerous.  R's main loop and
	       scan will not call it with a larger value, but
	       contributed code might. */
	    char obuf[CONSOLE_BUFFER_SIZE+1];
	    const char *ib = (const char *)buf;
	    char *ob = obuf;
	    if(!cd) {
		cd = Riconv_open("", R_StdinEnc);
		if(cd == (void *)-1) error(_("encoding '%s' is not recognised"), R_StdinEnc);
	    }
	    res = Riconv(cd, &ib, &inb, &ob, &onb);
	    *ob = '\0';
	    err = res == (size_t)(-1);
	    /* errors lead to part of the input line being ignored */
	    if(err) printf(_("<ERROR: re-encoding failure from encoding '%s'>\n"),
			   R_StdinEnc);
	    strncpy((char *)buf, obuf, len);
	}
/* according to system.txt, should be terminated in \n, so check this
   at eof and error */
	if ((err || feof(ifp ? ifp : stdin))
	    && (ll == 0 || buf[ll - 1] != '\n') && ll < len) {
	    buf[ll++] = '\n'; buf[ll] = '\0';
	}
	if (!R_Slave) {
	    fputs((char *)buf, stdout);
	    fflush(stdout);
	}
	return 1;
    }
    else {
#ifdef HAVE_LIBREADLINE
	R_ReadlineData rl_data;
	if (UsingReadline) {
	    rl_data.readline_gotaline = 0;
	    rl_data.readline_buf = buf;
	    rl_data.readline_addtohistory = addtohistory;
	    rl_data.readline_len = len;
	    rl_data.readline_eof = 0;
	    rl_data.prev = rl_top;
	    rl_top = &rl_data;
	    /* Allow conditional parsing of the ~/.inputrc file. */
	    rl_readline_name = "R";
	    pushReadline(prompt, readline_handler);
#ifdef HAVE_RL_COMPLETION_MATCHES
	    initialize_rlcompletion();
#endif
	}
	else
#endif /* HAVE_LIBREADLINE */
	{
	    fputs(prompt, stdout);
	    fflush(stdout);
	}

	if(R_InputHandlers == NULL)
	    initStdinHandler();

	for (;;) {
	    fd_set *what;

	    int wt = -1;
	    if (R_wait_usec > 0) wt = R_wait_usec;
	    if (Rg_wait_usec > 0 && (wt < 0 || wt > Rg_wait_usec))
		wt = Rg_wait_usec;
	    what = R_checkActivityEx(wt, 0, handleInterrupt);
	    /* This is slightly clumsy. We have advertised the
	     * convention that R_wait_usec == 0 means "wait forever",
	     * but we also need to enable R_checkActivity to return
	     * immediately. */

	    R_runHandlers(R_InputHandlers, what);
	    if (what == NULL)
		continue;
	    if (FD_ISSET(fileno(stdin), what)) {
		/* We could make this a regular handler, but we need
		 * to pass additional arguments. */
#ifdef HAVE_LIBREADLINE
		if (UsingReadline) {
		    rl_callback_read_char();
		    if(rl_data.readline_eof || rl_data.readline_gotaline) {
			rl_top = rl_data.prev;
			return(rl_data.readline_eof ? 0 : 1);
		    }
		}
		else
#endif /* HAVE_LIBREADLINE */
		{
		    if(fgets((char *)buf, len, stdin) == NULL)
			return 0;
		    else
			return 1;
		}
	    }
	}
    }
}
Esempio n. 9
0
SEXP installTrChar(SEXP x)
{
    void * obj;
    const char *inbuf, *ans = CHAR(x);
    char *outbuf;
    size_t inb, outb, res;
    cetype_t ienc = getCharCE(x);
    R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE};

    if(TYPEOF(x) != CHARSXP)
	error(_("'%s' must be called on a CHARSXP"), "installTrChar");
    if(x == NA_STRING || !(ENC_KNOWN(x))) return install(ans);
    if(IS_BYTES(x))
	error(_("translating strings with \"bytes\" encoding is not allowed"));
    if(utf8locale && IS_UTF8(x)) return install(ans);
    if(latin1locale && IS_LATIN1(x)) return install(ans);
    if(IS_ASCII(x)) return install(ans);

    if(IS_LATIN1(x)) {
	if(!latin1_obj) {
	    obj = Riconv_open("", "latin1");
	    /* should never happen */
	    if(obj == (void *)(-1))
#ifdef Win32
		error(_("unsupported conversion from '%s' in codepage %d"),
		      "latin1", localeCP);
#else
	        error(_("unsupported conversion from '%s' to '%s'"),
		      "latin1", "");
#endif
	    latin1_obj = obj;
	}
	obj = latin1_obj;
    } else {
	if(!utf8_obj) {
	    obj = Riconv_open("", "UTF-8");
	    /* should never happen */
	    if(obj == (void *)(-1)) 
#ifdef Win32
		error(_("unsupported conversion from '%s' in codepage %d"),
		      "latin1", localeCP);
#else
	        error(_("unsupported conversion from '%s' to '%s'"),
		      "latin1", "");
#endif
	    utf8_obj = obj;
	}
	obj = utf8_obj;
    }

    R_AllocStringBuffer(0, &cbuff);
top_of_loop:
    inbuf = ans; inb = strlen(inbuf);
    outbuf = cbuff.data; outb = cbuff.bufsize - 1;
    /* First initialize output */
    Riconv (obj, NULL, NULL, &outbuf, &outb);
next_char:
    /* Then convert input  */
    res = Riconv(obj, &inbuf , &inb, &outbuf, &outb);
    if(res == -1 && errno == E2BIG) {
	R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
	goto top_of_loop;
    } else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) {
	if(outb < 13) {
	    R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
	    goto top_of_loop;
	}
	if (ienc == CE_UTF8) {
	    /* if starting in UTF-8, use \uxxxx */
	    /* This must be the first byte */
	    size_t clen;
	    wchar_t wc;
	    clen = utf8toucs(&wc, inbuf);
	    if(clen > 0 && inb >= clen) {
		inbuf += clen; inb -= clen;
# ifndef Win32
		if((unsigned int) wc < 65536) {
# endif
		    snprintf(outbuf, 9, "<U+%04X>", (unsigned int) wc);
		    outbuf += 8; outb -= 8;
# ifndef Win32
		} else {
		    snprintf(outbuf, 13, "<U+%08X>", (unsigned int) wc);
		    outbuf += 12; outb -= 12;
		}
# endif
	    } else {
		snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf);
		outbuf += 4; outb -= 4;
		inbuf++; inb--;
	    }
	} else {
	    snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf);
	    outbuf += 4; outb -= 4;
	    inbuf++; inb--;
	}
	goto next_char;
    }
    *outbuf = '\0';
    SEXP Sans = install(cbuff.data);
    R_FreeStringBuffer(&cbuff);
    return Sans;
}
Esempio n. 10
0
/* iconv(x, from, to, sub, mark) */
SEXP attribute_hidden do_iconv(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP ans, x = CAR(args), si;
    void * obj;
    const char *inbuf;
    char *outbuf;
    const char *sub;
    size_t inb, outb, res;
    R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE};
    Rboolean isRawlist = FALSE;

    checkArity(op, args);
    if(isNull(x)) {  /* list locales */
#ifdef HAVE_ICONVLIST
	cnt = 0;
	iconvlist(count_one, NULL);
	PROTECT(ans = allocVector(STRSXP, cnt));
	cnt = 0;
	iconvlist(write_one, (void *)ans);
#else
	PROTECT(ans = R_NilValue);
#endif
    } else {
	int mark, toRaw;
	const char *from, *to;
	Rboolean isLatin1 = FALSE, isUTF8 = FALSE;

	args = CDR(args);
	if(!isString(CAR(args)) || length(CAR(args)) != 1)
	    error(_("invalid '%s' argument"), "from");
	from = CHAR(STRING_ELT(CAR(args), 0)); /* ASCII */
	args = CDR(args);
	if(!isString(CAR(args)) || length(CAR(args)) != 1)
	    error(_("invalid '%s' argument"), "to");
	to = CHAR(STRING_ELT(CAR(args), 0));
	args = CDR(args);
	if(!isString(CAR(args)) || length(CAR(args)) != 1)
	    error(_("invalid '%s' argument"), "sub");
	if(STRING_ELT(CAR(args), 0) == NA_STRING) sub = NULL;
	else sub = translateChar(STRING_ELT(CAR(args), 0));
	args = CDR(args);
	mark = asLogical(CAR(args));
	if(mark == NA_LOGICAL)
	    error(_("invalid '%s' argument"), "mark");	
	args = CDR(args);
	toRaw = asLogical(CAR(args));
	if(toRaw == NA_LOGICAL)
	    error(_("invalid '%s' argument"), "toRaw");	
	/* some iconv's allow "UTF8", but libiconv does not */
	if(streql(from, "UTF8") || streql(from, "utf8") ) from = "UTF-8";
	if(streql(to, "UTF8") || streql(to, "utf8") ) to = "UTF-8";
	/* Should we do something about marked CHARSXPs in 'from = ""'? */
	if(streql(to, "UTF-8")) isUTF8 = TRUE;
	if(streql(to, "latin1") || streql(to, "ISO_8859-1")
	    || streql(to, "CP1252")) isLatin1 = TRUE;
	if(streql(to, "") && known_to_be_latin1) isLatin1 = TRUE;
	if(streql(to, "") && known_to_be_utf8) isUTF8 = TRUE;
	obj = Riconv_open(to, from);
	if(obj == (iconv_t)(-1))
#ifdef Win32
	    error(_("unsupported conversion from '%s' to '%s' in codepage %d"), 
		  from, to, localeCP);
#else
	    error(_("unsupported conversion from '%s' to '%s'"), from, to);
#endif
	isRawlist = (TYPEOF(x) == VECSXP);
	if(isRawlist) {
	    if(toRaw)
		PROTECT(ans = duplicate(x));
	    else {
		PROTECT(ans = allocVector(STRSXP, LENGTH(x)));
		DUPLICATE_ATTRIB(ans, x);
	    }
	} else {   
	    if(TYPEOF(x) != STRSXP)
		error(_("'x' must be a character vector"));
	    if(toRaw) {
		PROTECT(ans = allocVector(VECSXP, LENGTH(x)));
		DUPLICATE_ATTRIB(ans, x);
	    } else 
		PROTECT(ans = duplicate(x));
	}
	R_AllocStringBuffer(0, &cbuff);  /* 0 -> default */
	for(R_xlen_t i = 0; i < XLENGTH(x); i++) {
	    if (isRawlist) {
		si = VECTOR_ELT(x, i);
		if (TYPEOF(si) == NILSXP) {
		    if (!toRaw) SET_STRING_ELT(ans, i, NA_STRING);
		    continue;
		} else if (TYPEOF(si) != RAWSXP)
		    error(_("'x' must be a list of NULL or raw vectors"));
	    } else {
		si = STRING_ELT(x, i);
		if (si == NA_STRING) {
		    if(!toRaw) SET_STRING_ELT(ans, i, NA_STRING);
		    continue;
		}
	    }
	top_of_loop:
	    inbuf = isRawlist ? (const char *) RAW(si) : CHAR(si); 
	    inb = LENGTH(si);
	    outbuf = cbuff.data; outb = cbuff.bufsize - 1;
	    /* First initialize output */
	    Riconv (obj, NULL, NULL, &outbuf, &outb);
	next_char:
	    /* Then convert input  */
	    res = Riconv(obj, &inbuf , &inb, &outbuf, &outb);
	    *outbuf = '\0';
	    /* other possible error conditions are incomplete
	       and invalid multibyte chars */
	    if(res == -1 && errno == E2BIG) {
		R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
		goto top_of_loop;
	    } else if(res == -1 && sub && 
		      (errno == EILSEQ || errno == EINVAL)) {
		/* it seems this gets thrown for non-convertible input too */
		if(strcmp(sub, "byte") == 0) {
		    if(outb < 5) {
			R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
			goto top_of_loop;
		    }
		    snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf);
		    outbuf += 4; outb -= 4;
		} else {
		    size_t j;
		    if(outb < strlen(sub)) {
			R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
			goto top_of_loop;
		    }
		    memcpy(outbuf, sub, j = strlen(sub));
		    outbuf += j;
		    outb -= j;
		}
		inbuf++; inb--;
		goto next_char;
	    }

	    if(toRaw) {
		if(res != -1 && inb == 0) {
		    size_t nout = cbuff.bufsize - 1 - outb;
		    SEXP el = allocVector(RAWSXP, nout);
		    memcpy(RAW(el), cbuff.data, nout);
		    SET_VECTOR_ELT(ans, i, el);
		} /* otherwise is already NULL */
	    } else {
		if(res != -1 && inb == 0) {
		    cetype_t ienc = CE_NATIVE;
		    
		    size_t nout = cbuff.bufsize - 1 - outb;
		    if(mark) {
			if(isLatin1) ienc = CE_LATIN1;
			else if(isUTF8) ienc = CE_UTF8;
		    }
		    SET_STRING_ELT(ans, i, 
				   mkCharLenCE(cbuff.data, (int) nout, ienc));
		} else SET_STRING_ELT(ans, i, NA_STRING);
	    }
	}
	Riconv_close(obj);
	R_FreeStringBuffer(&cbuff);
    }
    UNPROTECT(1);
    return ans;
}
Esempio n. 11
0
/* A version avoiding R_alloc for use in the Rgui editor */
void reEnc2(const char *x, char *y, int ny,
	    cetype_t ce_in, cetype_t ce_out, int subst)
{
    void * obj;
    const char *inbuf;
    char *outbuf;
    size_t inb, outb, res, top;
    char *tocode = NULL, *fromcode = NULL;
    char buf[20];
    R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE};

    strncpy(y, x, ny);
    y[ny - 1] = '\0';

    if(ce_in == ce_out || ce_in == CE_ANY || ce_out == CE_ANY) return;
    if(utf8locale && ce_in == CE_NATIVE && ce_out == CE_UTF8) return;
    if(utf8locale && ce_out == CE_NATIVE && ce_in == CE_UTF8) return;
    if(latin1locale && ce_in == CE_NATIVE && ce_out == CE_LATIN1) return;
    if(latin1locale && ce_out == CE_NATIVE && ce_in == CE_LATIN1) return;

    if(strIsASCII(x)) return;

    switch(ce_in) {
    case CE_NATIVE:
	{
	    /* Looks like CP1252 is treated as Latin-1 by iconv */
	    snprintf(buf, 20, "CP%d", localeCP);
	    fromcode = buf;
	    break;
	}
    case CE_LATIN1: fromcode = "CP1252"; break;
    case CE_UTF8:   fromcode = "UTF-8"; break;
    default: return;
    }

    switch(ce_out) {
    case CE_NATIVE:
	{
	    /* avoid possible misidentification of CP1250 as LATIN-2 */
	    snprintf(buf, 20, "CP%d", localeCP);
	    tocode = buf;
	    break;
	}
    case CE_LATIN1: tocode = "latin1"; break;
    case CE_UTF8:   tocode = "UTF-8"; break;
    default: return;
    }

    obj = Riconv_open(tocode, fromcode);
    if(obj == (void *)(-1)) return;
    R_AllocStringBuffer(0, &cbuff);
top_of_loop:
    inbuf = x; inb = strlen(inbuf);
    outbuf = cbuff.data; top = outb = cbuff.bufsize - 1;
    /* First initialize output */
    Riconv (obj, NULL, NULL, &outbuf, &outb);
next_char:
    /* Then convert input  */
    res = Riconv(obj, &inbuf , &inb, &outbuf, &outb);
    if(res == -1 && errno == E2BIG) {
	R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
	goto top_of_loop;
    } else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) {
	switch(subst) {
	case 1: /* substitute hex */
	    if(outb < 5) {
		R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
		goto top_of_loop;
	    }
	    snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf);
	    outbuf += 4; outb -= 4;
	    inbuf++; inb--;
	    goto next_char;
	    break;
	case 2: /* substitute . */
	    if(outb < 1) {
		R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
		goto top_of_loop;
	    }
	    *outbuf++ = '.'; inbuf++; outb--; inb--;
	    goto next_char;
	    break;
	case 3: /* substitute ? */
	    if(outb < 1) {
		R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
		goto top_of_loop;
	    }
	    *outbuf++ = '?'; inbuf++; outb--; inb--;
	    goto next_char;
	    break;
	default: /* skip byte */
	    inbuf++; inb--;
	    goto next_char;
	}
    }
    Riconv_close(obj);
    *outbuf = '\0';
    res = (top-outb)+1; /* strlen(cbuff.data) + 1; */
    if (res > ny) error("converted string too long for buffer");
    memcpy(y, cbuff.data, res);
    R_FreeStringBuffer(&cbuff);
}
Esempio n. 12
0
/* This may return a R_alloc-ed result, so the caller has to manage the
   R_alloc stack */
const char *reEnc(const char *x, cetype_t ce_in, cetype_t ce_out, int subst)
{
    void * obj;
    const char *inbuf;
    char *outbuf, *p;
    size_t inb, outb, res, top;
    char *tocode = NULL, *fromcode = NULL;
#ifdef Win32
    char buf[20];
#endif
    R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE};

    /* We can only encode from Symbol to UTF-8 */
    if(ce_in == ce_out || ce_out == CE_SYMBOL ||
       ce_in == CE_ANY || ce_out == CE_ANY) return x;
    if(ce_in == CE_SYMBOL) {
	if(ce_out == CE_UTF8) {
	    size_t nc = 3*strlen(x)+1; /* all in BMP */
	    p = R_alloc(nc, 1);
	    Rf_AdobeSymbol2utf8(p, x, nc);
	    return p;
	} else return x;
    }
    if(utf8locale && ce_in == CE_NATIVE && ce_out == CE_UTF8) return x;
    if(utf8locale && ce_out == CE_NATIVE && ce_in == CE_UTF8) return x;
    if(latin1locale && ce_in == CE_NATIVE && ce_out == CE_LATIN1) return x;
    if(latin1locale && ce_out == CE_NATIVE && ce_in == CE_LATIN1) return x;

    if(strIsASCII(x)) return x;

    switch(ce_in) {
#ifdef Win32
    case CE_NATIVE:
	{
	    /* Looks like CP1252 is treated as Latin-1 by iconv */
	    snprintf(buf, 20, "CP%d", localeCP);
	    fromcode = buf;
	    break;
	}
    case CE_LATIN1: fromcode = "CP1252"; break;
#else
    case CE_NATIVE: fromcode = ""; break;
    case CE_LATIN1: fromcode = "latin1"; break;
#endif
    case CE_UTF8:   fromcode = "UTF-8"; break;
    default: return x;
    }

    switch(ce_out) {
 #ifdef Win32
    case CE_NATIVE:
	{
	    /* avoid possible misidentification of CP1250 as LATIN-2 */
	    snprintf(buf, 20, "CP%d", localeCP);
	    tocode = buf;
	    break;
	}
#else
    case CE_NATIVE: tocode = ""; break;
#endif
    case CE_LATIN1: tocode = "latin1"; break;
    case CE_UTF8:   tocode = "UTF-8"; break;
    default: return x;
    }

    obj = Riconv_open(tocode, fromcode);
    if(obj == (void *)(-1)) return x;
    R_AllocStringBuffer(0, &cbuff);
top_of_loop:
    inbuf = x; inb = strlen(inbuf);
    outbuf = cbuff.data; top = outb = cbuff.bufsize - 1;
    /* First initialize output */
    Riconv (obj, NULL, NULL, &outbuf, &outb);
next_char:
    /* Then convert input  */
    res = Riconv(obj, &inbuf , &inb, &outbuf, &outb);
    if(res == -1 && errno == E2BIG) {
	R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
	goto top_of_loop;
    } else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) {
	switch(subst) {
	case 1: /* substitute hex */
	    if(outb < 5) {
		R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
		goto top_of_loop;
	    }
	    snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf);
	    outbuf += 4; outb -= 4;
	    inbuf++; inb--;
	    goto next_char;
	    break;
	case 2: /* substitute . */
	    if(outb < 1) {
		R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
		goto top_of_loop;
	    }
	    *outbuf++ = '.'; inbuf++; outb--; inb--;
	    goto next_char;
	    break;
	case 3: /* substitute ? */
	    if(outb < 1) {
		R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
		goto top_of_loop;
	    }
	    *outbuf++ = '?'; inbuf++; outb--; inb--;
	    goto next_char;
	    break;
	default: /* skip byte */
	    inbuf++; inb--;
	    goto next_char;
	}
    }
    Riconv_close(obj);
    *outbuf = '\0';
    res = (top-outb)+1; /* strlen(cbuff.data) + 1; */
    p = R_alloc(res, 1);
    memcpy(p, cbuff.data, res);
    R_FreeStringBuffer(&cbuff);
    return p;
}
Esempio n. 13
0
/* This may return a R_alloc-ed result, so the caller has to manage the
   R_alloc stack */
attribute_hidden /* but not hidden on Windows, where it was used in tcltk.c */
const wchar_t *wtransChar(SEXP x)
{
    void * obj;
    const char *inbuf, *ans = CHAR(x);
    char *outbuf;
    wchar_t *p;
    size_t inb, outb, res, top;
    Rboolean knownEnc = FALSE;
    R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE};

    if(TYPEOF(x) != CHARSXP)
	error(_("'%s' must be called on a CHARSXP"), "wtransChar");

    if(IS_BYTES(x))
	error(_("translating strings with \"bytes\" encoding is not allowed"));

    if(IS_LATIN1(x)) {
	if(!latin1_wobj) {
	    obj = Riconv_open(TO_WCHAR, "latin1");
	    if(obj == (void *)(-1))
		error(_("unsupported conversion from '%s' to '%s'"),
		      "latin1", TO_WCHAR);
	    latin1_wobj = obj;
	} else
	    obj = latin1_wobj;
	knownEnc = TRUE;
    } else if(IS_UTF8(x)) {
	if(!utf8_wobj) {
	    obj = Riconv_open(TO_WCHAR, "UTF-8");
	    if(obj == (void *)(-1)) 
		error(_("unsupported conversion from '%s' to '%s'"),
		      "latin1", TO_WCHAR);
	    utf8_wobj = obj;
	} else
	    obj = utf8_wobj;
	knownEnc = TRUE;
    } else {
	obj = Riconv_open(TO_WCHAR, "");
	if(obj == (void *)(-1))
#ifdef Win32
	    error(_("unsupported conversion to '%s' from codepage %d"),
		  TO_WCHAR, localeCP);
#else
	    error(_("unsupported conversion from '%s' to '%s'"), "", TO_WCHAR);
#endif
    }

    R_AllocStringBuffer(0, &cbuff);
top_of_loop:
    inbuf = ans; inb = strlen(inbuf);
    outbuf = cbuff.data; top = outb = cbuff.bufsize - 1;
    /* First initialize output */
    Riconv (obj, NULL, NULL, &outbuf, &outb);
next_char:
    /* Then convert input  */
    res = Riconv(obj, &inbuf , &inb, &outbuf, &outb);
    if(res == -1 && errno == E2BIG) {
	R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
	goto top_of_loop;
    } else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) {
	if(outb < 5) {
	    R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
	    goto top_of_loop;
	}
	snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf);
	outbuf += 4; outb -= 4;
	inbuf++; inb--;
	goto next_char;
	/* if(!knownEnc) Riconv_close(obj);
	   error(_("invalid input in wtransChar")); */
    }
    if(!knownEnc) Riconv_close(obj);
    res = (top - outb);
    /* terminator is 2 or 4 null bytes */
    p = (wchar_t *) R_alloc(res+4, 1);
    memset(p, 0, res+4);
    memcpy(p, cbuff.data, res);
    R_FreeStringBuffer(&cbuff);
    return p;
}