/* collapse a json object with n spaces */ SEXP C_collapse_object_pretty(SEXP x, SEXP y, SEXP indent) { if (!isString(x) || !isString(y)) error("x and y must character vectors."); int ni = asInteger(indent); if(ni == NA_INTEGER) error("indent must not be NA"); int len = length(x); if (len != length(y)) error("x and y must have same length."); //calculate required space size_t nchar_total = 0; for (int i=0; i<len; i++) { if(STRING_ELT(y, i) == NA_STRING) continue; nchar_total += strlen(translateCharUTF8(STRING_ELT(x, i))); nchar_total += strlen(translateCharUTF8(STRING_ELT(y, i))); nchar_total += ni + 6; //indent plus two extra spaces plus ": " and ",\n" } //final indent plus curly braces and linebreak and terminator nchar_total += (ni + 2 + 2); //allocate memory and create a cursor char *str = malloc(nchar_total); char *cursor = str; char **cur = &cursor; //init object append_text(cur, "{", 1); const char *start = *cur; //copy everything for (int i=0; i<len; i++) { if(STRING_ELT(y, i) == NA_STRING) continue; append_text(cur, "\n", 1); append_whitespace(cur, ni + 2); append_text(cur, translateCharUTF8(STRING_ELT(x, i)), -1); append_text(cur, ": ", 2); append_text(cur, translateCharUTF8(STRING_ELT(y, i)), -1); append_text(cur, ",", 1); } //finalize object if(cursor != start){ cursor[-1] = '\n'; append_whitespace(cur, ni); } append_text(cur, "}\0", 2); //encode as UTF8 string SEXP out = PROTECT(allocVector(STRSXP, 1)); SET_STRING_ELT(out, 0, mkCharCE(str, CE_UTF8)); UNPROTECT(1); free(str); return out; }
SEXP C_collapse_array_pretty_outer(SEXP x, SEXP indent) { if (!isString(x)) error("x must character vector."); int len = length(x); int ni = asInteger(indent); if(ni == NA_INTEGER) error("indent must not be NA"); //calculate required space size_t nchar_total = 0; for (int i=0; i<len; i++) { nchar_total += strlen(translateCharUTF8(STRING_ELT(x, i))); } //for indent plus two extra spaces plus ",\n" nchar_total += len * (ni + 4); //outer parentheses plus final indent and linebreak and terminator nchar_total += ni + 4; //allocate memory and create a cursor char *str = malloc(nchar_total); char *cursor = str; char **cur = &cursor; //init object append_text(cur, "[", 1); const char *start = *cur; //copy everything for (int i=0; i<len; i++) { append_text(cur, "\n", 1); append_whitespace(cur, ni + 2); append_text(cur, translateCharUTF8(STRING_ELT(x, i)), -1); append_text(cur, ",", 1); } //remove trailing ", " if(cursor != start){ cursor[-1] = '\n'; append_whitespace(cur, ni); } //finish up append_text(cur, "]\0", 2); //encode as UTF8 string SEXP out = PROTECT(allocVector(STRSXP, 1)); SET_STRING_ELT(out, 0, mkCharCE(str, CE_UTF8)); UNPROTECT(1); free(str); return out; }
SEXP R_mongo_collection_rename(SEXP ptr_col, SEXP db, SEXP name) { mongoc_collection_t *col = r2col(ptr_col); bson_error_t err; const char *new_db = NULL; if(db != R_NilValue) new_db = translateCharUTF8(asChar(db)); if(!mongoc_collection_rename(col, new_db, translateCharUTF8(asChar(name)), false, &err)) stop(err.message); return ScalarLogical(1); }
/* Return NULL on failure */ char* SexpStrVector_getitem(const SEXP sexp, int i) { if (TYPEOF(sexp) != STRSXP) { printf("Not an R vector of type STRSXP.\n"); return NULL; } if ((i < 0) || (i >= LENGTH(sexp))) { printf("Out-of-bound.\n"); /*FIXME: return int or NULL ?*/ return NULL; } char *res; SEXP sexp_item; PROTECT(sexp_item = STRING_ELT(sexp, (R_len_t)i)); cetype_t encoding = Rf_getCharCE(sexp_item); switch (encoding) { case CE_UTF8: res = (char *)translateCharUTF8(sexp_item); break; default: res = (char *)CHAR(sexp_item); break; } UNPROTECT(1); return res; }
/* Return NA if not found*/ R_len_t nameIndex(const SEXP sexp, const char *name) { SEXP sexp_item, sexp_names; char *name_item; PROTECT(sexp_names = getAttrib(sexp, R_NamesSymbol)); R_len_t n = LENGTH(sexp); R_len_t i; cetype_t encoding; int found = 0; for (i = 0; i < n; i++) { sexp_item = STRING_ELT(sexp_names, i); encoding = Rf_getCharCE(sexp_item); switch (encoding) { case CE_UTF8: name_item = (char *)translateCharUTF8(sexp_item); break; default: name_item = (char *)CHAR(sexp_item); break; } if (strcmp(name, name_item)) { found = 1; break; } } if (found) { return i; } else { return R_NaInt; } }
SEXP do_mchoice_equals(SEXP x, SEXP y) { int x_len = LENGTH(x); /* length of x vector */ int y_len = LENGTH(y); /* length of y vector */ SEXP ans; /* Logical return vector */ int nfound = 0; /* number of matches found */ int i,j, comp; /* iterators */ size_t slen; char *str_ptr; /* copy of the x string element */ const char *str; S_EVALUATOR if(!IS_INTEGER(y) || y_len == 0) PROBLEM "y must be an integer vector of at least length one." ERROR; PROTECT(ans = NEW_LOGICAL(x_len)); for(i=0; i < x_len; ++i) { nfound = 0; str = translateCharUTF8(STRING_ELT(x, i)); slen = strlen(str) + 1; /* if length of x element is zero or NA no posible match */ if(STRING_ELT(x, i) == NA_STRING) { SET_NA_LGL(LOGICAL_POINTER(ans)[i]); continue; } if(slen == 0) { LOGICAL_POINTER(ans)[i] = 0; continue; } str_ptr = Hmisc_AllocStringBuffer((slen) * sizeof(char), &cbuff); strncpy(str_ptr, str, slen); str_ptr[slen] = '\0'; while(str_ptr != NULL && nfound < y_len) { comp = get_next_mchoice(&str_ptr); for(j=0; j < y_len; j++) { if(comp == INTEGER_POINTER(y)[j]) { nfound++; break; } } } if(nfound < y_len) LOGICAL_POINTER(ans)[i] = 0; else LOGICAL_POINTER(ans)[i] = 1; } Hmisc_FreeStringBuffer(&cbuff); UNPROTECT(1); return(ans); }
SEXP C_collapse_array_pretty_inner(SEXP x) { if (!isString(x)) error("x must character vector."); //calculate required space int len = length(x); size_t nchar_total = 0; for (int i=0; i<len; i++) { nchar_total += strlen(translateCharUTF8(STRING_ELT(x, i))); } // n-1 ", " separators nchar_total += (len-1)*2; //outer parentheses plus terminator nchar_total += 3; //allocate memory and create a cursor char *str = malloc(nchar_total); char *cursor = str; char **cur = &cursor; //init object append_text(cur, "[", 1); //copy everything for (int i=0; i<len; i++) { append_text(cur, translateCharUTF8(STRING_ELT(x, i)), -1); append_text(cur, ", ", 2); } //remove trailing ", " if(len) { cursor -= 2; } //finish up append_text(cur, "]\0", 2); //encode as UTF8 string SEXP out = PROTECT(allocVector(STRSXP, 1)); SET_STRING_ELT(out, 0, mkCharCE(str, CE_UTF8)); UNPROTECT(1); free(str); return out; }
SEXP R_mongo_collection_new(SEXP ptr_client, SEXP collection, SEXP db) { mongoc_client_t *client = r2client(ptr_client); mongoc_collection_t *col = mongoc_client_get_collection (client, translateCharUTF8(asChar(db)), translateCharUTF8(asChar(collection))); SEXP out = PROTECT(col2r(col)); setAttrib(out, install("client"), ptr_client); UNPROTECT(1); return out; }
SEXP R_mongo_collection_drop_index(SEXP ptr_col, SEXP name) { mongoc_collection_t *col = r2col(ptr_col); const char *str = translateCharUTF8(asChar(name)); bson_error_t err; if(!mongoc_collection_drop_index(col, str, &err)) stop(err.message); return ScalarLogical(1); }
SEXP R_json_to_bson(SEXP json){ bson_t *b; bson_error_t err; b = bson_new_from_json ((uint8_t*)translateCharUTF8(asChar(json)), -1, &err); if(!b) stop(err.message); return bson2r(b); }
SEXP RTcl_ObjFromCharVector(SEXP args) { char *s; Tcl_DString s_ds; int count; Tcl_Obj *tclobj, *elem; int i; SEXP val, drop; Tcl_Encoding encoding; const void *vmax = vmaxget(); val = CADR(args); drop = CADDR(args); tclobj = Tcl_NewObj(); count = length(val); encoding = Tcl_GetEncoding(RTcl_interp, "utf-8"); if (count == 1 && LOGICAL(drop)[0]) { Tcl_DStringInit(&s_ds); s = Tcl_ExternalToUtfDString(encoding, translateCharUTF8(STRING_ELT(val, 0)), -1, &s_ds); Tcl_SetStringObj(tclobj, s, -1); Tcl_DStringFree(&s_ds); } else for ( i = 0 ; i < count ; i++) { elem = Tcl_NewObj(); Tcl_DStringInit(&s_ds); s = Tcl_ExternalToUtfDString(encoding, translateCharUTF8(STRING_ELT(val, i)), -1, &s_ds); Tcl_SetStringObj(elem, s, -1); Tcl_DStringFree(&s_ds); Tcl_ListObjAppendElement(RTcl_interp, tclobj, elem); } Tcl_FreeEncoding(encoding); SEXP res = makeRTclObject(tclobj); vmaxset(vmax); return res; }
static constxt char *convertToUTF8(constxt char *str, R_GE_gcontext *gc) { if (gc->fontface == 5) /* symbol font needs re-coding to UTF-8 */ str = symbol2utf8(str); #ifdef translateCharUTF8 else { /* first check whether we are dealing with non-ASCII at all */ int ascii = 1; constxt unsigned char *c = (constxt unsigned char*) str; while (*c) { if (*c > 127) { ascii = 0; break; } c++; } if (!ascii) /* non-ASCII, we need to convert it to UTF8 */ str = translateCharUTF8(mkCharCE(str, CE_NATIVE)); } #endif return str; }
const gchar * asCString(USER_OBJECT_ s_str) { if (s_str == NULL_USER_OBJECT) return(NULL); if (IS_VECTOR(s_str)) { if (GET_LENGTH(s_str) == 0) return(NULL); s_str = STRING_ELT(s_str, 0); } #if defined(R_VERSION) && R_VERSION >= R_Version(2, 7, 0) return(translateCharUTF8(s_str)); #else return(CHAR_DEREF(s_str)); #endif /*return(CHAR_DEREF(STRING_ELT(s_str, 0)));*/ }
SEXP R_download_curl(SEXP url, SEXP destfile, SEXP quiet, SEXP mode, SEXP ptr) { if(!isString(url)) error("Argument 'url' must be string."); if(!isString(destfile)) error("Argument 'destfile' must be string."); if(!isLogical(quiet)) error("Argument 'quiet' must be TRUE/FALSE."); if(!isString(mode)) error("Argument 'mode' must be string."); /* get the handle */ CURL *handle = get_handle(ptr); /* open file */ FILE *dest = fopen(CHAR(asChar(destfile)), CHAR(asChar(mode))); if(!dest) error("Failed to open file %s.", CHAR(asChar(destfile))); /* set options */ curl_easy_setopt(handle, CURLOPT_URL, translateCharUTF8(asChar(url))); curl_easy_setopt(handle, CURLOPT_NOPROGRESS, asLogical(quiet)); curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, push_disk); curl_easy_setopt(handle, CURLOPT_WRITEDATA, dest); /* perform blocking request */ CURLcode status = curl_easy_perform(handle); /* cleanup */ curl_easy_setopt(handle, CURLOPT_URL, NULL); curl_easy_setopt(handle, CURLOPT_NOPROGRESS, 1); curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, NULL); curl_easy_setopt(handle, CURLOPT_WRITEDATA, NULL); fclose(dest); if (status != CURLE_OK) error(curl_easy_strerror(status)); /* check for success */ stop_for_status(handle); return ScalarInteger(0); }
SEXP R_validate(SEXP x) { /* get data from R */ const char* json = translateCharUTF8(asChar(x)); /* test for BOM */ if(json[0] == '\xEF' && json[1] == '\xBB' && json[2] == '\xBF'){ SEXP output = PROTECT(duplicate(ScalarLogical(0))); SEXP msg = PROTECT(Rf_mkString("JSON string contains UTF8 byte-order-mark.")); setAttrib(output, install("err"), msg); UNPROTECT(2); return(output); } /* allocate a parser */ yajl_handle hand = yajl_alloc(NULL, NULL, NULL); /* parser options */ //yajl_config(hand, yajl_dont_validate_strings, 1); /* go parse */ const size_t rd = strlen(json); yajl_status stat = yajl_parse(hand, (const unsigned char*) json, rd); if(stat == yajl_status_ok) { stat = yajl_complete_parse(hand); } SEXP output = PROTECT(duplicate(ScalarLogical(!stat))); //error message if (stat != yajl_status_ok) { unsigned char* str = yajl_get_error(hand, 1, (const unsigned char*) json, rd); SEXP errstr = PROTECT(mkString((const char *) str)); SEXP offset = PROTECT(ScalarInteger(yajl_get_bytes_consumed(hand))); yajl_free_error(hand, str); setAttrib(output, install("offset"), offset); setAttrib(output, install("err"), errstr); UNPROTECT(2); } /* return boolean vec (0 means no errors, means is valid) */ yajl_free(hand); UNPROTECT(1); return output; }
SEXP R_mongo_collection_insert_page(SEXP ptr_col, SEXP json_vec, SEXP stop_on_error){ if(!isString(json_vec) || !length(json_vec)) stop("json_vec must be character string of at least length 1"); //ordered means serial execution bool ordered = asLogical(stop_on_error); //create bulk operation bson_error_t err; bson_t *b; bson_t reply; mongoc_bulk_operation_t *bulk = mongoc_collection_create_bulk_operation (r2col(ptr_col), ordered, NULL); for(int i = 0; i < length(json_vec); i++){ b = bson_new_from_json ((uint8_t*)translateCharUTF8(asChar(STRING_ELT(json_vec, i))), -1, &err); if(!b){ mongoc_bulk_operation_destroy (bulk); stop(err.message); } mongoc_bulk_operation_insert(bulk, b); bson_destroy (b); b = NULL; } //execute bulk operation bool success = mongoc_bulk_operation_execute (bulk, &reply, &err); mongoc_bulk_operation_destroy (bulk); //check for errors if(!success){ if(ordered){ Rf_errorcall(R_NilValue, err.message); } else { Rf_warningcall(R_NilValue, "Not all inserts were successful: %s\n", err.message); } } //get output SEXP out = PROTECT(bson2list(&reply)); bson_destroy (&reply); UNPROTECT(1); return out; }
SEXP R_mongo_client_new(SEXP uri_string) { mongoc_client_t *client = mongoc_client_new (translateCharUTF8(asChar(uri_string))); if(!client) stop("Invalid uri_string. Try mongodb://localhost"); //set ssl certificates here if (mongoc_uri_get_ssl (mongoc_client_get_uri(client))) { mongoc_client_set_ssl_opts(client, mongoc_ssl_opt_get_default()); } //verify that server is online //can fail if user has limited priv /* bson_error_t err; if(!mongoc_client_get_server_status(client, NULL, NULL, &err)){ mongoc_client_destroy(client); stop(err.message); } */ return client2r(client); }
SEXP R_parse(SEXP x, SEXP bigint_as_char) { /* get data from R */ const char* json = translateCharUTF8(asChar(x)); const int bigint = asLogical(bigint_as_char); /* ignore BOM as suggested by RFC */ if(json[0] == '\xEF' && json[1] == '\xBB' && json[2] == '\xBF'){ warning("JSON string contains (illegal) UTF8 byte-order-mark!"); json = json + 3; } /* parse json */ char errbuf[1024]; yajl_val node = yajl_tree_parse(json, errbuf, sizeof(errbuf)); /* parser error */ if (!node) { error(errbuf); } SEXP out = ParseValue(node, bigint); yajl_tree_free(node); return(out); }
SEXP R_handle_setopt(SEXP ptr, SEXP keys, SEXP values){ CURL *handle = get_handle(ptr); SEXP prot = R_ExternalPtrProtected(ptr); SEXP optnames = PROTECT(getAttrib(values, R_NamesSymbol)); if(!isInteger(keys)) error("keys` must be an integer"); if(!isVector(values)) error("`values` must be a list"); for(int i = 0; i < length(keys); i++){ int key = INTEGER(keys)[i]; const char* optname = CHAR(STRING_ELT(optnames, i)); SEXP val = VECTOR_ELT(values, i); if(val == R_NilValue){ assert(curl_easy_setopt(handle, key, NULL)); #ifdef HAS_XFERINFOFUNCTION } else if (key == CURLOPT_XFERINFOFUNCTION) { if (TYPEOF(val) != CLOSXP) error("Value for option %s (%d) must be a function.", optname, key); assert(curl_easy_setopt(handle, CURLOPT_XFERINFOFUNCTION, (curl_progress_callback) R_curl_callback_xferinfo)); assert(curl_easy_setopt(handle, CURLOPT_XFERINFODATA, val)); assert(curl_easy_setopt(handle, CURLOPT_NOPROGRESS, 0)); SET_VECTOR_ELT(prot, 1, val); //protect gc #endif } else if (key == CURLOPT_PROGRESSFUNCTION) { if (TYPEOF(val) != CLOSXP) error("Value for option %s (%d) must be a function.", optname, key); assert(curl_easy_setopt(handle, CURLOPT_PROGRESSFUNCTION, (curl_progress_callback) R_curl_callback_progress)); assert(curl_easy_setopt(handle, CURLOPT_PROGRESSDATA, val)); assert(curl_easy_setopt(handle, CURLOPT_NOPROGRESS, 0)); SET_VECTOR_ELT(prot, 2, val); //protect gc } else if (key == CURLOPT_READFUNCTION) { if (TYPEOF(val) != CLOSXP) error("Value for option %s (%d) must be a function.", optname, key); assert(curl_easy_setopt(handle, CURLOPT_READFUNCTION, (curl_read_callback) R_curl_callback_read)); assert(curl_easy_setopt(handle, CURLOPT_READDATA, val)); SET_VECTOR_ELT(prot, 3, val); //protect gc } else if (key == CURLOPT_DEBUGFUNCTION) { if (TYPEOF(val) != CLOSXP) error("Value for option %s (%d) must be a function.", optname, key); assert(curl_easy_setopt(handle, CURLOPT_DEBUGFUNCTION, (curl_debug_callback) R_curl_callback_debug)); assert(curl_easy_setopt(handle, CURLOPT_DEBUGDATA, val)); SET_VECTOR_ELT(prot, 4, val); //protect gc } else if (key == CURLOPT_URL) { /* always use utf-8 for urls */ const char * url_utf8 = translateCharUTF8(STRING_ELT(val, 0)); assert(curl_easy_setopt(handle, CURLOPT_URL, url_utf8)); } else if (opt_is_linked_list(key)) { error("Option %s (%d) not supported.", optname, key); } else if(key < 10000){ if(!isNumeric(val) || length(val) != 1) { error("Value for option %s (%d) must be a number.", optname, key); } assert(curl_easy_setopt(handle, key, (long) asInteger(val))); } else if(key < 20000){ switch (TYPEOF(val)) { case RAWSXP: if(key == CURLOPT_POSTFIELDS || key == CURLOPT_COPYPOSTFIELDS) assert(curl_easy_setopt(handle, CURLOPT_POSTFIELDSIZE_LARGE, (curl_off_t) Rf_length(val))); assert(curl_easy_setopt(handle, key, RAW(val))); break; case STRSXP: if (length(val) != 1) error("Value for option %s (%d) must be length-1 string", optname, key); assert(curl_easy_setopt(handle, key, CHAR(STRING_ELT(val, 0)))); break; default: error("Value for option %s (%d) must be a string or raw vector.", optname, key); } } else if(key >= 30000 && key < 40000){ if(!isNumeric(val) || length(val) != 1) { error("Value for option %s (%d) must be a number.", optname, key); } assert(curl_easy_setopt(handle, key, (curl_off_t) asReal(val))); } else { error("Option %s (%d) not supported.", optname, key); } } UNPROTECT(1); return ScalarLogical(1); }
/* Note that NA_STRING is not handled separately here. This is deliberate -- see ?paste -- and implicitly coerces it to "NA" */ SEXP attribute_hidden do_paste(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP ans, collapse, sep, x; int sepw, u_sepw, ienc; R_xlen_t i, j, k, maxlen, nx, pwidth; const char *s, *cbuf, *csep=NULL, *u_csep=NULL; char *buf; Rboolean allKnown, anyKnown, use_UTF8, use_Bytes, sepASCII = TRUE, sepUTF8 = FALSE, sepBytes = FALSE, sepKnown = FALSE, use_sep = (PRIMVAL(op) == 0); const void *vmax; checkArity(op, args); /* We use formatting and so we must initialize printing. */ PrintDefaults(); /* Check the arguments */ x = CAR(args); if (!isVectorList(x)) error(_("invalid first argument")); nx = xlength(x); if(use_sep) { /* paste(..., sep, .) */ sep = CADR(args); if (!isString(sep) || LENGTH(sep) <= 0 || STRING_ELT(sep, 0) == NA_STRING) error(_("invalid separator")); sep = STRING_ELT(sep, 0); csep = translateChar(sep); u_sepw = sepw = (int) strlen(csep); // will be short sepASCII = strIsASCII(csep); sepKnown = ENC_KNOWN(sep) > 0; sepUTF8 = IS_UTF8(sep); sepBytes = IS_BYTES(sep); collapse = CADDR(args); } else { /* paste0(..., .) */ u_sepw = sepw = 0; sep = R_NilValue;/* -Wall */ collapse = CADR(args); } if (!isNull(collapse)) if(!isString(collapse) || LENGTH(collapse) <= 0 || STRING_ELT(collapse, 0) == NA_STRING) error(_("invalid '%s' argument"), "collapse"); if(nx == 0) return (!isNull(collapse)) ? mkString("") : allocVector(STRSXP, 0); /* Maximum argument length, coerce if needed */ maxlen = 0; for (j = 0; j < nx; j++) { if (!isString(VECTOR_ELT(x, j))) { /* formerly in R code: moved to C for speed */ SEXP call, xj = VECTOR_ELT(x, j); if(OBJECT(xj)) { /* method dispatch */ PROTECT(call = lang2(install("as.character"), xj)); SET_VECTOR_ELT(x, j, eval(call, env)); UNPROTECT(1); } else if (isSymbol(xj)) SET_VECTOR_ELT(x, j, ScalarString(PRINTNAME(xj))); else SET_VECTOR_ELT(x, j, coerceVector(xj, STRSXP)); if (!isString(VECTOR_ELT(x, j))) error(_("non-string argument to internal 'paste'")); } if(xlength(VECTOR_ELT(x, j)) > maxlen) maxlen = xlength(VECTOR_ELT(x, j)); } if(maxlen == 0) return (!isNull(collapse)) ? mkString("") : allocVector(STRSXP, 0); PROTECT(ans = allocVector(STRSXP, maxlen)); for (i = 0; i < maxlen; i++) { /* Strategy for marking the encoding: if all inputs (including * the separator) are ASCII, so is the output and we don't * need to mark. Otherwise if all non-ASCII inputs are of * declared encoding, we should mark. * Need to be careful only to include separator if it is used. */ anyKnown = FALSE; allKnown = TRUE; use_UTF8 = FALSE; use_Bytes = FALSE; if(nx > 1) { allKnown = sepKnown || sepASCII; anyKnown = sepKnown; use_UTF8 = sepUTF8; use_Bytes = sepBytes; } pwidth = 0; for (j = 0; j < nx; j++) { k = xlength(VECTOR_ELT(x, j)); if (k > 0) { SEXP cs = STRING_ELT(VECTOR_ELT(x, j), i % k); if(IS_UTF8(cs)) use_UTF8 = TRUE; if(IS_BYTES(cs)) use_Bytes = TRUE; } } if (use_Bytes) use_UTF8 = FALSE; vmax = vmaxget(); for (j = 0; j < nx; j++) { k = xlength(VECTOR_ELT(x, j)); if (k > 0) { if(use_Bytes) pwidth += strlen(CHAR(STRING_ELT(VECTOR_ELT(x, j), i % k))); else if(use_UTF8) pwidth += strlen(translateCharUTF8(STRING_ELT(VECTOR_ELT(x, j), i % k))); else pwidth += strlen(translateChar(STRING_ELT(VECTOR_ELT(x, j), i % k))); vmaxset(vmax); } } if(use_sep) { if (use_UTF8 && !u_csep) { u_csep = translateCharUTF8(sep); u_sepw = (int) strlen(u_csep); // will be short } pwidth += (nx - 1) * (use_UTF8 ? u_sepw : sepw); } if (pwidth > INT_MAX) error(_("result would exceed 2^31-1 bytes")); cbuf = buf = R_AllocStringBuffer(pwidth, &cbuff); vmax = vmaxget(); for (j = 0; j < nx; j++) { k = xlength(VECTOR_ELT(x, j)); if (k > 0) { SEXP cs = STRING_ELT(VECTOR_ELT(x, j), i % k); if (use_UTF8) { s = translateCharUTF8(cs); strcpy(buf, s); buf += strlen(s); } else { s = use_Bytes ? CHAR(cs) : translateChar(cs); strcpy(buf, s); buf += strlen(s); allKnown = allKnown && (strIsASCII(s) || (ENC_KNOWN(cs)> 0)); anyKnown = anyKnown || (ENC_KNOWN(cs)> 0); } } if (sepw != 0 && j != nx - 1) { if (use_UTF8) { strcpy(buf, u_csep); buf += u_sepw; } else { strcpy(buf, csep); buf += sepw; } } vmax = vmaxget(); } ienc = 0; if(use_UTF8) ienc = CE_UTF8; else if(use_Bytes) ienc = CE_BYTES; else if(anyKnown && allKnown) { if(known_to_be_latin1) ienc = CE_LATIN1; if(known_to_be_utf8) ienc = CE_UTF8; } SET_STRING_ELT(ans, i, mkCharCE(cbuf, ienc)); } /* Now collapse, if required. */ if(collapse != R_NilValue && (nx = XLENGTH(ans)) > 0) { sep = STRING_ELT(collapse, 0); use_UTF8 = IS_UTF8(sep); use_Bytes = IS_BYTES(sep); for (i = 0; i < nx; i++) { if(IS_UTF8(STRING_ELT(ans, i))) use_UTF8 = TRUE; if(IS_BYTES(STRING_ELT(ans, i))) use_Bytes = TRUE; } if(use_Bytes) { csep = CHAR(sep); use_UTF8 = FALSE; } else if(use_UTF8) csep = translateCharUTF8(sep); else csep = translateChar(sep); sepw = (int) strlen(csep); anyKnown = ENC_KNOWN(sep) > 0; allKnown = anyKnown || strIsASCII(csep); pwidth = 0; vmax = vmaxget(); for (i = 0; i < nx; i++) if(use_UTF8) { pwidth += strlen(translateCharUTF8(STRING_ELT(ans, i))); vmaxset(vmax); } else /* already translated */ pwidth += strlen(CHAR(STRING_ELT(ans, i))); pwidth += (nx - 1) * sepw; if (pwidth > INT_MAX) error(_("result would exceed 2^31-1 bytes")); cbuf = buf = R_AllocStringBuffer(pwidth, &cbuff); vmax = vmaxget(); for (i = 0; i < nx; i++) { if(i > 0) { strcpy(buf, csep); buf += sepw; } if(use_UTF8) s = translateCharUTF8(STRING_ELT(ans, i)); else /* already translated */ s = CHAR(STRING_ELT(ans, i)); strcpy(buf, s); while (*buf) buf++; allKnown = allKnown && (strIsASCII(s) || (ENC_KNOWN(STRING_ELT(ans, i)) > 0)); anyKnown = anyKnown || (ENC_KNOWN(STRING_ELT(ans, i)) > 0); if(use_UTF8) vmaxset(vmax); } UNPROTECT(1); ienc = CE_NATIVE; if(use_UTF8) ienc = CE_UTF8; else if(use_Bytes) ienc = CE_BYTES; else if(anyKnown && allKnown) { if(known_to_be_latin1) ienc = CE_LATIN1; if(known_to_be_utf8) ienc = CE_UTF8; } PROTECT(ans = allocVector(STRSXP, 1)); SET_STRING_ELT(ans, 0, mkCharCE(cbuf, ienc)); } R_FreeStringBufferL(&cbuff); UNPROTECT(1); return ans; }
SEXP R_reformat(SEXP x, SEXP pretty, SEXP indent_string) { yajl_status stat; yajl_handle hand; yajl_gen g; SEXP output; /* init generator */ g = yajl_gen_alloc(NULL); yajl_gen_config(g, yajl_gen_beautify, asInteger(pretty)); yajl_gen_config(g, yajl_gen_indent_string, translateCharUTF8(asChar(indent_string))); yajl_gen_config(g, yajl_gen_validate_utf8, 0); yajl_gen_config(g, yajl_gen_escape_solidus, 1); //modified to only escape for "</" /* init parser */ hand = yajl_alloc(&callbacks, NULL, (void *) g); /* get data from R */ const char* json = translateCharUTF8(asChar(x)); /* ignore BOM */ if(json[0] == '\xEF' && json[1] == '\xBB' && json[2] == '\xBF'){ json = json + 3; } /* Get length (after removing bom) */ const size_t rd = strlen(json); /* parse */ stat = yajl_parse(hand, (const unsigned char*) json, rd); if(stat == yajl_status_ok) { stat = yajl_complete_parse(hand); } //error message if (stat != yajl_status_ok) { unsigned char* str = yajl_get_error(hand, 1, (const unsigned char*) json, rd); output = PROTECT(mkString((const char*) str)); yajl_free_error(hand, str); } else { //create R object const unsigned char* buf; size_t len; yajl_gen_get_buf(g, &buf, &len); //force as UTF8 string output = PROTECT(allocVector(STRSXP, 1)); SET_STRING_ELT(output, 0, mkCharCE((const char*) buf, CE_UTF8)); setAttrib(output, R_ClassSymbol, mkString("json")); } /* clean up */ yajl_gen_clear(g); yajl_gen_free(g); yajl_free(hand); /* return boolean vec (0 means no errors, means is valid) */ SEXP vec = PROTECT(allocVector(VECSXP, 2)); SET_VECTOR_ELT(vec, 0, ScalarInteger(stat)); SET_VECTOR_ELT(vec, 1, output); UNPROTECT(2); return vec; }
SEXP do_mchoice_match(SEXP x, SEXP table, SEXP nomatch) { SEXP elm_index; /* Storage for value of first row of first match of each element in x *\/ */ R_len_t len; /* Number of elements in x */ R_len_t t_len; /* Number of elements in table REMOVE*/ R_len_t nfound = 0; /* count of number of elements of x matched in table */ char *str_ptr; /* current location pointer */ const char *str; int i, j, k, comp, slen; /* REMOVE k */ S_EVALUATOR /* get number of elements in x */ len = LENGTH(x); /* allocate an index vector of the same length as x */ PROTECT(elm_index = NEW_INTEGER(len)); /* set all values in elm_index to 0 */ memset((int *)INTEGER_POINTER(elm_index), 0, len * sizeof(int)); /* count number of x values that are zero and set nfound to that */ for(i=0; i < len; i++) { if(INTEGER_POINTER(x)[i] == 0) { INTEGER_POINTER(elm_index)[i] = INTEGER_POINTER(nomatch)[0]; nfound++; } } /* iterate through each element of table looking for matches to values in x. it is done this way because parsing the mChoice string is expensive and looping is not. */ for(i=0; i < LENGTH(table) && nfound < len; i++) { if(STRING_ELT(table, i) == NA_STRING) continue; str = translateCharUTF8(STRING_ELT(table, i)); slen = strlen(str) + 1; str_ptr = Hmisc_AllocStringBuffer((slen) * sizeof(char), &cbuff); strncpy(str_ptr, str, slen); str_ptr[slen] = '\0'; while(str_ptr != NULL && nfound < len) { /* get the next component of the mChoice string */ comp = get_next_mchoice(&str_ptr); /* if comp is zero the next component was blank continue */ if(comp == 0) continue; /* Compare the component to all elements of x */ for(j = 0; j < len && nfound < len; j++) { /* If the element index is not zero that value has been prevously matched continue to next value */ if(INTEGER_POINTER(elm_index)[j] || INTEGER_POINTER(x)[j] == 0) continue; if(INTEGER_POINTER(x)[j] == comp) { nfound++; INTEGER_POINTER(elm_index)[j] = i+1; } } } } Hmisc_FreeStringBuffer(&cbuff); if(nfound < len) { /* if not all elements of x are matched to those in table set the elements of elmt_index that are zero to the value of nomatch */ for(i=0; i < len; i++) { if(INTEGER_POINTER(elm_index)[i] == 0) { INTEGER_POINTER(elm_index)[i] = INTEGER_POINTER(nomatch)[0]; } } } UNPROTECT(1); return(elm_index); }
/* a version of EncodeElement with different escaping of char strings */ static const char *EncodeElementS(SEXP x, int indx, R_StringBuffer *buff, char cdec) { switch(TYPEOF(x)) { case STRSXP: { const char *s = translateCharUTF8(STRING_ELT(x, indx)); char *u, *cbuf; int j, len, blen, offset; len = strlen(s); blen = len * 2 + 1; R_AllocStringBuffer(blen, buff); u = cbuf = buff->data; offset = 0; for (j = 0; j < len; j++){ switch(s[offset+j]){ /* http://www.postgresql.org/docs/8.1/static/sql-copy.html */ case '\b': *u++ = '\\'; *u++ = 'b'; break; case '\f': *u++ = '\\'; *u++ = 'f'; break; case '\n': *u++ = '\\'; *u++ = 'n'; break; case '\r': *u++ = '\\'; *u++ = 'r'; break; case '\t': *u++ = '\\'; *u++ = 't'; break; case '\v': *u++ = '\\'; *u++ = 'v'; break; case '\\': *u++ = '\\'; *u++ = '\\'; break; default: *u++ = s[offset+j]; } } *u = '\0'; return buff->data; } case LGLSXP:{ int value; value = LOGICAL(x)[indx]; if(value == TRUE) return "true"; if(value == FALSE) return "false"; return "\\N"; } case INTSXP:{ int value; value = INTEGER(x)[indx]; if(ISNA(value)) return "\\N"; snprintf(buff->data, buff->bufsize, "%d", value); return buff->data; } case REALSXP:{ double value = REAL(x)[indx]; if (!R_FINITE(value)) { if(ISNA(value)) return "\\N"; else if(ISNAN(value)) return "NaN"; else if(value > 0) return "Inf"; else return "-Inf"; } snprintf(buff->data, buff->bufsize, "%.15g", value); return buff->data; } default: return NULL; } return NULL; }
/* GetNativeSystemInfo is XP or later */ pGNSI = (PGNSI) GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo"); if(NULL != pGNSI) pGNSI(&si); else GetSystemInfo(&si); if(si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) strcat(ver, " x64"); } SET_STRING_ELT(ans, 1, mkChar(ver)); if((int)osvi.dwMajorVersion >= 5) { if(osvi.wServicePackMajor > 0) snprintf(ver, 256, "build %d, Service Pack %d", LOWORD(osvi.dwBuildNumber), (int) osvi.wServicePackMajor); else snprintf(ver, 256, "build %d", LOWORD(osvi.dwBuildNumber)); } else snprintf(ver, 256, "build %d, %s", LOWORD(osvi.dwBuildNumber), osvi.szCSDVersion); SET_STRING_ELT(ans, 2, mkChar(ver)); GetComputerNameW(name, &namelen); wcstoutf8(buf, name, 1000); SET_STRING_ELT(ans, 3, mkCharCE(buf, CE_UTF8)); #ifdef WIN64 SET_STRING_ELT(ans, 4, mkChar("x86-64")); #else SET_STRING_ELT(ans, 4, mkChar("x86")); #endif GetUserNameW(user, &userlen); wcstoutf8(buf, user, 1000); SET_STRING_ELT(ans, 5, mkCharCE(buf, CE_UTF8)); SET_STRING_ELT(ans, 6, STRING_ELT(ans, 5)); SET_STRING_ELT(ans, 7, STRING_ELT(ans, 5)); PROTECT(ansnames = allocVector(STRSXP, 8)); SET_STRING_ELT(ansnames, 0, mkChar("sysname")); SET_STRING_ELT(ansnames, 1, mkChar("release")); SET_STRING_ELT(ansnames, 2, mkChar("version")); SET_STRING_ELT(ansnames, 3, mkChar("nodename")); SET_STRING_ELT(ansnames, 4, mkChar("machine")); SET_STRING_ELT(ansnames, 5, mkChar("login")); SET_STRING_ELT(ansnames, 6, mkChar("user")); SET_STRING_ELT(ansnames, 7, mkChar("effective_user")); setAttrib(ans, R_NamesSymbol, ansnames); UNPROTECT(2); return ans; } SEXP do_syssleep(SEXP call, SEXP op, SEXP args, SEXP rho) { DWORD mtime; int ntime; double time; checkArity(op, args); time = asReal(CAR(args)); if (ISNAN(time) || time < 0) errorcall(call, _("invalid '%s' value"), "time"); ntime = 1000*(time) + 0.5; while (ntime > 0) { mtime = min(500, ntime); ntime -= mtime; Sleep(mtime); R_ProcessEvents(); } return R_NilValue; } #ifdef LEA_MALLOC #define MALLINFO_FIELD_TYPE size_t struct mallinfo { MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ MALLINFO_FIELD_TYPE smblks; /* number of fastbin blocks */ MALLINFO_FIELD_TYPE hblks; /* number of mmapped regions */ MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ MALLINFO_FIELD_TYPE fsmblks; /* space available in freed fastbin blocks */ MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ MALLINFO_FIELD_TYPE fordblks; /* total free space */ MALLINFO_FIELD_TYPE keepcost; /* top-most, releasable (via malloc_trim) space */ }; extern R_size_t R_max_memory; struct mallinfo mallinfo(void); #endif SEXP in_memsize(SEXP ssize) { SEXP ans; int maxmem = NA_LOGICAL; if(isLogical(ssize)) maxmem = asLogical(ssize); else if(isReal(ssize)) { R_size_t newmax; double mem = asReal(ssize); if (!R_FINITE(mem)) error(_("incorrect argument")); #ifdef LEA_MALLOC #ifndef WIN64 if(mem >= 4096) error(_("don't be silly!: your machine has a 4Gb address limit")); #endif newmax = mem * 1048576.0; if (newmax < R_max_memory) warning(_("cannot decrease memory limit: ignored")); else R_max_memory = newmax; #endif } else error(_("incorrect argument")); PROTECT(ans = allocVector(REALSXP, 1)); #ifdef LEA_MALLOC if(maxmem == NA_LOGICAL) REAL(ans)[0] = R_max_memory; else if(maxmem) REAL(ans)[0] = mallinfo().usmblks; else REAL(ans)[0] = mallinfo().uordblks; REAL(ans)[0] /= 1048576.0; #else REAL(ans)[0] = NA_REAL; #endif UNPROTECT(1); return ans; } SEXP do_dllversion(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP path = R_NilValue, ans; const wchar_t *dll; DWORD dwVerInfoSize; DWORD dwVerHnd; checkArity(op, args); path = CAR(args); if(!isString(path) || LENGTH(path) != 1) errorcall(call, _("invalid '%s' argument"), "path"); dll = filenameToWchar(STRING_ELT(path, 0), FALSE); dwVerInfoSize = GetFileVersionInfoSizeW(dll, &dwVerHnd); PROTECT(ans = allocVector(STRSXP, 2)); SET_STRING_ELT(ans, 0, mkChar("")); SET_STRING_ELT(ans, 1, mkChar("")); if (dwVerInfoSize) { BOOL fRet; LPSTR lpstrVffInfo; LPSTR lszVer = NULL; UINT cchVer = 0; lpstrVffInfo = (LPSTR) malloc(dwVerInfoSize); if (GetFileVersionInfoW(dll, 0L, dwVerInfoSize, lpstrVffInfo)) { fRet = VerQueryValue(lpstrVffInfo, TEXT("\\StringFileInfo\\040904E4\\FileVersion"), (LPVOID)&lszVer, &cchVer); if(fRet) SET_STRING_ELT(ans, 0, mkChar(lszVer)); fRet = VerQueryValue(lpstrVffInfo, TEXT("\\StringFileInfo\\040904E4\\R Version"), (LPVOID)&lszVer, &cchVer); if(fRet) SET_STRING_ELT(ans, 1, mkChar(lszVer)); else { fRet = VerQueryValue(lpstrVffInfo, TEXT("\\StringFileInfo\\040904E4\\Compiled under R Version"), (LPVOID)&lszVer, &cchVer); if(fRet) SET_STRING_ELT(ans, 1, mkChar(lszVer)); } } else ans = R_NilValue; free(lpstrVffInfo); } else ans = R_NilValue; UNPROTECT(1); return ans; } int Rwin_rename(const char *from, const char *to) { return (MoveFileEx(from, to, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH) == 0); } int Rwin_wrename(const wchar_t *from, const wchar_t *to) { return (MoveFileExW(from, to, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH) == 0); } const char *formatError(DWORD res) { static char buf[1000], *p; FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, res, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, 1000, NULL); p = buf+strlen(buf) -1; if(*p == '\n') *p = '\0'; p = buf+strlen(buf) -1; if(*p == '\r') *p = '\0'; p = buf+strlen(buf) -1; if(*p == '.') *p = '\0'; return buf; } void R_UTF8fixslash(char *s); /* from main/util.c */ SEXP do_normalizepath(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, paths = CAR(args), el, slash; int i, n = LENGTH(paths), res; char tmp[MAX_PATH], longpath[MAX_PATH], *tmp2; wchar_t wtmp[32768], wlongpath[32768], *wtmp2; int mustWork, fslash = 0; checkArity(op, args); if(!isString(paths)) errorcall(call, _("'path' must be a character vector")); slash = CADR(args); if(!isString(slash) || LENGTH(slash) != 1) errorcall(call, "'winslash' must be a character string"); const char *sl = CHAR(STRING_ELT(slash, 0)); if (strcmp(sl, "/") && strcmp(sl, "\\")) errorcall(call, "'winslash' must be '/' or '\\\\'"); if (strcmp(sl, "/") == 0) fslash = 1; mustWork = asLogical(CADDR(args)); PROTECT(ans = allocVector(STRSXP, n)); for (i = 0; i < n; i++) { int warn = 0; SEXP result; el = STRING_ELT(paths, i); result = el; if(getCharCE(el) == CE_UTF8) { if ((res = GetFullPathNameW(filenameToWchar(el, FALSE), 32768, wtmp, &wtmp2)) && res <= 32768) { if ((res = GetLongPathNameW(wtmp, wlongpath, 32768)) && res <= 32768) { wcstoutf8(longpath, wlongpath, wcslen(wlongpath)+1); if(fslash) R_UTF8fixslash(longpath); result = mkCharCE(longpath, CE_UTF8); } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { wcstoutf8(tmp, wtmp, wcslen(wtmp)+1); if(fslash) R_UTF8fixslash(tmp); result = mkCharCE(tmp, CE_UTF8); warn = 1; } } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { if (fslash) { strcpy(tmp, translateCharUTF8(el)); R_UTF8fixslash(tmp); result = mkCharCE(tmp, CE_UTF8); } warn = 1; } if (warn && (mustWork == NA_LOGICAL)) warningcall(call, "path[%d]=\"%ls\": %s", i+1, filenameToWchar(el,FALSE), formatError(GetLastError())); } else { if ((res = GetFullPathName(translateChar(el), MAX_PATH, tmp, &tmp2)) && res <= MAX_PATH) { if ((res = GetLongPathName(tmp, longpath, MAX_PATH)) && res <= MAX_PATH) { if(fslash) R_fixslash(longpath); result = mkChar(longpath); } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { if(fslash) R_fixslash(tmp); result = mkChar(tmp); warn = 1; } } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { if (fslash) { strcpy(tmp, translateChar(el)); R_fixslash(tmp); result = mkChar(tmp); } warn = 1; } if (warn && (mustWork == NA_LOGICAL)) warningcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } SET_STRING_ELT(ans, i, result); } UNPROTECT(1); return ans; }
// If we find a non-ASCII, non-NA, non-UTF8 encoding, we try to convert it to UTF8. That is, marked non-ascii/non-UTF8 encodings will always be checked in UTF8 locale. This seems to be the best fix I could think of to put the encoding issues to rest.. // Since the if-statement will fail with the first condition check in "normal" ASCII cases, there shouldn't be huge penalty issues for default setup. // Fix for #66, #69, #469 and #1293 // TODO: compare 1.9.6 performance with 1.9.7 with huge number of ASCII strings. SEXP ENC2UTF8(SEXP s) { if (!IS_ASCII(s) && s != NA_STRING && !IS_UTF8(s)) s = mkCharCE(translateCharUTF8(s), CE_UTF8); return (s); }