Exemplo n.º 1
0
/* collapse a json object with n spaces */
SEXP C_collapse_object_pretty(SEXP x, SEXP y, SEXP indent) {
  if (!isString(x) || !isString(y))
    error("x and y must character vectors.");

  int ni = asInteger(indent);
  if(ni == NA_INTEGER)
    error("indent must not be NA");

  int len = length(x);
  if (len != length(y))
    error("x and y must have same length.");

  //calculate required space
  size_t nchar_total = 0;
  for (int i=0; i<len; i++) {
    if(STRING_ELT(y, i) == NA_STRING) continue;
    nchar_total += strlen(translateCharUTF8(STRING_ELT(x, i)));
    nchar_total += strlen(translateCharUTF8(STRING_ELT(y, i)));
    nchar_total += ni + 6; //indent plus two extra spaces plus ": " and ",\n"
  }

  //final indent plus curly braces and linebreak and terminator
  nchar_total += (ni + 2 + 2);

  //allocate memory and create a cursor
  char *str = malloc(nchar_total);
  char *cursor = str;
  char **cur = &cursor;

  //init object
  append_text(cur, "{", 1);
  const char *start = *cur;

  //copy everything
  for (int i=0; i<len; i++) {
    if(STRING_ELT(y, i) == NA_STRING) continue;
    append_text(cur, "\n", 1);
    append_whitespace(cur, ni + 2);
    append_text(cur, translateCharUTF8(STRING_ELT(x, i)), -1);
    append_text(cur, ": ", 2);
    append_text(cur, translateCharUTF8(STRING_ELT(y, i)), -1);
    append_text(cur, ",", 1);
  }

  //finalize object
  if(cursor != start){
    cursor[-1] = '\n';
    append_whitespace(cur, ni);
  }
  append_text(cur, "}\0", 2);

  //encode as UTF8 string
  SEXP out = PROTECT(allocVector(STRSXP, 1));
  SET_STRING_ELT(out, 0, mkCharCE(str,  CE_UTF8));
  UNPROTECT(1);
  free(str);
  return out;
}
Exemplo n.º 2
0
SEXP C_collapse_array_pretty_outer(SEXP x, SEXP indent) {
  if (!isString(x))
    error("x must character vector.");

  int len = length(x);
  int ni = asInteger(indent);
  if(ni == NA_INTEGER)
    error("indent must not be NA");

  //calculate required space
  size_t nchar_total = 0;
  for (int i=0; i<len; i++) {
    nchar_total += strlen(translateCharUTF8(STRING_ELT(x, i)));
  }

  //for indent plus two extra spaces plus ",\n"
  nchar_total += len * (ni + 4);

  //outer parentheses plus final indent and linebreak and terminator
  nchar_total += ni + 4;

  //allocate memory and create a cursor
  char *str = malloc(nchar_total);
  char *cursor = str;
  char **cur = &cursor;

  //init object
  append_text(cur, "[", 1);
  const char *start = *cur;

  //copy everything
  for (int i=0; i<len; i++) {
    append_text(cur, "\n", 1);
    append_whitespace(cur, ni + 2);
    append_text(cur, translateCharUTF8(STRING_ELT(x, i)), -1);
    append_text(cur, ",", 1);
  }

  //remove trailing ", "
  if(cursor != start){
    cursor[-1] = '\n';
    append_whitespace(cur, ni);
  }

  //finish up
  append_text(cur, "]\0", 2);

  //encode as UTF8 string
  SEXP out = PROTECT(allocVector(STRSXP, 1));
  SET_STRING_ELT(out, 0, mkCharCE(str,  CE_UTF8));
  UNPROTECT(1);
  free(str);
  return out;
}
Exemplo n.º 3
0
SEXP R_mongo_collection_rename(SEXP ptr_col, SEXP db, SEXP name) {
  mongoc_collection_t *col = r2col(ptr_col);
  bson_error_t err;
  const char *new_db = NULL;
  if(db != R_NilValue)
    new_db = translateCharUTF8(asChar(db));

  if(!mongoc_collection_rename(col, new_db, translateCharUTF8(asChar(name)), false, &err))
    stop(err.message);
  return ScalarLogical(1);
}
Exemplo n.º 4
0
/* Return NULL on failure */
char* SexpStrVector_getitem(const SEXP sexp, int i) {
  if (TYPEOF(sexp) != STRSXP) {
    printf("Not an R vector of type STRSXP.\n");
    return NULL;
  }
  if ((i < 0) || (i >= LENGTH(sexp))) {
    printf("Out-of-bound.\n");
    /*FIXME: return int or NULL ?*/
    return NULL;
  }
  char *res;
  SEXP sexp_item;
  PROTECT(sexp_item = STRING_ELT(sexp, (R_len_t)i));
  cetype_t encoding = Rf_getCharCE(sexp_item);
  switch (encoding) {
  case CE_UTF8:
    res = (char *)translateCharUTF8(sexp_item);
    break;
  default:
    res = (char *)CHAR(sexp_item);
    break;
  }
  UNPROTECT(1);
  return res;
}
Exemplo n.º 5
0
/* Return NA if not found*/
R_len_t
nameIndex(const SEXP sexp, const char *name) {
  SEXP sexp_item, sexp_names;
  char *name_item;
  PROTECT(sexp_names = getAttrib(sexp, R_NamesSymbol));
  R_len_t n = LENGTH(sexp);
  R_len_t i;
  cetype_t encoding;
  int found = 0;
  for (i = 0; i < n; i++) {
    sexp_item = STRING_ELT(sexp_names, i);
    encoding = Rf_getCharCE(sexp_item);
    switch (encoding) {
    case CE_UTF8:
      name_item = (char *)translateCharUTF8(sexp_item);
      break;
    default:
      name_item = (char *)CHAR(sexp_item);
      break;
    }
    if (strcmp(name, name_item)) {
      found = 1;
      break;
    }
  }
  if (found) {
    return i;
  } else {
    return R_NaInt;
  }
}
Exemplo n.º 6
0
SEXP do_mchoice_equals(SEXP x, SEXP y) 
{
     int x_len = LENGTH(x);     /* length of x vector */
     int y_len = LENGTH(y);     /* length of y vector */
     SEXP ans;                  /* Logical return vector */
     int nfound = 0;                /* number of matches found */
     int i,j, comp;             /* iterators */
     size_t slen;
     char *str_ptr;             /* copy of the x string element */
     const char *str;

     S_EVALUATOR

     if(!IS_INTEGER(y) || y_len == 0)
          PROBLEM "y must be an integer vector of at least length one." ERROR;
   
     PROTECT(ans = NEW_LOGICAL(x_len));
     
     for(i=0; i < x_len; ++i) {
        nfound = 0;
        str = translateCharUTF8(STRING_ELT(x, i));

        slen = strlen(str) + 1;
        
        /* if length of x element is zero or NA no posible match */
        if(STRING_ELT(x, i) == NA_STRING) {
             SET_NA_LGL(LOGICAL_POINTER(ans)[i]);
             continue;
        }
        if(slen == 0) {
             LOGICAL_POINTER(ans)[i] = 0;
             continue;
        }
        
        str_ptr = Hmisc_AllocStringBuffer((slen) * sizeof(char), &cbuff);
        strncpy(str_ptr, str, slen);
        str_ptr[slen] = '\0';

        while(str_ptr != NULL && nfound < y_len) {
             comp = get_next_mchoice(&str_ptr);

             for(j=0; j < y_len; j++) {
                  if(comp == INTEGER_POINTER(y)[j]) {
                       nfound++;
                       break;
                  }
             }
        }
        
        if(nfound < y_len)
             LOGICAL_POINTER(ans)[i] = 0;
        else
             LOGICAL_POINTER(ans)[i] = 1;
     }
     
     Hmisc_FreeStringBuffer(&cbuff);
     UNPROTECT(1);
     return(ans);
}
Exemplo n.º 7
0
SEXP C_collapse_array_pretty_inner(SEXP x) {
  if (!isString(x))
    error("x must character vector.");

  //calculate required space
  int len = length(x);
  size_t nchar_total = 0;
  for (int i=0; i<len; i++) {
    nchar_total += strlen(translateCharUTF8(STRING_ELT(x, i)));
  }

  // n-1 ", " separators
  nchar_total += (len-1)*2;

  //outer parentheses plus terminator
  nchar_total += 3;

  //allocate memory and create a cursor
  char *str = malloc(nchar_total);
  char *cursor = str;
  char **cur = &cursor;

  //init object
  append_text(cur, "[", 1);

  //copy everything
  for (int i=0; i<len; i++) {
    append_text(cur, translateCharUTF8(STRING_ELT(x, i)), -1);
    append_text(cur, ", ", 2);
  }

  //remove trailing ", "
  if(len) {
    cursor -= 2;
  }

  //finish up
  append_text(cur, "]\0", 2);

  //encode as UTF8 string
  SEXP out = PROTECT(allocVector(STRSXP, 1));
  SET_STRING_ELT(out, 0, mkCharCE(str,  CE_UTF8));
  UNPROTECT(1);
  free(str);
  return out;
}
Exemplo n.º 8
0
SEXP R_mongo_collection_new(SEXP ptr_client, SEXP collection, SEXP db) {
  mongoc_client_t *client = r2client(ptr_client);
  mongoc_collection_t *col = mongoc_client_get_collection (client, translateCharUTF8(asChar(db)), translateCharUTF8(asChar(collection)));
  SEXP out = PROTECT(col2r(col));
  setAttrib(out, install("client"), ptr_client);
  UNPROTECT(1);
  return out;
}
Exemplo n.º 9
0
SEXP R_mongo_collection_drop_index(SEXP ptr_col, SEXP name) {
  mongoc_collection_t *col = r2col(ptr_col);
  const char *str = translateCharUTF8(asChar(name));
  bson_error_t err;

  if(!mongoc_collection_drop_index(col, str, &err))
    stop(err.message);

  return ScalarLogical(1);
}
Exemplo n.º 10
0
Arquivo: bson.c Projeto: CDC/mongolite
SEXP R_json_to_bson(SEXP json){
  bson_t *b;
  bson_error_t err;

  b = bson_new_from_json ((uint8_t*)translateCharUTF8(asChar(json)), -1, &err);
  if(!b)
    stop(err.message);

  return bson2r(b);
}
Exemplo n.º 11
0
Arquivo: tcltk.c Projeto: kmillar/rho
SEXP RTcl_ObjFromCharVector(SEXP args)
{
    char *s;
    Tcl_DString s_ds;
    int count;
    Tcl_Obj *tclobj, *elem;
    int i;
    SEXP val, drop;
    Tcl_Encoding encoding;
    const void *vmax = vmaxget();

    val = CADR(args);
    drop = CADDR(args);

    tclobj = Tcl_NewObj();

    count = length(val);
    encoding = Tcl_GetEncoding(RTcl_interp, "utf-8");
    if (count == 1 && LOGICAL(drop)[0]) {
	Tcl_DStringInit(&s_ds);
	s = Tcl_ExternalToUtfDString(encoding,
				     translateCharUTF8(STRING_ELT(val, 0)), 
				     -1, &s_ds);
	Tcl_SetStringObj(tclobj, s, -1);
	Tcl_DStringFree(&s_ds);
    } else
	for ( i = 0 ; i < count ; i++) {
	    elem = Tcl_NewObj();
	    Tcl_DStringInit(&s_ds);
	    s = Tcl_ExternalToUtfDString(encoding, 
					 translateCharUTF8(STRING_ELT(val, i)),
					 -1, &s_ds);
	    Tcl_SetStringObj(elem, s, -1);
	    Tcl_DStringFree(&s_ds);
	    Tcl_ListObjAppendElement(RTcl_interp, tclobj, elem);
	}

    Tcl_FreeEncoding(encoding);
    SEXP res = makeRTclObject(tclobj);
    vmaxset(vmax);
    return res;
}
Exemplo n.º 12
0
static constxt char *convertToUTF8(constxt char *str, R_GE_gcontext *gc)
{
    if (gc->fontface == 5) /* symbol font needs re-coding to UTF-8 */
	str = symbol2utf8(str);
#ifdef translateCharUTF8
    else { /* first check whether we are dealing with non-ASCII at all */
	int ascii = 1;
	constxt unsigned char *c = (constxt unsigned char*) str;
	while (*c) { if (*c > 127) { ascii = 0; break; } c++; }
	if (!ascii) /* non-ASCII, we need to convert it to UTF8 */
	    str = translateCharUTF8(mkCharCE(str, CE_NATIVE));
    }
#endif
    return str;
}
Exemplo n.º 13
0
const gchar *
asCString(USER_OBJECT_ s_str)
{
  if (s_str == NULL_USER_OBJECT)
    return(NULL);
  if (IS_VECTOR(s_str)) {
    if (GET_LENGTH(s_str) == 0)
      return(NULL);
    s_str = STRING_ELT(s_str, 0);
  }
#if defined(R_VERSION) && R_VERSION >= R_Version(2, 7, 0)
  return(translateCharUTF8(s_str));
#else
  return(CHAR_DEREF(s_str));
#endif
  /*return(CHAR_DEREF(STRING_ELT(s_str, 0)));*/
}
Exemplo n.º 14
0
SEXP R_download_curl(SEXP url, SEXP destfile, SEXP quiet, SEXP mode, SEXP ptr) {
  if(!isString(url))
    error("Argument 'url' must be string.");

  if(!isString(destfile))
    error("Argument 'destfile' must be string.");

  if(!isLogical(quiet))
    error("Argument 'quiet' must be TRUE/FALSE.");

  if(!isString(mode))
    error("Argument 'mode' must be string.");

  /* get the handle */
  CURL *handle = get_handle(ptr);

  /* open file */
  FILE *dest = fopen(CHAR(asChar(destfile)), CHAR(asChar(mode)));
  if(!dest)
    error("Failed to open file %s.", CHAR(asChar(destfile)));

  /* set options */
  curl_easy_setopt(handle, CURLOPT_URL, translateCharUTF8(asChar(url)));
  curl_easy_setopt(handle, CURLOPT_NOPROGRESS, asLogical(quiet));
  curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, push_disk);
  curl_easy_setopt(handle, CURLOPT_WRITEDATA, dest);

  /* perform blocking request */
  CURLcode status = curl_easy_perform(handle);

  /* cleanup */
  curl_easy_setopt(handle, CURLOPT_URL, NULL);
  curl_easy_setopt(handle, CURLOPT_NOPROGRESS, 1);
  curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, NULL);
  curl_easy_setopt(handle, CURLOPT_WRITEDATA, NULL);
  fclose(dest);

  if (status != CURLE_OK)
    error(curl_easy_strerror(status));

  /* check for success */
  stop_for_status(handle);
  return ScalarInteger(0);
}
Exemplo n.º 15
0
SEXP R_validate(SEXP x) {
    /* get data from R */
    const char* json = translateCharUTF8(asChar(x));

    /* test for BOM */
    if(json[0] == '\xEF' && json[1] == '\xBB' && json[2] == '\xBF'){
      SEXP output = PROTECT(duplicate(ScalarLogical(0)));
      SEXP msg = PROTECT(Rf_mkString("JSON string contains UTF8 byte-order-mark."));
      setAttrib(output, install("err"), msg);
      UNPROTECT(2);
      return(output);
    }

    /* allocate a parser */
    yajl_handle hand = yajl_alloc(NULL, NULL, NULL);

    /* parser options */
    //yajl_config(hand, yajl_dont_validate_strings, 1);

    /* go parse */
    const size_t rd = strlen(json);
    yajl_status stat = yajl_parse(hand, (const unsigned char*) json, rd);
    if(stat == yajl_status_ok) {
      stat = yajl_complete_parse(hand);
    }

    SEXP output = PROTECT(duplicate(ScalarLogical(!stat)));

    //error message
    if (stat != yajl_status_ok) {
        unsigned char* str = yajl_get_error(hand, 1, (const unsigned char*) json, rd);
        SEXP errstr = PROTECT(mkString((const char *) str));
        SEXP offset = PROTECT(ScalarInteger(yajl_get_bytes_consumed(hand)));
        yajl_free_error(hand, str);
        setAttrib(output, install("offset"), offset);
        setAttrib(output, install("err"), errstr);
        UNPROTECT(2);
    }

    /* return boolean vec (0 means no errors, means is valid) */
    yajl_free(hand);
    UNPROTECT(1);
    return output;
}
Exemplo n.º 16
0
SEXP R_mongo_collection_insert_page(SEXP ptr_col, SEXP json_vec, SEXP stop_on_error){
  if(!isString(json_vec) || !length(json_vec))
    stop("json_vec must be character string of at least length 1");

  //ordered means serial execution
  bool ordered = asLogical(stop_on_error);

  //create bulk operation
  bson_error_t err;
  bson_t *b;
  bson_t reply;
  mongoc_bulk_operation_t *bulk = mongoc_collection_create_bulk_operation (r2col(ptr_col), ordered, NULL);
  for(int i = 0; i < length(json_vec); i++){
    b = bson_new_from_json ((uint8_t*)translateCharUTF8(asChar(STRING_ELT(json_vec, i))), -1, &err);
    if(!b){
      mongoc_bulk_operation_destroy (bulk);
      stop(err.message);
    }
    mongoc_bulk_operation_insert(bulk, b);
    bson_destroy (b);
    b = NULL;
  }

  //execute bulk operation
  bool success = mongoc_bulk_operation_execute (bulk, &reply, &err);
  mongoc_bulk_operation_destroy (bulk);

  //check for errors
  if(!success){
    if(ordered){
      Rf_errorcall(R_NilValue, err.message);
    } else {
      Rf_warningcall(R_NilValue, "Not all inserts were successful: %s\n", err.message);
    }
  }

  //get output
  SEXP out = PROTECT(bson2list(&reply));
  bson_destroy (&reply);
  UNPROTECT(1);
  return out;
}
Exemplo n.º 17
0
SEXP R_mongo_client_new(SEXP uri_string) {
  mongoc_client_t *client = mongoc_client_new (translateCharUTF8(asChar(uri_string)));
  if(!client)
    stop("Invalid uri_string. Try mongodb://localhost");

  //set ssl certificates here
  if (mongoc_uri_get_ssl (mongoc_client_get_uri(client))) {
    mongoc_client_set_ssl_opts(client, mongoc_ssl_opt_get_default());
  }

  //verify that server is online
  //can fail if user has limited priv
  /*
  bson_error_t err;
  if(!mongoc_client_get_server_status(client, NULL, NULL, &err)){
    mongoc_client_destroy(client);
    stop(err.message);
  }
  */

  return client2r(client);
}
Exemplo n.º 18
0
SEXP R_parse(SEXP x, SEXP bigint_as_char) {
    /* get data from R */
    const char* json = translateCharUTF8(asChar(x));
    const int bigint = asLogical(bigint_as_char);

    /* ignore BOM as suggested by RFC */
    if(json[0] == '\xEF' && json[1] == '\xBB' && json[2] == '\xBF'){
      warning("JSON string contains (illegal) UTF8 byte-order-mark!");
      json = json + 3;
    }

    /* parse json */
    char errbuf[1024];
    yajl_val node = yajl_tree_parse(json, errbuf, sizeof(errbuf));

    /* parser error */
    if (!node) {
      error(errbuf);
    }
    SEXP out = ParseValue(node, bigint);
    yajl_tree_free(node);
    return(out);
}
Exemplo n.º 19
0
Arquivo: handle.c Projeto: cran/curl
SEXP R_handle_setopt(SEXP ptr, SEXP keys, SEXP values){
  CURL *handle = get_handle(ptr);
  SEXP prot = R_ExternalPtrProtected(ptr);
  SEXP optnames = PROTECT(getAttrib(values, R_NamesSymbol));

  if(!isInteger(keys))
    error("keys` must be an integer");

  if(!isVector(values))
    error("`values` must be a list");

  for(int i = 0; i < length(keys); i++){
    int key = INTEGER(keys)[i];
    const char* optname = CHAR(STRING_ELT(optnames, i));
    SEXP val = VECTOR_ELT(values, i);
    if(val == R_NilValue){
      assert(curl_easy_setopt(handle, key, NULL));
#ifdef HAS_XFERINFOFUNCTION
    } else if (key == CURLOPT_XFERINFOFUNCTION) {
      if (TYPEOF(val) != CLOSXP)
        error("Value for option %s (%d) must be a function.", optname, key);

      assert(curl_easy_setopt(handle, CURLOPT_XFERINFOFUNCTION,
                              (curl_progress_callback) R_curl_callback_xferinfo));
      assert(curl_easy_setopt(handle, CURLOPT_XFERINFODATA, val));
      assert(curl_easy_setopt(handle, CURLOPT_NOPROGRESS, 0));
      SET_VECTOR_ELT(prot, 1, val); //protect gc
#endif
    } else if (key == CURLOPT_PROGRESSFUNCTION) {
      if (TYPEOF(val) != CLOSXP)
        error("Value for option %s (%d) must be a function.", optname, key);

      assert(curl_easy_setopt(handle, CURLOPT_PROGRESSFUNCTION,
        (curl_progress_callback) R_curl_callback_progress));
      assert(curl_easy_setopt(handle, CURLOPT_PROGRESSDATA, val));
      assert(curl_easy_setopt(handle, CURLOPT_NOPROGRESS, 0));
      SET_VECTOR_ELT(prot, 2, val); //protect gc
    } else if (key == CURLOPT_READFUNCTION) {
      if (TYPEOF(val) != CLOSXP)
        error("Value for option %s (%d) must be a function.", optname, key);

      assert(curl_easy_setopt(handle, CURLOPT_READFUNCTION,
        (curl_read_callback) R_curl_callback_read));
      assert(curl_easy_setopt(handle, CURLOPT_READDATA, val));
      SET_VECTOR_ELT(prot, 3, val); //protect gc
    } else if (key == CURLOPT_DEBUGFUNCTION) {
      if (TYPEOF(val) != CLOSXP)
        error("Value for option %s (%d) must be a function.", optname, key);

      assert(curl_easy_setopt(handle, CURLOPT_DEBUGFUNCTION,
        (curl_debug_callback) R_curl_callback_debug));
      assert(curl_easy_setopt(handle, CURLOPT_DEBUGDATA, val));
      SET_VECTOR_ELT(prot, 4, val); //protect gc
    } else if (key == CURLOPT_URL) {
      /* always use utf-8 for urls */
      const char * url_utf8 = translateCharUTF8(STRING_ELT(val, 0));
      assert(curl_easy_setopt(handle, CURLOPT_URL, url_utf8));
    } else if (opt_is_linked_list(key)) {
      error("Option %s (%d) not supported.", optname, key);
    } else if(key < 10000){
      if(!isNumeric(val) || length(val) != 1) {
        error("Value for option %s (%d) must be a number.", optname, key);
      }
      assert(curl_easy_setopt(handle, key, (long) asInteger(val)));
    } else if(key < 20000){
      switch (TYPEOF(val)) {
      case RAWSXP:
        if(key == CURLOPT_POSTFIELDS || key == CURLOPT_COPYPOSTFIELDS)
          assert(curl_easy_setopt(handle, CURLOPT_POSTFIELDSIZE_LARGE, (curl_off_t) Rf_length(val)));
        assert(curl_easy_setopt(handle, key, RAW(val)));
        break;
      case STRSXP:
        if (length(val) != 1)
          error("Value for option %s (%d) must be length-1 string", optname, key);
        assert(curl_easy_setopt(handle, key, CHAR(STRING_ELT(val, 0))));
        break;
      default:
        error("Value for option %s (%d) must be a string or raw vector.", optname, key);
      }
    } else if(key >= 30000 && key < 40000){
      if(!isNumeric(val) || length(val) != 1) {
        error("Value for option %s (%d) must be a number.", optname, key);
      }
      assert(curl_easy_setopt(handle, key, (curl_off_t) asReal(val)));
    } else {
      error("Option %s (%d) not supported.", optname, key);
    }
  }
  UNPROTECT(1);
  return ScalarLogical(1);
}
Exemplo n.º 20
0
/* Note that NA_STRING is not handled separately here.  This is
   deliberate -- see ?paste -- and implicitly coerces it to "NA"
*/
SEXP attribute_hidden do_paste(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP ans, collapse, sep, x;
    int sepw, u_sepw, ienc;
    R_xlen_t i, j, k, maxlen, nx, pwidth;
    const char *s, *cbuf, *csep=NULL, *u_csep=NULL;
    char *buf;
    Rboolean allKnown, anyKnown, use_UTF8, use_Bytes,
	sepASCII = TRUE, sepUTF8 = FALSE, sepBytes = FALSE, sepKnown = FALSE,
	use_sep = (PRIMVAL(op) == 0);
    const void *vmax;

    checkArity(op, args);

    /* We use formatting and so we must initialize printing. */

    PrintDefaults();

    /* Check the arguments */

    x = CAR(args);
    if (!isVectorList(x))
	error(_("invalid first argument"));
    nx = xlength(x);

    if(use_sep) { /* paste(..., sep, .) */
	sep = CADR(args);
	if (!isString(sep) || LENGTH(sep) <= 0 || STRING_ELT(sep, 0) == NA_STRING)
	    error(_("invalid separator"));
	sep = STRING_ELT(sep, 0);
	csep = translateChar(sep);
	u_sepw = sepw = (int) strlen(csep); // will be short
	sepASCII = strIsASCII(csep);
	sepKnown = ENC_KNOWN(sep) > 0;
	sepUTF8 = IS_UTF8(sep);
	sepBytes = IS_BYTES(sep);
	collapse = CADDR(args);
    } else { /* paste0(..., .) */
	u_sepw = sepw = 0; sep = R_NilValue;/* -Wall */
	collapse = CADR(args);
    }
    if (!isNull(collapse))
	if(!isString(collapse) || LENGTH(collapse) <= 0 ||
	   STRING_ELT(collapse, 0) == NA_STRING)
	    error(_("invalid '%s' argument"), "collapse");
    if(nx == 0)
	return (!isNull(collapse)) ? mkString("") : allocVector(STRSXP, 0);


    /* Maximum argument length, coerce if needed */

    maxlen = 0;
    for (j = 0; j < nx; j++) {
	if (!isString(VECTOR_ELT(x, j))) {
	    /* formerly in R code: moved to C for speed */
	    SEXP call, xj = VECTOR_ELT(x, j);
	    if(OBJECT(xj)) { /* method dispatch */
		PROTECT(call = lang2(install("as.character"), xj));
		SET_VECTOR_ELT(x, j, eval(call, env));
		UNPROTECT(1);
	    } else if (isSymbol(xj))
		SET_VECTOR_ELT(x, j, ScalarString(PRINTNAME(xj)));
	    else
		SET_VECTOR_ELT(x, j, coerceVector(xj, STRSXP));

	    if (!isString(VECTOR_ELT(x, j)))
		error(_("non-string argument to internal 'paste'"));
	}
	if(xlength(VECTOR_ELT(x, j)) > maxlen)
	    maxlen = xlength(VECTOR_ELT(x, j));
    }
    if(maxlen == 0)
	return (!isNull(collapse)) ? mkString("") : allocVector(STRSXP, 0);

    PROTECT(ans = allocVector(STRSXP, maxlen));

    for (i = 0; i < maxlen; i++) {
	/* Strategy for marking the encoding: if all inputs (including
	 * the separator) are ASCII, so is the output and we don't
	 * need to mark.  Otherwise if all non-ASCII inputs are of
	 * declared encoding, we should mark.
	 * Need to be careful only to include separator if it is used.
	 */
	anyKnown = FALSE; allKnown = TRUE; use_UTF8 = FALSE; use_Bytes = FALSE;
	if(nx > 1) {
	    allKnown = sepKnown || sepASCII;
	    anyKnown = sepKnown;
	    use_UTF8 = sepUTF8;
	    use_Bytes = sepBytes;
	}

	pwidth = 0;
	for (j = 0; j < nx; j++) {
	    k = xlength(VECTOR_ELT(x, j));
	    if (k > 0) {
		SEXP cs = STRING_ELT(VECTOR_ELT(x, j), i % k);
		if(IS_UTF8(cs)) use_UTF8 = TRUE;
		if(IS_BYTES(cs)) use_Bytes = TRUE;
	    }
	}
	if (use_Bytes) use_UTF8 = FALSE;
	vmax = vmaxget();
	for (j = 0; j < nx; j++) {
	    k = xlength(VECTOR_ELT(x, j));
	    if (k > 0) {
		if(use_Bytes)
		    pwidth += strlen(CHAR(STRING_ELT(VECTOR_ELT(x, j), i % k)));
		else if(use_UTF8)
		    pwidth += strlen(translateCharUTF8(STRING_ELT(VECTOR_ELT(x, j), i % k)));
		else
		    pwidth += strlen(translateChar(STRING_ELT(VECTOR_ELT(x, j), i % k)));
		vmaxset(vmax);
	    }
	}
	if(use_sep) {
	    if (use_UTF8 && !u_csep) {
		u_csep = translateCharUTF8(sep);
		u_sepw = (int) strlen(u_csep); // will be short
	    }
	    pwidth += (nx - 1) * (use_UTF8 ? u_sepw : sepw);
	}
	if (pwidth > INT_MAX)
	    error(_("result would exceed 2^31-1 bytes"));
	cbuf = buf = R_AllocStringBuffer(pwidth, &cbuff);
	vmax = vmaxget();
	for (j = 0; j < nx; j++) {
	    k = xlength(VECTOR_ELT(x, j));
	    if (k > 0) {
		SEXP cs = STRING_ELT(VECTOR_ELT(x, j), i % k);
		if (use_UTF8) {
		    s = translateCharUTF8(cs);
		    strcpy(buf, s);
		    buf += strlen(s);
		} else {
		    s = use_Bytes ? CHAR(cs) : translateChar(cs);
		    strcpy(buf, s);
		    buf += strlen(s);
		    allKnown = allKnown && (strIsASCII(s) || (ENC_KNOWN(cs)> 0));
		    anyKnown = anyKnown || (ENC_KNOWN(cs)> 0);
		}
	    }
	    if (sepw != 0 && j != nx - 1) {
		if (use_UTF8) {
		    strcpy(buf, u_csep);
		    buf += u_sepw;
		} else {
		    strcpy(buf, csep);
		    buf += sepw;
		}
	    }
	    vmax = vmaxget();
	}
	ienc = 0;
	if(use_UTF8) ienc = CE_UTF8;
	else if(use_Bytes) ienc = CE_BYTES;
	else if(anyKnown && allKnown) {
	    if(known_to_be_latin1) ienc = CE_LATIN1;
	    if(known_to_be_utf8) ienc = CE_UTF8;
	}
	SET_STRING_ELT(ans, i, mkCharCE(cbuf, ienc));
    }

    /* Now collapse, if required. */

    if(collapse != R_NilValue && (nx = XLENGTH(ans)) > 0) {
	sep = STRING_ELT(collapse, 0);
	use_UTF8 = IS_UTF8(sep);
	use_Bytes = IS_BYTES(sep);
	for (i = 0; i < nx; i++) {
	    if(IS_UTF8(STRING_ELT(ans, i))) use_UTF8 = TRUE;
	    if(IS_BYTES(STRING_ELT(ans, i))) use_Bytes = TRUE;
	}
	if(use_Bytes) {
	    csep = CHAR(sep);
	    use_UTF8 = FALSE;
	} else if(use_UTF8)
	    csep = translateCharUTF8(sep);
	else
	    csep = translateChar(sep);
	sepw = (int) strlen(csep);
	anyKnown = ENC_KNOWN(sep) > 0;
	allKnown = anyKnown || strIsASCII(csep);
	pwidth = 0;
	vmax = vmaxget();
	for (i = 0; i < nx; i++)
	    if(use_UTF8) {
		pwidth += strlen(translateCharUTF8(STRING_ELT(ans, i)));
		vmaxset(vmax);
	    } else /* already translated */
		pwidth += strlen(CHAR(STRING_ELT(ans, i)));
	pwidth += (nx - 1) * sepw;
	if (pwidth > INT_MAX)
	    error(_("result would exceed 2^31-1 bytes"));
	cbuf = buf = R_AllocStringBuffer(pwidth, &cbuff);
	vmax = vmaxget();
	for (i = 0; i < nx; i++) {
	    if(i > 0) {
		strcpy(buf, csep);
		buf += sepw;
	    }
	    if(use_UTF8)
		s = translateCharUTF8(STRING_ELT(ans, i));
	    else /* already translated */
		s = CHAR(STRING_ELT(ans, i));
	    strcpy(buf, s);
	    while (*buf)
		buf++;
	    allKnown = allKnown &&
		(strIsASCII(s) || (ENC_KNOWN(STRING_ELT(ans, i)) > 0));
	    anyKnown = anyKnown || (ENC_KNOWN(STRING_ELT(ans, i)) > 0);
	    if(use_UTF8) vmaxset(vmax);
	}
	UNPROTECT(1);
	ienc = CE_NATIVE;
	if(use_UTF8) ienc = CE_UTF8;
	else if(use_Bytes) ienc = CE_BYTES;
	else if(anyKnown && allKnown) {
	    if(known_to_be_latin1) ienc = CE_LATIN1;
	    if(known_to_be_utf8) ienc = CE_UTF8;
	}
	PROTECT(ans = allocVector(STRSXP, 1));
	SET_STRING_ELT(ans, 0, mkCharCE(cbuf, ienc));
    }
    R_FreeStringBufferL(&cbuff);
    UNPROTECT(1);
    return ans;
}
Exemplo n.º 21
0
SEXP R_reformat(SEXP x, SEXP pretty, SEXP indent_string) {
    yajl_status stat;
    yajl_handle hand;
    yajl_gen g;
    SEXP output;

    /* init generator */
    g = yajl_gen_alloc(NULL);
    yajl_gen_config(g, yajl_gen_beautify, asInteger(pretty));
    yajl_gen_config(g, yajl_gen_indent_string, translateCharUTF8(asChar(indent_string)));
    yajl_gen_config(g, yajl_gen_validate_utf8, 0);
    yajl_gen_config(g, yajl_gen_escape_solidus, 1); //modified to only escape for "</"

    /* init parser */
    hand = yajl_alloc(&callbacks, NULL, (void *) g);

    /* get data from R */
    const char* json = translateCharUTF8(asChar(x));

    /* ignore BOM */
    if(json[0] == '\xEF' && json[1] == '\xBB' && json[2] == '\xBF'){
      json = json + 3;
    }

    /* Get length (after removing bom) */
    const size_t rd = strlen(json);

    /* parse */
    stat = yajl_parse(hand, (const unsigned char*) json, rd);
    if(stat == yajl_status_ok) {
      stat = yajl_complete_parse(hand);
    }

    //error message
    if (stat != yajl_status_ok) {
      unsigned char* str = yajl_get_error(hand, 1, (const unsigned char*) json, rd);
      output = PROTECT(mkString((const char*) str));
      yajl_free_error(hand, str);
    } else {
      //create R object
      const unsigned char* buf;
      size_t len;
      yajl_gen_get_buf(g, &buf, &len);

      //force as UTF8 string
      output = PROTECT(allocVector(STRSXP, 1));
      SET_STRING_ELT(output, 0, mkCharCE((const char*) buf, CE_UTF8));
      setAttrib(output, R_ClassSymbol, mkString("json"));
    }

    /* clean up */
    yajl_gen_clear(g);
    yajl_gen_free(g);
    yajl_free(hand);

    /* return boolean vec (0 means no errors, means is valid) */
    SEXP vec = PROTECT(allocVector(VECSXP, 2));
    SET_VECTOR_ELT(vec, 0, ScalarInteger(stat));
    SET_VECTOR_ELT(vec, 1, output);
    UNPROTECT(2);
    return vec;
}
Exemplo n.º 22
0
SEXP do_mchoice_match(SEXP x, SEXP table, SEXP nomatch) 
{
     SEXP elm_index;            /* Storage for value of first row of 
                                   first match of each element in x *\/ */
     R_len_t len;               /* Number of elements in x */
     R_len_t t_len;             /* Number of elements in table REMOVE*/
     R_len_t nfound = 0;        /* count of number of elements of
                                   x matched in table */
     char *str_ptr;             /* current location pointer */
     const char *str;
		 int i, j, k, comp, slen;   /* REMOVE k */

     S_EVALUATOR
     /* get number of elements in x */
     len = LENGTH(x);
     
     /* allocate an index vector of the same length as x */
     PROTECT(elm_index = NEW_INTEGER(len));
     
     /* set all values in elm_index to 0 */
     memset((int *)INTEGER_POINTER(elm_index), 0, len * sizeof(int));

     /* count number of x values that are zero and set nfound to that */
     for(i=0; i < len; i++) {
          if(INTEGER_POINTER(x)[i] == 0) {
               INTEGER_POINTER(elm_index)[i] = INTEGER_POINTER(nomatch)[0];
               nfound++;
          }
     }
     

     /* iterate through each element of table looking for matches to values in x.
        it is done this way because parsing the mChoice string is expensive and looping is not. */
     for(i=0; i < LENGTH(table) && nfound < len; i++) {
          if(STRING_ELT(table, i) == NA_STRING)
               continue;
          
          str = translateCharUTF8(STRING_ELT(table, i));
          slen = strlen(str) + 1;
          
          str_ptr = Hmisc_AllocStringBuffer((slen) * sizeof(char), &cbuff);
          strncpy(str_ptr, str, slen);
          str_ptr[slen] = '\0';
          
          while(str_ptr != NULL && nfound < len) {
               /* get the next component of the mChoice string */
               comp = get_next_mchoice(&str_ptr);
               
               /* if comp is zero the next component was blank continue */
               if(comp == 0)
                    continue;
                    
               /* Compare the component to all elements of x */
               for(j = 0; j < len && nfound < len; j++) {
                    /* If the element index is not zero that value has been prevously
                       matched continue to next value */
                    if(INTEGER_POINTER(elm_index)[j] || INTEGER_POINTER(x)[j] == 0)
                         continue;
                    
                    if(INTEGER_POINTER(x)[j] == comp) {
                         nfound++;
                         INTEGER_POINTER(elm_index)[j] = i+1;
                    }
               }
          }
     }
     
     Hmisc_FreeStringBuffer(&cbuff);
     
     if(nfound < len) {
          /* if not all elements of x are matched to those in table
             set the elements of elmt_index that are zero to the value 
             of nomatch */
          for(i=0; i < len; i++) {
               if(INTEGER_POINTER(elm_index)[i] == 0) {
                    INTEGER_POINTER(elm_index)[i] = INTEGER_POINTER(nomatch)[0];
               }
          }
     }

     UNPROTECT(1);
     return(elm_index);
}
Exemplo n.º 23
0
/* a version of EncodeElement with different escaping of char strings */
static const char
*EncodeElementS(SEXP x, int indx,
                R_StringBuffer *buff, char cdec)
{
    switch(TYPEOF(x)) {
       case STRSXP:
       {
	    const char *s = translateCharUTF8(STRING_ELT(x, indx));
	    char *u, *cbuf;
            int j, len, blen, offset;
	    len = strlen(s);
	    blen = len * 2 + 1;
            R_AllocStringBuffer(blen, buff);
	    u = cbuf = buff->data;
	    offset = 0;
	    for (j = 0; j < len; j++){
                switch(s[offset+j]){
/* http://www.postgresql.org/docs/8.1/static/sql-copy.html */
                    case '\b':
                        *u++ = '\\';
                        *u++ = 'b';
                        break;
                    case '\f':
                        *u++ = '\\';
                        *u++ = 'f';
                        break;
                    case '\n':
                        *u++ = '\\';
                        *u++ = 'n';
                        break;
                    case '\r':
                        *u++ = '\\';
                        *u++ = 'r';
                        break;
                    case '\t':
                        *u++ = '\\';
                        *u++ = 't';
                        break;
                    case '\v':
                        *u++ = '\\';
                        *u++ = 'v';
                        break;
                    case '\\':
                        *u++ = '\\';
                        *u++ = '\\';
                        break;
                    default:
		        *u++ = s[offset+j];
                }
            }
            *u = '\0';
            return buff->data;
        }
        case LGLSXP:{
            int value;
            value = LOGICAL(x)[indx];
            if(value == TRUE) return "true";
            if(value == FALSE) return "false";
            return "\\N";
        }
        case INTSXP:{
            int value;
            value = INTEGER(x)[indx];
            if(ISNA(value)) return "\\N";
            snprintf(buff->data, buff->bufsize, "%d", value);
            return buff->data;
        }
        case REALSXP:{
            double value = REAL(x)[indx];
            if (!R_FINITE(value)) {
                if(ISNA(value)) return "\\N";
                else if(ISNAN(value)) return "NaN";
                else if(value > 0) return "Inf";
                else return "-Inf";
            }
            snprintf(buff->data, buff->bufsize, "%.15g", value);
            return buff->data;
        }
        default:
            return NULL; 
    }
    return NULL; 
}
Exemplo n.º 24
0
Arquivo: extra.c Projeto: csilles/cxxr
	/* GetNativeSystemInfo is XP or later */
	pGNSI = (PGNSI)
	    GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
			   "GetNativeSystemInfo");
	if(NULL != pGNSI) pGNSI(&si); else GetSystemInfo(&si);
	if(si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64)
	    strcat(ver, " x64");
    }
    SET_STRING_ELT(ans, 1, mkChar(ver));

    if((int)osvi.dwMajorVersion >= 5) {
	if(osvi.wServicePackMajor > 0)
	    snprintf(ver, 256, "build %d, Service Pack %d",
		     LOWORD(osvi.dwBuildNumber),
		     (int) osvi.wServicePackMajor);
	else snprintf(ver, 256, "build %d", LOWORD(osvi.dwBuildNumber));
    } else
	snprintf(ver, 256, "build %d, %s",
		 LOWORD(osvi.dwBuildNumber), osvi.szCSDVersion);
    SET_STRING_ELT(ans, 2, mkChar(ver));
    GetComputerNameW(name, &namelen);
    wcstoutf8(buf, name, 1000);
    SET_STRING_ELT(ans, 3, mkCharCE(buf, CE_UTF8));
#ifdef WIN64
    SET_STRING_ELT(ans, 4, mkChar("x86-64"));
#else
    SET_STRING_ELT(ans, 4, mkChar("x86"));
#endif
    GetUserNameW(user, &userlen);
    wcstoutf8(buf, user, 1000);
    SET_STRING_ELT(ans, 5, mkCharCE(buf, CE_UTF8));
    SET_STRING_ELT(ans, 6, STRING_ELT(ans, 5));
    SET_STRING_ELT(ans, 7, STRING_ELT(ans, 5));
    PROTECT(ansnames = allocVector(STRSXP, 8));
    SET_STRING_ELT(ansnames, 0, mkChar("sysname"));
    SET_STRING_ELT(ansnames, 1, mkChar("release"));
    SET_STRING_ELT(ansnames, 2, mkChar("version"));
    SET_STRING_ELT(ansnames, 3, mkChar("nodename"));
    SET_STRING_ELT(ansnames, 4, mkChar("machine"));
    SET_STRING_ELT(ansnames, 5, mkChar("login"));
    SET_STRING_ELT(ansnames, 6, mkChar("user"));
    SET_STRING_ELT(ansnames, 7, mkChar("effective_user"));
    setAttrib(ans, R_NamesSymbol, ansnames);
    UNPROTECT(2);
    return ans;
}

SEXP do_syssleep(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    DWORD mtime;
    int ntime;
    double time;

    checkArity(op, args);
    time = asReal(CAR(args));
    if (ISNAN(time) || time < 0)
	errorcall(call, _("invalid '%s' value"), "time");
    ntime = 1000*(time) + 0.5;
    while (ntime > 0) {
	mtime = min(500, ntime);
	ntime -= mtime;
	Sleep(mtime);
	R_ProcessEvents();
    }
    return R_NilValue;
}

#ifdef LEA_MALLOC
#define MALLINFO_FIELD_TYPE size_t
struct mallinfo {
    MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
    MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
    MALLINFO_FIELD_TYPE smblks;   /* number of fastbin blocks */
    MALLINFO_FIELD_TYPE hblks;    /* number of mmapped regions */
    MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
    MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
    MALLINFO_FIELD_TYPE fsmblks;  /* space available in freed fastbin blocks */
    MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
    MALLINFO_FIELD_TYPE fordblks; /* total free space */
    MALLINFO_FIELD_TYPE keepcost; /* top-most, releasable (via malloc_trim) space */
};
extern R_size_t R_max_memory;

struct mallinfo mallinfo(void);
#endif 

SEXP in_memsize(SEXP ssize)
{
    SEXP ans;
    int maxmem = NA_LOGICAL;

    if(isLogical(ssize)) 
	maxmem = asLogical(ssize);
    else if(isReal(ssize)) {
	R_size_t newmax;
	double mem = asReal(ssize);
	if (!R_FINITE(mem))
	    error(_("incorrect argument"));
#ifdef LEA_MALLOC
#ifndef WIN64
	if(mem >= 4096)
	    error(_("don't be silly!: your machine has a 4Gb address limit"));
#endif
	newmax = mem * 1048576.0;
	if (newmax < R_max_memory)
	    warning(_("cannot decrease memory limit: ignored"));
	else
	    R_max_memory = newmax;
#endif
    } else
	error(_("incorrect argument"));
	
    PROTECT(ans = allocVector(REALSXP, 1));
#ifdef LEA_MALLOC
    if(maxmem == NA_LOGICAL)
	REAL(ans)[0] = R_max_memory;
    else if(maxmem)
	REAL(ans)[0] = mallinfo().usmblks;
    else
	REAL(ans)[0] = mallinfo().uordblks;
    REAL(ans)[0] /= 1048576.0;
#else
    REAL(ans)[0] = NA_REAL;
#endif
    UNPROTECT(1);
    return ans;
}

SEXP do_dllversion(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    SEXP path = R_NilValue, ans;
    const wchar_t *dll;
    DWORD dwVerInfoSize;
    DWORD dwVerHnd;

    checkArity(op, args);
    path = CAR(args);
    if(!isString(path) || LENGTH(path) != 1)
	errorcall(call, _("invalid '%s' argument"), "path");
    dll = filenameToWchar(STRING_ELT(path, 0), FALSE);
    dwVerInfoSize = GetFileVersionInfoSizeW(dll, &dwVerHnd);
    PROTECT(ans = allocVector(STRSXP, 2));
    SET_STRING_ELT(ans, 0, mkChar(""));
    SET_STRING_ELT(ans, 1, mkChar(""));
    if (dwVerInfoSize) {
	BOOL  fRet;
	LPSTR lpstrVffInfo;
	LPSTR lszVer = NULL;
	UINT  cchVer = 0;

	lpstrVffInfo = (LPSTR) malloc(dwVerInfoSize);
	if (GetFileVersionInfoW(dll, 0L, dwVerInfoSize, lpstrVffInfo)) {

	    fRet = VerQueryValue(lpstrVffInfo,
				 TEXT("\\StringFileInfo\\040904E4\\FileVersion"),
				 (LPVOID)&lszVer, &cchVer);
	    if(fRet) SET_STRING_ELT(ans, 0, mkChar(lszVer));

	    fRet = VerQueryValue(lpstrVffInfo,
				 TEXT("\\StringFileInfo\\040904E4\\R Version"),
				 (LPVOID)&lszVer, &cchVer);
	    if(fRet) SET_STRING_ELT(ans, 1, mkChar(lszVer));
	    else {
		fRet = VerQueryValue(lpstrVffInfo,
				     TEXT("\\StringFileInfo\\040904E4\\Compiled under R Version"),
				     (LPVOID)&lszVer, &cchVer);
		if(fRet) SET_STRING_ELT(ans, 1, mkChar(lszVer));
	    }

	} else ans = R_NilValue;
	free(lpstrVffInfo);
    } else ans = R_NilValue;
    UNPROTECT(1);
    return ans;
}



int Rwin_rename(const char *from, const char *to)
{
    return (MoveFileEx(from, to, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH) == 0);
}

int Rwin_wrename(const wchar_t *from, const wchar_t *to)
{
    return (MoveFileExW(from, to, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH) == 0);
}


const char *formatError(DWORD res)
{
    static char buf[1000], *p;
    FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
		  NULL, res,
		  MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
		  buf, 1000, NULL);
    p = buf+strlen(buf) -1;
    if(*p == '\n') *p = '\0';
    p = buf+strlen(buf) -1;
    if(*p == '\r') *p = '\0';
    p = buf+strlen(buf) -1;
    if(*p == '.') *p = '\0';
    return buf;
}


void R_UTF8fixslash(char *s); /* from main/util.c */
SEXP do_normalizepath(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    SEXP ans, paths = CAR(args), el, slash;
    int i, n = LENGTH(paths), res;
    char tmp[MAX_PATH], longpath[MAX_PATH], *tmp2;
    wchar_t wtmp[32768], wlongpath[32768], *wtmp2;
    int mustWork, fslash = 0;

    checkArity(op, args);
    if(!isString(paths))
	errorcall(call, _("'path' must be a character vector"));

    slash = CADR(args);
    if(!isString(slash) || LENGTH(slash) != 1)
	errorcall(call, "'winslash' must be a character string");
    const char *sl = CHAR(STRING_ELT(slash, 0));
    if (strcmp(sl, "/") && strcmp(sl, "\\"))
	errorcall(call, "'winslash' must be '/' or '\\\\'");
    if (strcmp(sl, "/") == 0) fslash = 1;
    
    mustWork = asLogical(CADDR(args));

    PROTECT(ans = allocVector(STRSXP, n));
    for (i = 0; i < n; i++) {
    	int warn = 0;
    	SEXP result;
	el = STRING_ELT(paths, i);
	result = el;
	if(getCharCE(el) == CE_UTF8) {
	    if ((res = GetFullPathNameW(filenameToWchar(el, FALSE), 32768, 
					wtmp, &wtmp2)) && res <= 32768) {
		if ((res = GetLongPathNameW(wtmp, wlongpath, 32768))
		    && res <= 32768) {
	    	    wcstoutf8(longpath, wlongpath, wcslen(wlongpath)+1);
		    if(fslash) R_UTF8fixslash(longpath);
	    	    result = mkCharCE(longpath, CE_UTF8);
		} else if(mustWork == 1) {
		    errorcall(call, "path[%d]=\"%s\": %s", i+1, 
			      translateChar(el), 
			      formatError(GetLastError()));	
	    	} else {
	    	    wcstoutf8(tmp, wtmp, wcslen(wtmp)+1);
		    if(fslash) R_UTF8fixslash(tmp);
	    	    result = mkCharCE(tmp, CE_UTF8);
	    	    warn = 1;
	    	}
	    } else if(mustWork == 1) {
		errorcall(call, "path[%d]=\"%s\": %s", i+1, 
			  translateChar(el), 
			  formatError(GetLastError()));	
	    } else {
		if (fslash) {
		    strcpy(tmp, translateCharUTF8(el));
		    R_UTF8fixslash(tmp);
	    	    result = mkCharCE(tmp, CE_UTF8);
		}
	    	warn = 1;
	    }
	    if (warn && (mustWork == NA_LOGICAL))
	    	warningcall(call, "path[%d]=\"%ls\": %s", i+1, 
			    filenameToWchar(el,FALSE), 
			    formatError(GetLastError()));
	} else {
	    if ((res = GetFullPathName(translateChar(el), MAX_PATH, tmp, &tmp2)) 
		&& res <= MAX_PATH) {
	    	if ((res = GetLongPathName(tmp, longpath, MAX_PATH))
		    && res <= MAX_PATH) {
		    if(fslash) R_fixslash(longpath);
	    	    result = mkChar(longpath);
		} else if(mustWork == 1) {
		    errorcall(call, "path[%d]=\"%s\": %s", i+1, 
			      translateChar(el), 
			      formatError(GetLastError()));	
	    	} else {
		    if(fslash) R_fixslash(tmp);
	    	    result = mkChar(tmp);
	    	    warn = 1;
	    	}
	    } else if(mustWork == 1) {
		errorcall(call, "path[%d]=\"%s\": %s", i+1, 
			  translateChar(el), 
			  formatError(GetLastError()));	
	    } else {
		if (fslash) {
		    strcpy(tmp, translateChar(el));
		    R_fixslash(tmp);
		    result = mkChar(tmp);
		}
	    	warn = 1;
	    }
	    if (warn && (mustWork == NA_LOGICAL))
		warningcall(call, "path[%d]=\"%s\": %s", i+1, 
			    translateChar(el), 
			    formatError(GetLastError()));	
	}
	SET_STRING_ELT(ans, i, result);
    }
    UNPROTECT(1);
    return ans;
}
Exemplo n.º 25
0
// If we find a non-ASCII, non-NA, non-UTF8 encoding, we try to convert it to UTF8. That is, marked non-ascii/non-UTF8 encodings will always be checked in UTF8 locale. This seems to be the best fix I could think of to put the encoding issues to rest..
// Since the if-statement will fail with the first condition check in "normal" ASCII cases, there shouldn't be huge penalty issues for default setup.
// Fix for #66, #69, #469 and #1293
// TODO: compare 1.9.6 performance with 1.9.7 with huge number of ASCII strings.
SEXP ENC2UTF8(SEXP s) {
    if (!IS_ASCII(s) && s != NA_STRING && !IS_UTF8(s))
        s = mkCharCE(translateCharUTF8(s), CE_UTF8);
    return (s);
}