_Bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, int ncol) { // use typeSize superfluously to avoid not-used warning; otherwise could move typeSize from fread.h into fread.c if (typeSize[CT_BOOL8_N]!=1) STOP("Internal error: typeSize[CT_BOOL8_N] != 1"); // # nocov if (typeSize[CT_STRING]!=8) STOP("Internal error: typeSize[CT_STRING] != 1"); // # nocov colNamesSxp = R_NilValue; if (colNames!=NULL) { SET_VECTOR_ELT(RCHK, 1, colNamesSxp=allocVector(STRSXP, ncol)); for (int i=0; i<ncol; i++) { SEXP elem; if (colNames[i].len<=0) { char buff[12]; sprintf(buff,"V%d",i+1); elem = mkChar(buff); // no PROTECT as passed immediately to SET_STRING_ELT } else { elem = mkCharLenCE(anchor+colNames[i].off, colNames[i].len, ienc); // no PROTECT as passed immediately to SET_STRING_ELT } SET_STRING_ELT(colNamesSxp, i, elem); } } if (length(colClassesSxp)) { SEXP typeRName_sxp = PROTECT(allocVector(STRSXP, NUT)); for (int i=0; i<NUT; i++) SET_STRING_ELT(typeRName_sxp, i, mkChar(typeRName[i])); if (isString(colClassesSxp)) { SEXP typeEnum_idx = PROTECT(chmatch(colClassesSxp, typeRName_sxp, NUT, FALSE)); if (LENGTH(colClassesSxp)==1) { signed char newType = typeEnum[INTEGER(typeEnum_idx)[0]-1]; if (newType == CT_DROP) STOP("colClasses='NULL' is not permitted; i.e. to drop all columns and load nothing"); for (int i=0; i<ncol; i++) type[i]=newType; // freadMain checks bump up only not down } else if (LENGTH(colClassesSxp)==ncol) { for (int i=0; i<ncol; i++) { if (STRING_ELT(colClassesSxp,i)==NA_STRING) continue; // user is ok with inherent type for this column type[i] = typeEnum[INTEGER(typeEnum_idx)[i]-1]; } } else { STOP("colClasses is an unnamed character vector but its length is %d. Must be length 1 or ncol (%d in this case) when unnamed. To specify types for a subset of columns you can either name the items with the column names or pass list() format to colClasses using column names or column numbers. See examples in ?fread.", LENGTH(colClassesSxp), ncol); } UNPROTECT(1); // typeEnum_idx } else { if (!isNewList(colClassesSxp)) STOP("CfreadR: colClasses is not type list"); if (!length(getAttrib(colClassesSxp, R_NamesSymbol))) STOP("CfreadR: colClasses is type list but has no names"); SEXP typeEnum_idx = PROTECT(chmatch(PROTECT(getAttrib(colClassesSxp, R_NamesSymbol)), typeRName_sxp, NUT, FALSE)); for (int i=0; i<LENGTH(colClassesSxp); i++) { SEXP items; signed char thisType = typeEnum[INTEGER(typeEnum_idx)[i]-1]; items = VECTOR_ELT(colClassesSxp,i); if (thisType == CT_DROP) { if (!isNull(dropSxp) || !isNull(selectSxp)) { if (dropSxp!=items) DTWARN("Ignoring the NULL item in colClasses= because select= or drop= has been used."); // package damr has a nice workaround for when NULL didn't work before v1.12.0: it sets drop=col_class$`NULL`. So allow that unambiguous case with no warning. } else { dropSxp = items; } continue; } SEXP itemsInt; if (isString(items)) itemsInt = PROTECT(chmatch(items, colNamesSxp, NA_INTEGER, FALSE)); else itemsInt = PROTECT(coerceVector(items, INTSXP)); // UNPROTECTed directly just after this for loop. No protecti++ here is correct. for (int j=0; j<LENGTH(items); j++) { int k = INTEGER(itemsInt)[j]; if (k==NA_INTEGER) { if (isString(items)) STOP("Column name '%s' in colClasses[[%d]] not found", CHAR(STRING_ELT(items, j)),i+1); else STOP("colClasses[[%d]][%d] is NA", i+1, j+1); } else { if (k<1 || k>ncol) STOP("Column number %d (colClasses[[%d]][%d]) is out of range [1,ncol=%d]",k,i+1,j+1,ncol); k--; if (type[k]<0) STOP("Column '%s' appears more than once in colClasses", CHAR(STRING_ELT(colNamesSxp,k))); type[k] = -thisType; // freadMain checks bump up only not down. Deliberately don't catch here to test freadMain; e.g. test 959 } } UNPROTECT(1); // UNPROTECTing itemsInt inside loop to save protection stack } for (int i=0; i<ncol; i++) if (type[i]<0) type[i] *= -1; // undo sign; was used to detect duplicates UNPROTECT(2); // typeEnum_idx (+1 for its protect of getAttrib) } UNPROTECT(1); // typeRName_sxp } if (readInt64As != CT_INT64) { for (int i=0; i<ncol; i++) if (type[i]==CT_INT64) type[i] = readInt64As; } if (length(dropSxp)) { SEXP itemsInt; if (isString(dropSxp)) itemsInt = PROTECT(chmatch(dropSxp, colNamesSxp, NA_INTEGER, FALSE)); else itemsInt = PROTECT(coerceVector(dropSxp, INTSXP)); for (int j=0; j<LENGTH(itemsInt); j++) { int k = INTEGER(itemsInt)[j]; if (k==NA_INTEGER) { if (isString(dropSxp)) { DTWARN("Column name '%s' in 'drop' not found", CHAR(STRING_ELT(dropSxp, j))); } else { DTWARN("drop[%d] is NA", j+1); } } else { if (k<1 || k>ncol) { DTWARN("Column number %d (drop[%d]) is out of range [1,ncol=%d]",k,j+1,ncol); } else { // if (type[k-1] == CT_DROP) DTWARN("drop= contains duplicates"); // NULL in colClasses didn't work between 1.11.0 and 1.11.8 so people have been using drop= to re-specify the NULL columns in colClasses. Now that NULL in colClasses works // from v1.12.0 there is no easy way to distinguish dups in drop= from drop overlapping with NULLs in colClasses. But it's unambiguous that it was intended to remove these // columns, so no need for warning. type[k-1] = CT_DROP; } } } UNPROTECT(1); // itemsInt } else if (length(selectSxp)) { SEXP tt; if (isString(selectSxp)) { // invalid cols check part of #1445 moved here (makes sense before reading the file) tt = PROTECT(chmatch(selectSxp, colNamesSxp, NA_INTEGER, FALSE)); for (int i=0; i<length(selectSxp); i++) if (INTEGER(tt)[i]==NA_INTEGER) DTWARN("Column name '%s' not found in column name header (case sensitive), skipping.", CHAR(STRING_ELT(selectSxp, i))); } else { tt = PROTECT(selectSxp); // harmless superfluous PROTECT, for ease of balancing } for (int i=0; i<LENGTH(tt); i++) { int k = isInteger(tt) ? INTEGER(tt)[i] : (int)REAL(tt)[i]; if (k == NA_INTEGER) continue; if (k<0) STOP("Column number %d (select[%d]) negative but should be in the range [1,ncol=%d]. Consider drop= for column exclusion.",k,i+1,ncol); if (k==0) STOP("select = 0 (select[%d]) has no meaning. All values of select should be in the range [1,ncol=%d].",i+1,ncol); if (k>ncol) STOP("Column number %d (select[%d]) is too large for this table, which only has %d columns.",k,i+1,ncol); if (type[k-1]<0) STOP("Column number %d ('%s') has been selected twice by select=", k, CHAR(STRING_ELT(colNamesSxp,k-1))); type[k-1] *= -1; // detect and error on duplicates on all types without calling duplicated() at all } for (int i=0; i<ncol; i++) { if (type[i]<0) type[i] *= -1; else type[i]=CT_DROP; } UNPROTECT(1); // tt } return true; }

/* "%*%" (op = 0), crossprod (op = 1) or tcrossprod (op = 2) */ SEXP attribute_hidden do_earg_matprod(SEXP call, SEXP op, SEXP arg_x, SEXP arg_y, SEXP rho) { int ldx, ldy, nrx, ncx, nry, ncy, mode; SEXP x = arg_x, y = arg_y, xdims, ydims, ans; Rboolean sym; sym = isNull(y); if (sym && (PRIMVAL(op) > 0)) y = x; if ( !(isNumeric(x) || isComplex(x)) || !(isNumeric(y) || isComplex(y)) ) errorcall(call, _("requires numeric/complex matrix/vector arguments")); xdims = getDimAttrib(x); ydims = getDimAttrib(y); ldx = length(xdims); ldy = length(ydims); if (ldx != 2 && ldy != 2) { /* x and y non-matrices */ if (PRIMVAL(op) == 0) { nrx = 1; ncx = LENGTH(x); } else { nrx = LENGTH(x); ncx = 1; } nry = LENGTH(y); ncy = 1; } else if (ldx != 2) { /* x not a matrix */ nry = INTEGER(ydims)[0]; ncy = INTEGER(ydims)[1]; nrx = 0; ncx = 0; if (PRIMVAL(op) == 0) { if (LENGTH(x) == nry) { /* x as row vector */ nrx = 1; ncx = nry; /* == LENGTH(x) */ } else if (nry == 1) { /* x as col vector */ nrx = LENGTH(x); ncx = 1; } } else if (PRIMVAL(op) == 1) { /* crossprod() */ if (LENGTH(x) == nry) { /* x is a col vector */ nrx = nry; /* == LENGTH(x) */ ncx = 1; } /* else if (nry == 1) ... not being too tolerant to treat x as row vector, as t(x) *is* row vector */ } else { /* tcrossprod */ if (LENGTH(x) == ncy) { /* x as row vector */ nrx = 1; ncx = ncy; /* == LENGTH(x) */ } else if (ncy == 1) { /* x as col vector */ nrx = LENGTH(x); ncx = 1; } } } else if (ldy != 2) { /* y not a matrix */ nrx = INTEGER(xdims)[0]; ncx = INTEGER(xdims)[1]; nry = 0; ncy = 0; if (PRIMVAL(op) == 0) { if (LENGTH(y) == ncx) { /* y as col vector */ nry = ncx; ncy = 1; } else if (ncx == 1) { /* y as row vector */ nry = 1; ncy = LENGTH(y); } } else if (PRIMVAL(op) == 1) { /* crossprod() */ if (LENGTH(y) == nrx) { /* y is a col vector */ nry = nrx; ncy = 1; } } else { /* tcrossprod -- y is a col vector */ nry = LENGTH(y); ncy = 1; } } else { /* x and y matrices */ nrx = INTEGER(xdims)[0]; ncx = INTEGER(xdims)[1]; nry = INTEGER(ydims)[0]; ncy = INTEGER(ydims)[1]; } /* nr[ow](.) and nc[ol](.) are now defined for x and y */ if (PRIMVAL(op) == 0) { /* primitive, so use call */ if (ncx != nry) errorcall(call, _("non-conformable arguments")); } else if (PRIMVAL(op) == 1) { if (nrx != nry) error(_("non-conformable arguments")); } else { if (ncx != ncy) error(_("non-conformable arguments")); } if (isComplex(x) || isComplex(y)) mode = CPLXSXP; else mode = REALSXP; x = coerceVector(x, mode); y = coerceVector(y, mode); if (PRIMVAL(op) == 0) { /* op == 0 : matprod() */ PROTECT(ans = allocMatrix(mode, nrx, ncy)); if (mode == CPLXSXP) cmatprod(COMPLEX(x), nrx, ncx, COMPLEX(y), nry, ncy, COMPLEX(ans)); else matprod(REAL(x), nrx, ncx, REAL(y), nry, ncy, REAL(ans)); PROTECT(xdims = getDimNamesAttrib(x)); PROTECT(ydims = getDimNamesAttrib(y)); if (xdims != R_NilValue || ydims != R_NilValue) { SEXP dimnames, dimnamesnames, dnx=R_NilValue, dny=R_NilValue; /* allocate dimnames and dimnamesnames */ PROTECT(dimnames = allocVector(VECSXP, 2)); PROTECT(dimnamesnames = allocVector(STRSXP, 2)); if (xdims != R_NilValue) { if (ldx == 2 || ncx == 1) { SET_VECTOR_ELT(dimnames, 0, VECTOR_ELT(xdims, 0)); dnx = getNamesAttrib(xdims); if(!isNull(dnx)) SET_STRING_ELT(dimnamesnames, 0, STRING_ELT(dnx, 0)); } } #define YDIMS_ET_CETERA \ if (ydims != R_NilValue) { \ if (ldy == 2) { \ SET_VECTOR_ELT(dimnames, 1, VECTOR_ELT(ydims, 1)); \ dny = getNamesAttrib(ydims); \ if(!isNull(dny)) \ SET_STRING_ELT(dimnamesnames, 1, STRING_ELT(dny, 1)); \ } else if (nry == 1) { \ SET_VECTOR_ELT(dimnames, 1, VECTOR_ELT(ydims, 0)); \ dny = getNamesAttrib(ydims); \ if(!isNull(dny)) \ SET_STRING_ELT(dimnamesnames, 1, STRING_ELT(dny, 0)); \ } \ } \ \ /* We sometimes attach a dimnames attribute \ * whose elements are all NULL ... \ * This is ugly but causes no real damage. \ * Now (2.1.0 ff), we don't anymore: */ \ if (VECTOR_ELT(dimnames,0) != R_NilValue || \ VECTOR_ELT(dimnames,1) != R_NilValue) { \ if (dnx != R_NilValue || dny != R_NilValue) \ setAttrib(dimnames, R_NamesSymbol, dimnamesnames); \ setAttrib(ans, R_DimNamesSymbol, dimnames); \ } \ UNPROTECT(2) YDIMS_ET_CETERA; } } else if (PRIMVAL(op) == 1) { /* op == 1: crossprod() */ PROTECT(ans = allocMatrix(mode, ncx, ncy)); if (mode == CPLXSXP) if(sym) ccrossprod(COMPLEX(x), nrx, ncx, COMPLEX(x), nry, ncy, COMPLEX(ans)); else ccrossprod(COMPLEX(x), nrx, ncx, COMPLEX(y), nry, ncy, COMPLEX(ans)); else { if(sym) symcrossprod(REAL(x), nrx, ncx, REAL(ans)); else crossprod(REAL(x), nrx, ncx, REAL(y), nry, ncy, REAL(ans)); } PROTECT(xdims = getDimNamesAttrib(x)); if (sym) PROTECT(ydims = xdims); else PROTECT(ydims = getDimNamesAttrib(y)); if (xdims != R_NilValue || ydims != R_NilValue) { SEXP dimnames, dimnamesnames, dnx=R_NilValue, dny=R_NilValue; /* allocate dimnames and dimnamesnames */ PROTECT(dimnames = allocVector(VECSXP, 2)); PROTECT(dimnamesnames = allocVector(STRSXP, 2)); if (xdims != R_NilValue) { if (ldx == 2) {/* not nrx==1 : .. fixed, ihaka 2003-09-30 */ SET_VECTOR_ELT(dimnames, 0, VECTOR_ELT(xdims, 1)); dnx = getNamesAttrib(xdims); if(!isNull(dnx)) SET_STRING_ELT(dimnamesnames, 0, STRING_ELT(dnx, 1)); } } YDIMS_ET_CETERA; } } else { /* op == 2: tcrossprod() */ PROTECT(ans = allocMatrix(mode, nrx, nry)); if (mode == CPLXSXP) if(sym) tccrossprod(COMPLEX(x), nrx, ncx, COMPLEX(x), nry, ncy, COMPLEX(ans)); else tccrossprod(COMPLEX(x), nrx, ncx, COMPLEX(y), nry, ncy, COMPLEX(ans)); else { if(sym) symtcrossprod(REAL(x), nrx, ncx, REAL(ans)); else tcrossprod(REAL(x), nrx, ncx, REAL(y), nry, ncy, REAL(ans)); } PROTECT(xdims = getDimNamesAttrib(x)); if (sym) PROTECT(ydims = xdims); else PROTECT(ydims = getDimNamesAttrib(y)); if (xdims != R_NilValue || ydims != R_NilValue) { SEXP dimnames, dimnamesnames, dnx=R_NilValue, dny=R_NilValue; /* allocate dimnames and dimnamesnames */ PROTECT(dimnames = allocVector(VECSXP, 2)); PROTECT(dimnamesnames = allocVector(STRSXP, 2)); if (xdims != R_NilValue) { if (ldx == 2) { SET_VECTOR_ELT(dimnames, 0, VECTOR_ELT(xdims, 0)); dnx = getNamesAttrib(xdims); if(!isNull(dnx)) SET_STRING_ELT(dimnamesnames, 0, STRING_ELT(dnx, 0)); } } if (ydims != R_NilValue) { if (ldy == 2) { SET_VECTOR_ELT(dimnames, 1, VECTOR_ELT(ydims, 0)); dny = getNamesAttrib(ydims); if(!isNull(dny)) SET_STRING_ELT(dimnamesnames, 1, STRING_ELT(dny, 0)); } } if (VECTOR_ELT(dimnames,0) != R_NilValue || VECTOR_ELT(dimnames,1) != R_NilValue) { if (dnx != R_NilValue || dny != R_NilValue) setAttrib(dimnames, R_NamesSymbol, dimnamesnames); setAttrib(ans, R_DimNamesSymbol, dimnames); } UNPROTECT(2); } } UNPROTECT(3); return ans; }

SEXP attribute_hidden do_system(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP tlist = R_NilValue; int intern = 0; checkArity(op, args); if (!isValidStringF(CAR(args))) error(_("non-empty character argument expected")); intern = asLogical(CADR(args)); if (intern == NA_INTEGER) error(_("'intern' must be logical and not NA")); if (intern) { /* intern = TRUE */ FILE *fp; char *x = "r", buf[INTERN_BUFSIZE]; const char *cmd; int i, j, res; SEXP tchar, rval; PROTECT(tlist); cmd = translateChar(STRING_ELT(CAR(args), 0)); errno = 0; /* precaution */ if(!(fp = R_popen(cmd, x))) error(_("cannot popen '%s', probable reason '%s'"), cmd, strerror(errno)); for (i = 0; fgets(buf, INTERN_BUFSIZE, fp); i++) { size_t read = strlen(buf); if(read >= INTERN_BUFSIZE - 1) warning(_("line %d may be truncated in call to system(, intern = TRUE)"), i + 1); if (read > 0 && buf[read-1] == '\n') buf[read - 1] = '\0'; /* chop final CR */ tchar = mkChar(buf); UNPROTECT(1); PROTECT(tlist = CONS(tchar, tlist)); } res = pclose(fp); #ifdef HAVE_SYS_WAIT_H if (WIFEXITED(res)) res = WEXITSTATUS(res); else res = 0; #else /* assume that this is shifted if a multiple of 256 */ if ((res % 256) == 0) res = res/256; #endif if ((res & 0xff) == 127) {/* 127, aka -1 */ if (errno) error(_("error in running command: '%s'"), strerror(errno)); else error(_("error in running command")); } else if (res) { if (errno) warningcall(R_NilValue, _("running command '%s' had status %d and error message '%s'"), cmd, res, strerror(errno)); else warningcall(R_NilValue, _("running command '%s' had status %d"), cmd, res); } rval = PROTECT(allocVector(STRSXP, i)); for (j = (i - 1); j >= 0; j--) { SET_STRING_ELT(rval, j, CAR(tlist)); tlist = CDR(tlist); } if(res) { SEXP lsym = install("status"); setAttrib(rval, lsym, ScalarInteger(res)); if(errno) { lsym = install("errmsg"); setAttrib(rval, lsym, mkString(strerror(errno))); } } UNPROTECT(2); return rval; } else { /* intern = FALSE */ #ifdef HAVE_AQUA R_Busy(1); #endif tlist = PROTECT(allocVector(INTSXP, 1)); fflush(stdout); INTEGER(tlist)[0] = R_system(translateChar(STRING_ELT(CAR(args), 0))); #ifdef HAVE_AQUA R_Busy(0); #endif UNPROTECT(1); R_Visible = 0; return tlist; } }

SEXP lapack_qr(SEXP Xin, SEXP tl) { SEXP ans, Givens, Gcpy, nms, pivot, qraux, X; int i, n, nGivens = 0, p, trsz, *Xdims, rank; double rcond = 0., tol = asReal(tl), *work; if (!(isReal(Xin) & isMatrix(Xin))) error(_("X must be a real (numeric) matrix")); if (tol < 0.) error(_("tol, given as %g, must be non-negative"), tol); if (tol > 1.) error(_("tol, given as %g, must be <= 1"), tol); ans = PROTECT(allocVector(VECSXP,5)); SET_VECTOR_ELT(ans, 0, X = duplicate(Xin)); Xdims = INTEGER(coerceVector(getAttrib(X, R_DimSymbol), INTSXP)); n = Xdims[0]; p = Xdims[1]; SET_VECTOR_ELT(ans, 2, qraux = allocVector(REALSXP, (n < p) ? n : p)); SET_VECTOR_ELT(ans, 3, pivot = allocVector(INTSXP, p)); for (i = 0; i < p; i++) INTEGER(pivot)[i] = i + 1; trsz = (n < p) ? n : p; /* size of triangular part of decomposition */ rank = trsz; Givens = PROTECT(allocVector(VECSXP, rank - 1)); setAttrib(ans, R_NamesSymbol, nms = allocVector(STRSXP, 5)); SET_STRING_ELT(nms, 0, mkChar("qr")); SET_STRING_ELT(nms, 1, mkChar("rank")); SET_STRING_ELT(nms, 2, mkChar("qraux")); SET_STRING_ELT(nms, 3, mkChar("pivot")); SET_STRING_ELT(nms, 4, mkChar("Givens")); if (n > 0 && p > 0) { int info, *iwork, lwork; double *xpt = REAL(X), tmp; lwork = -1; F77_CALL(dgeqrf)(&n, &p, xpt, &n, REAL(qraux), &tmp, &lwork, &info); if (info) error(_("First call to dgeqrf returned error code %d"), info); lwork = (int) tmp; work = (double *) R_alloc((lwork < 3*trsz) ? 3*trsz : lwork, sizeof(double)); F77_CALL(dgeqrf)(&n, &p, xpt, &n, REAL(qraux), work, &lwork, &info); if (info) error(_("Second call to dgeqrf returned error code %d"), info); iwork = (int *) R_alloc(trsz, sizeof(int)); F77_CALL(dtrcon)("1", "U", "N", &rank, xpt, &n, &rcond, work, iwork, &info); if (info) error(_("Lapack routine dtrcon returned error code %d"), info); while (rcond < tol) { /* check diagonal elements */ double minabs = (xpt[0] < 0.) ? -xpt[0]: xpt[0]; int jmin = 0; for (i = 1; i < rank; i++) { double el = xpt[i*(n+1)]; el = (el < 0.) ? -el: el; if (el < minabs) { jmin = i; minabs = el; } } if (jmin < (rank - 1)) { SET_VECTOR_ELT(Givens, nGivens, getGivens(xpt, n, jmin, rank)); nGivens++; } rank--; F77_CALL(dtrcon)("1", "U", "N", &rank, xpt, &n, &rcond, work, iwork, &info); if (info) error(_("Lapack routine dtrcon returned error code %d"), info); } } SET_VECTOR_ELT(ans, 4, Gcpy = allocVector(VECSXP, nGivens)); for (i = 0; i < nGivens; i++) SET_VECTOR_ELT(Gcpy, i, VECTOR_ELT(Givens, i)); SET_VECTOR_ELT(ans, 1, ScalarInteger(rank)); setAttrib(ans, install("useLAPACK"), ScalarLogical(1)); setAttrib(ans, install("rcond"), ScalarReal(rcond)); UNPROTECT(2); return ans; }

SEXP DropDims(SEXP x) { SEXP dims, dimnames, newnames = R_NilValue; int i, n, ndims; PROTECT(x); dims = getDimAttrib(x); dimnames = getDimNamesAttrib(x); /* Check that dropping will actually do something. */ /* (1) Check that there is a "dim" attribute. */ if (dims == R_NilValue) { UNPROTECT(1); return x; } ndims = LENGTH(dims); /* (2) Check whether there are redundant extents */ n = 0; for (i = 0; i < ndims; i++) if (INTEGER(dims)[i] != 1) n++; if (n == ndims) { UNPROTECT(1); return x; } if (n <= 1) { /* We have reduced to a vector result. If that has length one, it is ambiguous which dimnames to use, so use it if there is only one (as from R 2.7.0). */ if (dimnames != R_NilValue) { if(XLENGTH(x) != 1) { for (i = 0; i < LENGTH(dims); i++) { if (INTEGER(dims)[i] != 1) { newnames = VECTOR_ELT(dimnames, i); break; } } } else { /* drop all dims: keep names if unambiguous */ int cnt; for(i = 0, cnt = 0; i < LENGTH(dims); i++) if(VECTOR_ELT(dimnames, i) != R_NilValue) cnt++; if(cnt == 1) for (i = 0; i < LENGTH(dims); i++) { newnames = VECTOR_ELT(dimnames, i); if(newnames != R_NilValue) break; } } } PROTECT(newnames); setAttrib(x, R_DimNamesSymbol, R_NilValue); setAttrib(x, R_DimSymbol, R_NilValue); setAttrib(x, R_NamesSymbol, newnames); /* FIXME: the following is desirable, but pointless as long as subset.c & others have a contrary version that leaves the S4 class in, incorrectly, in the case of vectors. JMC 3/3/09 */ /* if(IS_S4_OBJECT(x)) {/\* no longer valid subclass of array or matrix *\/ */ /* setAttrib(x, R_ClassSymbol, R_NilValue); */ /* UNSET_S4_OBJECT(x); */ /* } */ UNPROTECT(1); } else { /* We have a lower dimensional array. */ SEXP newdims, dnn, newnamesnames = R_NilValue; dnn = getNamesAttrib(dimnames); PROTECT(newdims = allocVector(INTSXP, n)); for (i = 0, n = 0; i < ndims; i++) if (INTEGER(dims)[i] != 1) INTEGER(newdims)[n++] = INTEGER(dims)[i]; if (!isNull(dimnames)) { int havenames = 0; for (i = 0; i < ndims; i++) if (INTEGER(dims)[i] != 1 && VECTOR_ELT(dimnames, i) != R_NilValue) havenames = 1; if (havenames) { PROTECT(newnames = allocVector(VECSXP, n)); PROTECT(newnamesnames = allocVector(STRSXP, n)); for (i = 0, n = 0; i < ndims; i++) { if (INTEGER(dims)[i] != 1) { if(!isNull(dnn)) SET_STRING_ELT(newnamesnames, n, STRING_ELT(dnn, i)); SET_VECTOR_ELT(newnames, n++, VECTOR_ELT(dimnames, i)); } } } else dimnames = R_NilValue; } PROTECT(dimnames); setAttrib(x, R_DimNamesSymbol, R_NilValue); setAttrib(x, R_DimSymbol, newdims); if (dimnames != R_NilValue) { if(!isNull(dnn)) setAttrib(newnames, R_NamesSymbol, newnamesnames); setAttrib(x, R_DimNamesSymbol, newnames); UNPROTECT(2); } UNPROTECT(2); } UNPROTECT(1); return x; }

/** Generate random strings * * @param n single integer * @param length integer vector * @param pattern character vector * @return character vector * * @version 0.2-1 (Marek Gagolewski, 2014-04-04) * * @version 0.2-1 (Marek Gagolewski, 2014-04-05) * Use StriContainerCharClass which now contains UnicodeSets; * vectorized also over pattern * * @version 0.3-1 (Marek Gagolewski, 2014-11-04) * Issue #112: str_prepare_arg* retvals were not PROTECTed from gc */ SEXP stri_rand_strings(SEXP n, SEXP length, SEXP pattern) { int n_val = stri__prepare_arg_integer_1_notNA(n, "n"); PROTECT(length = stri_prepare_arg_integer(length, "length")); PROTECT(pattern = stri_prepare_arg_string(pattern, "pattern")); if (n_val < 0) n_val = 0; /* that's not NA for sure now */ R_len_t length_len = LENGTH(length); if (length_len <= 0) { UNPROTECT(2); Rf_error(MSG__ARG_EXPECTED_NOT_EMPTY, "length"); } else if (length_len > n_val || n_val % length_len != 0) Rf_warning(MSG__WARN_RECYCLING_RULE2); R_len_t pattern_len = LENGTH(pattern); if (pattern_len <= 0) { UNPROTECT(2); Rf_error(MSG__ARG_EXPECTED_NOT_EMPTY, "pattern"); } else if (pattern_len > n_val || n_val % pattern_len != 0) Rf_warning(MSG__WARN_RECYCLING_RULE2); GetRNGstate(); STRI__ERROR_HANDLER_BEGIN(2) StriContainerCharClass pattern_cont(pattern, max(n_val, pattern_len)); StriContainerInteger length_cont(length, max(n_val, length_len)); // get max required bufsize int* length_tab = INTEGER(length); R_len_t bufsize = 0; for (R_len_t i=0; i<length_len; ++i) { if (length_tab[i] != NA_INTEGER && length_tab[i] > bufsize) bufsize = length_tab[i]; } bufsize *= 4; // 1 UChar32 -> max. 4 UTF-8 bytes String8buf buf(bufsize); char* bufdata = buf.data(); SEXP ret; STRI__PROTECT(ret = Rf_allocVector(STRSXP, n_val)); for (R_len_t i=0; i<n_val; ++i) { if (length_cont.isNA(i) || pattern_cont.isNA(i)) { SET_STRING_ELT(ret, i, NA_STRING); continue; } int length_cur = length_cont.get(i); if (length_cur < 0) length_cur = 0; const UnicodeSet* uset = &(pattern_cont.get(i)); int32_t uset_size = uset->size(); // generate string: R_len_t j = 0; UBool err = FALSE; for (R_len_t k=0; k<length_cur; ++k) { int32_t idx = (int32_t)floor(unif_rand()*(double)uset_size); /* 0..uset_size-1 */ UChar32 c = uset->charAt(idx); if (c < 0) throw StriException(MSG__INTERNAL_ERROR); U8_APPEND((uint8_t*)bufdata, j, bufsize, c, err); if (err) throw StriException(MSG__INTERNAL_ERROR); } SET_STRING_ELT(ret, i, Rf_mkCharLenCE(bufdata, j, CE_UTF8)); } PutRNGstate(); STRI__UNPROTECT_ALL return ret; STRI__ERROR_HANDLER_END({ PutRNGstate(); }) }

static SEXP readRegistryKey(HKEY hkey, int depth, int view) { int i, k = 0, size0, *indx; SEXP ans, nm, ans0, nm0, tmp, sind; DWORD res, nsubkeys, maxsubkeylen, nval, maxvalnamlen, size; wchar_t *name; HKEY sub; REGSAM acc = KEY_READ; if (depth <= 0) return mkString("<subkey>"); if(view == 2) acc |= KEY_WOW64_32KEY; else if(view == 3) acc |= KEY_WOW64_64KEY; res = RegQueryInfoKey(hkey, NULL, NULL, NULL, &nsubkeys, &maxsubkeylen, NULL, &nval, &maxvalnamlen, NULL, NULL, NULL); if (res != ERROR_SUCCESS) error("RegQueryInfoKey error code %d: '%s'", (int) res, formatError(res)); size0 = max(maxsubkeylen, maxvalnamlen) + 1; name = (wchar_t *) R_alloc(size0, sizeof(wchar_t)); PROTECT(ans = allocVector(VECSXP, nval + nsubkeys)); PROTECT(nm = allocVector(STRSXP, nval+ nsubkeys)); if (nval > 0) { PROTECT(ans0 = allocVector(VECSXP, nval)); PROTECT(nm0 = allocVector(STRSXP, nval)); for (i = 0; i < nval; i++) { size = size0; res = RegEnumValueW(hkey, i, (LPWSTR) name, &size, NULL, NULL, NULL, NULL); if (res != ERROR_SUCCESS) break; SET_VECTOR_ELT(ans0, i, readRegistryKey1(hkey, name)); SET_STRING_ELT(nm0, i, mkCharUcs(name)); } /* now sort by name */ PROTECT(sind = allocVector(INTSXP, nval)); indx = INTEGER(sind); for (i = 0; i < nval; i++) indx[i] = i; orderVector1(indx, nval, nm0, TRUE, FALSE, R_NilValue); for (i = 0; i < nval; i++, k++) { SET_VECTOR_ELT(ans, k, VECTOR_ELT(ans0, indx[i])); if (LENGTH(tmp = STRING_ELT(nm0, indx[i]))) SET_STRING_ELT(nm, k, tmp); else SET_STRING_ELT(nm, k, mkChar("(Default)")); } UNPROTECT(3); } if (nsubkeys > 0) { PROTECT(ans0 = allocVector(VECSXP, nsubkeys)); PROTECT(nm0 = allocVector(STRSXP, nsubkeys)); for (i = 0; i < nsubkeys; i++) { size = size0; res = RegEnumKeyExW(hkey, i, (LPWSTR) name, &size, NULL, NULL, NULL, NULL); if (res != ERROR_SUCCESS) break; res = RegOpenKeyExW(hkey, (LPWSTR) name, 0, acc, &sub); if (res != ERROR_SUCCESS) break; SET_VECTOR_ELT(ans0, i, readRegistryKey(sub, depth-1, view)); SET_STRING_ELT(nm0, i, mkCharUcs(name)); RegCloseKey(sub); } /* now sort by name */ PROTECT(sind = allocVector(INTSXP, nsubkeys)); indx = INTEGER(sind); for (i = 0; i < nsubkeys; i++) indx[i] = i; orderVector1(indx, nsubkeys, nm0, TRUE, FALSE, R_NilValue); for (i = 0; i < nsubkeys; i++, k++) { SET_VECTOR_ELT(ans, k, VECTOR_ELT(ans0, indx[i])); SET_STRING_ELT(nm, k, STRING_ELT(nm0, indx[i])); } UNPROTECT(3); } setAttrib(ans, R_NamesSymbol, nm); UNPROTECT(2); return ans; }

SEXP attribute_hidden do_subset2_dflt(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, dims, dimnames, indx, subs, x; int i, ndims, nsubs; int drop = 1, pok, exact = -1; int named_x; R_xlen_t offset = 0; PROTECT(args); ExtractDropArg(args, &drop); /* Is partial matching ok? When the exact arg is NA, a warning is issued if partial matching occurs. */ exact = ExtractExactArg(args); if (exact == -1) pok = exact; else pok = !exact; x = CAR(args); /* This code was intended for compatibility with S, */ /* but in fact S does not do this. Will anyone notice? */ if (x == R_NilValue) { UNPROTECT(1); /* args */ return x; } /* Get the subscripting and dimensioning information */ /* and check that any array subscripting is compatible. */ subs = CDR(args); if(0 == (nsubs = length(subs))) errorcall(call, _("no index specified")); dims = getAttrib(x, R_DimSymbol); ndims = length(dims); if(nsubs > 1 && nsubs != ndims) errorcall(call, _("incorrect number of subscripts")); /* code to allow classes to extend environment */ if(TYPEOF(x) == S4SXP) { x = R_getS4DataSlot(x, ANYSXP); if(x == R_NilValue) errorcall(call, _("this S4 class is not subsettable")); } PROTECT(x); /* split out ENVSXP for now */ if( TYPEOF(x) == ENVSXP ) { if( nsubs != 1 || !isString(CAR(subs)) || length(CAR(subs)) != 1 ) errorcall(call, _("wrong arguments for subsetting an environment")); ans = findVarInFrame(x, installTrChar(STRING_ELT(CAR(subs), 0))); if( TYPEOF(ans) == PROMSXP ) { PROTECT(ans); ans = eval(ans, R_GlobalEnv); UNPROTECT(1); /* ans */ } else SET_NAMED(ans, 2); UNPROTECT(2); /* args, x */ if(ans == R_UnboundValue) return(R_NilValue); if (NAMED(ans)) SET_NAMED(ans, 2); return ans; } /* back to the regular program */ if (!(isVector(x) || isList(x) || isLanguage(x))) errorcall(call, R_MSG_ob_nonsub, type2char(TYPEOF(x))); named_x = NAMED(x); /* x may change below; save this now. See PR#13411 */ if(nsubs == 1) { /* vector indexing */ SEXP thesub = CAR(subs); int len = length(thesub); if (len > 1) { #ifdef SWITCH_TO_REFCNT if (IS_GETTER_CALL(call)) { /* this is (most likely) a getter call in a complex assighment so we duplicate as needed. The original x should have been duplicated if it might be shared */ if (MAYBE_SHARED(x)) error("getter call used outside of a complex assignment."); x = vectorIndex(x, thesub, 0, len-1, pok, call, TRUE); } else x = vectorIndex(x, thesub, 0, len-1, pok, call, FALSE); #else x = vectorIndex(x, thesub, 0, len-1, pok, call, FALSE); #endif named_x = NAMED(x); UNPROTECT(1); /* x */ PROTECT(x); } SEXP xnames = PROTECT(getAttrib(x, R_NamesSymbol)); offset = get1index(thesub, xnames, xlength(x), pok, len > 1 ? len-1 : -1, call); UNPROTECT(1); /* xnames */ if (offset < 0 || offset >= xlength(x)) { /* a bold attempt to get the same behaviour for $ and [[ */ if (offset < 0 && (isNewList(x) || isExpression(x) || isList(x) || isLanguage(x))) { UNPROTECT(2); /* args, x */ return R_NilValue; } else errorcall(call, R_MSG_subs_o_b); } } else { /* matrix indexing */ /* Here we use the fact that: */ /* CAR(R_NilValue) = R_NilValue */ /* CDR(R_NilValue) = R_NilValue */ int ndn; /* Number of dimnames. Unlikely to be anything but 0 or nsubs, but just in case... */ PROTECT(indx = allocVector(INTSXP, nsubs)); dimnames = getAttrib(x, R_DimNamesSymbol); ndn = length(dimnames); for (i = 0; i < nsubs; i++) { INTEGER(indx)[i] = (int) get1index(CAR(subs), (i < ndn) ? VECTOR_ELT(dimnames, i) : R_NilValue, INTEGER(indx)[i], pok, -1, call); subs = CDR(subs); if (INTEGER(indx)[i] < 0 || INTEGER(indx)[i] >= INTEGER(dims)[i]) errorcall(call, R_MSG_subs_o_b); } offset = 0; for (i = (nsubs - 1); i > 0; i--) offset = (offset + INTEGER(indx)[i]) * INTEGER(dims)[i - 1]; offset += INTEGER(indx)[0]; UNPROTECT(1); /* indx */ } if(isPairList(x)) { #ifdef LONG_VECTOR_SUPPORT if (offset > R_SHORT_LEN_MAX) error("invalid subscript for pairlist"); #endif ans = CAR(nthcdr(x, (int) offset)); if (named_x > NAMED(ans)) SET_NAMED(ans, named_x); } else if(isVectorList(x)) { /* did unconditional duplication before 2.4.0 */ ans = VECTOR_ELT(x, offset); if (named_x > NAMED(ans)) SET_NAMED(ans, named_x); } else { ans = allocVector(TYPEOF(x), 1); switch (TYPEOF(x)) { case LGLSXP: case INTSXP: INTEGER(ans)[0] = INTEGER(x)[offset]; break; case REALSXP: REAL(ans)[0] = REAL(x)[offset]; break; case CPLXSXP: COMPLEX(ans)[0] = COMPLEX(x)[offset]; break; case STRSXP: SET_STRING_ELT(ans, 0, STRING_ELT(x, offset)); break; case RAWSXP: RAW(ans)[0] = RAW(x)[offset]; break; default: UNIMPLEMENTED_TYPE("do_subset2", x); } } UNPROTECT(2); /* args, x */ return ans; }

SEXP nlm(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP value, names, v, R_gradientSymbol, R_hessianSymbol; double *x, *typsiz, fscale, gradtl, stepmx, steptol, *xpls, *gpls, fpls, *a, *wrk, dlt; int code, i, j, k, itnlim, method, iexp, omsg, msg, n, ndigit, iagflg, iahflg, want_hessian, itncnt; /* .Internal( * nlm(function(x) f(x, ...), p, hessian, typsize, fscale, * msg, ndigit, gradtol, stepmax, steptol, iterlim) */ function_info *state; args = CDR(args); PrintDefaults(); state = (function_info *) R_alloc(1, sizeof(function_info)); /* the function to be minimized */ v = CAR(args); if (!isFunction(v)) error(_("attempt to minimize non-function")); PROTECT(state->R_fcall = lang2(v, R_NilValue)); args = CDR(args); /* `p' : inital parameter value */ n = 0; x = fixparam(CAR(args), &n); args = CDR(args); /* `hessian' : H. required? */ want_hessian = asLogical(CAR(args)); if (want_hessian == NA_LOGICAL) want_hessian = 0; args = CDR(args); /* `typsize' : typical size of parameter elements */ typsiz = fixparam(CAR(args), &n); args = CDR(args); /* `fscale' : expected function size */ fscale = asReal(CAR(args)); if (ISNA(fscale)) error(_("invalid NA value in parameter")); args = CDR(args); /* `msg' (bit pattern) */ omsg = msg = asInteger(CAR(args)); if (msg == NA_INTEGER) error(_("invalid NA value in parameter")); args = CDR(args); ndigit = asInteger(CAR(args)); if (ndigit == NA_INTEGER) error(_("invalid NA value in parameter")); args = CDR(args); gradtl = asReal(CAR(args)); if (ISNA(gradtl)) error(_("invalid NA value in parameter")); args = CDR(args); stepmx = asReal(CAR(args)); if (ISNA(stepmx)) error(_("invalid NA value in parameter")); args = CDR(args); steptol = asReal(CAR(args)); if (ISNA(steptol)) error(_("invalid NA value in parameter")); args = CDR(args); /* `iterlim' (def. 100) */ itnlim = asInteger(CAR(args)); if (itnlim == NA_INTEGER) error(_("invalid NA value in parameter")); state->R_env = rho; /* force one evaluation to check for the gradient and hessian */ iagflg = 0; /* No analytic gradient */ iahflg = 0; /* No analytic hessian */ state->have_gradient = 0; state->have_hessian = 0; R_gradientSymbol = install("gradient"); R_hessianSymbol = install("hessian"); /* This vector is shared with all subsequent calls */ v = allocVector(REALSXP, n); for (i = 0; i < n; i++) REAL(v)[i] = x[i]; SETCADR(state->R_fcall, v); SET_NAMED(v, 2); // in case the functions try to alter it value = eval(state->R_fcall, state->R_env); v = getAttrib(value, R_gradientSymbol); if (v != R_NilValue) { if (LENGTH(v) == n && (isReal(v) || isInteger(v))) { iagflg = 1; state->have_gradient = 1; v = getAttrib(value, R_hessianSymbol); if (v != R_NilValue) { if (LENGTH(v) == (n * n) && (isReal(v) || isInteger(v))) { iahflg = 1; state->have_hessian = 1; } else { warning(_("hessian supplied is of the wrong length or mode, so ignored")); } } } else { warning(_("gradient supplied is of the wrong length or mode, so ignored")); } } if (((msg/4) % 2) && !iahflg) { /* skip check of analytic Hessian */ msg -= 4; } if (((msg/2) % 2) && !iagflg) { /* skip check of analytic gradient */ msg -= 2; } FT_init(n, FT_SIZE, state); /* Plug in the call to the optimizer here */ method = 1; /* Line Search */ iexp = iahflg ? 0 : 1; /* Function calls are expensive */ dlt = 1.0; xpls = (double*)R_alloc(n, sizeof(double)); gpls = (double*)R_alloc(n, sizeof(double)); a = (double*)R_alloc(n*n, sizeof(double)); wrk = (double*)R_alloc(8*n, sizeof(double)); /* * Dennis + Schnabel Minimizer * * SUBROUTINE OPTIF9(NR,N,X,FCN,D1FCN,D2FCN,TYPSIZ,FSCALE, * + METHOD,IEXP,MSG,NDIGIT,ITNLIM,IAGFLG,IAHFLG,IPR, * + DLT,GRADTL,STEPMX,STEPTOL, * + XPLS,FPLS,GPLS,ITRMCD,A,WRK) * * * Note: I have figured out what msg does. * It is actually a sum of bit flags as follows * 1 = don't check/warn for 1-d problems * 2 = don't check analytic gradients * 4 = don't check analytic hessians * 8 = don't print start and end info * 16 = print at every iteration * Using msg=9 is absolutely minimal * I think we always check gradients and hessians */ optif9(n, n, x, (fcn_p) fcn, (fcn_p) Cd1fcn, (d2fcn_p) Cd2fcn, state, typsiz, fscale, method, iexp, &msg, ndigit, itnlim, iagflg, iahflg, dlt, gradtl, stepmx, steptol, xpls, &fpls, gpls, &code, a, wrk, &itncnt); if (msg < 0) opterror(msg); if (code != 0 && (omsg&8) == 0) optcode(code); if (want_hessian) { PROTECT(value = allocVector(VECSXP, 6)); PROTECT(names = allocVector(STRSXP, 6)); fdhess(n, xpls, fpls, (fcn_p) fcn, state, a, n, &wrk[0], &wrk[n], ndigit, typsiz); for (i = 0; i < n; i++) for (j = 0; j < i; j++) a[i + j * n] = a[j + i * n]; } else { PROTECT(value = allocVector(VECSXP, 5)); PROTECT(names = allocVector(STRSXP, 5)); } k = 0; SET_STRING_ELT(names, k, mkChar("minimum")); SET_VECTOR_ELT(value, k, ScalarReal(fpls)); k++; SET_STRING_ELT(names, k, mkChar("estimate")); SET_VECTOR_ELT(value, k, allocVector(REALSXP, n)); for (i = 0; i < n; i++) REAL(VECTOR_ELT(value, k))[i] = xpls[i]; k++; SET_STRING_ELT(names, k, mkChar("gradient")); SET_VECTOR_ELT(value, k, allocVector(REALSXP, n)); for (i = 0; i < n; i++) REAL(VECTOR_ELT(value, k))[i] = gpls[i]; k++; if (want_hessian) { SET_STRING_ELT(names, k, mkChar("hessian")); SET_VECTOR_ELT(value, k, allocMatrix(REALSXP, n, n)); for (i = 0; i < n * n; i++) REAL(VECTOR_ELT(value, k))[i] = a[i]; k++; } SET_STRING_ELT(names, k, mkChar("code")); SET_VECTOR_ELT(value, k, allocVector(INTSXP, 1)); INTEGER(VECTOR_ELT(value, k))[0] = code; k++; /* added by Jim K Lindsey */ SET_STRING_ELT(names, k, mkChar("iterations")); SET_VECTOR_ELT(value, k, allocVector(INTSXP, 1)); INTEGER(VECTOR_ELT(value, k))[0] = itncnt; k++; setAttrib(value, R_NamesSymbol, names); UNPROTECT(3); return value; }

static SEXP ArraySubset(SEXP x, SEXP s, SEXP call, int drop) { int k, mode; SEXP dimnames, dimnamesnames, p, q, r, result, xdims; const void *vmaxsave = vmaxget(); mode = TYPEOF(x); xdims = getAttrib(x, R_DimSymbol); k = length(xdims); /* k is now the number of dims */ int **subs = (int**)R_alloc(k, sizeof(int*)); int *indx = (int*)R_alloc(k, sizeof(int)); int *bound = (int*)R_alloc(k, sizeof(int)); R_xlen_t *offset = (R_xlen_t*)R_alloc(k, sizeof(R_xlen_t)); /* Construct a vector to contain the returned values. */ /* Store its extents. */ R_xlen_t n = 1; r = s; for (int i = 0; i < k; i++) { SETCAR(r, int_arraySubscript(i, CAR(r), xdims, x, call)); bound[i] = LENGTH(CAR(r)); n *= bound[i]; r = CDR(r); } PROTECT(result = allocVector(mode, n)); r = s; for (int i = 0; i < k; i++) { indx[i] = 0; subs[i] = INTEGER(CAR(r)); r = CDR(r); } offset[0] = 1; for (int i = 1; i < k; i++) offset[i] = offset[i - 1] * INTEGER(xdims)[i - 1]; /* Transfer the subset elements from "x" to "a". */ for (R_xlen_t i = 0; i < n; i++) { R_xlen_t ii = 0; for (int j = 0; j < k; j++) { int jj = subs[j][indx[j]]; if (jj == NA_INTEGER) { ii = NA_INTEGER; goto assignLoop; } if (jj < 1 || jj > INTEGER(xdims)[j]) errorcall(call, R_MSG_subs_o_b); ii += (jj - 1) * offset[j]; } assignLoop: switch (mode) { case LGLSXP: if (ii != NA_INTEGER) LOGICAL(result)[i] = LOGICAL(x)[ii]; else LOGICAL(result)[i] = NA_LOGICAL; break; case INTSXP: if (ii != NA_INTEGER) INTEGER(result)[i] = INTEGER(x)[ii]; else INTEGER(result)[i] = NA_INTEGER; break; case REALSXP: if (ii != NA_INTEGER) REAL(result)[i] = REAL(x)[ii]; else REAL(result)[i] = NA_REAL; break; case CPLXSXP: if (ii != NA_INTEGER) { COMPLEX(result)[i] = COMPLEX(x)[ii]; } else { COMPLEX(result)[i].r = NA_REAL; COMPLEX(result)[i].i = NA_REAL; } break; case STRSXP: if (ii != NA_INTEGER) SET_STRING_ELT(result, i, STRING_ELT(x, ii)); else SET_STRING_ELT(result, i, NA_STRING); break; case VECSXP: if (ii != NA_INTEGER) SET_VECTOR_ELT(result, i, VECTOR_ELT_FIX_NAMED(x, ii)); else SET_VECTOR_ELT(result, i, R_NilValue); break; case RAWSXP: if (ii != NA_INTEGER) RAW(result)[i] = RAW(x)[ii]; else RAW(result)[i] = (Rbyte) 0; break; default: errorcall(call, _("array subscripting not handled for this type")); break; } if (n > 1) { int j = 0; while (++indx[j] >= bound[j]) { indx[j] = 0; j = (j + 1) % k; } } } PROTECT(xdims = allocVector(INTSXP, k)); for(int i = 0 ; i < k ; i++) INTEGER(xdims)[i] = bound[i]; setAttrib(result, R_DimSymbol, xdims); UNPROTECT(1); /* xdims */ /* The array elements have been transferred. */ /* Now we need to transfer the attributes. */ /* Most importantly, we need to subset the */ /* dimnames of the returned value. */ dimnames = getAttrib(x, R_DimNamesSymbol); PROTECT(dimnamesnames = getAttrib(dimnames, R_NamesSymbol)); if (dimnames != R_NilValue) { int j = 0; PROTECT(xdims = allocVector(VECSXP, k)); if (TYPEOF(dimnames) == VECSXP) { r = s; for (int i = 0; i < k ; i++) { if (bound[i] > 0) { SET_VECTOR_ELT(xdims, j++, ExtractSubset(VECTOR_ELT(dimnames, i), allocVector(STRSXP, bound[i]), CAR(r), call)); } else { /* 0-length dims have NULL dimnames */ SET_VECTOR_ELT(xdims, j++, R_NilValue); } r = CDR(r); } } else { p = dimnames; q = xdims; r = s; for(int i = 0 ; i < k; i++) { SETCAR(q, allocVector(STRSXP, bound[i])); SETCAR(q, ExtractSubset(CAR(p), CAR(q), CAR(r), call)); p = CDR(p); q = CDR(q); r = CDR(r); } } setAttrib(xdims, R_NamesSymbol, dimnamesnames); setAttrib(result, R_DimNamesSymbol, xdims); UNPROTECT(1); /* xdims */ } /* This was removed for matrices in 1998 copyMostAttrib(x, result); */ /* Free temporary memory */ vmaxset(vmaxsave); if (drop) DropDims(result); UNPROTECT(2); /* dimnamesnames, result */ return result; }

static SEXP ExtractSubset(SEXP x, SEXP result, SEXP indx, SEXP call) { R_xlen_t i, ii, n, nx; int mode, mi; SEXP tmp, tmp2; mode = TYPEOF(x); mi = TYPEOF(indx); n = XLENGTH(indx); nx = xlength(x); tmp = result; if (x == R_NilValue) return x; for (i = 0; i < n; i++) { switch(mi) { case REALSXP: if(!R_FINITE(REAL(indx)[i])) ii = NA_INTEGER; else ii = (R_xlen_t) (REAL(indx)[i] - 1); break; default: ii = INTEGER(indx)[i]; if (ii != NA_INTEGER) ii--; } switch (mode) { /* NA_INTEGER < 0, so some of this is redundant */ case LGLSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) LOGICAL(result)[i] = LOGICAL(x)[ii]; else LOGICAL(result)[i] = NA_INTEGER; break; case INTSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) INTEGER(result)[i] = INTEGER(x)[ii]; else INTEGER(result)[i] = NA_INTEGER; break; case REALSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) REAL(result)[i] = REAL(x)[ii]; else REAL(result)[i] = NA_REAL; break; case CPLXSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) { COMPLEX(result)[i] = COMPLEX(x)[ii]; } else { COMPLEX(result)[i].r = NA_REAL; COMPLEX(result)[i].i = NA_REAL; } break; case STRSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) SET_STRING_ELT(result, i, STRING_ELT(x, ii)); else SET_STRING_ELT(result, i, NA_STRING); break; case VECSXP: case EXPRSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) SET_VECTOR_ELT(result, i, VECTOR_ELT_FIX_NAMED(x, ii)); else SET_VECTOR_ELT(result, i, R_NilValue); break; case LISTSXP: /* cannot happen: pairlists are coerced to lists */ case LANGSXP: #ifdef LONG_VECTOR_SUPPORT if (ii > R_SHORT_LEN_MAX) error("invalid subscript for pairlist"); #endif if (0 <= ii && ii < nx && ii != NA_INTEGER) { tmp2 = nthcdr(x, (int) ii); SETCAR(tmp, CAR(tmp2)); SET_TAG(tmp, TAG(tmp2)); } else SETCAR(tmp, R_NilValue); tmp = CDR(tmp); break; case RAWSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) RAW(result)[i] = RAW(x)[ii]; else RAW(result)[i] = (Rbyte) 0; break; default: errorcall(call, R_MSG_ob_nonsub, type2char(mode)); } } return result; }

static SEXP MatrixSubset(SEXP x, SEXP s, SEXP call, int drop) { SEXP attr, result, sr, sc, dim; int nr, nc, nrs, ncs; R_xlen_t i, j, ii, jj, ij, iijj; nr = nrows(x); nc = ncols(x); /* Note that "s" is protected on entry. */ /* The following ensures that pointers remain protected. */ dim = getAttrib(x, R_DimSymbol); sr = SETCAR(s, int_arraySubscript(0, CAR(s), dim, x, call)); sc = SETCADR(s, int_arraySubscript(1, CADR(s), dim, x, call)); nrs = LENGTH(sr); ncs = LENGTH(sc); /* Check this does not overflow: currently only possible on 32-bit */ if ((double)nrs * (double)ncs > R_XLEN_T_MAX) error(_("dimensions would exceed maximum size of array")); PROTECT(sr); PROTECT(sc); result = allocVector(TYPEOF(x), (R_xlen_t) nrs * (R_xlen_t) ncs); PROTECT(result); for (i = 0; i < nrs; i++) { ii = INTEGER(sr)[i]; if (ii != NA_INTEGER) { if (ii < 1 || ii > nr) errorcall(call, R_MSG_subs_o_b); ii--; } for (j = 0; j < ncs; j++) { jj = INTEGER(sc)[j]; if (jj != NA_INTEGER) { if (jj < 1 || jj > nc) errorcall(call, R_MSG_subs_o_b); jj--; } ij = i + j * nrs; if (ii == NA_INTEGER || jj == NA_INTEGER) { switch (TYPEOF(x)) { case LGLSXP: case INTSXP: INTEGER(result)[ij] = NA_INTEGER; break; case REALSXP: REAL(result)[ij] = NA_REAL; break; case CPLXSXP: COMPLEX(result)[ij].r = NA_REAL; COMPLEX(result)[ij].i = NA_REAL; break; case STRSXP: SET_STRING_ELT(result, ij, NA_STRING); break; case VECSXP: SET_VECTOR_ELT(result, ij, R_NilValue); break; case RAWSXP: RAW(result)[ij] = (Rbyte) 0; break; default: errorcall(call, _("matrix subscripting not handled for this type")); break; } } else { iijj = ii + jj * nr; switch (TYPEOF(x)) { case LGLSXP: LOGICAL(result)[ij] = LOGICAL(x)[iijj]; break; case INTSXP: INTEGER(result)[ij] = INTEGER(x)[iijj]; break; case REALSXP: REAL(result)[ij] = REAL(x)[iijj]; break; case CPLXSXP: COMPLEX(result)[ij] = COMPLEX(x)[iijj]; break; case STRSXP: SET_STRING_ELT(result, ij, STRING_ELT(x, iijj)); break; case VECSXP: SET_VECTOR_ELT(result, ij, VECTOR_ELT_FIX_NAMED(x, iijj)); break; case RAWSXP: RAW(result)[ij] = RAW(x)[iijj]; break; default: errorcall(call, _("matrix subscripting not handled for this type")); break; } } } } if(nrs >= 0 && ncs >= 0) { PROTECT(attr = allocVector(INTSXP, 2)); INTEGER(attr)[0] = nrs; INTEGER(attr)[1] = ncs; setAttrib(result, R_DimSymbol, attr); UNPROTECT(1); } /* The matrix elements have been transferred. Now we need to */ /* transfer the attributes. Most importantly, we need to subset */ /* the dimnames of the returned value. */ if (nrs >= 0 && ncs >= 0) { SEXP dimnames, dimnamesnames, newdimnames; dimnames = getAttrib(x, R_DimNamesSymbol); PROTECT(dimnamesnames = getAttrib(dimnames, R_NamesSymbol)); if (!isNull(dimnames)) { PROTECT(newdimnames = allocVector(VECSXP, 2)); if (TYPEOF(dimnames) == VECSXP) { SET_VECTOR_ELT(newdimnames, 0, ExtractSubset(VECTOR_ELT(dimnames, 0), allocVector(STRSXP, nrs), sr, call)); SET_VECTOR_ELT(newdimnames, 1, ExtractSubset(VECTOR_ELT(dimnames, 1), allocVector(STRSXP, ncs), sc, call)); } else { SET_VECTOR_ELT(newdimnames, 0, ExtractSubset(CAR(dimnames), allocVector(STRSXP, nrs), sr, call)); SET_VECTOR_ELT(newdimnames, 1, ExtractSubset(CADR(dimnames), allocVector(STRSXP, ncs), sc, call)); } setAttrib(newdimnames, R_NamesSymbol, dimnamesnames); setAttrib(result, R_DimNamesSymbol, newdimnames); UNPROTECT(1); /* newdimnames */ } UNPROTECT(1); /* dimnamesnames */ } /* Probably should not do this: copyMostAttrib(x, result); */ if (drop) DropDims(result); UNPROTECT(3); return result; }

/* A function for reading compact format files. * Use with the .Call interface function. * Written by Mikko Korpela */ SEXP rcompact(SEXP filename){ char field_id, line[LINE_LENGTH], mplier_str[MPLIER_LENGTH], *found1, *found2, *found_leftpar, *found_dot, *found_rightpar, *found_tilde, *id_start, *old_point, *point, *point2, *endp, *tmp_name, *tmp_comment; int i, j, n, first_yr, last_yr, id_length, exponent, n_repeats, field_width, n_x_w, n_lines, remainder, idx, this_last, *i_first, *i_last; long int precision; size_t idx2; Rboolean n_found, divide; long long int read_int; double read_double, mplier, *r_mplier, *r_data; FILE *f; SEXP result, series_id, series_first, series_last, series_mplier, series_data, project_comments; rwlnode first, *this; commentnode comment_first, *comment_this; double divisor = 1; /* assign a value to avoid compiler nag */ int n_content = 0; int n_comments = 0; Rboolean early_eof = FALSE; /* Open the file for reading */ const char *fname = CHAR(STRING_ELT(filename, 0)); f = fopen(fname, "r"); if(f == NULL) error(_("Could not open file %s for reading"), fname); this = &first; /* current rwlnode */ comment_this = &comment_first; /* current commentnode */ n = 0; /* number of series */ first_yr = R_INT_MAX; /* the first year in all data */ last_yr = R_INT_MIN; /* the last year in all data */ /* Each round of the loop reads a header line, * then the data lines of the corresponding series */ while(fgets_eol(line, &n_content, LINE_LENGTH, f) != NULL){ /* In the beginning of the file, if no ~ is found, we assume * the line is a comment. This is the same approach as in the * TRiCYCLE program. */ while(strchr(line, '~') == NULL){ if(n_content > 0){ /* Skip empty lines */ if(n_comments == R_INT_MAX) error(_("Number of comments exceeds integer range")); ++n_comments; tmp_comment = (char *) R_alloc(n_content+1, sizeof(char)); strncpy(tmp_comment, line, n_content); tmp_comment[n_content] = '\0'; /* Null termination */ comment_this->text = tmp_comment; comment_this->next = (commentnode *) R_alloc(1, sizeof(commentnode)); comment_this = comment_this->next; } if(fgets_eol(line, &n_content, LINE_LENGTH, f) == NULL){ early_eof = TRUE; break; } } if(early_eof == TRUE) break; if(n == R_INT_MAX) error(_("Number of series exceeds integer range")); /* A simple check to point out too long header * lines. Generally, if one line is too long, this function * will probably be unable to parse the next line. In that * case, finding the faulty line may be of some value. Of * course, if the input is generated by some program, lines * are expected to be short enough. Data edited by hand may be * a different case. */ if(n_content > CONTENT_LENGTH){ fclose(f); error(_("Series %d: Header line is too long (max length %d)"), n+1, CONTENT_LENGTH); } n_found = FALSE; /* Find the first '=' character (N or I field) */ found1 = strchr(line, '='); /* Not a header line, not a valid file */ if(found1 == NULL){ fclose(f); error(_("Series %d: No '=' found when header line was expected"), n+1); } if(found1 == line){ fclose(f); error(_("Series %d: No room for number before first '='"), n+1); } /* Convert the part left of the first '=' to an integer */ read_int = strtoll(line, &endp, 10); if(endp != found1){ fclose(f); error(_("Series %d: Only a number must be found right before 1st '='"), n+1); } if(read_int > R_INT_MAX || read_int < R_INT_MIN){ fclose(f); error(_("Series %d: Number %lld exceeds integer range"), n+1, read_int); } /* We assume the field id is right after the '=' */ field_id = toupper((unsigned char)(*(found1+1))); /* We allow N (n) and I (i) fields in either order */ if(field_id == 'N'){ n_found = TRUE; if(read_int <= 0){ fclose(f); error(_("Series %d: Length of series must be at least one (%ld seen)"), n+1, read_int); } this->n = (int) read_int; } else if(field_id == 'I'){ this->first_yr = (int) read_int; } else{ fclose(f); error(_("Series %d: Unknown field id: %c"), n+1, *(found1+1)); } /* Require space */ if(*(found1+2) != ' '){ fclose(f); error(_("Series %d: Space required between N and I fields"), n+1); } /* Find the second '=' character (I or N field) */ found2 = strchr(found1+3, '='); if(found2 == NULL){ fclose(f); error(_("Series %d: Second '=' missing"), n+1); } if(found2 == found1+3){ fclose(f); error(_("Series %d: No room for number before second '='"), n+1); } read_int = strtoll(found1+3, &endp, 10); if(endp != found2){ fclose(f); error(_("Series %d: Only a number must be found after first field, right before 2nd '='"), n+1); } if(read_int > R_INT_MAX || read_int < R_INT_MIN){ fclose(f); error(_("Series %d: Number %lld exceeds integer range"), n+1, read_int); } field_id = toupper((unsigned char)(*(found2+1))); if(n_found == TRUE && field_id == 'I'){ this->first_yr = (int) read_int; } else if(field_id == 'N'){ if(read_int <= 0){ fclose(f); error(_("Series %d: Length of series must be at least one (%ld seen)"), n+1, read_int); } this->n = (int) read_int; } else{ fclose(f); error(_("Series %d: Unknown or doubled field id: %c"), n+1, *(found2+1)); } /* Check for overflow */ if(this->first_yr > 1 && this->n - 1 > R_INT_MAX - this->first_yr) error(_("Series %d: Last year exceeds integer range"), n+1); /* Update global first and last year */ if(this->first_yr < first_yr) first_yr = this->first_yr; this_last = this->first_yr + (this->n - 1); if(this_last > last_yr) last_yr = this_last; point = found2+2; /* Require one space */ if(*point != ' '){ fclose(f); error(_("Series %d (%s): Space required before ID"), n+1, this->id); } else { ++point; } /* Skip further spaces */ while(*point == ' ') ++point; /* Find last character of series id */ found_tilde = strchr(point+1, '~'); if(found_tilde == NULL || found_tilde < point + 2){ error(_("Series %d (%s): '~' not found in expected location"), n+1, this->id); fclose(f); } point2 = found_tilde - 1; while(*point2 != ' ' && point2 > point + 1) --point2; --point2; while(*point2 == ' ') --point2; /* Read series id */ if(isprint((unsigned char)(*point))){ id_start = point; ++point; while(point < point2 + 1){ if(!isprint((unsigned char)(*point))){ fclose(f); error(_("Series %d: Invalid character in series ID"), n+1); } ++point; } id_length = (int)(point - id_start); tmp_name = (char *) R_alloc(id_length+1, sizeof(char)); strncpy(tmp_name, id_start, id_length); tmp_name[id_length] = '\0'; /* Null termination */ this->id = tmp_name; } else { fclose(f); error(_("Series %d: Alphanumeric series ID not found"), n+1); } /* Require space */ if(*point != ' '){ fclose(f); error(_("Series %d (%s): Space required after alphanumeric ID"), n+1, this->id); } /* Read number format description, must be <exp>(<n>F<w>.<d>)~ */ ++point; exponent = (int) strtol(point, &endp, 10); if(endp == point){ fclose(f); error(_("Series %d (%s): Exponent not found"), n+1, this->id); } if(exponent < 0){ exponent = -exponent; divide = TRUE; } else{ divide = FALSE; } if(snprintf(mplier_str, MPLIER_LENGTH, "1e%d", exponent) >= MPLIER_LENGTH){ fclose(f); error(_("Series %d (%s): Exponent has too many characters"), n+1, this->id); } if(*endp != '('){ fclose(f); error(_("Series %d (%s): Opening parenthesis required after exponent"), n+1, this->id); } found_leftpar = endp; found_dot = strchr(found_leftpar+1, '.'); if(found_dot == NULL){ fclose(f); error(_("Series %d (%s): No dot found in number format description"), n+1, this->id); } found_rightpar = strchr(found_dot+1, ')'); if(found_rightpar == NULL){ fclose(f); error(_("Series %d (%s): No closing parenthesis found"), n+1, this->id); } if(divide == TRUE){ divisor = strtod(mplier_str, NULL); mplier = 1 / divisor; /* Only for information purpose */ } else{ mplier = strtod(mplier_str, NULL); } this->mplier = mplier; point = found_leftpar+1; n_repeats = (int) strtol(point, &endp, 10); if(endp == point){ fclose(f); error(_("Series %d (%s): Number of values per line not found"), n+1, this->id); } if(n_repeats < 1){ fclose(f); error(_("Series %d (%s): At least one value per line is needed"), n+1, this->id); } if(n_repeats > CONTENT_LENGTH){ fclose(f); error(_("Series %d (%s): Number of values per line (%d) > max line length (%d)"), n+1, this->id, n_repeats, CONTENT_LENGTH); } if(*endp != 'F'){ fclose(f); error(_("Series %d (%s): Only 'F' number format is supported"), n+1, this->id); } point = endp+1; field_width = (int) strtol(point, &endp, 10); if(endp == point){ fclose(f); error(_("Series %d (%s): Field width not found"), n+1, this->id); } if(endp != found_dot){ fclose(f); error(_("Series %d (%s): Field width and '.' must be adjacent"), n+1, this->id); } if(field_width < 1){ fclose(f); error(_("Series %d (%s): Field width must be at least one (%d seen)"), n+1, this->id, field_width); } point = found_dot+1; precision = strtol(point, &endp, 10); if(endp == point){ fclose(f); error(_("Series %d (%s): Number of decimals not found"), n+1, this->id); } if(endp != found_rightpar){ fclose(f); error(_("Series %d (%s): Number of decimals and ')' must be adjacent"), n+1, this->id); } if(precision != 0L){ fclose(f); error(_("Series %d (%s): No (implied) decimal places allowed in format"), n+1, this->id); } n_x_w = n_repeats * field_width; if(n_x_w > CONTENT_LENGTH){ fclose(f); error(_("Series %d (%s): Required line length %d exceeds the maximum %d"), n+1, this->id, n_x_w, CONTENT_LENGTH); } /* Temporary storage for the data on the following lines */ this->data = (double *) R_alloc(this->n, sizeof(double)); /* Number of full-length lines (integer division truncates) */ n_lines = this->n / n_repeats; /* Number of values on the (possible) left-over line */ remainder = this->n - n_lines * n_repeats; /* Read the data (full lines) */ idx = -n_repeats; for(i=0; i<n_lines; i++){ if(fgets_eol(line, &n_content, LINE_LENGTH, f) == NULL){ fclose(f); error(_("Series %d (%s): Unexpected end of file (%d data lines read)"), n+1, this->id, i); } if((remainder > 0 || !feof(f)) && n_content > CONTENT_LENGTH){ fclose(f); error(_("Series %d (%s): Data line %d is too long (max length %d)"), n+1, this->id, i+1, CONTENT_LENGTH); } point = line + n_x_w; idx += n_repeats << 1; /* Read backwards */ for(j=0; j<n_repeats; j++){ *point = '\0'; /* overwrite is OK because number has been read */ old_point = point; point -= field_width; /* pick a piece of field_width characters */ read_double = strtod(point, &endp); if(endp != old_point){ /* numbers must be right aligned */ fclose(f); error(_("Series %d (%s): Could not read number (data row %d, field %d).\nMalformed number or previous line too long."), n+1, this->id, i+1, n_repeats-j); } /* Division by a precise number (integer value) is * more accurate than multiplication with an * approximate number. Example from R: * > foo=seq(0,1,length.out=100) * > length(which(foo/100!=foo*0.01)) * [1] 10 */ if(divide == TRUE) this->data[--idx] = read_double / divisor; else this->data[--idx] = read_double * mplier; } } /* Read the data (possibly remaining shorter line) */ if(remainder > 0){ if(fgets_eol(line, &n_content, LINE_LENGTH, f) == NULL){ fclose(f); error(_("Series %d (%s): Unexpected end of file (%d data lines read)"), n+1, this->id, n_lines); } if(!feof(f) && n_content > CONTENT_LENGTH){ fclose(f); error(_("Series %d (%s): Data line %d is too long (max length %d)"), n+1, this->id, n_lines+1, CONTENT_LENGTH); } point = line + remainder * field_width; idx += n_repeats + remainder; for(j=0; j<remainder; j++){ *point = '\0'; old_point = point; point -= field_width; read_double = strtod(point, &endp); if(endp != old_point){ fclose(f); error(_("Series %d (%s): Could not read number (data row %d, field %d).\nMalformed number or previous line too long."), n+1, this->id, n_lines+1, remainder-j); } if(divide == TRUE) this->data[--idx] = read_double / divisor; else this->data[--idx] = read_double * mplier; } } /* Prepare for possible next round of the loop (next series) */ this->next = (rwlnode *) R_alloc(1, sizeof(rwlnode)); this = this->next; ++n; } if(ferror(f)){ fclose(f); error(_("Error reading file %s"), fname); } /* Close the file (ignore return value) */ fclose(f); if(n == 0) error(_("No data found in file %s"), fname); /* Transform the results to a list with 7 elements */ PROTECT(result = allocVector(VECSXP, 8)); /* [[1]] First year of all data */ SET_VECTOR_ELT(result, 0, ScalarInteger(first_yr)); /* [[2]] Last year of all data */ SET_VECTOR_ELT(result, 1, ScalarInteger(last_yr)); /* [[3]] Series ID */ PROTECT(series_id = allocVector(STRSXP, n)); /* [[4]] First year of series */ PROTECT(series_first = allocVector(INTSXP, n)); /* [[5]] Last year of series */ PROTECT(series_last = allocVector(INTSXP, n)); /* [[6]] Multiplier (precision) */ PROTECT(series_mplier = allocVector(REALSXP, n)); /* [[7]] Numeric data (ring widths) */ PROTECT(series_data = allocMatrix(REALSXP, last_yr - first_yr + 1, n)); /* [[8]] Project comments */ PROTECT(project_comments = allocVector(STRSXP, n_comments)); /* C access to the last four R data structures. * - first two (scalars, i.e. vector of length one) already done * - SET_STRING_ELT is used for accessing the character vector */ i_first = INTEGER(series_first); i_last = INTEGER(series_last); r_mplier = REAL(series_mplier); r_data = REAL(series_data); /* idx2 is for indexing r_data. * The matrix series_data is stored in column-major order: We * proceed one series at a time, simply incrementing idx2 on each * (carefully planned) write to the array. */ idx2 = 0; this = &first; for(i=0; i<n; i++){ this_last = this->first_yr + (this->n - 1); SET_STRING_ELT(series_id, i, mkChar(this->id)); i_first[i] = this->first_yr; i_last[i] = this_last; r_mplier[i] = this->mplier; /* Add NA to beginning */ for(j=0; j < this->first_yr - first_yr; j++) r_data[idx2++] = NA_REAL; /* Add data to middle */ for(j=0; j < this->n; j++) r_data[idx2++] = this->data[j]; /* Add NA to end */ for(j=0; j < last_yr - this_last; j++) r_data[idx2++] = NA_REAL; this = this->next; } comment_this = &comment_first; for(i=0; i<n_comments; i++){ SET_STRING_ELT(project_comments, i, mkChar(comment_this->text)); comment_this = comment_this->next; } SET_VECTOR_ELT(result, 7, project_comments); SET_VECTOR_ELT(result, 6, series_data); SET_VECTOR_ELT(result, 5, series_mplier); SET_VECTOR_ELT(result, 4, series_last); SET_VECTOR_ELT(result, 3, series_first); SET_VECTOR_ELT(result, 2, series_id); UNPROTECT(7); return(result); }

void pushBuffer(ThreadLocalFreadParsingContext *ctx) { const void *buff8 = ctx->buff8; const void *buff4 = ctx->buff4; const void *buff1 = ctx->buff1; const char *anchor = ctx->anchor; int nRows = (int) ctx->nRows; size_t DTi = ctx->DTi; int rowSize8 = (int) ctx->rowSize8; int rowSize4 = (int) ctx->rowSize4; int rowSize1 = (int) ctx->rowSize1; int nStringCols = ctx->nStringCols; int nNonStringCols = ctx->nNonStringCols; // Do all the string columns first so as to minimize and concentrate the time inside the single critical. // While the string columns are happening other threads before me can be copying their non-string buffers to the // final DT and other threads after me can be filling their buffers too. // rowSize is passed in because it will be different (much smaller) on the reread covering any type exception columns // locals passed in on stack so openmp knows that no synchonization is required // the byte position of this column in the first row of the row-major buffer if (nStringCols) { #pragma omp critical { int off8 = 0; int cnt8 = rowSize8 / 8; lenOff *buff8_lenoffs = (lenOff*) buff8; for (int j=0, resj=-1, done=0; done<nStringCols && j<ncol; j++) { if (type[j] == CT_DROP) continue; resj++; if (type[j] == CT_STRING) { SEXP dest = VECTOR_ELT(DT, resj); lenOff *source = buff8_lenoffs + off8; for (int i=0; i<nRows; i++) { int strLen = source->len; if (strLen) { // stringLen == INT_MIN => NA, otherwise not a NAstring was checked inside fread_mean SET_STRING_ELT(dest, DTi+i, strLen<0 ? NA_STRING : mkCharLenCE(anchor + source->off, strLen, ienc)); } // else dest was already initialized with R_BlankString by allocVector() source += cnt8; } done++; // if just one string col near the start, don't loop over the other 10,000 cols. TODO? start on first too } off8 += (size[j] == 8); } } } int off1 = 0, off4 = 0, off8 = 0; for (int j=0, resj=-1, done=0; done<nNonStringCols && j<ncol; j++) { if (type[j]==CT_DROP) continue; int thisSize = size[j]; resj++; if (type[j]!=CT_STRING && type[j]>0) { if (thisSize == 8) { char *dest = (char *)DATAPTR(VECTOR_ELT(DT, resj)) + DTi*8; char *src8 = (char*)buff8 + off8; for (int i=0; i<nRows; i++) { memcpy(dest, src8, 8); src8 += rowSize8; dest += 8; } } else if (thisSize == 4) { char *dest = (char *)DATAPTR(VECTOR_ELT(DT, resj)) + DTi*4; char *src4 = (char*)buff4 + off4; for (int i=0; i<nRows; i++) { memcpy(dest, src4, 4); src4 += rowSize4; dest += 4; } } else if (thisSize == 1) { if (type[j] > CT_BOOL8_L) STOP("Field size is 1 but the field is of type %d\n", type[j]); Rboolean *dest = (Rboolean *)((char *)DATAPTR(VECTOR_ELT(DT, resj)) + DTi*sizeof(Rboolean)); char *src1 = (char*)buff1 + off1; for (int i=0; i<nRows; i++) { int8_t v = *(int8_t *)src1; *dest = (v==INT8_MIN ? NA_INTEGER : v); src1 += rowSize1; dest++; } } else STOP("Runtime error: unexpected field of size %d\n", thisSize); done++; } off8 += (size[j] & 8); off4 += (size[j] & 4); off1 += (size[j] & 1); } }

SEXP C_fitmodel (const SEXP ncomp_a, const SEXP nind_a, const SEXP hyp, const SEXP data, const SEXP offset_logit, const SEXP design_mean, const SEXP design_variance, const SEXP design_disease, const SEXP control_parameters, const SEXP mix_model, const SEXP pi_model) { if (TYPEOF(hyp) != STRSXP) {cerr<<"hyp should be a character\n";exit(1);} if (TYPEOF(ncomp_a) != INTSXP) {cerr<<"Argument error - ncomp"<<endl;exit(1);} if (TYPEOF(nind_a) != INTSXP) {cerr<<"Argument error - nind"<<endl;exit(1);} if (TYPEOF(data) != VECSXP) {cerr<<"Argument error - data"<<endl;exit(1);} if (TYPEOF(control_parameters) != VECSXP) {cerr<<"Argument error - control_parameters"<<endl;exit(1);} if (TYPEOF(mix_model) != INTSXP) {cerr<<"Argument error - mix.model"<<endl;exit(1);} if (TYPEOF(pi_model) != INTSXP) {cerr<<"Argument error - pi.pmodel"<<endl;exit(1);} const double tol = *REAL(getListElement(control_parameters, "tol")); const double max_iter = *REAL(getListElement(control_parameters, "max.iter")); const double min_freq = *REAL(getListElement(control_parameters, "min.freq")); const double logP_threshold = *REAL(getListElement(control_parameters, "logP.outliers")); const double * logit_offset_p = REAL(offset_logit); const double * mean_design = REAL(design_mean); const double * var_design = REAL(design_variance); const double * disease_design = REAL(design_disease); const int * mean_dims = INTEGER(getAttrib(design_mean, R_DimSymbol)); const int * variance_dims = INTEGER(getAttrib(design_variance, R_DimSymbol)); const int * disease_dims = INTEGER(getAttrib(design_disease, R_DimSymbol)); //cout<<"disease dims "<<disease_dims[0]<<" "<<disease_dims[1]<<endl; const string test (CHAR ( STRING_ELT (hyp, 0))); ///////////// Now I believe that all arguments are read only const int ncomp = *INTEGER(ncomp_a); const int nind = *INTEGER(nind_a); //const int * strat_assoc = INTEGER(getListElement(data, "strata.association")); const int * strat_assoc = NULL; const int * strat_var = INTEGER(getListElement(data, "strata.var")); const int * strat_mean = INTEGER(getListElement(data, "strata.mean")); const int * cohort = INTEGER(getListElement(data, "batch")); const double * alpha_start = REAL(getListElement(data, "alpha.start")); const double * disease = REAL(getListElement(data, "trait")); const double * signal = REAL(getListElement(data, "signal")); const double * nu_start = REAL(getListElement(data, "nu.start")); const double * mean_start = REAL(getListElement(data, "mean.start")); const double * var_start = REAL(getListElement(data, "var.start")); int nstrat_var = 0; vector<int> array_strat_var (500, 0); for (int i = 0; i != nind*ncomp; i++) { if (strat_var[i] > 499) {cerr<<"No more than 500 strata are allowed\n";exit(1);} array_strat_var[ strat_var[i] ]++; if ( array_strat_var[ strat_var[i] ] == 1 ) nstrat_var++; } int nstrat_mean = 0; vector<int> array_strat_mean (500, 0); for (int i = 0; i != nind*ncomp; i++) { if (strat_mean[i] > 499) {cerr<<"No more than 500 strata are allowed\n";exit(1);} array_strat_mean[ strat_mean[i] ]++; if ( array_strat_mean[ strat_mean[i] ] == 1 ) nstrat_mean++; } int nstrat_assoc = 1; //vector<int> array_strat_assoc (500, 0); //for (int i = 0; i != nind*ncomp; i++) { // if (strat_assoc[i] > 499) {cerr<<"No more than 500 strata are allowed\n";exit(1);} // array_strat_assoc[ strat_assoc[i] ]++; // if ( array_strat_assoc[ strat_assoc[i] ] == 1 ) nstrat_assoc++; //} //nstrat_assoc = 1; ///here I override the sue of stratification in the logistic regression //if (nstrat_var > 1) {cout<<"Using stratification for variances: "<< nstrat_var <<" strata\n";} // Which frequency model MODEL fit_model = DISEASE; if( *INTEGER(pi_model) == 0 ) fit_model = DISEASE; else if( *INTEGER(pi_model) == 1 || length(design_disease) <= 1 ) fit_model = HETERO; else if( *INTEGER(pi_model) == 2 ) fit_model = QT; else{ Rprintf("Invalid model parameter : %d \n",*INTEGER(pi_model) ); } // Which hypothesis HYPOTHESIS h = (test == "H0") ? H0 : H1; // Component type int component_type = *INTEGER(mix_model) <= 20 ? 1 : 2; // Constraint type //int signal_model = *INTEGER(mix_model) - component_type*10; // Dont update means/variances if there is no design matrix //bool fix_means = length(design_mean) > 1 ? false : true; //bool fix_vars = length(design_variance) > 1 ? false : true; CNV_signal * myCNV = new CNV_signal(nind, ncomp, cohort, signal, disease, mean_start, var_start, nu_start, alpha_start, logit_offset_p, mean_design, var_design, disease_design, mean_dims[1], variance_dims[1], disease_dims[1], fit_model, h, logP_threshold, min_freq, strat_var, nstrat_var, strat_mean, nstrat_mean, strat_assoc, nstrat_assoc); string status; vector<double> dpost; // Fit the required model if(component_type == 1){ int signal_model = *INTEGER(mix_model) - 10; fit_model_gaussian( myCNV, dpost, status, max_iter, tol, signal_model); } if(component_type == 2){ int signal_model = *INTEGER(mix_model) - 2000; fit_model_t( myCNV, dpost, status, max_iter, tol, signal_model ); } //Interface with R SEXP post, stat, ret; PROTECT(post = allocMatrix(REALSXP,nind*ncomp, 8)); double * dres = REAL(post); //vector<double> dpost = myCNV->GetPosterior(); // Get the final data frame for (size_t i = 0; i != dpost.size(); i++) { dres[i] = dpost[i]; } PROTECT(stat=allocVector(STRSXP,1)); // Allocate storage for status string SET_STRING_ELT(stat, 0, mkChar( status.c_str() )); PROTECT( ret = allocVector(VECSXP, 2) ); // Allocate and fill return list SET_VECTOR_ELT(ret, 0, post); SET_VECTOR_ELT(ret, 1, stat); delete myCNV; UNPROTECT(3); return ret; }

/* Given a minc filename, return a list containing: (1) the dimension names (2) the dimension sizes (3) and much, much more */ SEXP get_volume_info(SEXP filename) { mihandle_t minc_volume; midimhandle_t *dimensions; miclass_t volume_class; mitype_t volume_type; int result, i; int n_dimensions; misize_t n_dimensions_misize_t; int n_protects, list_index; int n_frames; // variables to hold dim-related info misize_t dim_sizes[MI2_MAX_VAR_DIMS]; double dim_starts[MI2_MAX_VAR_DIMS]; double dim_steps[MI2_MAX_VAR_DIMS]; double time_offsets[MAX_FRAMES]; double time_widths[MAX_FRAMES]; char *dim_name; char *dim_units; char *space_type; Rboolean time_dim_exists; static char *dimorder3d[] = { "zspace","yspace","xspace" }; static char *dimorder4d[] = { "time", "zspace","yspace","xspace" }; /* declare R datatypes */ SEXP rtnList, listNames; SEXP xDimSizes, xDimNames, xDimUnits, xDimStarts, xDimSteps, xTimeWidths, xTimeOffsets; // start ... if ( R_DEBUG_mincIO ) Rprintf("get_volume_info: start ...\n"); /* do some initialization */ for (i=0; i < MI2_MAX_VAR_DIMS; ++i){ // set dim info to zeros dim_sizes[i] = 0; dim_starts[i] = 0; dim_steps[i] = 0; } // frame-related init time_dim_exists = FALSE; for (i=0; i < MAX_FRAMES; ++i) { time_offsets[i]=999.9; time_widths[i]=999.9; } n_frames = 0; n_protects = 0; // counter of protected R variables /* init the return list (include list names) */ PROTECT(rtnList=allocVector(VECSXP, R_RTN_LIST_LEN)); PROTECT(listNames=allocVector(STRSXP, R_RTN_LIST_LEN)); n_protects = n_protects +2; /* open the existing volume */ result = miopen_volume(CHAR(STRING_ELT(filename,0)), MI2_OPEN_READ, &minc_volume); /* error on open? */ if (result != MI_NOERROR) { error("Error opening input file: %s.\n", CHAR(STRING_ELT(filename,0))); } /* set the apparent order to something conventional */ // ... first need to get the number of dimensions if ( miget_volume_dimension_count(minc_volume, MI_DIMCLASS_ANY, MI_DIMATTR_ALL, &n_dimensions) != MI_NOERROR ){ error("Error returned from miget_volume_dimension_count.\n"); } n_dimensions_misize_t = (misize_t) n_dimensions; // ... now set the order if ( R_DEBUG_mincIO ) Rprintf("Setting the apparent order for %d dimensions ... ", n_dimensions); if ( n_dimensions == 3 ) { result = miset_apparent_dimension_order_by_name(minc_volume, 3, dimorder3d); } else if ( n_dimensions == 4 ) { result = miset_apparent_dimension_order_by_name(minc_volume, 4, dimorder4d); } else { error("Error file %s has %d dimensions and we can only deal with 3 or 4.\n", CHAR(STRING_ELT(filename,0)), n_dimensions); } if ( result != MI_NOERROR ) { error("Error returned from miset_apparent_dimension_order_by_name while setting apparent order for %d dimensions.\n", n_dimensions); } if ( R_DEBUG_mincIO ) Rprintf("Done.\n"); /* get the volume data class (the intended "real" values) */ if ( miget_data_class(minc_volume, &volume_class) != MI_NOERROR ){ error("Error returned from miget_data_class.\n"); } /* append to return list ... */ list_index = 0; SET_VECTOR_ELT(rtnList, list_index, ScalarInteger(volume_class)); SET_STRING_ELT(listNames, list_index, mkChar("volumeDataClass")); /* print the volume data type (as it is actually stored in the volume) */ if ( miget_data_type(minc_volume, &volume_type) != MI_NOERROR ){ error("Error returned from miget_data_type.\n"); } /* append to return list ... */ list_index++; SET_VECTOR_ELT(rtnList, list_index, ScalarInteger(volume_type)); SET_STRING_ELT(listNames, list_index, mkChar("volumeDataType")); /* retrieve the volume space type (talairach, native, etc) */ result = miget_space_name(minc_volume, &space_type); if ( result == MI_NOERROR ) { error("Error returned from miget_space_name.\n"); } /* append to return list ... */ list_index++; SET_VECTOR_ELT(rtnList, list_index, mkString(space_type)); SET_STRING_ELT(listNames, list_index, mkChar("spaceType")); /* retrieve the total number of dimensions in this volume */ if ( miget_volume_dimension_count(minc_volume, MI_DIMCLASS_ANY, MI_DIMATTR_ALL, &n_dimensions) != MI_NOERROR ){ error("Error returned from miget_volume_dimension_count.\n"); } /* append to return list ... */ list_index++; SET_VECTOR_ELT(rtnList, list_index, ScalarInteger(n_dimensions)); SET_STRING_ELT(listNames, list_index, mkChar("nDimensions")); /* load up dimension-related information */ // /* first allocate the R variables */ PROTECT( xDimSizes=allocVector(INTSXP,n_dimensions) ); PROTECT( xDimNames=allocVector(STRSXP,n_dimensions) ); PROTECT( xDimUnits=allocVector(STRSXP,n_dimensions) ); PROTECT( xDimStarts=allocVector(REALSXP,n_dimensions) ); PROTECT( xDimSteps=allocVector(REALSXP,n_dimensions) ); n_protects = n_protects +5; /* next, load up the midimension struct for all dimensions*/ dimensions = (midimhandle_t *) malloc( sizeof( midimhandle_t ) * n_dimensions ); result = miget_volume_dimensions(minc_volume, MI_DIMCLASS_ANY, MI_DIMATTR_ALL, MI_DIMORDER_APPARENT, n_dimensions, dimensions); // need to check against MI_ERROR, as "result" will contain nDimensions if OK if ( result == MI_ERROR ) { error("Error code(%d) returned from miget_volume_dimensions.\n", result); } /* get the dimension sizes for all dimensions */ result = miget_dimension_sizes(dimensions, n_dimensions_misize_t, dim_sizes); if ( result != MI_NOERROR ) { error("Error returned from miget_dimension_sizes.\n"); } /* add to R vector ... */ for (i=0; i<n_dimensions; ++i){ INTEGER(xDimSizes)[i] = dim_sizes[i]; } list_index++; SET_VECTOR_ELT(rtnList, list_index, xDimSizes); SET_STRING_ELT(listNames, list_index, mkChar("dimSizes")); /* get the dimension START values for all dimensions */ result = miget_dimension_starts(dimensions, MI_ORDER_FILE, n_dimensions, dim_starts); if ( result == MI_ERROR ) { error("Error returned from miget_dimension_starts.\n"); } /* add to R vector ... */ for (i=0; i<n_dimensions; ++i){ REAL(xDimStarts)[i] = dim_starts[i]; } list_index++; SET_VECTOR_ELT(rtnList, list_index, xDimStarts); SET_STRING_ELT(listNames, list_index, mkChar("dimStarts")); /* get the dimension STEP values for all dimensions */ result = miget_dimension_separations(dimensions, MI_ORDER_FILE, n_dimensions, dim_steps); if ( result == MI_ERROR ) { error("Error returned from miget_dimension_separations.\n"); } /* add to R vector ... */ for (i=0; i<n_dimensions; ++i){ REAL(xDimSteps)[i] = dim_steps[i]; } list_index++; SET_VECTOR_ELT(rtnList, list_index, xDimSteps); SET_STRING_ELT(listNames, list_index, mkChar("dimSteps")); /* Loop over the dimensions to grab the remaining info ... */ for( i=0; i < n_dimensions; ++i ){ // /* get (and print) the dimension names for all dimensions* ... remember that since miget_dimension_name calls strdup which, in turn, ... calls malloc to get memory for the new string -- we need to call "mifree" on ... our pointer to release that memory. */ result = miget_dimension_name(dimensions[i], &dim_name); // do we have a time dimension? if ( !strcmp(dim_name, "time") ) { time_dim_exists = TRUE; n_frames = ( time_dim_exists ) ? dim_sizes[0] : 0; } // store the dimension name and units SET_STRING_ELT(xDimNames, i, mkChar(dim_name)); mifree_name(dim_name); result = miget_dimension_units(dimensions[i], &dim_units); SET_STRING_ELT(xDimUnits, i, mkChar(dim_units)); mifree_name(dim_units); } /* add number of frames to return list */ list_index++; SET_VECTOR_ELT(rtnList, list_index, ScalarInteger(n_frames)); SET_STRING_ELT(listNames, list_index, mkChar("nFrames")); // add dim names to return list list_index++; SET_VECTOR_ELT(rtnList, list_index, xDimNames); SET_STRING_ELT(listNames, list_index, mkChar("dimNames")); // add dim units list_index++; SET_VECTOR_ELT(rtnList, list_index, xDimUnits); SET_STRING_ELT(listNames, list_index, mkChar("dimUnits")); /* get the dimension OFFSETS values for the TIME dimension */ if ( time_dim_exists ) { PROTECT( xTimeOffsets=allocVector(REALSXP,n_frames) ); n_protects++; result = miget_dimension_offsets(dimensions[0], n_frames, 0, time_offsets); if ( result == MI_ERROR ) { error("Error returned from miget_dimension_offsets.\n"); } /* add to R vector ... */ for (i=0; i < n_frames; ++i) { REAL(xTimeOffsets)[i] = time_offsets[i]; // if (R_DEBUG_mincIO) Rprintf("Time offset[%d] = %g\n", i, time_offsets[i]); } list_index++; SET_VECTOR_ELT(rtnList, list_index, xTimeOffsets); SET_STRING_ELT(listNames, list_index, mkChar("timeOffsets")); /* get the dimension WIDTH values for the TIME dimension */ PROTECT( xTimeWidths=allocVector(REALSXP,n_frames) ); n_protects++; result = miget_dimension_widths(dimensions[0], MI_ORDER_FILE, n_frames, 0, time_widths); if ( result == MI_ERROR ) { error("Error returned from miget_dimension_widths.\n"); } /* add to R vector ... */ for (i=0; i<n_frames; ++i) { REAL(xTimeWidths)[i] = time_widths[i]; // if (R_DEBUG_mincIO) Rprintf("Time width[%d] = %g\n", i, time_widths[i]); } list_index++; SET_VECTOR_ELT(rtnList, list_index, xTimeWidths); SET_STRING_ELT(listNames, list_index, mkChar("timeWidths")); } // free heap memory free(dimensions); /* close volume */ miclose_volume(minc_volume); /* attach the list component names to the list */ setAttrib(rtnList, R_NamesSymbol, listNames); /* remove gc collection protection */ UNPROTECT(n_protects); /* return */ if ( R_DEBUG_mincIO ) Rprintf("get_volume_info: returning ...\n"); return(rtnList); }

/* C backend: compute the score component for each target node. */ void c_per_node_score(SEXP network, SEXP data, SEXP score, SEXP targets, SEXP extra_args, int debuglevel, double *res) { int i = 0, ntargets = length(targets); char *s = (char *)CHAR(STRING_ELT(score, 0)); SEXP cur; /* allocate dummy variable for the current node's label. */ PROTECT(cur = allocVector(STRSXP, 1)); if (strcmp(s, "loglik") == 0) { /* discrete log-likelihood score. */ for (i = 0; i < ntargets; i++) { SET_STRING_ELT(cur, 0, STRING_ELT(targets, i)); DEBUG_BEFORE(); res[i] = loglik_dnode(cur, network, data, NULL, debuglevel); }/*FOR*/ }/*THEN*/ else if (strcmp(s, "loglik-g") == 0) { /* Gaussian log-likelihood score. */ for (i = 0; i < ntargets; i++) { SET_STRING_ELT(cur, 0, STRING_ELT(targets, i)); DEBUG_BEFORE(); res[i] = loglik_gnode(cur, network, data, NULL, debuglevel); }/*FOR*/ }/*THEN*/ else if (strcmp(s, "loglik-cg") == 0) { /* Conditional Linear Gaussian log-likelihood score. */ for (i = 0; i < ntargets; i++) { SET_STRING_ELT(cur, 0, STRING_ELT(targets, i)); DEBUG_BEFORE(); res[i] = loglik_cgnode(cur, network, data, NULL, debuglevel); }/*FOR*/ }/*THEN*/ else if ((strcmp(s, "aic") == 0) || (strcmp(s, "bic") == 0)) { /* AIC and BIC scores, discrete data. */ double nparams = 0, *k = REAL(getListElement(extra_args, "k")); for (i = 0; i < ntargets; i++) { SET_STRING_ELT(cur, 0, STRING_ELT(targets, i)); DEBUG_BEFORE(); res[i] = loglik_dnode(cur, network, data, &nparams, debuglevel); res[i] -= (*k) * nparams; if (debuglevel > 0) Rprintf(" > penalty is %lf x %.0lf = %lf.\n", *k, nparams, (*k) * nparams); }/*FOR*/ }/*THEN*/ else if ((strcmp(s, "aic-g") == 0) || (strcmp(s, "bic-g") == 0)) { /* AIC and BIC scores, Gaussian data. */ double nparams = 0, *k = REAL(getListElement(extra_args, "k")); for (i = 0; i < ntargets; i++) { SET_STRING_ELT(cur, 0, STRING_ELT(targets, i)); DEBUG_BEFORE(); res[i] = loglik_gnode(cur, network, data, &nparams, debuglevel); res[i] -= (*k) * nparams; if (debuglevel > 0) Rprintf(" > penalty is %lf x %.0lf = %lf.\n", *k, nparams, (*k) * nparams); }/*FOR*/ }/*THEN*/ else if ((strcmp(s, "aic-cg") == 0) || (strcmp(s, "bic-cg") == 0)) { /* AIC and BIC scores, Conditional Linear Gaussian data. */ double nparams = 0, *k = REAL(getListElement(extra_args, "k")); for (i = 0; i < ntargets; i++) { SET_STRING_ELT(cur, 0, STRING_ELT(targets, i)); DEBUG_BEFORE(); res[i] = loglik_cgnode(cur, network, data, &nparams, debuglevel); res[i] -= (*k) * nparams; if (debuglevel > 0) Rprintf(" > penalty is %lf x %.0lf = %lf.\n", *k, nparams, (*k) * nparams); }/*FOR*/ }/*THEN*/ else if (strcmp(s, "bde") == 0) { SEXP iss = getListElement(extra_args, "iss"); SEXP prior = getListElement(extra_args, "prior"); SEXP beta = getListElement(extra_args, "beta"); /* Bayesian Dirichlet equivalent score (BDe). */ for (i = 0; i < ntargets; i++) { SET_STRING_ELT(cur, 0, STRING_ELT(targets, i)); DEBUG_BEFORE(); res[i] = dirichlet_node(cur, network, data, iss, prior, beta, R_NilValue, FALSE, debuglevel); }/*FOR*/ }/*THEN*/ else if (strcmp(s, "k2") == 0) { /* Bayesian Dirichlet equivalent score (BDe). */ for (i = 0; i < ntargets; i++) { SET_STRING_ELT(cur, 0, STRING_ELT(targets, i)); DEBUG_BEFORE(); res[i] = dirichlet_node(cur, network, data, R_NilValue, R_NilValue, R_NilValue, R_NilValue, FALSE, debuglevel); }/*FOR*/ }/*THEN*/ else if (strcmp(s, "bge") == 0) { SEXP iss = getListElement(extra_args, "iss"); SEXP phi = getListElement(extra_args, "phi"); SEXP prior = getListElement(extra_args, "prior"); SEXP beta = getListElement(extra_args, "beta"); /* Bayesian Gaussian equivalent score (BGe). */ for (i = 0; i < ntargets; i++) { SET_STRING_ELT(cur, 0, STRING_ELT(targets, i)); DEBUG_BEFORE(); res[i] = wishart_node(cur, network, data, iss, phi, prior, beta, debuglevel); }/*FOR*/ }/*THEN*/ else if (strcmp(s, "bdes") == 0) { SEXP iss = getListElement(extra_args, "iss"); /* Bayesian Dirichlet equivalent sparse score (BDes). */ for (i = 0; i < ntargets; i++) { SET_STRING_ELT(cur, 0, STRING_ELT(targets, i)); DEBUG_BEFORE(); res[i] = dirichlet_node(cur, network, data, iss, R_NilValue, R_NilValue, R_NilValue, TRUE, debuglevel); }/*FOR*/ }/*THEN*/ else if (strcmp(s, "mbde") == 0) { SEXP iss = getListElement(extra_args, "iss"); SEXP exp = getListElement(extra_args, "exp"); /* Mixture Bayesian Dirichlet equivalent score (mBDe). */ for (i = 0; i < ntargets; i++) { SET_STRING_ELT(cur, 0, STRING_ELT(targets, i)); DEBUG_BEFORE(); res[i] = dirichlet_node(cur, network, data, iss, R_NilValue, R_NilValue, exp, FALSE, debuglevel); }/*FOR*/ }/*THEN*/ UNPROTECT(1); }/*C_PER_NODE_SCORE*/

static SEXP Julia_R_Scalar(jl_value_t *Var) { SEXP ans = R_NilValue; double tmpfloat; //most common type is here if (jl_is_int32(Var)) { PROTECT(ans = ScalarInteger(jl_unbox_int32(Var))); UNPROTECT(1); } else if (jl_is_int64(Var)) { tmpfloat=(double)jl_unbox_int64(Var); if (inInt32Range(tmpfloat)) PROTECT(ans = ScalarInteger((int32_t)jl_unbox_int64(Var))); else PROTECT(ans = ScalarReal(tmpfloat)); UNPROTECT(1); } //more integer type if (jl_is_uint32(Var)) { tmpfloat=(double)jl_unbox_uint32(Var); if (inInt32Range(tmpfloat)) PROTECT(ans = ScalarInteger((int32_t)jl_unbox_uint32(Var))); else PROTECT(ans = ScalarReal(tmpfloat)); UNPROTECT(1); } else if (jl_is_uint64(Var)) { tmpfloat=(double)jl_unbox_int64(Var); if (inInt32Range(tmpfloat)) PROTECT(ans = ScalarInteger((int32_t)jl_unbox_uint64(Var))); else PROTECT(ans = ScalarReal(tmpfloat)); UNPROTECT(1); } else if (jl_is_float64(Var)) { PROTECT(ans = ScalarReal(jl_unbox_float64(Var))); UNPROTECT(1); } else if (jl_is_float32(Var)) { PROTECT(ans = ScalarReal(jl_unbox_float32(Var))); UNPROTECT(1); } else if (jl_is_bool(Var)) { PROTECT(ans = ScalarLogical(jl_unbox_bool(Var))); UNPROTECT(1); } else if (jl_is_int8(Var)) { PROTECT(ans = ScalarInteger(jl_unbox_int8(Var))); UNPROTECT(1); } else if (jl_is_uint8(Var)) { PROTECT(ans = ScalarInteger(jl_unbox_uint8(Var))); UNPROTECT(1); } else if (jl_is_int16(Var)) { PROTECT(ans = ScalarInteger(jl_unbox_int16(Var))); UNPROTECT(1); } else if (jl_is_uint16(Var)) { PROTECT(ans = ScalarInteger(jl_unbox_uint16(Var))); UNPROTECT(1); } else if (jl_is_utf8_string(Var)) { PROTECT(ans = allocVector(STRSXP, 1)); SET_STRING_ELT(ans, 0, mkCharCE(jl_string_data(Var), CE_UTF8)); UNPROTECT(1); } else if (jl_is_ascii_string(Var)) { PROTECT(ans = ScalarString(mkChar(jl_string_data(Var)))); UNPROTECT(1); } return ans; }

/** Generate random permutations of code points in each string * * @param str character vector * @return character vector * * @version 0.2-1 (Marek Gagolewski, 2014-04-04) * * @version 0.3-1 (Marek Gagolewski, 2014-11-04) * Issue #112: str_prepare_arg* retvals were not PROTECTed from gc * * @version 1.2.5 (Marek Gagolewski, 2019-07-23) * #319: Fixed overflow in `stri_rand_shuffle()`. */ SEXP stri_rand_shuffle(SEXP str) { PROTECT(str = stri_prepare_arg_string(str, "str")); R_len_t n = LENGTH(str); GetRNGstate(); STRI__ERROR_HANDLER_BEGIN(1) StriContainerUTF8 str_cont(str, n); R_len_t bufsize = 0; for (R_len_t i=0; i<n; ++i) { if (str_cont.isNA(i)) continue; R_len_t ni = str_cont.get(i).length(); if (ni > bufsize) bufsize = ni; } std::vector<UChar32> buf1(bufsize); // at most bufsize UChars32 (bufsize/4 min.) String8buf buf2(bufsize); SEXP ret; STRI__PROTECT(ret = Rf_allocVector(STRSXP, n)); for (R_len_t i=0; i<n; ++i) { if (str_cont.isNA(i)) { SET_STRING_ELT(ret, i, NA_STRING); continue; } // fill buf1 UChar32 c = (UChar32)0; const char* s = str_cont.get(i).c_str(); R_len_t sn = str_cont.get(i).length(); R_len_t j = 0; R_len_t k = 0; while (c >= 0 && j < sn) { U8_NEXT(s, j, sn, c); buf1[k++] = (int)c; } if (c < 0) { Rf_warning(MSG__INVALID_UTF8); SET_STRING_ELT(ret, i, NA_STRING); continue; } // do shuffle buf1 at pos 0..k-1: (Fisher-Yates shuffle) R_len_t cur_n = k; for (j=0; j<cur_n-1; ++j) { // rand from i to cur_n-1 R_len_t r = (R_len_t)floor(unif_rand()*(double)(cur_n-j)+(double)j); UChar32 tmp = buf1[r]; buf1[r] = buf1[j]; buf1[j] = tmp; } // create string: char* buf2data = buf2.data(); c = (UChar32)0; j = 0; k = 0; UBool err = FALSE; while (!err && k < cur_n) { c = buf1[k++]; U8_APPEND((uint8_t*)buf2data, j, bufsize, c, err); } if (err) throw StriException(MSG__INTERNAL_ERROR); SET_STRING_ELT(ret, i, Rf_mkCharLenCE(buf2data, j, CE_UTF8)); } PutRNGstate(); STRI__UNPROTECT_ALL return ret; STRI__ERROR_HANDLER_END({ PutRNGstate(); }) }

static SEXP Julia_R_MD(jl_value_t *Var) { SEXP ans = R_NilValue; jl_value_t *val; if (((jl_array_t *)Var)->ptrarray) val = jl_cellref(Var, 0); else val = jl_arrayref((jl_array_t *)Var, 0); //get Julia dims and set R array Dims int len = jl_array_len(Var); if (len == 0) return ans; int ndims = jl_array_ndims(Var); SEXP dims; PROTECT(dims = allocVector(INTSXP, ndims)); for (size_t i = 0; i < ndims; i++) { INTEGER(dims)[i] = jl_array_dim(Var, i); } UNPROTECT(1); if (jl_is_bool(val)) { char *p = (char *) jl_array_data(Var); PROTECT(ans = allocArray(LGLSXP, dims)); for (size_t i = 0; i < len; i++) LOGICAL(ans)[i] = p[i]; UNPROTECT(1); } else if (jl_is_int32(val)) { int32_t *p = (int32_t *) jl_array_data(Var); jlint_to_r; } //int64 else if (jl_is_int64(val)) { int64_t *p = (int64_t *) jl_array_data(Var); jlbiggerint_to_r; } //more integer type else if (jl_is_int8(val)) { int8_t *p = (int8_t *) jl_array_data(Var); jlint_to_r; } else if (jl_is_int16(val)) { int16_t *p = (int16_t *) jl_array_data(Var); jlint_to_r; } else if (jl_is_uint8(val)) { uint8_t *p = (uint8_t *) jl_array_data(Var); jlint_to_r; } else if (jl_is_uint16(val)) { uint16_t *p = (uint16_t *) jl_array_data(Var); jlint_to_r; } else if (jl_is_uint32(val)) { uint32_t *p = (uint32_t *) jl_array_data(Var); jlbiggerint_to_r; } else if (jl_is_uint64(val)) { uint64_t *p = (uint64_t *) jl_array_data(Var); jlbiggerint_to_r; } //double else if (jl_is_float64(val)) { double *p = (double *) jl_array_data(Var); jlfloat_to_r; } else if (jl_is_float32(val)) { float *p = (float *) jl_array_data(Var); jlfloat_to_r; } //convert string array to STRSXP ,but not sure it is corret? else if (jl_is_utf8_string(val)) { PROTECT(ans = allocArray(STRSXP, dims)); for (size_t i = 0; i < len; i++) SET_STRING_ELT(ans, i, mkCharCE(jl_string_data(jl_cellref(Var, i)), CE_UTF8)); UNPROTECT(1); } else if (jl_is_ascii_string(val)) { PROTECT(ans = allocArray(STRSXP, dims)); for (size_t i = 0; i < len; i++) SET_STRING_ELT(ans, i, mkChar(jl_string_data(jl_cellref(Var, i)))); UNPROTECT(1); } return ans; }

static SEXP readRegistryKey1(HKEY hkey, const wchar_t *name) { SEXP ans = R_NilValue; LONG res; DWORD type, size0 = 10000, size = size0; BYTE data[10000], *d = data; res = RegQueryValueExW(hkey, name, NULL, &type, d, &size); while (res == ERROR_MORE_DATA) { size0 *= 10; size = size0; d = (BYTE *) R_alloc(size0, sizeof(char)); res = RegQueryValueExW(hkey, name, NULL, &type, d, &size); } if (res != ERROR_SUCCESS) return ans; switch(type) { case REG_NONE: /* NULL */ break; case REG_DWORD: ans = allocVector(INTSXP, 1); memcpy(INTEGER(ans), d, 4); break; case REG_DWORD_BIG_ENDIAN: { BYTE d4[4]; int i; for(i = 0; i < 4; i++) d4[3-i] = d[i]; ans = allocVector(INTSXP, 1); memcpy(INTEGER(ans), d4, 4); break; } case REG_SZ: case REG_EXPAND_SZ: { PROTECT(ans = allocVector(STRSXP, 1)); SET_STRING_ELT(ans, 0, mkCharUcs((wchar_t *)d)); UNPROTECT(1); break; } case REG_BINARY: ans = allocVector(RAWSXP, size); memcpy(RAW(ans), d, size); break; case REG_MULTI_SZ: { int i, n; wchar_t *p = (wchar_t *)d; for (n = 0; *p; n++) { for(; *p; p++) {}; p++; } PROTECT(ans = allocVector(STRSXP, n)); for (i = 0, p = (wchar_t *)d; i < n; i++) { SET_STRING_ELT(ans, i, mkCharUcs(p)); for(; *p; p++) {}; p++; } UNPROTECT(1); break; } case REG_LINK: warning("unhandled key type %s\n", "REG_LINK"); ans = mkString("<REG_LINK>"); break; case REG_RESOURCE_LIST: warning("unhandled key type %s\n", "REG_RESOURCE_LIST"); ans = mkString("<REG_RESOURCE_LIST>"); break; default: warning("unhandled key type %d\n", type); } return ans; }

static SEXP Julia_R_MD_NA(jl_value_t *Var) { SEXP ans = R_NilValue; char *strData = "Varname0tmp.data"; char *strNA = "bitunpack(Varname0tmp.na)"; jl_set_global(jl_main_module, jl_symbol("Varname0tmp"), (jl_value_t *)Var); jl_value_t *retData = jl_eval_string(strData); jl_value_t *retNA = jl_eval_string(strNA); jl_value_t *val; if (((jl_array_t *)retData)->ptrarray) val = jl_cellref(retData, 0); else val = jl_arrayref((jl_array_t *)retData, 0); int len = jl_array_len(retData); if (len == 0) return ans; int ndims = jl_array_ndims(retData); SEXP dims; PROTECT(dims = allocVector(INTSXP, ndims)); for (size_t i = 0; i < ndims; i++) INTEGER(dims)[i] = jl_array_dim(retData, i); UNPROTECT(1); //bool array char *pNA = (char *) jl_array_data(retNA); if (jl_is_bool(val)) { char *p = (char *) jl_array_data(retData); PROTECT(ans = allocArray(LGLSXP, dims)); for (size_t i = 0; i < len; i++) if (pNA[i]) LOGICAL(ans)[i] = NA_LOGICAL; else LOGICAL(ans)[i] = p[i]; UNPROTECT(1); } else if (jl_is_int32(val)) { int32_t *p = (int32_t *) jl_array_data(retData); jlint_to_r_na; } //int64 else if (jl_is_int64(val)) { int64_t *p = (int64_t *) jl_array_data(retData); jlbiggerint_to_r_na; } //more integer type else if (jl_is_int8(val)) { int8_t *p = (int8_t *) jl_array_data(retData); jlint_to_r_na; } else if (jl_is_int16(val)) { int16_t *p = (int16_t *) jl_array_data(retData); jlint_to_r_na; } else if (jl_is_uint8(val)) { uint8_t *p = (uint8_t *) jl_array_data(retData); jlint_to_r_na; } else if (jl_is_uint16(val)) { uint16_t *p = (uint16_t *) jl_array_data(retData); jlint_to_r_na; } else if (jl_is_uint32(val)) { uint32_t *p = (uint32_t *) jl_array_data(retData); jlbiggerint_to_r_na; } else if (jl_is_uint64(val)) { uint64_t *p = (uint64_t *) jl_array_data(retData); jlbiggerint_to_r_na; } //double else if (jl_is_float64(val)) { double *p = (double *) jl_array_data(retData); jlfloat_to_r_na; } else if (jl_is_float32(val)) { float *p = (float *) jl_array_data(retData); jlfloat_to_r_na; } //convert string array to STRSXP else if (jl_is_utf8_string(val)) { PROTECT(ans = allocArray(STRSXP, dims)); for (size_t i = 0; i < len; i++) if (pNA[i]) SET_STRING_ELT(ans, i, NA_STRING); else SET_STRING_ELT(ans, i, mkCharCE(jl_string_data(jl_cellref(retData, i)), CE_UTF8)); UNPROTECT(1); } else if (jl_is_ascii_string(val)) { PROTECT(ans = allocArray(STRSXP, dims)); for (size_t i = 0; i < len; i++) if (pNA[i]) SET_STRING_ELT(ans, i, NA_STRING); else SET_STRING_ELT(ans, i, mkChar(jl_string_data(jl_cellref(retData, i)))); UNPROTECT(1); } return ans; }

/* {{{ rberkeley_dbcursor_get */ SEXP rberkeley_dbcursor_get (SEXP _dbc, SEXP _key, SEXP _data, SEXP _flags, SEXP _n /* non-API flag */) { DBC *dbc; DBT key, data; u_int32_t flags; int i, n, ret, P=0; flags = (u_int32_t)INTEGER(_flags)[0]; n = (INTEGER(_n)[0] < 0) ? 100 : INTEGER(_n)[0]; /* this should be _all_ data */ dbc = R_ExternalPtrAddr(_dbc); if(R_ExternalPtrTag(_dbc) != install("DBC") || dbc == NULL) error("invalid 'dbc' handle"); memset(&key, 0, sizeof(DBT)); memset(&data, 0, sizeof(DBT)); SEXP Keys, Data, results; PROTECT(Keys = allocVector(VECSXP, n)); P++; PROTECT(Data = allocVector(VECSXP, n)); P++; PROTECT(results = allocVector(VECSXP, n)); P++; /* Two scenarios for DBcursor->get calls: (1) key and data are SPECIFIED <OR> key is SPECIFIED, data is EMPTY (2) key and data are EMPTY We must handle these seperately in order to return a sensible result */ if( (!isNull(_key) && !isNull(_data)) || !isNull(_key) ) { /* need to handle cases where multiple results can be returned. Possibly given that flag we can instead use the last if-else branch */ key.data = (unsigned char *)RAW(_key); key.size = length(_key); if(!isNull(_data)) { data.data = (unsigned char *)RAW(_data); data.size = length(_data); } ret = dbc->get(dbc, &key, &data, flags); if(ret == 0) { SEXP KeyData; PROTECT(KeyData = allocVector(VECSXP, 2));P++; SEXP rawkey; PROTECT(rawkey = allocVector(RAWSXP, key.size)); memcpy(RAW(rawkey), key.data, key.size); SET_VECTOR_ELT(KeyData, 0, rawkey); UNPROTECT(1); SEXP rawdata; PROTECT(rawdata = allocVector(RAWSXP, data.size)); memcpy(RAW(rawdata), data.data, data.size); SET_VECTOR_ELT(KeyData, 1, rawdata); UNPROTECT(1); SEXP KeyDataNames; PROTECT(KeyDataNames = allocVector(STRSXP,2)); P++; SET_STRING_ELT(KeyDataNames, 0, mkChar("key")); SET_STRING_ELT(KeyDataNames, 1, mkChar("data")); setAttrib(KeyData, R_NamesSymbol, KeyDataNames); SET_VECTOR_ELT(results, 0, KeyData); PROTECT(results = lengthgets(results, 1)); P++; } } else if(isNull(_key) && isNull(_data)) { for(i = 0; i < n; i++) { ret = dbc->get(dbc, &key, &data, flags); if(ret == 0) { SEXP KeyData; PROTECT(KeyData = allocVector(VECSXP, 2)); SEXP rawkey; PROTECT(rawkey = allocVector(RAWSXP, key.size)); memcpy(RAW(rawkey), key.data, key.size); SET_VECTOR_ELT(KeyData, 0, rawkey); SEXP rawdata; PROTECT(rawdata = allocVector(RAWSXP, data.size)); memcpy(RAW(rawdata), data.data, data.size); SET_VECTOR_ELT(KeyData, 1, rawdata); SEXP KeyDataNames; PROTECT(KeyDataNames = allocVector(STRSXP,2)); SET_STRING_ELT(KeyDataNames, 0, mkChar("key")); SET_STRING_ELT(KeyDataNames, 1, mkChar("data")); setAttrib(KeyData, R_NamesSymbol, KeyDataNames); SET_VECTOR_ELT(results, i, KeyData); UNPROTECT(4); /* KeyDataNames, rawdata, rawkey, KeyData */ } else { /* end of data */ if(i == 0) { /* no results */ UNPROTECT(P); return ScalarInteger(ret); } /* truncate the keys and data to the i-size found */ PROTECT(results = lengthgets(results, i)); P++; break; } } } UNPROTECT(P); return results; }

SEXP print_result_R(Sites *site,int nsites,int numSeq,char **seq,char **rseq,int *seqLen, double logev,double **opwm,int pwmLen,int id,char *sdyad,char *pwmConsensus,int numCycle, double pvalueCutoff,double maxpFactor,int *geneID) { register int i,j; int cn[4];//maxHeaderLen; int *seqCn; SEXP PWM; SEXP seqConsencus; SEXP motifname; SEXP motifname2; SEXP returnData; SEXP LengthSequence; SEXP SequencesIdent; SEXP StrandIdent; SEXP AccessionIdent; SEXP PositionIdent; SEXP SeqIden; SEXP PValue; SEXP GADEMList; PROTECT(returnData=NEW_LIST(5)); PROTECT(GADEMList=NEW_LIST(6)); PROTECT(PWM=allocMatrix(REALSXP,4,pwmLen)); PROTECT(seqConsencus=NEW_CHARACTER(1)); PROTECT(motifname=NEW_INTEGER(1)); PROTECT(motifname2=NEW_CHARACTER(1)); PROTECT(SequencesIdent=NEW_CHARACTER(nsites)); PROTECT(PositionIdent=NEW_INTEGER(nsites)); PROTECT(SeqIden=NEW_INTEGER(nsites)); PROTECT(StrandIdent=NEW_CHARACTER(nsites)); PROTECT(AccessionIdent=NEW_INTEGER(nsites)); PROTECT(PValue=NEW_NUMERIC(nsites)); PROTECT(LengthSequence=NEW_INTEGER(nsites)); int increment_sequence=0; int compt=0; seqCn=alloc_int(numSeq); //maxHeaderLen=min(maxHeaderLen,MAX_SEQ_HEADER); for (i=0; i<numSeq; i++) seqCn[i]=0; for (i=0; i<nsites; i++) seqCn[site[i].seq]++; for (i=0; i<4; i++) cn[i]=0; for (i=0; i<numSeq; i++) { if (seqCn[i]==0) cn[0]++; if (seqCn[i]==1) cn[1]++; if (seqCn[i]==2) cn[2]++; if (seqCn[i]>2) cn[3]++; } if (seqCn) { free(seqCn); seqCn=NULL; } for (i=0; i<nsites; i++) { //SET_STRING_ELT(AccessionIdent,increment_sequence,mkChar(geneID[site[i].seq])); INTEGER(AccessionIdent)[increment_sequence]=(geneID[site[i].seq]); if (site[i].rev=='0') { if (site[i].pos<0) { char sequence_conca[100]=""; for (j=0; j<pwmLen+site[i].pos; j++) { switch(seq[site[i].seq][j]) { case 'a': strcat(sequence_conca,"A");break; case 'c': strcat(sequence_conca,"C");break; case 'g': strcat(sequence_conca,"G");break; case 't': strcat(sequence_conca,"T");break; case 'n': strcat(sequence_conca,"N");break; default: break; } } } else { char sequence_conca[100]=""; for (j=site[i].pos; j<min(seqLen[site[i].seq],site[i].pos+pwmLen); j++) { switch(seq[site[i].seq][j]) { case 'a': strcat(sequence_conca,"A");break; case 'c': strcat(sequence_conca,"C");break; case 'g': strcat(sequence_conca,"G");break; case 't': strcat(sequence_conca,"T");break; case 'n': strcat(sequence_conca,"N");break; default: break; } } SET_STRING_ELT(SequencesIdent,increment_sequence,mkChar(sequence_conca)); } // print flanking region for (j=site[i].pos+pwmLen; j<min(site[i].pos+pwmLen+FLANKING_BASES,seqLen[site[i].seq]); j++) SET_STRING_ELT(StrandIdent,increment_sequence,mkChar("+")); INTEGER(SeqIden)[increment_sequence]=site[i].seq+1; INTEGER(PositionIdent)[increment_sequence]=site[i].pos+1; DOUBLE_DATA(PValue)[increment_sequence]=site[i].pvalue; increment_sequence=increment_sequence+1; } else { if (site[i].pos<0) { char sequence_conca[50]=""; //for (j=site[i].pos; j<0; j++) Rprintf("X"); for (j=0; j<pwmLen+site[i].pos; j++) { switch(rseq[site[i].seq][j]) { case 'a': strcat(sequence_conca,"A");break; case 'c': strcat(sequence_conca,"C");break; case 'g': strcat(sequence_conca,"G");break; case 't': strcat(sequence_conca,"T");break; case 'n': strcat(sequence_conca,"N");break; default: break; } } } else { char sequence_conca[50]=""; for (j=site[i].pos; j<min(seqLen[site[i].seq],site[i].pos+pwmLen); j++) { switch(rseq[site[i].seq][j]) { case 'a': strcat(sequence_conca,"A");break; case 'c': strcat(sequence_conca,"C");break; case 'g': strcat(sequence_conca,"G");break; case 't': strcat(sequence_conca,"T");break; case 'n': strcat(sequence_conca,"N");break; default: break; } } SET_STRING_ELT(SequencesIdent,increment_sequence,mkChar(sequence_conca)); } if (site[i].pos+pwmLen-seqLen[site[i].seq]>0) { //for (j=seqLen[site[i].seq]; j<site[i].pos+pwmLen; j++) Rprintf("X"); } // print flanking region for (j=site[i].pos+pwmLen; j<min(site[i].pos+pwmLen+FLANKING_BASES,seqLen[site[i].seq]); j++) SET_STRING_ELT(StrandIdent,increment_sequence,mkChar("-")); INTEGER(SeqIden)[increment_sequence]=site[i].seq+1; INTEGER(PositionIdent)[increment_sequence]=seqLen[site[i].seq]-site[i].pos; DOUBLE_DATA(PValue)[increment_sequence]=site[i].pvalue; increment_sequence=increment_sequence+1; } } for (int aa=0;aa<pwmLen;aa++) { for(int bb=0;bb<4;bb++) { NUMERIC_DATA(PWM)[compt]=opwm[aa][bb]; compt++; } } SET_STRING_ELT(seqConsencus,0,mkChar(pwmConsensus)); INTEGER(LengthSequence)[0]=125; INTEGER(motifname)[0]=id; const char base[] = "m"; char filename [ FILENAME_MAX ]; int number = id; /*Rprintf("%s%d", base, number);*/ SET_STRING_ELT(motifname2,0,mkChar(filename)); SET_VECTOR_ELT(returnData,0,seqConsencus); SET_VECTOR_ELT(returnData,2,LengthSequence); SET_VECTOR_ELT(returnData,4,motifname2); SET_VECTOR_ELT(returnData,1,PWM); SET_VECTOR_ELT(GADEMList,0,SequencesIdent); SET_VECTOR_ELT(GADEMList,1,StrandIdent); SET_VECTOR_ELT(GADEMList,2,PositionIdent); SET_VECTOR_ELT(GADEMList,3,PValue); SET_VECTOR_ELT(GADEMList,4,AccessionIdent); SET_VECTOR_ELT(GADEMList,5,SeqIden); SET_VECTOR_ELT(returnData,3,GADEMList); UNPROTECT(13); return (returnData); }

SEXP attribute_hidden do_earg_matrix(SEXP call, SEXP op, SEXP arg_vals, SEXP arg_snr, SEXP arg_snc, SEXP arg_byrow, SEXP arg_dimnames, SEXP arg_miss_nr, SEXP arg_miss_nc, SEXP rho) { SEXP vals, ans, snr, snc, dimnames; int nr = 1, nc = 1, byrow, miss_nr, miss_nc; R_xlen_t lendat; vals = arg_vals; switch(TYPEOF(vals)) { case LGLSXP: case INTSXP: case REALSXP: case CPLXSXP: case STRSXP: case RAWSXP: case EXPRSXP: case VECSXP: break; default: error(_("'data' must be of a vector type, was '%s'"), type2char(TYPEOF(vals))); } lendat = XLENGTH(vals); snr = arg_snr; snc = arg_snc; byrow = asLogical(arg_byrow); if (byrow == NA_INTEGER) error(_("invalid '%s' argument"), "byrow"); dimnames = arg_dimnames; miss_nr = asLogical(arg_miss_nr); miss_nc = asLogical(arg_miss_nc); if (!miss_nr) { if (!isNumeric(snr)) error(_("non-numeric matrix extent")); nr = asInteger(snr); if (nr == NA_INTEGER) error(_("invalid 'nrow' value (too large or NA)")); if (nr < 0) error(_("invalid 'nrow' value (< 0)")); } if (!miss_nc) { if (!isNumeric(snc)) error(_("non-numeric matrix extent")); nc = asInteger(snc); if (nc == NA_INTEGER) error(_("invalid 'ncol' value (too large or NA)")); if (nc < 0) error(_("invalid 'ncol' value (< 0)")); } if (miss_nr && miss_nc) { if (lendat > INT_MAX) error("data is too long"); nr = (int) lendat; } else if (miss_nr) { if (lendat > (double) nc * INT_MAX) error("data is too long"); // avoid division by zero if (nc == 0) { if (lendat) error(_("nc = 0 for non-null data")); else nr = 0; } else nr = (int) ceil((double) lendat / (double) nc); } else if (miss_nc) { if (lendat > (double) nr * INT_MAX) error("data is too long"); // avoid division by zero if (nr == 0) { if (lendat) error(_("nr = 0 for non-null data")); else nc = 0; } else nc = (int) ceil((double) lendat / (double) nr); } if(lendat > 0) { R_xlen_t nrc = (R_xlen_t) nr * nc; if (lendat > 1 && nrc % lendat != 0) { if (((lendat > nr) && (lendat / nr) * nr != lendat) || ((lendat < nr) && (nr / lendat) * lendat != nr)) warning(_("data length [%d] is not a sub-multiple or multiple of the number of rows [%d]"), lendat, nr); else if (((lendat > nc) && (lendat / nc) * nc != lendat) || ((lendat < nc) && (nc / lendat) * lendat != nc)) warning(_("data length [%d] is not a sub-multiple or multiple of the number of columns [%d]"), lendat, nc); } else if ((lendat > 1) && (nrc == 0)){ warning(_("data length exceeds size of matrix")); } } #ifndef LONG_VECTOR_SUPPORT if ((double)nr * (double)nc > INT_MAX) error(_("too many elements specified")); #endif PROTECT(ans = allocMatrix(TYPEOF(vals), nr, nc)); if(lendat) { if (isVector(vals)) copyMatrix(ans, vals, byrow); else copyListMatrix(ans, vals, byrow); } else if (isVector(vals)) { /* fill with NAs */ R_xlen_t N = (R_xlen_t) nr * nc, i; switch(TYPEOF(vals)) { case STRSXP: for (i = 0; i < N; i++) SET_STRING_ELT(ans, i, NA_STRING); break; case LGLSXP: for (i = 0; i < N; i++) LOGICAL(ans)[i] = NA_LOGICAL; break; case INTSXP: for (i = 0; i < N; i++) INTEGER(ans)[i] = NA_INTEGER; break; case REALSXP: for (i = 0; i < N; i++) REAL(ans)[i] = NA_REAL; break; case CPLXSXP: { Rcomplex na_cmplx; na_cmplx.r = NA_REAL; na_cmplx.i = 0; for (i = 0; i < N; i++) COMPLEX(ans)[i] = na_cmplx; } break; case RAWSXP: memset(RAW(ans), 0, N); break; default: /* don't fill with anything */ ; } } if(!isNull(dimnames)&& length(dimnames) > 0) ans = dimnamesgets(ans, dimnames); UNPROTECT(1); return ans; }

struct VALC_settings VALC_settings_vet(SEXP set_list, SEXP env) { struct VALC_settings settings = VALC_settings_init(); R_xlen_t set_len = 16; if(TYPEOF(set_list) == VECSXP) { if(xlength(set_list) != set_len) { error( "`vet/vetr` usage error: `settings` must be a list of length %zu.", set_len ); } SEXP set_names = PROTECT(getAttrib(set_list, R_NamesSymbol)); if(set_names == R_NilValue || TYPEOF(set_names) != STRSXP) { error( "%s%s%s", "`vet/vetr` usage error: ", "argument `settings` must be a named list as produced ", "by `vetr_settings`." ); } const char * set_names_default[] = { "type.mode", "attr.mode", "lang.mode", "fun.mode", "rec.mode", "suppress.warnings", "fuzzy.int.max.len", "width", "env.depth.max", "symb.sub.depth.max", "symb.size.max", "nchar.max", "track.hash.content.size", "env", "result.list.size.init", "result.list.size.max" }; SEXP set_names_def_sxp = PROTECT(allocVector(STRSXP, set_len)); for(R_xlen_t i = 0; i < set_len; ++i) { SEXP chr_name = PROTECT(mkChar(set_names_default[i])); SET_STRING_ELT(set_names_def_sxp, i, chr_name); UNPROTECT(1); } if(!R_compute_identical(set_names, set_names_def_sxp, 16)) { error( "%s%s", "`vet/vetr` usage error: argument `settings` names are not in format ", "produced by `vetr_settings`." ); } set_names_def_sxp = R_NilValue; UNPROTECT(2); // check the scalar integers settings.type_mode = VALC_is_scalar_int(VECTOR_ELT(set_list, 0), "type.mode", 0, 2); settings.attr_mode = VALC_is_scalar_int(VECTOR_ELT(set_list, 1), "attr.mode", 0, 2); settings.lang_mode = VALC_is_scalar_int(VECTOR_ELT(set_list, 2), "lang.mode", 0, 2); settings.fun_mode = VALC_is_scalar_int(VECTOR_ELT(set_list, 3), "fun.mode", 0, 2); settings.rec_mode = VALC_is_scalar_int(VECTOR_ELT(set_list, 4), "rec.mode", 0, 2); settings.fuzzy_int_max_len = VALC_is_scalar_int( VECTOR_ELT(set_list, 6), "fuzzy.int.max.len", INT_MIN, INT_MAX ); settings.width = VALC_is_scalar_int(VECTOR_ELT(set_list, 7), "width", -1, INT_MAX); settings.env_depth_max = VALC_is_scalar_int(VECTOR_ELT(set_list, 8), "env.depth.max", -1, INT_MAX); settings.symb_sub_depth_max = VALC_is_scalar_int( VECTOR_ELT(set_list, 9), "symb.sub.depth.max", 0, INT_MAX ); settings.nchar_max = VALC_is_scalar_int(VECTOR_ELT(set_list, 10), "nchar.max", 0, INT_MAX); settings.symb_size_max = VALC_is_scalar_int( VECTOR_ELT(set_list, 11), "symb.size.max", 0, INT_MAX ); settings.track_hash_content_size = VALC_is_scalar_int( VECTOR_ELT(set_list, 12), "track.hash.content.size", 0, INT_MAX ); // Other checks SEXP sup_warn = VECTOR_ELT(set_list, 5); if( TYPEOF(sup_warn) != LGLSXP || xlength(sup_warn) != 1 || asInteger(sup_warn) == NA_LOGICAL ) { error( "%s%s", "`vet/vetr` usage error: setting `suppress.warnings` must be TRUE ", "or FALSE" ); } settings.suppress_warnings = asLogical(sup_warn); if( TYPEOF(VECTOR_ELT(set_list, 13)) != ENVSXP && VECTOR_ELT(set_list, 13) != R_NilValue ) { error( "%s%s", "`ver/vetr` usage error: setting `env` must be an environment ", "or NULL" ); } settings.env = VECTOR_ELT(set_list, 13); settings.result_list_size_init = VALC_is_scalar_int( VECTOR_ELT(set_list, 14), "result.list.size.init", 1, INT_MAX - 1 ); settings.result_list_size_max = VALC_is_scalar_int( VECTOR_ELT(set_list, 15), "result.list.size.max", 1, INT_MAX - 1 ); } else if (set_list != R_NilValue) { error( "%s (is %s).", "`vet/vetr` usage error: argument `settings` must be a list or NULL", type2char(TYPEOF(set_list)) ); } if(TYPEOF(env) != ENVSXP) { error("`vet/vetr` usage error: argument `env` must be an environment."); } if(settings.env == R_NilValue) settings.env = env; return settings; }

SEXP attribute_hidden do_earg_transpose(SEXP call, SEXP op, SEXP arg_x, SEXP rho) { SEXP a, r, dims, dimnames, dimnamesnames = R_NilValue, ndimnamesnames, rnames, cnames; int ldim, ncol = 0, nrow = 0; R_xlen_t len = 0; a = arg_x; if (isVector(a)) { dims = getDimAttrib(a); ldim = length(dims); rnames = R_NilValue; cnames = R_NilValue; switch(ldim) { case 0: len = nrow = LENGTH(a); ncol = 1; rnames = getNamesAttrib(a); dimnames = rnames;/* for isNull() below*/ break; case 1: len = nrow = LENGTH(a); ncol = 1; dimnames = getDimNamesAttrib(a); if (dimnames != R_NilValue) { rnames = VECTOR_ELT(dimnames, 0); dimnamesnames = getNamesAttrib(dimnames); } break; case 2: ncol = ncols(a); nrow = nrows(a); len = XLENGTH(a); dimnames = getDimNamesAttrib(a); if (dimnames != R_NilValue) { rnames = VECTOR_ELT(dimnames, 0); cnames = VECTOR_ELT(dimnames, 1); dimnamesnames = getNamesAttrib(dimnames); } break; default: goto not_matrix; } } else goto not_matrix; PROTECT(r = allocVector(TYPEOF(a), len)); R_xlen_t i, j, l_1 = len-1; switch (TYPEOF(a)) { case LGLSXP: case INTSXP: // filling in columnwise, "accessing row-wise": for (i = 0, j = 0; i < len; i++, j += nrow) { if (j > l_1) j -= l_1; INTEGER(r)[i] = INTEGER(a)[j]; } break; case REALSXP: for (i = 0, j = 0; i < len; i++, j += nrow) { if (j > l_1) j -= l_1; REAL(r)[i] = REAL(a)[j]; } break; case CPLXSXP: for (i = 0, j = 0; i < len; i++, j += nrow) { if (j > l_1) j -= l_1; COMPLEX(r)[i] = COMPLEX(a)[j]; } break; case STRSXP: for (i = 0, j = 0; i < len; i++, j += nrow) { if (j > l_1) j -= l_1; SET_STRING_ELT(r, i, STRING_ELT(a,j)); } break; case VECSXP: for (i = 0, j = 0; i < len; i++, j += nrow) { if (j > l_1) j -= l_1; SET_VECTOR_ELT(r, i, VECTOR_ELT(a,j)); } break; case RAWSXP: for (i = 0, j = 0; i < len; i++, j += nrow) { if (j > l_1) j -= l_1; RAW(r)[i] = RAW(a)[j]; } break; default: UNPROTECT(1); goto not_matrix; } PROTECT(dims = allocVector(INTSXP, 2)); INTEGER(dims)[0] = ncol; INTEGER(dims)[1] = nrow; setAttrib(r, R_DimSymbol, dims); UNPROTECT(1); /* R <= 2.2.0: dropped list(NULL,NULL) dimnames : * if(rnames != R_NilValue || cnames != R_NilValue) */ if(!isNull(dimnames)) { PROTECT(dimnames = allocVector(VECSXP, 2)); SET_VECTOR_ELT(dimnames, 0, cnames); SET_VECTOR_ELT(dimnames, 1, rnames); if(!isNull(dimnamesnames)) { PROTECT(ndimnamesnames = allocVector(VECSXP, 2)); SET_VECTOR_ELT(ndimnamesnames, 1, STRING_ELT(dimnamesnames, 0)); SET_VECTOR_ELT(ndimnamesnames, 0, (ldim == 2) ? STRING_ELT(dimnamesnames, 1): R_BlankString); setAttrib(dimnames, R_NamesSymbol, ndimnamesnames); UNPROTECT(1); } setAttrib(r, R_DimNamesSymbol, dimnames); UNPROTECT(1); } copyMostAttrib(a, r); UNPROTECT(1); return r; not_matrix: error(_("argument is not a matrix")); return call;/* never used; just for -Wall */ }

USER_OBJECT_ RS_JAVA(MethodConverter)(jobject obj, jclass type, JNIEnv *env, RSFromJavaConverter *converter) { int i = 0, k, n; int numSlots; USER_OBJECT_ ans, names; const char *tmp; jboolean isCopy; jstring jval; jclass klass; jobject jsig, jobj; ReflectanceMethodIDs *mids; boolean isMethod; jint modifier; if(ModifierStringID == NULL) initReflectanceMethods(env); /* Determine whether we have a constructor or method and set the method identifiers and number of slots appropriately. */ isMethod = VMENV IsSameObject(env, type, VMENV FindClass(env, "java/lang/reflect/Method")) == JNI_TRUE; if(isMethod) { mids = &MethodIDs; numSlots = 6; } else { mids = &ConstructorIDs; numSlots = 5; /* Drop out the */ } PROTECT(ans = NEW_LIST(numSlots)); PROTECT(names = NEW_CHARACTER(numSlots)); SET_VECTOR_ELT(ans, i, NEW_CHARACTER(1)); jval = VMENV CallObjectMethod(env, obj, mids->getName); tmp = VMENV GetStringUTFChars(env, jval, &isCopy); SET_STRING_ELT(VECTOR_ELT(ans, i), 0, COPY_TO_USER_STRING(tmp)); if(isCopy) VMENV ReleaseStringUTFChars(env, jval, tmp); SET_STRING_ELT(names, i, COPY_TO_USER_STRING("name")); i++; SET_VECTOR_ELT(ans, i, NEW_CHARACTER(1)); klass = VMENV CallObjectMethod(env, obj, mids->getClass); tmp = getClassName(env, klass, &isCopy); SET_STRING_ELT(VECTOR_ELT(ans, i), 0, COPY_TO_USER_STRING(tmp)); SET_STRING_ELT(names, i, COPY_TO_USER_STRING("Declaring class")); i++; jsig = VMENV CallObjectMethod(env, obj, mids->getParameters); n = VMENV GetArrayLength(env, jsig); SET_VECTOR_ELT(ans, i, NEW_CHARACTER(n)); for(k = 0; k < n ; k++) { jobj = VMENV GetObjectArrayElement(env, jsig, k); tmp = getClassName(env, jobj, &isCopy); SET_STRING_ELT(VECTOR_ELT(ans, i), k, COPY_TO_USER_STRING(tmp)); } SET_STRING_ELT(names, i, COPY_TO_USER_STRING("Parameters")); i++; SET_VECTOR_ELT(ans, i, NEW_INTEGER(1)); modifier = VMENV CallIntMethod(env, obj, mids->getModifiers); INTEGER_DATA(VECTOR_ELT(ans, i))[0] = modifier; SET_STRING_ELT(names, i, COPY_TO_USER_STRING("Modifiers")); { /* Now get the string that represents the modifier value. Do this by calling the static method toString(int) in the java.lang.reflect.Modifier class. We assume we have initialized the ModifierStringID method id earlier when getting all the method ids for the reflectance classes. */ USER_OBJECT_ tmpr; const char *modName; jstring jmodName; PROTECT(tmpr = NEW_CHARACTER(1)); jmodName = VMENV CallStaticObjectMethod(env, (jclass)VMENV FindClass(env, "java/lang/reflect/Modifier"), ModifierStringID, modifier); if(jmodName != NULL_JAVA_OBJECT) { modName = VMENV GetStringUTFChars(env, jmodName, &isCopy); SET_STRING_ELT(tmpr, 0, COPY_TO_USER_STRING(modName)); } SET_NAMES(VECTOR_ELT(ans, i), tmpr); UNPROTECT(1); } i++; jsig = VMENV CallObjectMethod(env, obj, mids->getExceptions); n = VMENV GetArrayLength(env, jsig); SET_VECTOR_ELT(ans, i, NEW_CHARACTER(n)); for(k = 0; k < n ; k++) { jobj = VMENV GetObjectArrayElement(env, jsig, k); tmp = getClassName(env, jobj, &isCopy); SET_STRING_ELT(VECTOR_ELT(ans, i), k, COPY_TO_USER_STRING(tmp)); } SET_STRING_ELT(names, i, COPY_TO_USER_STRING("Exceptions")); i++; if(isMethod) { SET_VECTOR_ELT(ans, i, NEW_CHARACTER(1)); klass = VMENV CallObjectMethod(env, obj, mids->getReturnType); tmp = getClassName(env, klass, &isCopy); SET_VECTOR_ELT(VECTOR_ELT(ans, i), 0, COPY_TO_USER_STRING(tmp)); SET_STRING_ELT(names, i, COPY_TO_USER_STRING("Return type")); i++; } SET_NAMES(ans, names); /* Now set the class to be "JavaMethod" */ UNPROTECT(2); return(ans); }

SEXP attribute_hidden do_sysinfo(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, ansnames; struct utsname name; char *login; checkArity(op, args); PROTECT(ans = allocVector(STRSXP, 8)); if(uname(&name) == -1) { UNPROTECT(1); return R_NilValue; } SET_STRING_ELT(ans, 0, mkChar(name.sysname)); SET_STRING_ELT(ans, 1, mkChar(name.release)); SET_STRING_ELT(ans, 2, mkChar(name.version)); SET_STRING_ELT(ans, 3, mkChar(name.nodename)); SET_STRING_ELT(ans, 4, mkChar(name.machine)); login = getlogin(); SET_STRING_ELT(ans, 5, login ? mkChar(login) : mkChar("unknown")); #if defined(HAVE_PWD_H) && defined(HAVE_GETPWUID) && defined(HAVE_GETUID) { struct passwd *stpwd; stpwd = getpwuid(getuid()); SET_STRING_ELT(ans, 6, stpwd ? mkChar(stpwd->pw_name) : mkChar("unknown")); } #else SET_STRING_ELT(ans, 6, mkChar("unknown")); #endif #if defined(HAVE_PWD_H) && defined(HAVE_GETPWUID) && defined(HAVE_GETEUID) { struct passwd *stpwd; stpwd = getpwuid(geteuid()); SET_STRING_ELT(ans, 7, stpwd ? mkChar(stpwd->pw_name) : mkChar("unknown")); } #else SET_STRING_ELT(ans, 7, mkChar("unknown")); #endif PROTECT(ansnames = allocVector(STRSXP, 8)); SET_STRING_ELT(ansnames, 0, mkChar("sysname")); SET_STRING_ELT(ansnames, 1, mkChar("release")); SET_STRING_ELT(ansnames, 2, mkChar("version")); SET_STRING_ELT(ansnames, 3, mkChar("nodename")); SET_STRING_ELT(ansnames, 4, mkChar("machine")); SET_STRING_ELT(ansnames, 5, mkChar("login")); SET_STRING_ELT(ansnames, 6, mkChar("user")); SET_STRING_ELT(ansnames, 7, mkChar("effective_user")); setAttrib(ans, R_NamesSymbol, ansnames); UNPROTECT(2); return ans; }

static SEXP stringSubscript(SEXP s, int ns, int nx, SEXP names, StringEltGetter strg, int *stretch, Rboolean in) { SEXP indx, indexnames; int i, j, nnames, sub, extra; int canstretch = *stretch; #ifdef USE_HASHING Rboolean usehashing = in && (ns * nx > 1000); #else Rboolean usehashing = FALSE; #endif PROTECT(s); PROTECT(names); PROTECT(indexnames = allocVector(STRSXP, ns)); nnames = nx; extra = nnames; /* Process each of the subscripts. First we compare with the names * on the vector and then (if there is no match) with each of the * previous subscripts, since (if assigning) we may have already * added an element of that name. (If we are not assigning, any * nonmatch will have given an error.) */ #ifdef USE_HASHING if(usehashing) { /* must be internal, so names contains a character vector */ PROTECT(indx = match(names, s, 0)); for (i = 0; i < ns; i++) SET_STRING_ELT(indexnames, i, R_NilValue); } else { #endif PROTECT(indx = allocVector(INTSXP, ns)); for (i = 0; i < ns; i++) { sub = 0; if (names != R_NilValue) { for (j = 0; j < nnames; j++) { SEXP names_j = strg(names, j); if (!in && TYPEOF(names_j) != CHARSXP) error(_("character vector element does not have type CHARSXP")); if (NonNullStringMatch(STRING_ELT(s, i), names_j)) { sub = j + 1; SET_STRING_ELT(indexnames, i, R_NilValue); break; } } } INTEGER(indx)[i] = sub; } #ifdef USE_HASHING } #endif for (i = 0; i < ns; i++) { sub = INTEGER(indx)[i]; if (sub == 0) { for (j = 0 ; j < i ; j++) if (NonNullStringMatch(STRING_ELT(s, i), STRING_ELT(s, j))) { sub = INTEGER(indx)[j]; SET_STRING_ELT(indexnames, i, STRING_ELT(s, j)); break; } } if (sub == 0) { if (!canstretch) error(_("subscript out of bounds")); extra += 1; sub = extra; SET_STRING_ELT(indexnames, i, STRING_ELT(s, i)); } INTEGER(indx)[i] = sub; } /* Ghastly hack! We attach the new names to the attribute */ /* slot on the returned subscript vector. */ if (extra != nnames) SET_ATTRIB(indx, indexnames); if (canstretch) *stretch = extra; UNPROTECT(4); return indx; }