SEXP readDataPorStream(SEXP porStream, SEXP what, SEXP s_n, SEXP s_types){ #ifdef DEBUG Rprintf("\n############################"); Rprintf("\n#readDataPorStream"); Rprintf("\n############################"); #endif porStreamBuf *b = get_porStreamBuf(porStream); int n = asInteger(s_n); #ifdef DEBUG Rprintf("\nRequired number of cases: %d",n); Rprintf("\nBuffer contents: |%s|",b->buf); Rprintf("\nLine: %d",b->line); Rprintf("\nPosition: %d",b->pos); Rprintf("\nBuffer remainder: %s",b->buf + b->pos); #endif PROTECT(s_types = coerceVector(s_types,INTSXP)); int nvar = length(s_types); int *types = INTEGER(s_types); SEXP x, y, data; char *charbuf; int charbuflen = 0; PROTECT(data = allocVector(VECSXP,nvar)); int i,j; for(j = 0; j < nvar; j++){ if(types[j]==0) SET_VECTOR_ELT(data,j,allocVector(REALSXP,n)); else { SET_VECTOR_ELT(data,j,allocVector(STRSXP,n)); if(types[j] > charbuflen) charbuflen = types[j]; } } charbuf = R_alloc(charbuflen+1,sizeof(char)); #ifdef DEBUG // PrintValue(data); #endif for(i = 0; i < n; i++){ if(atEndPorStream(b) || (b->pos < 80 && b->buf[b->pos] == 'Z')){ #ifdef DEBUG Rprintf("\nReached end of cases at i=%d",i); #endif int new_length = i; for(j = 0; j < nvar; j++){ x = VECTOR_ELT(data,j); SET_VECTOR_ELT(data,j,lengthgets(x,new_length)); } n = new_length; break; } #ifdef DEBUG Rprintf("\nCase number: %d\n",i); #endif for(j = 0; j < nvar; j++){ if(atEndPorStream(b)) { printPorStreamBuf(b); warning("\nPremature end of data"); break; } #ifdef DEBUG PrintValue(VECTOR_ELT(data,j)); #endif if(types[j]==0) REAL(VECTOR_ELT(data,j))[i] = readDoublePorStream1(b); else SET_STRING_ELT(VECTOR_ELT(data,j), i, mkChar(readCHARPorStream(b,charbuf,types[j]))); #ifdef DEBUG if(i<3 && types[j]>0) PrintValue(STRING_ELT(VECTOR_ELT(data,j),i)); #endif } } for(j = 0; j < nvar; j++){ x = VECTOR_ELT(what,j); y = VECTOR_ELT(data,j); copyMostAttrib(x,y); } UNPROTECT(2); return data; }

SEXP pathSearch(SEXP Rx, SEXP Ry, SEXP Romega, SEXP Rlambda, SEXP Rkappa, SEXP Rtheta, SEXP Rn_lambda, SEXP Rn_theta, SEXP Reps, SEXP Rmax, SEXP Rinmax, SEXP Rpenalty, SEXP Rinit) { const char *penalty; int i, j, l, n, p, max, iter[1], n_lambda, n_theta, inmax; double eps; double *lambda, *y, *x, *init, *in, kappa, *theta, *omega; void cdfit(), free_vec(); SEXP betahat, betatild, Riter, return_list; p = Rf_ncols(Rx); n = Rf_nrows(Rx); Rx = coerceVector(Rx, REALSXP); Ry = coerceVector(Ry, REALSXP); Romega = coerceVector(Romega, REALSXP); Rlambda = coerceVector(Rlambda, REALSXP); Rkappa = coerceVector(Rkappa, REALSXP); Rtheta = coerceVector(Rtheta, REALSXP); Rn_lambda = coerceVector(Rn_lambda, INTSXP); Rn_theta = coerceVector(Rn_theta, INTSXP); Reps = coerceVector(Reps, REALSXP); Rinit = coerceVector(Rinit, REALSXP); Rmax = coerceVector(Rmax, INTSXP); Rinmax = coerceVector(Rinmax, INTSXP); lambda = REAL(Rlambda); kappa = REAL(Rkappa)[0]; theta = REAL(Rtheta); penalty = CHAR(STRING_ELT(Rpenalty, 0)); n_lambda = INTEGER(Rn_lambda)[0]; n_theta = INTEGER(Rn_theta)[0]; eps = REAL(Reps)[0]; max = INTEGER(Rmax)[0]; inmax = INTEGER(Rinmax)[0]; x = REAL(Rx); y = REAL(Ry); omega = REAL(Romega); in = REAL(Rinit); init = vector(p); for (j = 0; j < p; j++) init[j] = in[j]; PROTECT(betahat = Rf_allocMatrix(REALSXP, p, n_lambda * n_theta)); PROTECT(betatild = Rf_allocMatrix(REALSXP, p, n_lambda * n_theta)); PROTECT(Riter = Rf_allocMatrix(INTSXP, n_lambda, n_theta)); PROTECT(return_list = Rf_allocVector(VECSXP, 6)); /*LASSO path along lambda*/ for (i = 0; i < n_lambda; i++){ for (l = 0; l < n_theta; l++) { cdfit(x, y, omega, init, lambda[i], 0.0, theta[l], "LASSO", eps, max, inmax, n, p, iter); for (j = 0; j < p; j++) REAL(betahat)[(i + l * n_lambda) * p + j] = init[j]; INTEGER(Riter)[i + l * n_lambda] = iter[0]; }} SET_VECTOR_ELT(return_list, 0, betahat); /*MCP path along kappa with n_lambda interim points*/ if (strcmp(penalty, "MCP") == 0) { for (i = 0; i < n_lambda; i++) { for (l = 0; l < n_theta; l++) { for (j = 0; j < p; j++) init[j] = REAL(betahat)[(i + l * n_lambda) * p + j]; cdfit(x, y, omega, init, lambda[i], kappa, theta[l], "MCP", eps, max, inmax, n, p, iter); for (j = 0; j < p; j++) REAL(betatild)[(i + l * n_lambda) * p + j] = init[j]; INTEGER(Riter)[i + l * n_lambda] = iter[0]; } } } SET_VECTOR_ELT(return_list, 1, betatild); SET_VECTOR_ELT(return_list, 2, Riter); SET_VECTOR_ELT(return_list, 3, Rlambda); SET_VECTOR_ELT(return_list, 4, Rkappa); SET_VECTOR_ELT(return_list, 5, Rtheta); UNPROTECT(4); free_vector(init); return(return_list); }

SEXP SP_PREFIX(Polygons_c)(SEXP pls, SEXP ID) { SEXP ans, labpt, Area, plotOrder, crds, pl, n, hole; int nps, i, pc=0, sumholes; double *areas, *areaseps, fuzz; int *po, *holes; SEXP valid; nps = length(pls); fuzz = R_pow(DOUBLE_EPS, (2.0/3.0)); areas = (double *) R_alloc((size_t) nps, sizeof(double)); areaseps = (double *) R_alloc((size_t) nps, sizeof(double)); holes = (int *) R_alloc((size_t) nps, sizeof(int)); for (i=0, sumholes=0; i<nps; i++) { areas[i] = NUMERIC_POINTER(GET_SLOT(VECTOR_ELT(pls, i), install("area")))[0]; holes[i] = LOGICAL_POINTER(GET_SLOT(VECTOR_ELT(pls, i), install("hole")))[0]; areaseps[i] = holes[i] ? areas[i] + fuzz : areas[i]; sumholes += holes[i]; } po = (int *) R_alloc((size_t) nps, sizeof(int)); if (nps > 1) { for (i=0; i<nps; i++) po[i] = i + R_OFFSET; revsort(areaseps, po, nps); } else { po[0] = 1; } if (sumholes == nps) { crds = GET_SLOT(VECTOR_ELT(pls, (po[0] - R_OFFSET)), install("coords")); PROTECT(n = NEW_INTEGER(1)); pc++; INTEGER_POINTER(n)[0] = INTEGER_POINTER(getAttrib(crds, R_DimSymbol))[0]; PROTECT(hole = NEW_LOGICAL(1)); pc++; LOGICAL_POINTER(hole)[0] = FALSE; pl = SP_PREFIX(Polygon_c)(crds, n, hole); /* bug 100417 Patrick Giraudoux */ holes[po[0] - R_OFFSET] = LOGICAL_POINTER(hole)[0]; SET_VECTOR_ELT(pls, (po[0] - R_OFFSET), pl); } PROTECT(ans = NEW_OBJECT(MAKE_CLASS("Polygons"))); pc++; SET_SLOT(ans, install("Polygons"), pls); SET_SLOT(ans, install("ID"), ID); PROTECT(Area = NEW_NUMERIC(1)); pc++; NUMERIC_POINTER(Area)[0] = 0.0; for (i=0; i<nps; i++) { NUMERIC_POINTER(Area)[0] += holes[i] ? 0.0 : fabs(areas[i]); } SET_SLOT(ans, install("area"), Area); PROTECT(plotOrder = NEW_INTEGER(nps)); pc++; for (i=0; i<nps; i++) INTEGER_POINTER(plotOrder)[i] = po[i]; SET_SLOT(ans, install("plotOrder"), plotOrder); PROTECT(labpt = NEW_NUMERIC(2)); pc++; NUMERIC_POINTER(labpt)[0] = NUMERIC_POINTER(GET_SLOT(VECTOR_ELT(pls, (po[0]-1)), install("labpt")))[0]; NUMERIC_POINTER(labpt)[1] = NUMERIC_POINTER(GET_SLOT(VECTOR_ELT(pls, (po[0]-1)), install("labpt")))[1]; SET_SLOT(ans, install("labpt"), labpt); PROTECT(valid = SP_PREFIX(Polygons_validate_c)(ans)); pc++; if (!isLogical(valid)) { UNPROTECT(pc); if (isString(valid)) error(CHAR(STRING_ELT(valid, 0))); else error("invalid Polygons object"); } UNPROTECT(pc); return(ans); }

SEXP findmzROI(SEXP mz, SEXP intensity, SEXP scanindex, SEXP mzrange, SEXP scanrange, SEXP lastscan, SEXP dev, SEXP minEntries, SEXP prefilter, SEXP noise) { double *pmz, *pintensity, mzrangeFrom,mzrangeTo; int i,*pscanindex, scanrangeFrom, scanrangeTo, ctScan, nmz, lastScan, inoise; int scerr = 0; // count of peak insertion errors, due to missing/bad centroidisation int perc, lp = -1; SEXP peaklist,entrylist,list_names,vmz,vmzmin,vmzmax,vscmin,vscmax,vlength,vintensity; pmz = REAL(mz); nmz = GET_LENGTH(mz); pintensity = REAL(intensity); pscanindex = INTEGER(scanindex); lastScan = INTEGER(lastscan)[0]; inoise = INTEGER(noise)[0]; pickOptions.dev = REAL(dev)[0]; pickOptions.minEntries = INTEGER(minEntries)[0]; pickOptions.minimumIntValues=INTEGER(prefilter)[0]; pickOptions.minimumInt=INTEGER(prefilter)[1]; mzrangeFrom = REAL(mzrange)[0]; mzrangeTo = REAL(mzrange)[1]; scanrangeFrom = INTEGER(scanrange)[0]; scanrangeTo = INTEGER(scanrange)[1]; struct mzROIStruct * mzROI = (struct mzROIStruct *) calloc(ROI_INIT_LENGTH, sizeof(struct mzROIStruct)); if (mzROI == NULL) error("findmzROI/calloc: buffer memory could not be allocated ! (%d bytes)\n",ROI_INIT_LENGTH * sizeof(struct mzROIStruct) ); struct mzROIStruct * mzval = (struct mzROIStruct *) calloc(MZVAL_INIT_LENGTH, sizeof(struct mzROIStruct)); if (mzval == NULL) error("findmzROI/calloc: buffer memory could not be allocated ! (%d bytes)\n",MZVAL_INIT_LENGTH * sizeof(struct mzROIStruct) ); mzLength.mzvalTotal = MZVAL_INIT_LENGTH; mzLength.mzROITotal = ROI_INIT_LENGTH; mzLength.mzval = 0; mzLength.mzROI = 0; struct scanBuf * scanbuf = &scbuf; scanbuf->thisScan = NULL; scanbuf->nextScan = NULL; scanbuf->thisScanLength = 0; scanbuf->nextScanLength = 0; char *names[N_NAMES] = {"mz", "mzmin", "mzmax", "scmin", "scmax", "length", "intensity"}; PROTECT(list_names = allocVector(STRSXP, N_NAMES)); for(i = 0; i < N_NAMES; i++) SET_STRING_ELT(list_names, i, mkChar(names[i])); Rprintf(" %% finished: "); for (ctScan=scanrangeFrom;ctScan<=scanrangeTo;ctScan++) { perc = (int) (ctScan* 100)/scanrangeTo; if ((perc % 10) == 0 && (perc != lp)) { Rprintf("%d ",perc); lp = perc; } scanbuf=getScan(ctScan, pmz, pintensity, pscanindex,nmz,lastScan, scanbuf); if (scanbuf->thisScanLength > 0) { #ifdef DEBUG Rprintf("ScanLength %d ",scanbuf->thisScanLength); Rprintf("Scan Nr. %d of %d (%d %%) %d peaks -- working at %d m/z ROI's, %d ROI's completed.\n", ctScan, scanrangeTo, (int)100.0*ctScan/scanrangeTo,scanbuf->thisScanLength,mzLength.mzval,mzLength.mzROI); #endif int p; double fMass,lastMass=-1; double fInten; for (p=0;p < scanbuf->thisScanLength;p++) { fMass = scanbuf->thisScan[p].mz; fInten = scanbuf->thisScan[p].intensity; if (fMass < lastMass) error("m/z sort assumption violated ! (scan %d, p %d, current %2.4f (I=%2.2f), last %2.4f) \n",ctScan,p,fMass,fInten,lastMass); lastMass = fMass; if (fInten > inoise) mzval=insertpeak(fMass, fInten, scanbuf, ctScan, scanrangeTo, mzval, &mzLength, &pickOptions); } } mzROI=cleanup(ctScan,mzROI,mzval,&mzLength,&scerr,&pickOptions); R_FlushConsole(); } //for ctScan mzROI=cleanup(ctScan+1,mzROI,mzval,&mzLength,&scerr,&pickOptions); PROTECT(peaklist = allocVector(VECSXP, mzLength.mzROI)); int total = 0; for (i=0;i<mzLength.mzROI;i++) { PROTECT(entrylist = allocVector(VECSXP, N_NAMES)); PROTECT(vmz = NEW_NUMERIC(1)); PROTECT(vmzmin = NEW_NUMERIC(1)); PROTECT(vmzmax = NEW_NUMERIC(1)); PROTECT(vscmin = NEW_INTEGER(1)); PROTECT(vscmax = NEW_INTEGER(1)); PROTECT(vlength = NEW_INTEGER(1)); PROTECT(vintensity = NEW_INTEGER(1)); NUMERIC_POINTER(vmz)[0] = mzROI[i].mz; NUMERIC_POINTER(vmzmin)[0] = mzROI[i].mzmin; NUMERIC_POINTER(vmzmax)[0] = mzROI[i].mzmax; INTEGER_POINTER(vscmin)[0] = mzROI[i].scmin; INTEGER_POINTER(vscmax)[0] = mzROI[i].scmax; INTEGER_POINTER(vlength)[0] = mzROI[i].length; INTEGER_POINTER(vintensity)[0] = mzROI[i].intensity; SET_VECTOR_ELT(entrylist, 0, vmz); SET_VECTOR_ELT(entrylist, 1, vmzmin); SET_VECTOR_ELT(entrylist, 2, vmzmax); SET_VECTOR_ELT(entrylist, 3, vscmin); SET_VECTOR_ELT(entrylist, 4, vscmax); SET_VECTOR_ELT(entrylist, 5, vlength); SET_VECTOR_ELT(entrylist, 6, vintensity); setAttrib(entrylist, R_NamesSymbol, list_names); //attaching the vector names SET_VECTOR_ELT(peaklist, i, entrylist); UNPROTECT(N_NAMES + 1); //entrylist + values total++; } if (scerr > 0) Rprintf("Warning: There were %d peak data insertion problems. \n Please try lowering the \"ppm\" parameter.\n", scerr); Rprintf("\n %d m/z ROI's.\n", total); UNPROTECT(2); // peaklist,list_names // free(ptpeakbuf); if (scanbuf->thisScan != NULL) free(scanbuf->thisScan); if (scanbuf->thisScan != NULL) free(scanbuf->nextScan); free(mzval); free(mzROI); return(peaklist); }

SEXP hbin(SEXP x, SEXP y, SEXP swts, SEXP shape, SEXP size, SEXP rx, SEXP ry, SEXP bnd, SEXP n, SEXP doCellid){ /* Copyright 1991 Version Date: September 16, 1994 Programmer: Dan Carr, Conversion to C, triangulation, and Rapi Nicholas Lewin-Koh (2010) Indexing: Left to right, bottom to top bnd[0] rows, bnd[2] columns Input Vars: x,y the values of x and y xcm,ycm vectors for the center of mass of the returned hexagons shape the shape parameter for the hexagons cell cnt Output: cell ids for non empty cells, revised bnd(1) optionally also return cellid(1:n), and wcnt Copyright (2004) Nicholas Lewin-Koh and Martin Maechler */ int nc, nn; int i, i1, i2, iinc; int j1, j2, jinc; int L, ll, lmax, lat, tcell; double c1, c2, con1, con2, dist1, fsize; double sx, sy, xmin, ymin, xr, yr; uint keepID=0, doWeights=0; int prcnt=0; SEXP ans; SEXP cnt, cell, wcnt, cellid, xcm, ycm; if(LOGICAL(doCellid)[0]>0) keepID = 1; if(length(swts) > 0 || swts != R_NilValue) doWeights = 1; /*_______Alloc and protect the necessary result vectors, then set to 0_____________*/ lmax=INTEGER(bnd)[0]*INTEGER(bnd)[1]; PROTECT(cnt = allocVector(INTSXP, lmax)); prcnt++; PROTECT(cell = allocVector(INTSXP, lmax)); prcnt++; PROTECT(xcm = allocVector(REALSXP, lmax)); prcnt++; PROTECT(ycm = allocVector(REALSXP, lmax)); prcnt++; if(keepID > 0) PROTECT(cellid = allocVector(INTSXP, INTEGER(n)[0])); else PROTECT(cellid = allocVector(NILSXP, 1)); prcnt++; if(doWeights > 0)PROTECT(wcnt = allocVector(REALSXP, lmax)); else PROTECT(wcnt = allocVector(NILSXP, 1)); prcnt++; memset(INTEGER(cell),0,lmax*sizeof(int)); memset(INTEGER(cnt),0,lmax*sizeof(int)); memset(REAL(xcm),0,lmax*sizeof(double)); memset(REAL(ycm),0,lmax*sizeof(double)); if(doWeights>0) memset(REAL(wcnt),0,lmax*sizeof(double)); /*_______Constants for scaling the data_____________________________*/ fsize=INTEGER(size)[0]; nn=INTEGER(n)[0]; xmin = REAL(rx)[0]; ymin = REAL(ry)[0]; xr = REAL(rx)[1]-xmin; yr = REAL(ry)[1]-ymin; c1 = fsize/xr; c2 = (fsize*REAL(shape)[0])/(yr*sqrt(3.0)); jinc= INTEGER(bnd)[1]; lat=jinc+1; iinc= 2*jinc; con1 = 0.25; con2 = 1.0/3.0; /*_______Binning loop______________________________________________*/ for(i=0; i<nn; i++){ sx = c1 * (REAL(x)[i] - xmin); sy = c2 * (REAL(y)[i] - ymin); j1 = sx+.5; i1 = sy+.5; dist1 = (sx-j1)*(sx-j1)+ 3.0*(sy-i1)*(sy-i1); /* need floor in C for this, same effect as trunc*/ if(dist1 < con1) L = i1*iinc + j1 + 1; else if(dist1 > con2) L = floor(sy)*iinc + floor(sx) + lat; else{ j2 = sx; i2 = sy; if(dist1 <= ((sx - j2 - 0.5)*(sx - j2 - 0.5)) + 3.0*((sy - i2 - 0.5)*(sy - i2 - 0.5))) L = i1*iinc + j1 + 1; else L=i2*iinc+ j2+lat; } ll=L-1; INTEGER(cnt)[ll]++; if(doWeights > 0) REAL(wcnt)[ll] = REAL(wcnt)[ll] + REAL(swts)[i]; if (keepID > 0) INTEGER(cellid)[i] = L; REAL(xcm)[ll] = REAL(xcm)[ll] + (REAL(x)[i]-REAL(xcm)[ll])/INTEGER(cnt)[ll]; REAL(ycm)[ll] = REAL(ycm)[ll]+ (REAL(y)[i]-REAL(ycm)[ll])/INTEGER(cnt)[ll]; } /*_______Compression of output________________________________________*/ nc=-1; for(L=0;L<lmax;L++){ if(INTEGER(cnt)[L] > 0){ nc=nc+1; INTEGER(cell)[nc]=L+1; INTEGER(cnt)[nc]=INTEGER(cnt)[L]; REAL(xcm)[nc]=REAL(xcm)[L]; REAL(ycm)[nc]=REAL(ycm)[L]; } } INTEGER(n)[0]=nc+1; INTEGER(bnd)[0]=(INTEGER(cell)[nc]-1)/INTEGER(bnd)[1]+1; /* Output constructor */ PROTECT(ans = allocVector(VECSXP, 6)); prcnt++; SET_VECTOR_ELT(ans, 0, n); SET_VECTOR_ELT(ans, 1, cell); SET_VECTOR_ELT(ans, 2, cnt); SET_VECTOR_ELT(ans, 3, wcnt); SET_VECTOR_ELT(ans, 4, xcm); SET_VECTOR_ELT(ans, 5, ycm); UNPROTECT(prcnt); return(ans); }

/* rep_len(x, len), also used for rep.int() with scalar 'times' */ static SEXP rep3(SEXP s, R_xlen_t ns, R_xlen_t na) { R_xlen_t i, j; SEXP a; PROTECT(a = allocVector(TYPEOF(s), na)); // i % ns is slow, especially with long R_xlen_t switch (TYPEOF(s)) { case LGLSXP: for (i = 0, j = 0; i < na;) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); if (j >= ns) j = 0; LOGICAL(a)[i++] = LOGICAL(s)[j++]; } break; case INTSXP: for (i = 0, j = 0; i < na;) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); if (j >= ns) j = 0; INTEGER(a)[i++] = INTEGER(s)[j++]; } break; case REALSXP: for (i = 0, j = 0; i < na;) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); if (j >= ns) j = 0; REAL(a)[i++] = REAL(s)[j++]; } break; case CPLXSXP: for (i = 0, j = 0; i < na;) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); if (j >= ns) j = 0; COMPLEX(a)[i++] = COMPLEX(s)[j++]; } break; case RAWSXP: for (i = 0, j = 0; i < na;) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); if (j >= ns) j = 0; RAW(a)[i++] = RAW(s)[j++]; } break; case STRSXP: for (i = 0, j = 0; i < na;) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); if (j >= ns) j = 0; SET_STRING_ELT(a, i++, STRING_ELT(s, j++)); } break; case VECSXP: case EXPRSXP: for (i = 0, j = 0; i < na;) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); if (j >= ns) j = 0; SET_VECTOR_ELT(a, i++, lazy_duplicate(VECTOR_ELT(s, j++))); } break; default: UNIMPLEMENTED_TYPE("rep3", s); } UNPROTECT(1); return a; }

void countMatrixColumns(SEXP x, const column_type* columnTypes, SEXP dropPatternExpr, bool createDropPattern, size_t* result) { size_t numColumns = (size_t) rc_getLength(x); bool dropColumn; for (size_t i = 0; i < numColumns; ++i) { SEXP col = VECTOR_ELT(x, i); switch (columnTypes[i]) { case REAL_VECTOR: case INTEGER_VECTOR: case LOGICAL_VECTOR: { if (dropPatternExpr != R_NilValue) { if (createDropPattern) { dropColumn = numericVectorIsConstant(col, columnTypes[i]); SET_VECTOR_ELT(dropPatternExpr, i, rc_newLogical(1)); LOGICAL(VECTOR_ELT(dropPatternExpr, i))[0] = dropColumn ? TRUE : FALSE; } else { dropColumn = LOGICAL(VECTOR_ELT(dropPatternExpr, i))[0]; } if (!dropColumn) *result += 1; } else { *result += 1; } } break; case REAL_MATRIX: { double* colData = REAL(col); int* dims = INTEGER(rc_getDims(col)); size_t numRows = dims[0], numCols = dims[1]; if (dropPatternExpr != R_NilValue) { if (createDropPattern) { SET_VECTOR_ELT(dropPatternExpr, i, rc_newLogical(numCols)); int* dropPattern = LOGICAL(VECTOR_ELT(dropPatternExpr, i)); for (size_t j = 0; j < numCols; ++j) { dropColumn = misc_vectorIsConstant(colData + j * numRows, numRows); dropPattern[j] = dropColumn; if (!dropColumn) *result += 1; } } else { int* dropPattern = LOGICAL(VECTOR_ELT(dropPatternExpr, i)); for (size_t j = 0; j < numCols; ++j) if (dropPattern[j] == 0) *result += 1; } } else { *result += numCols; } } break; case INTEGER_MATRIX: case LOGICAL_MATRIX: { int* colData = INTEGER(col); int* dims = INTEGER(rc_getDims(col)); size_t numRows = dims[0], numCols = dims[1]; if (dropPatternExpr != R_NilValue) { if (createDropPattern) { SET_VECTOR_ELT(dropPatternExpr, i, rc_newLogical(numCols)); int* dropPattern = LOGICAL(VECTOR_ELT(dropPatternExpr, i)); for (size_t j = 0; j < numCols; ++j) { dropColumn = integerVectorIsConstant(colData + j * numRows, numRows); dropPattern[j] = dropColumn; if (!dropColumn) *result += 1; } } else { int* dropPattern = LOGICAL(VECTOR_ELT(dropPatternExpr, i)); for (size_t j = 0; j < numCols; ++j) if (dropPattern[j] == 0) *result += 1; } } else { *result += numCols; } } break; case FACTOR: { SEXP levelsExpr = rc_getLevels(col); size_t numLevels = (size_t) rc_getLength(levelsExpr); if (dropPatternExpr != R_NilValue) { int* factorInstanceCounts; if (createDropPattern) { SET_VECTOR_ELT(dropPatternExpr, i, rc_newInteger(numLevels)); factorInstanceCounts = INTEGER(VECTOR_ELT(dropPatternExpr, i)); tableFactor(col, factorInstanceCounts); } else { factorInstanceCounts = INTEGER(VECTOR_ELT(dropPatternExpr, i)); } size_t numLevelsPerFactor = 0; for (size_t j = 0; j < numLevels; ++j) if (factorInstanceCounts[j] > 0) ++numLevelsPerFactor; if (numLevelsPerFactor == 2) { *result += 1; } else if (numLevelsPerFactor > 2) { *result += numLevelsPerFactor; } } else { *result += (numLevels <= 2 ? 1 : numLevels); } } default: break; } } }

SEXP TASCB_min(SEXP vect, SEXP tau, SEXP numberofSamples, SEXP sigma){ int value_count, sigma_count; mgs_result mgs_res; quant_result q_res; dbl_array *vector, *vect_sorted, *s; dbl_matrix *H_Mat1, *H_Mat2; calc_V_result_tri v_res; final_result_tri f_res; SEXP result, binarized_vector, threshold1, threshold2, p_value, other_results, names; SEXP smoothed, zerocrossing, deriv, steps, H1, H2, index, v_vec, meanlist, smoothedX; value_count = length(vect); sigma_count = length(sigma); PROTECT(result = allocVector(VECSXP, 5)); PROTECT(names = allocVector(VECSXP, 5)); SET_VECTOR_ELT(names,0, mkChar("binarized_vector")); SET_VECTOR_ELT(names,1, mkChar("threshold1")); SET_VECTOR_ELT(names,2, mkChar("threshold2")); SET_VECTOR_ELT(names,3, mkChar("p_value")); SET_VECTOR_ELT(names,4, mkChar("other_results")); setAttrib(result, R_NamesSymbol, names); UNPROTECT(1); PROTECT(other_results = allocVector(VECSXP, 10)); PROTECT(names = allocVector(VECSXP, 10)); SET_VECTOR_ELT(names,0, mkChar("smoothed")); SET_VECTOR_ELT(names,1, mkChar("zerocrossing")); SET_VECTOR_ELT(names,2, mkChar("deriv")); SET_VECTOR_ELT(names,3, mkChar("steps")); SET_VECTOR_ELT(names,4, mkChar("H_Mat1")); SET_VECTOR_ELT(names,5, mkChar("H_Mat2")); SET_VECTOR_ELT(names,6, mkChar("index")); SET_VECTOR_ELT(names,7, mkChar("v_vec")); SET_VECTOR_ELT(names,8, mkChar("smoothedX")); SET_VECTOR_ELT(names,9, mkChar("meanlist")); setAttrib(other_results, R_NamesSymbol, names); UNPROTECT(1); PROTECT(smoothed = allocMatrix(REALSXP, value_count - 1, sigma_count)); PROTECT(zerocrossing = allocMatrix(INTSXP, (int)ceil((double)value_count / 2.0), sigma_count)); PROTECT(deriv = allocVector(REALSXP, value_count - 1)); mgs_res.smoothed = init_dbl_matrix(REAL(smoothed), sigma_count, value_count - 1, 0); mgs_res.zerocrossing = init_int_matrix(INTEGER(zerocrossing), sigma_count, (int)ceil((double)value_count / 2.0), 0); mgs_res.deriv = init_dbl_array(REAL(deriv), value_count - 1, 0); vector = init_dbl_array(REAL(vect), value_count, 1); vect_sorted = init_dbl_array(0, value_count, 0); s = init_dbl_array(REAL(sigma), sigma_count, 1); b = init_dbl_matrix(0, sigma_count, mgs_res.deriv->length, 0); b_returned = init_int_matrix(0, sigma_count, mgs_res.deriv->length, 0); memcpy(vect_sorted->values, REAL(vect), vect_sorted->length * sizeof(double)); qsort(vect_sorted->values, vect_sorted->length, sizeof(double), comp); mgs(&mgs_res, vect_sorted, s); q_res.steps = init_int_matrix(0, sigma_count, (int)ceil((double)value_count / 2.0), 0); q_res.index = init_int_array(0, sigma_count, 0); q_res.greatest_index_ind = 0; q_res.greatest_steps_col = 0; q_res.greatest_steps_row = 0; getQuantizations(&q_res, &mgs_res); PROTECT(steps = allocMatrix(INTSXP, q_res.greatest_steps_col, q_res.greatest_steps_row)); PROTECT(H1 = allocMatrix(REALSXP, q_res.greatest_steps_col, q_res.greatest_steps_row)); PROTECT(H2 = allocMatrix(REALSXP, q_res.greatest_steps_col, q_res.greatest_steps_row)); PROTECT(index = allocVector(INTSXP, q_res.greatest_index_ind)); cut_int_matrix(q_res.steps, INTEGER(steps), 0, q_res.greatest_steps_row - 1, 0, q_res.greatest_steps_col - 1); cut_int_array(q_res.index, INTEGER(index), 0, q_res.greatest_index_ind - 1); H_Mat1 = init_dbl_matrix(REAL(H1), q_res.greatest_steps_row, q_res.greatest_steps_col, 0); H_Mat2 = init_dbl_matrix(REAL(H2), q_res.greatest_steps_row, q_res.greatest_steps_col, 0); PROTECT(v_vec = allocMatrix(INTSXP, q_res.index->length, 2)); PROTECT(smoothedX = allocMatrix(REALSXP, mgs_res.smoothed->cols + 1, q_res.index->length)); PROTECT(meanlist = allocMatrix(REALSXP, vect_sorted->length, q_res.index->length + 1)); v_res.v = init_int_matrix(INTEGER(v_vec), q_res.index->length, 2, 0); v_res.meanlist = init_dbl_matrix(REAL(meanlist), q_res.index->length + 1, vect_sorted->length, 0); v_res.smoothedX = init_dbl_matrix(REAL(smoothedX), q_res.index->length, mgs_res.smoothed->cols + 1, 0); calc_V_Scalespace_tri_min(&v_res, &mgs_res, &q_res, H_Mat1, H_Mat2, vect_sorted); PROTECT(binarized_vector = allocVector(INTSXP, value_count)); PROTECT(threshold1 = allocVector(REALSXP, 1)); PROTECT(threshold2 = allocVector(REALSXP, 1)); PROTECT(p_value = allocVector(REALSXP, 1)); f_res.binarized_vector = init_int_array(INTEGER(binarized_vector), value_count, 0); f_res.p = REAL(p_value); f_res.threshold1 = REAL(threshold1); f_res.threshold2 = REAL(threshold2); calc_final_results_tri_min(&f_res, v_res.v, vector, vect_sorted, *REAL(tau), *INTEGER(numberofSamples)); SET_VECTOR_ELT(other_results, 0, smoothed); SET_VECTOR_ELT(other_results, 1, zerocrossing); SET_VECTOR_ELT(other_results, 2, deriv); SET_VECTOR_ELT(other_results, 3, steps); SET_VECTOR_ELT(other_results, 4, H1); SET_VECTOR_ELT(other_results, 5, H2); SET_VECTOR_ELT(other_results, 6, index); SET_VECTOR_ELT(other_results, 7, v_vec); SET_VECTOR_ELT(other_results, 8, smoothedX); SET_VECTOR_ELT(other_results, 9, meanlist); SET_VECTOR_ELT(result, 0, binarized_vector); SET_VECTOR_ELT(result, 1, threshold1); SET_VECTOR_ELT(result, 2, threshold2); SET_VECTOR_ELT(result, 3, p_value); SET_VECTOR_ELT(result, 4, other_results); destroy_dbl_matrix(mgs_res.smoothed); destroy_int_matrix(mgs_res.zerocrossing); destroy_dbl_array(mgs_res.deriv); destroy_dbl_array(vector); destroy_dbl_array(vect_sorted); destroy_dbl_array(s); destroy_dbl_matrix(b); destroy_int_matrix(b_returned); b = 0; b_returned = 0; destroy_dbl_matrix(H_Mat1); destroy_dbl_matrix(H_Mat2); destroy_int_matrix(q_res.steps); destroy_int_array(q_res.index); destroy_int_matrix(v_res.v); destroy_dbl_matrix(v_res.meanlist); destroy_dbl_matrix(v_res.smoothedX); destroy_int_array(f_res.binarized_vector); UNPROTECT(16); return result; }

SEXP elsa(SEXP v, SEXP nc, SEXP nr, SEXP nclass, SEXP rr, SEXP cc) { int nProtected=0; int c, row, col, ngb, q, nnr, nnc, nrow, ncol, cellnr, ncl, n; double e, w, s, xi, qq, count, a; R_len_t i, j; SEXP ans; PROTECT(ans = NEW_LIST(2)); ++nProtected; int *xrr, *xcc; double *xv; nrow=INTEGER(nr)[0]; ncol=INTEGER(nc)[0]; ncl=INTEGER(nclass)[0]; n=length(v); SET_VECTOR_ELT(ans, 0, NEW_NUMERIC(n)); SET_VECTOR_ELT(ans, 1, NEW_NUMERIC(n)); PROTECT(v = coerceVector(v, REALSXP)); ++nProtected; PROTECT(rr = coerceVector(rr, INTSXP)); ++nProtected; PROTECT(cc = coerceVector(cc, INTSXP)); ++nProtected; ngb=length(rr); xv=REAL(v); xrr=INTEGER(rr); xcc=INTEGER(cc); for (c=0;c < n;c++) { R_CheckUserInterrupt(); xi=xv[c]; if (!R_IsNA(xi)) { row = (c / ncol) + 1; col = (c + 1) - ((row - 1) * ncol); double xn[ngb]; q=-1; for (i=0; i < ngb; i++) { nnr= row + xrr[i]; nnc = col + xcc[i]; if ((nnr > 0) & (nnr <= nrow) & (nnc > 0) & (nnc <= ncol)) { cellnr = ((nnr - 1) * ncol) + nnc; if (!R_IsNA(xv[(cellnr-1)])) { q+=1; xn[q]=xv[(cellnr-1)]; } } } // sort for (i=0;i <= (q-1);i++) { for (j=i+1;j <= q;j++) { if (xn[i] > xn[j]) { a=xn[i]; xn[i]=xn[j]; xn[j]=a; } } } //------ a=xn[0]; count=1; e=0; qq=q+1; for (i=1;i <= q;i++) { if (xn[i] != a) { e = e + ((count / qq) * log2(count / qq)); a=xn[i]; count=1; } else { count+=1; } } e = e + ((count / qq) * log2(count / qq)); w=0; for (i=0; i <= q;i++) { w = w + fabs(xn[i] - xi); } w = w / ((qq - 1) * (ncl - 1)); if (qq > ncl) { s = log2(ncl); } else { s = log2(qq); } NUMERIC_POINTER(VECTOR_ELT(ans, 0))[c] = -e / s; //xans[c] = (-e * w) / s; NUMERIC_POINTER(VECTOR_ELT(ans, 1))[c] = w; } else { //xans[c]=R_NaReal; NUMERIC_POINTER(VECTOR_ELT(ans, 0))[c] = R_NaReal; NUMERIC_POINTER(VECTOR_ELT(ans, 1))[c] = R_NaReal; } } UNPROTECT(nProtected); return(ans); }

/* NOTE: For environments serialize.c calls this function to find if there is a class attribute in order to reconstruct the object bit if needed. This means the function cannot use OBJECT(vec) == 0 to conclude that the class attribute is R_NilValue. If you want to rewrite this function to use such a pre-test, be sure to adjust serialize.c accordingly. LT */ SEXP attribute_hidden getAttrib0(SEXP vec, SEXP name) { SEXP s; int len, i, any; if (name == R_NamesSymbol) { if(isVector(vec) || isList(vec) || isLanguage(vec)) { s = getAttrib(vec, R_DimSymbol); if(TYPEOF(s) == INTSXP && length(s) == 1) { s = getAttrib(vec, R_DimNamesSymbol); if(!isNull(s)) { SET_NAMED(VECTOR_ELT(s, 0), 2); return VECTOR_ELT(s, 0); } } } if (isList(vec) || isLanguage(vec)) { len = length(vec); PROTECT(s = allocVector(STRSXP, len)); i = 0; any = 0; for ( ; vec != R_NilValue; vec = CDR(vec), i++) { if (TAG(vec) == R_NilValue) SET_STRING_ELT(s, i, R_BlankString); else if (isSymbol(TAG(vec))) { any = 1; SET_STRING_ELT(s, i, PRINTNAME(TAG(vec))); } else error(_("getAttrib: invalid type (%s) for TAG"), type2char(TYPEOF(TAG(vec)))); } UNPROTECT(1); if (any) { if (!isNull(s)) SET_NAMED(s, 2); return (s); } return R_NilValue; } } /* This is where the old/new list adjustment happens. */ for (s = ATTRIB(vec); s != R_NilValue; s = CDR(s)) if (TAG(s) == name) { if (name == R_DimNamesSymbol && TYPEOF(CAR(s)) == LISTSXP) { SEXP _new, old; int i; _new = allocVector(VECSXP, length(CAR(s))); old = CAR(s); i = 0; while (old != R_NilValue) { SET_VECTOR_ELT(_new, i++, CAR(old)); old = CDR(old); } SET_NAMED(_new, 2); return _new; } SET_NAMED(CAR(s), 2); return CAR(s); } return R_NilValue; }

SEXP TASCA_min(SEXP vect, SEXP tau, SEXP numberofsamples){ //get the lengths //int i,j,sum,sum_tot; //int bytes = 0; int value_count = length(vect); int vc_m1 = value_count - 1; int vc_m2 = vc_m1 - 1; dbl_array *vector, *vect_sorted, *Q_Max_vec; dbl_matrix *Cc_Mat, *Q_Mat, *H_Mat1, *H_Mat2; int_matrix *Ind_Mat, *P_Mat, *v_vec; final_result_tri f_res; //sort the vect into vect_sorted vector = init_dbl_array(REAL(vect), value_count, 1); vect_sorted = init_dbl_array(0, value_count, 0); memcpy(vect_sorted->values, vector->values, vect_sorted->length * sizeof(double)); qsort(vect_sorted->values, vect_sorted->length, sizeof(double), comp); //name the required SEXP Objects SEXP result, binarized_vector, threshold1, threshold2, p_value, other_results, Cc, Ind, P, Q, H1, H2, Q_max, v, Names; //allocate memory for saving calculated values alloc_Accelerator_Memory(value_count); alloc_Accelerator_Memory_tri_min(value_count); //allocate the final result and set the names of the entries PROTECT(result = allocVector(VECSXP, 5)); PROTECT(Names = allocVector(VECSXP, 5)); SET_VECTOR_ELT(Names,0, mkChar("binarized_vector")); SET_VECTOR_ELT(Names,1, mkChar("threshold1")); SET_VECTOR_ELT(Names,2, mkChar("threshold2")); SET_VECTOR_ELT(Names,3, mkChar("p_value")); SET_VECTOR_ELT(Names,4, mkChar("other_results")); setAttrib(result, R_NamesSymbol, Names); UNPROTECT(1); PROTECT(other_results = allocVector(VECSXP, 8)); PROTECT(Names = allocVector(VECSXP, 8)); SET_VECTOR_ELT(Names,0, mkChar("Cc")); SET_VECTOR_ELT(Names,1, mkChar("Ind")); SET_VECTOR_ELT(Names,2, mkChar("P_Mat")); SET_VECTOR_ELT(Names,3, mkChar("Q_Mat")); SET_VECTOR_ELT(Names,4, mkChar("H_Mat1")); SET_VECTOR_ELT(Names,5, mkChar("H_Mat2")); SET_VECTOR_ELT(Names,6, mkChar("maximal_Qs")); SET_VECTOR_ELT(Names,7, mkChar("v_vec")); setAttrib(other_results, R_NamesSymbol, Names); UNPROTECT(1); //allocate memory for result matrices and vectors and set the matrix values to zero (because they aren't //all overwritten by the functions) PROTECT(binarized_vector = allocVector(INTSXP, value_count)); PROTECT(threshold1 = allocVector(REALSXP, 1)); PROTECT(threshold2 = allocVector(REALSXP, 1)); PROTECT(p_value = allocVector(REALSXP, 1)); PROTECT(Cc = allocMatrix(REALSXP, vc_m1, vc_m1)); PROTECT(Ind = allocMatrix(INTSXP, vc_m1, vc_m2)); PROTECT(P = allocMatrix(INTSXP, vc_m2, vc_m2)); PROTECT(Q = allocMatrix(REALSXP, vc_m2, vc_m2)); PROTECT(H1 = allocMatrix(REALSXP, vc_m2, vc_m2)); PROTECT(H2 = allocMatrix(REALSXP, vc_m2, vc_m2)); PROTECT(Q_max = allocVector(REALSXP, vc_m2)); PROTECT(v = allocMatrix(INTSXP, vc_m2, 2)); Cc_Mat = init_dbl_matrix(REAL(Cc), vc_m1, vc_m1, 0); Ind_Mat = init_int_matrix(INTEGER(Ind), vc_m2, vc_m1, 0); P_Mat = init_int_matrix(INTEGER(P), vc_m2, vc_m2, 0); v_vec = init_int_matrix(INTEGER(v), vc_m2, 2, 0); Q_Max_vec = init_dbl_array(REAL(Q_max), vc_m2, 0); Q_Mat = init_dbl_matrix(REAL(Q), vc_m2, vc_m2, 0); H_Mat1 = init_dbl_matrix(REAL(H1), vc_m2, vc_m2, 0); H_Mat2 = init_dbl_matrix(REAL(H2), vc_m2, vc_m2, 0); f_res.binarized_vector = init_int_array(INTEGER(binarized_vector), value_count, 0); f_res.p = REAL(p_value); f_res.threshold1 = REAL(threshold1); f_res.threshold2 = REAL(threshold2); //start the computation of the entries of all matrices calc_First_Cost_Matrix_Line(Cc_Mat, vect_sorted); calc_RestCc_and_Ind_Matrices(Cc_Mat, Ind_Mat, vect_sorted); calc_P_Matrix(P_Mat, Ind_Mat); calc_V_tri_min(v_vec, Q_Max_vec, Q_Mat, H_Mat1, H_Mat2, P_Mat, vect_sorted); //free the memory for calculated values free_Accelerator_Memory(); free_Accelerator_Memory_tri_min(); //calculate the final three results calc_final_results_tri_min(&f_res, v_vec, vector, vect_sorted, *REAL(tau), *INTEGER(numberofsamples)); //free(vect_sorted); destroy_dbl_array(vector); destroy_dbl_array(vect_sorted); destroy_dbl_matrix(Cc_Mat); destroy_int_matrix(Ind_Mat); destroy_int_matrix(P_Mat); destroy_int_matrix(v_vec); destroy_dbl_array(Q_Max_vec); destroy_dbl_matrix(Q_Mat); destroy_dbl_matrix(H_Mat1); destroy_dbl_matrix(H_Mat2); destroy_int_array(f_res.binarized_vector); //assign the computed elements to the final result SET_VECTOR_ELT(other_results,0, Cc); SET_VECTOR_ELT(other_results,1, Ind); SET_VECTOR_ELT(other_results,2, P); SET_VECTOR_ELT(other_results,3, Q); SET_VECTOR_ELT(other_results,4, H1); SET_VECTOR_ELT(other_results,5, H2); SET_VECTOR_ELT(other_results,6, Q_max); SET_VECTOR_ELT(other_results,7, v); SET_VECTOR_ELT(result,0, binarized_vector); SET_VECTOR_ELT(result,1, threshold1); SET_VECTOR_ELT(result,2, threshold2); SET_VECTOR_ELT(result,3, p_value); SET_VECTOR_ELT(result,4, other_results); UNPROTECT(14); return result; }

/* remove one variable in each highly-correlated pair. */ SEXP dedup (SEXP data, SEXP threshold, SEXP complete, SEXP debug) { int i = 0, j = 0, k = 0, dropped = 0, nc = 0; int debuglevel = isTRUE(debug); double *mean = NULL, *sse = NULL, *xx = NULL, *yy = NULL; double cur_mean[2], cur_sse[2]; double tol = MACHINE_TOL, t = NUM(threshold); long double sum = 0; SEXP result, colnames; gdata dt = { 0 }; /* extract the columns from the data frame. */ dt = gdata_from_SEXP(data, 0); meta_init_flags(&(dt.m), 0, complete, R_NilValue); meta_copy_names(&(dt.m), 0, data); /* set up the vectors for the pairwise complete observations. */ xx = Calloc1D(dt.m.nobs, sizeof(double)); yy = Calloc1D(dt.m.nobs, sizeof(double)); if (debuglevel > 0) Rprintf("* caching means and variances.\n"); mean = Calloc1D(dt.m.ncols, sizeof(double)); sse = Calloc1D(dt.m.ncols, sizeof(double)); /* cache the mean and variance of complete variables. */ for (j = 0; j < dt.m.ncols; j++) { if (!dt.m.flag[j].complete) continue; mean[j] = c_mean(dt.col[j], dt.m.nobs); sse[j] = c_sse(dt.col[j], mean[j], dt.m.nobs); }/*FOR*/ /* main loop. */ for (j = 0; j < dt.m.ncols - 1; j++) { /* skip variables already flagged for removal. */ if (dt.m.flag[j].drop) continue; if (debuglevel > 0) Rprintf("* looking at %s with %d variables still to check.\n", dt.m.names[j], dt.m.ncols - (j + 1)); for (k = j + 1; k < dt.m.ncols; k++) { /* skip variables already flagged for removal. */ if (dt.m.flag[k].drop) continue; if (dt.m.flag[j].complete && dt.m.flag[k].complete) { /* use the cached means and variances. */ cur_mean[0] = mean[j]; cur_mean[1] = mean[k]; cur_sse[0] = sse[j]; cur_sse[1] = sse[k]; /* compute the covariance. */ for (i = 0, sum = 0; i < dt.m.nobs; i++) sum += (dt.col[j][i] - cur_mean[0]) * (dt.col[k][i] - cur_mean[1]); }/*THEN*/ else { for (i = 0, nc = 0; i < dt.m.nobs; i++) { if (ISNAN(dt.col[j][i]) || ISNAN(dt.col[k][i])) continue; xx[nc] = dt.col[j][i]; yy[nc++] = dt.col[k][i]; }/*FOR*/ /* if there are no complete observations, take the variables to be * independent. */ if (nc == 0) continue; cur_mean[0] = c_mean(xx, nc); cur_mean[1] = c_mean(yy, nc); cur_sse[0] = c_sse(xx, cur_mean[0], nc); cur_sse[1] = c_sse(yy, cur_mean[1], nc); /* compute the covariance. */ for (i = 0, sum = 0; i < nc; i++) sum += (xx[i] - cur_mean[0]) * (yy[i] - cur_mean[1]); }/*ELSE*/ /* safety check against "divide by zero" errors. */ if ((cur_sse[0] < tol) || (cur_sse[1] < tol)) sum = 0; else sum /= sqrt(cur_sse[0] * cur_sse[1]); /* test the correlation against the threshold. */ if (fabsl(sum) > t) { if (debuglevel > 0) Rprintf("%s is collinear with %s, dropping %s with COR = %.4Lf\n", dt.m.names[j], dt.m.names[k], dt.m.names[k], sum); /* flag the variable for removal. */ dt.m.flag[k].drop = TRUE; dropped++; }/*THEN*/ }/*FOR*/ }/*FOR*/ /* set up the return value. */ PROTECT(result = allocVector(VECSXP, dt.m.ncols - dropped)); PROTECT(colnames = allocVector(STRSXP, dt.m.ncols - dropped)); for (j = 0, k = 0; j < dt.m.ncols; j++) if (!dt.m.flag[j].drop) { SET_STRING_ELT(colnames, k, mkChar(dt.m.names[j])); SET_VECTOR_ELT(result, k++, VECTOR_ELT(data, j)); }/*THEN*/ setAttrib(result, R_NamesSymbol, colnames); /* make it a data frame. */ minimal_data_frame(result); Free1D(mean); Free1D(sse); Free1D(xx); Free1D(yy); FreeGDT(dt, FALSE); UNPROTECT(2); return result; }/*DEDUP*/

SEXP scdd_f(SEXP m, SEXP h, SEXP roworder, SEXP adjacency, SEXP inputadjacency, SEXP incidence, SEXP inputincidence) { int i, j, k; GetRNGstate(); if (! isMatrix(m)) error("'m' must be matrix"); if (! isLogical(h)) error("'h' must be logical"); if (! isString(roworder)) error("'roworder' must be character"); if (! isLogical(adjacency)) error("'adjacency' must be logical"); if (! isLogical(inputadjacency)) error("'inputadjacency' must be logical"); if (! isLogical(incidence)) error("'incidence' must be logical"); if (! isLogical(inputincidence)) error("'inputincidence' must be logical"); if (LENGTH(h) != 1) error("'h' must be scalar"); if (LENGTH(roworder) != 1) error("'roworder' must be scalar"); if (LENGTH(adjacency) != 1) error("'adjacency' must be scalar"); if (LENGTH(inputadjacency) != 1) error("'inputadjacency' must be scalar"); if (LENGTH(incidence) != 1) error("'incidence' must be scalar"); if (LENGTH(inputincidence) != 1) error("'inputincidence' must be scalar"); if (! isReal(m)) error("'m' must be double"); SEXP m_dim; PROTECT(m_dim = getAttrib(m, R_DimSymbol)); int nrow = INTEGER(m_dim)[0]; int ncol = INTEGER(m_dim)[1]; UNPROTECT(1); #ifdef BLATHER printf("nrow = %d\n", nrow); printf("ncol = %d\n", ncol); #endif /* BLATHER */ if ((! LOGICAL(h)[0]) && nrow <= 0) error("no rows in 'm', not allowed for V-representation"); if (ncol <= 2) error("no cols in m[ , - c(1, 2)]"); for (i = 0; i < nrow * ncol; i++) if (! R_finite(REAL(m)[i])) error("'m' not finite-valued"); for (i = 0; i < nrow; i++) { double foo = REAL(m)[i]; if (! (foo == 0.0 || foo == 1.0)) error("column one of 'm' not zero-or-one valued"); } if (! LOGICAL(h)[0]) for (i = nrow; i < 2 * nrow; i++) { double foo = REAL(m)[i]; if (! (foo == 0.0 || foo == 1.0)) error("column two of 'm' not zero-or-one valued"); } ddf_set_global_constants(); myfloat value; ddf_init(value); ddf_MatrixPtr mf = ddf_CreateMatrix(nrow, ncol - 1); /* note our matrix has one more column than Fukuda's */ /* representation */ if(LOGICAL(h)[0]) mf->representation = ddf_Inequality; else mf->representation = ddf_Generator; mf->numbtype = ddf_Real; /* linearity */ for (i = 0; i < nrow; i++) { double foo = REAL(m)[i]; if (foo == 1.0) set_addelem(mf->linset, i + 1); /* note conversion from zero-origin to one-origin indexing */ } /* matrix */ for (j = 1, k = nrow; j < ncol; j++) for (i = 0; i < nrow; i++, k++) { ddf_set_d(value, REAL(m)[k]); ddf_set(mf->matrix[i][j - 1], value); /* note our matrix has one more column than Fukuda's */ } ddf_RowOrderType strategy = ddf_LexMin; const char *row_str = CHAR(STRING_ELT(roworder, 0)); if(strcmp(row_str, "maxindex") == 0) strategy = ddf_MaxIndex; else if(strcmp(row_str, "minindex") == 0) strategy = ddf_MinIndex; else if(strcmp(row_str, "mincutoff") == 0) strategy = ddf_MinCutoff; else if(strcmp(row_str, "maxcutoff") == 0) strategy = ddf_MaxCutoff; else if(strcmp(row_str, "mixcutoff") == 0) strategy = ddf_MixCutoff; else if(strcmp(row_str, "lexmin") == 0) strategy = ddf_LexMin; else if(strcmp(row_str, "lexmax") == 0) strategy = ddf_LexMax; else if(strcmp(row_str, "randomrow") == 0) strategy = ddf_RandomRow; else error("roworder not recognized"); ddf_ErrorType err = ddf_NoError; ddf_PolyhedraPtr poly = ddf_DDMatrix2Poly2(mf, strategy, &err); if (poly->child != NULL && poly->child->CompStatus == ddf_InProgress) { ddf_FreeMatrix(mf); ddf_FreePolyhedra(poly); ddf_clear(value); ddf_free_global_constants(); error("Computation failed, floating-point arithmetic problem\n"); } if (err != ddf_NoError) { rrf_WriteErrorMessages(err); ddf_FreeMatrix(mf); ddf_FreePolyhedra(poly); ddf_clear(value); ddf_free_global_constants(); error("failed"); } ddf_MatrixPtr aout = NULL; if (poly->representation == ddf_Inequality) aout = ddf_CopyGenerators(poly); else if (poly->representation == ddf_Generator) aout = ddf_CopyInequalities(poly); else error("Cannot happen! poly->representation no good\n"); if (aout == NULL) error("Cannot happen! aout no good\n"); int mrow = aout->rowsize; int mcol = aout->colsize; if (mcol + 1 != ncol) error("Cannot happen! computed matrix has wrong number of columns"); #ifdef BLATHER printf("mrow = %d\n", mrow); printf("mcol = %d\n", mcol); #endif /* BLATHER */ SEXP bar; PROTECT(bar = allocMatrix(REALSXP, mrow, ncol)); /* linearity output */ for (i = 0; i < mrow; i++) if (set_member(i + 1, aout->linset)) REAL(bar)[i] = 1.0; else REAL(bar)[i] = 0.0; /* note conversion from zero-origin to one-origin indexing */ /* matrix output */ for (j = 1, k = mrow; j < ncol; j++) for (i = 0; i < mrow; i++, k++) { double ax = ddf_get_d(aout->matrix[i][j - 1]); /* note our matrix has one more column than Fukuda's */ REAL(bar)[k] = ax; } int nresult = 1; SEXP baz_adj = NULL; if (LOGICAL(adjacency)[0]) { ddf_SetFamilyPtr sout = ddf_CopyAdjacency(poly); PROTECT(baz_adj = rrf_WriteSetFamily(sout)); ddf_FreeSetFamily(sout); nresult++; } SEXP baz_inp_adj = NULL; if (LOGICAL(inputadjacency)[0]) { ddf_SetFamilyPtr sout = ddf_CopyInputAdjacency(poly); PROTECT(baz_inp_adj = rrf_WriteSetFamily(sout)); ddf_FreeSetFamily(sout); nresult++; } SEXP baz_inc = NULL; if (LOGICAL(incidence)[0]) { ddf_SetFamilyPtr sout = ddf_CopyIncidence(poly); PROTECT(baz_inc = rrf_WriteSetFamily(sout)); ddf_FreeSetFamily(sout); nresult++; } SEXP baz_inp_inc = NULL; if (LOGICAL(inputincidence)[0]) { ddf_SetFamilyPtr sout = ddf_CopyInputIncidence(poly); PROTECT(baz_inp_inc = rrf_WriteSetFamily(sout)); ddf_FreeSetFamily(sout); nresult++; } SEXP result, resultnames; PROTECT(result = allocVector(VECSXP, nresult)); PROTECT(resultnames = allocVector(STRSXP, nresult)); SET_STRING_ELT(resultnames, 0, mkChar("output")); SET_VECTOR_ELT(result, 0, bar); int iresult = 1; if (baz_adj) { SET_STRING_ELT(resultnames, iresult, mkChar("adjacency")); SET_VECTOR_ELT(result, iresult, baz_adj); iresult++; } if (baz_inp_adj) { SET_STRING_ELT(resultnames, iresult, mkChar("inputadjacency")); SET_VECTOR_ELT(result, iresult, baz_inp_adj); iresult++; } if (baz_inc) { SET_STRING_ELT(resultnames, iresult, mkChar("incidence")); SET_VECTOR_ELT(result, iresult, baz_inc); iresult++; } if (baz_inp_inc) { SET_STRING_ELT(resultnames, iresult, mkChar("inputincidence")); SET_VECTOR_ELT(result, iresult, baz_inp_inc); iresult++; } namesgets(result, resultnames); if (aout->objective != ddf_LPnone) error("Cannot happen! aout->objective != ddf_LPnone\n"); ddf_FreeMatrix(aout); ddf_FreeMatrix(mf); ddf_FreePolyhedra(poly); ddf_clear(value); ddf_free_global_constants(); PutRNGstate(); UNPROTECT(2 + nresult); return result; }

SEXP readSlicePorStream(SEXP porStream, SEXP what, SEXP s_vars, SEXP s_cases, SEXP s_types){ porStreamBuf *b = get_porStreamBuf(porStream); PROTECT(s_vars = coerceVector(s_vars,LGLSXP)); PROTECT(s_cases = coerceVector(s_cases,LGLSXP)); PROTECT(s_types = coerceVector(s_types,INTSXP)); int nvar = length(s_types); int ncases = length(s_cases); int *types = INTEGER(s_types); if(LENGTH(s_vars)!=nvar) error("\'s_vars\' argument has wrong length"); int ii,i,j,k, m=0, n = 0; for(j = 0; j < nvar; j++) m+=LOGICAL(s_vars)[j]; for(i = 0; i < ncases; i++) n+=LOGICAL(s_cases)[i]; SEXP x, y, data; char *charbuf; int charbuflen = 0; PROTECT(data = allocVector(VECSXP,m)); k = 0; for(j = 0; j < nvar; j++){ if(types[j] > charbuflen) charbuflen = types[j]; if(LOGICAL(s_vars)[j]){ if(types[j]==0) SET_VECTOR_ELT(data,k,allocVector(REALSXP,n)); else { SET_VECTOR_ELT(data,k,allocVector(STRSXP,n)); } k++; } } charbuf = R_alloc(charbuflen+1,sizeof(char)); ii = 0; for(i = 0; i < ncases; i++){ if(atEndPorStream(b) || (b->pos < 80 && b->buf[b->pos] == 'Z')){ int new_length = ii; for(j = 0; j < m; j++){ x = VECTOR_ELT(data,j); SET_VECTOR_ELT(data,j,lengthgets(x,new_length)); } n = new_length; break; } if(LOGICAL(s_cases)[i]){ k = 0; for(j = 0; j < nvar; j++){ if(atEndPorStream(b)) { printPorStreamBuf(b); warning("\nPremature end of data"); } if(types[j]==0){ if(LOGICAL(s_vars)[j]){ REAL(VECTOR_ELT(data,k))[ii] = readDoublePorStream1(b); k++; } else { readDoublePorStream1(b); } } else { if(LOGICAL(s_vars)[j]){ SET_STRING_ELT(VECTOR_ELT(data,k), ii, mkChar(readCHARPorStream(b,charbuf,types[j]))); k++; } else { readCHARPorStream(b,charbuf,types[j]); } } } ii++; } else { for(j = 0; j < nvar; j++){ if(atEndPorStream(b)) { printPorStreamBuf(b); error("\nPremature end of data"); } if(types[j]==0) readDoublePorStream1(b); else readCHARPorStream(b,charbuf,types[j]); } } } k = 0; for(j = 0; j < nvar; j++){ if(LOGICAL(s_vars)[j]){ x = VECTOR_ELT(what,j); y = VECTOR_ELT(data,k); copyMostAttrib(x,y); k++; } } UNPROTECT(4); return data; }

/* mean strength for p-values and score deltas. */ static void mean_strength_overall(SEXP *mean_df, SEXP strength, SEXP nodes, int nrows, int nstr, SEXP ref_hash, double *w) { int i = 0, j = 0, *t = NULL; double *mstr = NULL, *cur_strength = NULL; long double cumw = 0; SEXP mean_str, cur, cur_hash, try; /* allocate the strength accumulator vector. */ PROTECT(mean_str = allocVector(REALSXP, nrows)); SET_VECTOR_ELT(*mean_df, 2, mean_str); mstr = REAL(mean_str); memset(mstr, '\0', nrows * sizeof(double)); for (i = 0; i < nstr; i++) { /* move to the next object. */ cur = VECTOR_ELT(strength, i); /* get the strength values from the bn.strength object. */ cur_strength = REAL(VECTOR_ELT(cur, 2)); /* get the arc IDs to use to correctly match strengths. */ PROTECT(cur_hash = arc_hash(cur, nodes, FALSE, FALSE)); /* match the current arc IDs to the reference arc IDs. */ PROTECT(try = match(ref_hash, cur_hash, 0)); t = INTEGER(try); for (j = 0; j < nrows; j++) mstr[t[j] - 1] += w[i] * cur_strength[j]; /* update the total weight mass. */ cumw += w[i]; UNPROTECT(2); }/*FOR*/ /* rescale by the total weight mass. */ for (j = 0; j < nrows; j++) mstr[j] /= cumw; UNPROTECT(1); }/*MEAN_STRENGTH_OVERALL*/ /* mean strength for bootstrap probabilities. */ static void mean_strength_direction(SEXP *mean_df, SEXP strength, SEXP nodes, int nrows, int nstr, SEXP ref_hash, double *w) { int i = 0, j = 0, *t = NULL, nnodes = length(nodes); double *mstr = NULL, *mdir = NULL, *cur_strength = NULL, *cur_dir = NULL; double fwd = 0, bkwd = 0; long double cumw = 0; SEXP mean_str, mean_dir, cur, cur_hash, try; /* allocate vectors for strength and direction. */ PROTECT(mean_str = allocVector(REALSXP, nrows)); SET_VECTOR_ELT(*mean_df, 2, mean_str); mstr = REAL(mean_str); memset(mstr, '\0', nrows * sizeof(double)); PROTECT(mean_dir = allocVector(REALSXP, nrows)); SET_VECTOR_ELT(*mean_df, 3, mean_dir); mdir = REAL(mean_dir); memset(mdir, '\0', nrows * sizeof(double)); for (i = 0; i < nstr; i++) { /* move to the next object. */ cur = VECTOR_ELT(strength, i); /* get the strength and direction values from the bn.strength object. */ cur_strength = REAL(VECTOR_ELT(cur, 2)); cur_dir = REAL(VECTOR_ELT(cur, 3)); /* get the arc IDs to use to correctly match strengths. */ PROTECT(cur_hash = arc_hash(cur, nodes, FALSE, FALSE)); /* match the current arc IDs to the reference arc IDs. */ PROTECT(try = match(ref_hash, cur_hash, 0)); t = INTEGER(try); for (j = 0; j < nrows; j++) mstr[t[j] - 1] += w[i] * (cur_strength[j] * cur_dir[j]); /* update the total weight mass. */ cumw += w[i]; UNPROTECT(2); }/*FOR*/ /* rescale by the total weight mass. */ for (j = 0; j < nrows; j++) mstr[j] /= cumw; /* split arc strength from direction strength. */ for (i = 0; i < nnodes; i++) { for (j = i + 1; j < nnodes; j++) { fwd = mstr[CMC(j, i, nnodes) - i - 1]; bkwd = mstr[CMC(i, j, nnodes) - j]; mstr[CMC(j, i, nnodes) - i - 1] = mstr[CMC(i, j, nnodes) - j] = fwd + bkwd; if (bkwd + fwd > 0) { mdir[CMC(j, i, nnodes) - i - 1] = fwd / (fwd + bkwd); mdir[CMC(i, j, nnodes) - j] = bkwd / (fwd + bkwd); }/*THEN*/ else { mdir[CMC(j, i, nnodes) - i - 1] = mdir[CMC(i, j, nnodes) - j] = 0; }/*ELSE*/ }/*FOR*/ }/*FOR*/ UNPROTECT(2); }/*MEAN_STRENGTH_DIRECTION*/ /* average multiple bn.strength objects, with weights. */ SEXP mean_strength(SEXP strength, SEXP nodes, SEXP weights) { int nstr = length(weights), ncols = 0, nrows = 0; double *w = REAL(weights); const char *m = NULL; SEXP ref, ref_hash, mean_df, method; /* initialize the result using the first bn.strength object as a reference. */ ref = VECTOR_ELT(strength, 0); ncols = length(ref); nrows = length(VECTOR_ELT(ref, 0)); PROTECT(mean_df = allocVector(VECSXP, ncols)); setAttrib(mean_df, R_NamesSymbol, getAttrib(ref, R_NamesSymbol)); SET_VECTOR_ELT(mean_df, 0, VECTOR_ELT(ref, 0)); SET_VECTOR_ELT(mean_df, 1, VECTOR_ELT(ref, 1)); /* make it a data frame */ minimal_data_frame(mean_df); /* compute the arc IDs to match arcs of later bn.strength objects. */ PROTECT(ref_hash = arc_hash(ref, nodes, FALSE, FALSE)); /* switch backend according to how the strengths were computed. */ method = getAttrib(ref, BN_MethodSymbol); m = CHAR(STRING_ELT(method, 0)); if ((strcmp(m, "score") == 0) || (strcmp(m, "test") == 0)) mean_strength_overall(&mean_df, strength, nodes, nrows, nstr, ref_hash, w); else if (strcmp(m, "bootstrap") == 0) mean_strength_direction(&mean_df, strength, nodes, nrows, nstr, ref_hash, w); UNPROTECT(2); return mean_df; }/*MEAN_STRENGTH*/

SEXP elsa_vector(SEXP v, SEXP nb, SEXP nclass) { int nProtected=0; int ncl, n, q, ngb, a; double e, w, s, qq, count,xi; R_len_t i, j, c; SEXP ans; PROTECT(ans = NEW_LIST(2)); ++nProtected; double *xv; ncl=INTEGER(nclass)[0]; n=length(v); PROTECT(v = coerceVector(v, REALSXP)); ++nProtected; SET_VECTOR_ELT(ans, 0, NEW_NUMERIC(n)); SET_VECTOR_ELT(ans, 1, NEW_NUMERIC(n)); xv=REAL(v); for (c=0;c < n;c++) { R_CheckUserInterrupt(); xi=xv[c]; if (!R_IsNA(xi)) { ngb = length(VECTOR_ELT(nb,c)); double xn[ngb+1]; q=-1; for (i=0;i < ngb;i++) { a=xv[INTEGER_POINTER(VECTOR_ELT(nb,c))[i] - 1]; if (!R_IsNA(a)) { q+=1; xn[i]=a; } } q+=1; xn[q]=xi; // sort for (i=0;i <= (q-1);i++) { for (j=i+1;j <= q;j++) { if (xn[i] > xn[j]) { a=xn[i]; xn[i]=xn[j]; xn[j]=a; } } } //------ a=xn[0]; count=1; e=0; qq=q+1; for (i=1;i <= q;i++) { if (xn[i] != a) { e = e + ((count / qq) * log2(count / qq)); a=xn[i]; count=1; } else { count+=1; } } e = e + ((count / qq) * log2(count / qq)); w=0; for (i=0; i <= q;i++) { w = w + fabs(xn[i] - xi); } w = w / ((qq - 1) * (ncl - 1)); if (qq > ncl) { s = log2(ncl); } else { s = log2(qq); } NUMERIC_POINTER(VECTOR_ELT(ans, 0))[c] = -e / s; //xans[c] = (-e * w) / s; NUMERIC_POINTER(VECTOR_ELT(ans, 1))[c] = w; } else { NUMERIC_POINTER(VECTOR_ELT(ans, 0))[c] = R_NaReal; NUMERIC_POINTER(VECTOR_ELT(ans, 1))[c] = R_NaReal; } } UNPROTECT(nProtected); return(ans); }

/* rep.int(x, times) for a vector times */ static SEXP rep2(SEXP s, SEXP ncopy) { R_xlen_t i, na, nc, n; int j; SEXP a, t; PROTECT(t = coerceVector(ncopy, INTSXP)); nc = xlength(ncopy); na = 0; for (i = 0; i < nc; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); if (INTEGER(t)[i] == NA_INTEGER || INTEGER(t)[i] < 0) error(_("invalid '%s' value"), "times"); na += INTEGER(t)[i]; } /* R_xlen_t ni = NINTERRUPT, ratio; if(nc > 0) { ratio = na/nc; // average no of replications if (ratio > 1000U) ni = 1000U; } */ PROTECT(a = allocVector(TYPEOF(s), na)); n = 0; switch (TYPEOF(s)) { case LGLSXP: for (i = 0; i < nc; i++) { // if ((i+1) % ni == 0) R_CheckUserInterrupt(); for (j = 0; j < INTEGER(t)[i]; j++) LOGICAL(a)[n++] = LOGICAL(s)[i]; } break; case INTSXP: for (i = 0; i < nc; i++) { // if ((i+1) % ni == 0) R_CheckUserInterrupt(); for (j = 0; j < INTEGER(t)[i]; j++) INTEGER(a)[n++] = INTEGER(s)[i]; } break; case REALSXP: for (i = 0; i < nc; i++) { // if ((i+1) % ni == 0) R_CheckUserInterrupt(); for (j = 0; j < INTEGER(t)[i]; j++) REAL(a)[n++] = REAL(s)[i]; } break; case CPLXSXP: for (i = 0; i < nc; i++) { // if ((i+1) % ni == 0) R_CheckUserInterrupt(); for (j = 0; j < INTEGER(t)[i]; j++) COMPLEX(a)[n++] = COMPLEX(s)[i]; } break; case STRSXP: for (i = 0; i < nc; i++) { // if ((i+1) % ni == 0) R_CheckUserInterrupt(); for (j = 0; j < INTEGER(t)[i]; j++) SET_STRING_ELT(a, n++, STRING_ELT(s, i)); } break; case VECSXP: case EXPRSXP: for (i = 0; i < nc; i++) { // if ((i+1) % ni == 0) R_CheckUserInterrupt(); SEXP elt = lazy_duplicate(VECTOR_ELT(s, i)); for (j = 0; j < INTEGER(t)[i]; j++) SET_VECTOR_ELT(a, n++, elt); if (j > 1) SET_NAMED(elt, 2); } break; case RAWSXP: for (i = 0; i < nc; i++) { // if ((i+1) % ni == 0) R_CheckUserInterrupt(); for (j = 0; j < INTEGER(t)[i]; j++) RAW(a)[n++] = RAW(s)[i]; } break; default: UNIMPLEMENTED_TYPE("rep2", s); } UNPROTECT(2); return a; }

SEXP attribute_hidden do_mapply(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); SEXP f = CAR(args), varyingArgs = CADR(args), constantArgs = CADDR(args); int m, zero = 0; R_xlen_t *lengths, *counters, longest = 0; m = length(varyingArgs); SEXP vnames = PROTECT(getAttrib(varyingArgs, R_NamesSymbol)); Rboolean named = vnames != R_NilValue; lengths = (R_xlen_t *) R_alloc(m, sizeof(R_xlen_t)); for (int i = 0; i < m; i++) { SEXP tmp1 = VECTOR_ELT(varyingArgs, i); lengths[i] = xlength(tmp1); if (isObject(tmp1)) { // possibly dispatch on length() /* Cache the .Primitive: unclear caching is worthwhile. */ static SEXP length_op = NULL; if (length_op == NULL) length_op = R_Primitive("length"); // DispatchOrEval() needs 'args' to be a pairlist SEXP ans, tmp2 = PROTECT(list1(tmp1)); if (DispatchOrEval(call, length_op, "length", tmp2, rho, &ans, 0, 1)) lengths[i] = (R_xlen_t) (TYPEOF(ans) == REALSXP ? REAL(ans)[0] : asInteger(ans)); UNPROTECT(1); } if (lengths[i] == 0) zero++; if (lengths[i] > longest) longest = lengths[i]; } if (zero && longest) error(_("zero-length inputs cannot be mixed with those of non-zero length")); counters = (R_xlen_t *) R_alloc(m, sizeof(R_xlen_t)); memset(counters, 0, m * sizeof(R_xlen_t)); SEXP mindex = PROTECT(allocVector(VECSXP, m)); SEXP nindex = PROTECT(allocVector(VECSXP, m)); /* build a call like f(dots[[1]][[4]], dots[[2]][[4]], dots[[3]][[4]], d=7) */ SEXP fcall = R_NilValue; // -Wall if (constantArgs == R_NilValue) ; else if (isVectorList(constantArgs)) fcall = VectorToPairList(constantArgs); else error(_("argument 'MoreArgs' of 'mapply' is not a list")); PROTECT_INDEX fi; PROTECT_WITH_INDEX(fcall, &fi); Rboolean realIndx = longest > INT_MAX; SEXP Dots = install("dots"); for (int j = m - 1; j >= 0; j--) { SET_VECTOR_ELT(mindex, j, ScalarInteger(j + 1)); SET_VECTOR_ELT(nindex, j, allocVector(realIndx ? REALSXP : INTSXP, 1)); SEXP tmp1 = PROTECT(lang3(R_Bracket2Symbol, Dots, VECTOR_ELT(mindex, j))); SEXP tmp2 = PROTECT(lang3(R_Bracket2Symbol, tmp1, VECTOR_ELT(nindex, j))); REPROTECT(fcall = LCONS(tmp2, fcall), fi); UNPROTECT(2); if (named && CHAR(STRING_ELT(vnames, j))[0] != '\0') SET_TAG(fcall, installTrChar(STRING_ELT(vnames, j))); } REPROTECT(fcall = LCONS(f, fcall), fi); SEXP ans = PROTECT(allocVector(VECSXP, longest)); for (int i = 0; i < longest; i++) { for (int j = 0; j < m; j++) { counters[j] = (++counters[j] > lengths[j]) ? 1 : counters[j]; if (realIndx) REAL(VECTOR_ELT(nindex, j))[0] = (double) counters[j]; else INTEGER(VECTOR_ELT(nindex, j))[0] = (int) counters[j]; } SEXP tmp = eval(fcall, rho); if (NAMED(tmp)) tmp = duplicate(tmp); SET_VECTOR_ELT(ans, i, tmp); } for (int j = 0; j < m; j++) if (counters[j] != lengths[j]) warning(_("longer argument not a multiple of length of shorter")); UNPROTECT(5); return ans; }

/* rep(), allowing for both times and each */ static SEXP rep4(SEXP x, SEXP times, R_xlen_t len, int each, R_xlen_t nt) { SEXP a; R_xlen_t lx = xlength(x); R_xlen_t i, j, k, k2, k3, sum; // faster code for common special case if (each == 1 && nt == 1) return rep3(x, lx, len); PROTECT(a = allocVector(TYPEOF(x), len)); switch (TYPEOF(x)) { case LGLSXP: if(nt == 1) for(i = 0; i < len; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); LOGICAL(a)[i] = LOGICAL(x)[(i/each) % lx]; } else { for(i = 0, k = 0, k2 = 0; i < lx; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); for(j = 0, sum = 0; j < each; j++) sum += INTEGER(times)[k++]; for(k3 = 0; k3 < sum; k3++) { LOGICAL(a)[k2++] = LOGICAL(x)[i]; if(k2 == len) goto done; } } } break; case INTSXP: if(nt == 1) for(i = 0; i < len; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); INTEGER(a)[i] = INTEGER(x)[(i/each) % lx]; } else { for(i = 0, k = 0, k2 = 0; i < lx; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); for(j = 0, sum = 0; j < each; j++) sum += INTEGER(times)[k++]; for(k3 = 0; k3 < sum; k3++) { INTEGER(a)[k2++] = INTEGER(x)[i]; if(k2 == len) goto done; } } } break; case REALSXP: if(nt == 1) for(i = 0; i < len; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); REAL(a)[i] = REAL(x)[(i/each) % lx]; } else { for(i = 0, k = 0, k2 = 0; i < lx; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); for(j = 0, sum = 0; j < each; j++) sum += INTEGER(times)[k++]; for(k3 = 0; k3 < sum; k3++) { REAL(a)[k2++] = REAL(x)[i]; if(k2 == len) goto done; } } } break; case CPLXSXP: if(nt == 1) for(i = 0; i < len; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); COMPLEX(a)[i] = COMPLEX(x)[(i/each) % lx]; } else { for(i = 0, k = 0, k2 = 0; i < lx; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); for(j = 0, sum = 0; j < each; j++) sum += INTEGER(times)[k++]; for(k3 = 0; k3 < sum; k3++) { COMPLEX(a)[k2++] = COMPLEX(x)[i]; if(k2 == len) goto done; } } } break; case STRSXP: if(nt == 1) for(i = 0; i < len; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); SET_STRING_ELT(a, i, STRING_ELT(x, (i/each) % lx)); } else { for(i = 0, k = 0, k2 = 0; i < lx; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); for(j = 0, sum = 0; j < each; j++) sum += INTEGER(times)[k++]; for(k3 = 0; k3 < sum; k3++) { SET_STRING_ELT(a, k2++, STRING_ELT(x, i)); if(k2 == len) goto done; } } } break; case VECSXP: case EXPRSXP: if(nt == 1) for(i = 0; i < len; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); SET_VECTOR_ELT(a, i, VECTOR_ELT(x, (i/each) % lx)); } else { for(i = 0, k = 0, k2 = 0; i < lx; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); for(j = 0, sum = 0; j < each; j++) sum += INTEGER(times)[k++]; for(k3 = 0; k3 < sum; k3++) { SET_VECTOR_ELT(a, k2++, VECTOR_ELT(x, i)); if(k2 == len) goto done; } } } break; case RAWSXP: if(nt == 1) for(i = 0; i < len; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); RAW(a)[i] = RAW(x)[(i/each) % lx]; } else { for(i = 0, k = 0, k2 = 0; i < lx; i++) { // if ((i+1) % NINTERRUPT == 0) R_CheckUserInterrupt(); for(j = 0, sum = 0; j < each; j++) sum += INTEGER(times)[k++]; for(k3 = 0; k3 < sum; k3++) { RAW(a)[k2++] = RAW(x)[i]; if(k2 == len) goto done; } } } break; default: UNIMPLEMENTED_TYPE("rep4", x); } done: UNPROTECT(1); return a; }

/* a single step of the optimized hill climbing (one arc addition/removal/reversal). */ SEXP hc_opt_step(SEXP amat, SEXP nodes, SEXP added, SEXP cache, SEXP reference, SEXP wlmat, SEXP blmat, SEXP nparents, SEXP maxp, SEXP debug) { int nnodes = length(nodes), i = 0, j = 0; int *am = NULL, *ad = NULL, *w = NULL, *b = NULL, debuglevel = isTRUE(debug); int counter = 0, update = 1, from = 0, to = 0, *path = NULL, *scratch = NULL; double *cache_value = NULL, temp = 0, max = 0, tol = MACHINE_TOL; double *mp = REAL(maxp), *np = REAL(nparents); SEXP bestop; /* allocate and initialize the return value (use FALSE as a canary value). */ PROTECT(bestop = allocVector(VECSXP, 3)); setAttrib(bestop, R_NamesSymbol, mkStringVec(3, "op", "from", "to")); /* allocate and initialize a dummy FALSE object. */ SET_VECTOR_ELT(bestop, 0, ScalarLogical(FALSE)); /* allocate buffers for c_has_path(). */ path = Calloc1D(nnodes, sizeof(int)); scratch = Calloc1D(nnodes, sizeof(int)); /* save pointers to the numeric/integer matrices. */ cache_value = REAL(cache); ad = INTEGER(added); am = INTEGER(amat); w = INTEGER(wlmat); b = INTEGER(blmat); if (debuglevel > 0) { /* count how may arcs are to be tested. */ for (i = 0; i < nnodes * nnodes; i++) counter += ad[i]; Rprintf("----------------------------------------------------------------\n"); Rprintf("* trying to add one of %d arcs.\n", counter); }/*THEN*/ for (i = 0; i < nnodes; i++) { for (j = 0; j < nnodes; j++) { /* nothing to see, move along. */ if (ad[CMC(i, j, nnodes)] == 0) continue; /* retrieve the score delta from the cache. */ temp = cache_value[CMC(i, j, nnodes)]; if (debuglevel > 0) { Rprintf(" > trying to add %s -> %s.\n", NODE(i), NODE(j)); Rprintf(" > delta between scores for nodes %s %s is %lf.\n", NODE(i), NODE(j), temp); }/*THEN*/ /* this score delta is the best one at the moment, so add the arc if it * does not introduce cycles in the graph. */ if (temp - max > tol) { if (c_has_path(j, i, am, nnodes, nodes, FALSE, FALSE, path, scratch, FALSE)) { if (debuglevel > 0) Rprintf(" > not adding, introduces cycles in the graph.\n"); continue; }/*THEN*/ if (debuglevel > 0) Rprintf(" @ adding %s -> %s.\n", NODE(i), NODE(j)); /* update the return value. */ bestop_update(bestop, "set", NODE(i), NODE(j)); /* store the node indices to update the reference scores. */ from = i; to = j; /* update the threshold score delta. */ max = temp; }/*THEN*/ }/*FOR*/ }/*FOR*/ if (debuglevel > 0) { /* count how may arcs are to be tested. */ for (i = 0, counter = 0; i < nnodes * nnodes; i++) counter += am[i] * (1 - w[i]); Rprintf("----------------------------------------------------------------\n"); Rprintf("* trying to remove one of %d arcs.\n", counter); }/*THEN*/ for (i = 0; i < nnodes; i++) { for (j = 0; j < nnodes; j++) { /* nothing to see, move along. */ if (am[CMC(i, j, nnodes)] == 0) continue; /* whitelisted arcs are not to be removed, ever. */ if (w[CMC(i, j, nnodes)] == 1) continue; /* retrieve the score delta from the cache. */ temp = cache_value[CMC(i, j, nnodes)]; if (debuglevel > 0) { Rprintf(" > trying to remove %s -> %s.\n", NODE(i), NODE(j)); Rprintf(" > delta between scores for nodes %s %s is %lf.\n", NODE(i), NODE(j), temp); }/*THEN*/ if (temp - max > tol) { if (debuglevel > 0) Rprintf(" @ removing %s -> %s.\n", NODE(i), NODE(j)); /* update the return value. */ bestop_update(bestop, "drop", NODE(i), NODE(j)); /* store the node indices to update the reference scores. */ from = i; to = j; /* update the threshold score delta. */ max = temp; }/*THEN*/ }/*FOR*/ }/*FOR*/ if (debuglevel > 0) { /* count how may arcs are to be tested. */ for (i = 0, counter = 0; i < nnodes; i++) for (j = 0; j < nnodes; j++) counter += am[CMC(i, j, nnodes)] * (1 - b[CMC(j, i, nnodes)]); Rprintf("----------------------------------------------------------------\n"); Rprintf("* trying to reverse one of %d arcs.\n", counter); }/*THEN*/ for (i = 0; i < nnodes; i++) { for (j = 0; j < nnodes; j++) { /* nothing to see, move along. */ if (am[CMC(i, j, nnodes)] == 0) continue; /* don't reverse an arc if the one in the opposite direction is * blacklisted, ever. */ if (b[CMC(j, i, nnodes)] == 1) continue; /* do not reverse an arc if that means violating the limit on the * maximum number of parents. */ if (np[i] >= *mp) continue; /* retrieve the score delta from the cache. */ temp = cache_value[CMC(i, j, nnodes)] + cache_value[CMC(j, i, nnodes)]; /* nuke small values and negative zeroes. */ if (fabs(temp) < tol) temp = 0; if (debuglevel > 0) { Rprintf(" > trying to reverse %s -> %s.\n", NODE(i), NODE(j)); Rprintf(" > delta between scores for nodes %s %s is %lf.\n", NODE(i), NODE(j), temp); }/*THEN*/ if (temp - max > tol) { if (c_has_path(i, j, am, nnodes, nodes, FALSE, TRUE, path, scratch, FALSE)) { if (debuglevel > 0) Rprintf(" > not reversing, introduces cycles in the graph.\n"); continue; }/*THEN*/ if (debuglevel > 0) Rprintf(" @ reversing %s -> %s.\n", NODE(i), NODE(j)); /* update the return value. */ bestop_update(bestop, "reverse", NODE(i), NODE(j)); /* store the node indices to update the reference scores. */ from = i; to = j; /* both nodes' reference scores must be updated. */ update = 2; /* update the threshold score delta. */ max = temp; }/*THEN*/ }/*FOR*/ }/*FOR*/ /* update the reference scores. */ REAL(reference)[to] += cache_value[CMC(from, to, nnodes)]; if (update == 2) REAL(reference)[from] += cache_value[CMC(to, from, nnodes)]; Free1D(path); Free1D(scratch); UNPROTECT(1); return bestop; }/*HC_OPT_STEP*/

/* using thernau's habit: name a S object "charlie2" and the pointer ** to the contents of the object "charlie"; the latter is ** used in the computations */ SEXP netfastpinter( SEXP efac2, SEXP edims2, SEXP ecut2, SEXP expect2, SEXP x2, SEXP y2,SEXP ys2, SEXP status2, SEXP times2) { int i,j,k; int n, edim, ntime; double **x; double *data2, *si, *sitt; double **ecut, *etemp; double hazard, hazspi; /*cum hazard over an interval, also weigthed hazard */ double thiscell, etime, time, et2; int indx, indx2; double wt; int *efac, *edims, *status; double *expect, *y,*ys, *times; SEXP rlist, rlistnames; /*my declarations*/ SEXP yidli2, dnisi2,yisi2,yidlisi2,yi2,dni2,dnisisq2,yisitt2,yidlisitt2,yidlisiw2; double *yidli, *dnisi,*yisi,*yidlisi,*yi,*dni,*dnisisq, *yisitt,*yidlisitt,*yidlisiw; /* ** copies of input arguments */ efac = INTEGER(efac2); edims = INTEGER(edims2); edim = LENGTH(edims2); expect= REAL(expect2); n = LENGTH(y2); /*number of individuals */ x = dmatrix(REAL(x2), n, edim); y = REAL(y2); /*follow-up times*/ ys = REAL(ys2); status = INTEGER(status2); /* status */ times = REAL(times2); ntime = LENGTH(times2); /*length of times for reportint */ /* scratch space */ data2 = (double *)ALLOC(edim+1, sizeof(double)); si = (double *)ALLOC(n, sizeof(double)); /*Si for each individual - this is a pointer, the values are called using s[i]*/ sitt = (double *)ALLOC(n, sizeof(double)); /*Si at the beg. of the interval for each individual */ /* ** Set up ecut index as a ragged array */ ecut = (double **)ALLOC(edim, sizeof(double *)); etemp = REAL(ecut2); for (i=0; i<edim; i++) { ecut[i] = etemp; if (efac[i]==0) etemp += edims[i]; else if(efac[i] >1) etemp += 1 + (efac[i]-1)*edims[i]; } /* ** Create output arrays */ PROTECT(yidli2 = allocVector(REALSXP, ntime)); /*sum Yi dLambdai for each time* - length=length(times2)*/ yidli = REAL(yidli2); PROTECT(dnisi2 = allocVector(REALSXP, ntime)); /*sum dNi/Si for each time* - length=length(times2)*/ dnisi = REAL(dnisi2); PROTECT(yisi2 = allocVector(REALSXP, ntime)); /*sum Yi/Si for each time* - length=length(times2)*/ yisi = REAL(yisi2); PROTECT(yisitt2 = allocVector(REALSXP, ntime)); /*add tt*/ yisitt = REAL(yisitt2); PROTECT(yidlisi2 = allocVector(REALSXP, ntime)); /*sum yi/Si dLambdai for each time* - length=length(times2)*/ yidlisi = REAL(yidlisi2); PROTECT(yidlisitt2 = allocVector(REALSXP, ntime)); /*add tt*/ yidlisitt = REAL(yidlisitt2); PROTECT(yidlisiw2 = allocVector(REALSXP, ntime)); /*add w*/ yidlisiw = REAL(yidlisiw2); PROTECT(yi2 = allocVector(REALSXP, ntime)); /* sum yi at each time*/ yi = REAL(yi2); PROTECT(dni2 = allocVector(REALSXP, ntime)); /*sum Yi dLambdai for each time* - length=length(times2)*/ dni = REAL(dni2); PROTECT(dnisisq2 = allocVector(REALSXP, ntime)); /*sum yi/Si dLambdai for each time* - length=length(times2)*/ dnisisq = REAL(dnisisq2); /*initialize Si values*/ for (i=0; i<n; i++) { si[i] =1; sitt[i] =1; } /*initialize output values*/ for (j=0; j<ntime; j++) { yidli[j] =0; dnisi[j] =0; yisi[j]=0; yisitt[j]=0; yidlisi[j]=0; yidlisitt[j]=0; yidlisiw[j]=0; yi[j]=0; dni[j]=0; dnisisq[j]=0; } time =0; for (j=0; j<ntime ; j++) { /* loop in time */ thiscell = times[j] - time; /* compute for each individual*/ for (i=0; i<n; i++) { if(y[i]>= times[j]){ // if still at risk /* ** initialize */ for (k=0; k<edim; k++){ data2[k] = x[k][i]; /* the individual's values of demographic variables at time 0 */ if (efac[k] !=1) data2[k] += time; /* add time to time changing variables */ } /* ** add up hazard */ /* expected calc ** The wt parameter only comes into play for older style US rate ** tables, where pystep does interpolation. ** Each call to pystep moves up to the next 'boundary' in the ** expected table, data2 contains our current position therein */ etime = thiscell; hazard =0; hazspi=0; //integration of haz/si while (etime >0) { et2 = pystep(edim, &indx, &indx2, &wt, data2, efac, edims, ecut, etime, 1); hazspi+= et2* expect[indx]/(si[i]*exp(-hazard)); //add the integrated part if (wt <1) hazard+= et2*(wt*expect[indx] +(1-wt)*expect[indx2]); else hazard+= et2* expect[indx]; for (k=0; k<edim; k++) if (efac[k] !=1) data2[k] += et2; etime -= et2; } sitt[i] = si[i]; // si at the beginning of the interval si[i] = si[i]* exp(-hazard); if(ys[i]<=times[j]){ // if start of observation before this time yisi[j]+=1/si[i]; yisitt[j]+=1/sitt[i]; yidlisi[j]+=hazard/si[i]; yidlisitt[j]+=hazard/sitt[i]; yidlisiw[j]+=hazspi; yidli[j]+=hazard; yi[j]+=1; if(y[i]==times[j]){ dnisi[j]+=status[i]/si[i]; dni[j]+=status[i]; dnisisq[j]+=status[i]/(si[i]*si[i]); } // if this person died at this time } // if start of observation before this time } // if still at risk }// loop through individuals time += thiscell; }// loop through times /* ** package the output */ PROTECT(rlist = allocVector(VECSXP, 10)); //number of variables SET_VECTOR_ELT(rlist,0, dnisi2); SET_VECTOR_ELT(rlist,1, yisi2); SET_VECTOR_ELT(rlist,2, yidlisi2); SET_VECTOR_ELT(rlist,3, dnisisq2); SET_VECTOR_ELT(rlist,4, yi2); SET_VECTOR_ELT(rlist,5, dni2); SET_VECTOR_ELT(rlist,6, yidli2); SET_VECTOR_ELT(rlist,7, yisitt2); /*added tt*/ SET_VECTOR_ELT(rlist,8, yidlisitt2); /*added tt*/ SET_VECTOR_ELT(rlist,9, yidlisiw2); /*added w*/ PROTECT(rlistnames= allocVector(STRSXP, 10)); //number of variables SET_STRING_ELT(rlistnames, 0, mkChar("dnisi")); SET_STRING_ELT(rlistnames, 1, mkChar("yisi")); SET_STRING_ELT(rlistnames, 2, mkChar("yidlisi")); SET_STRING_ELT(rlistnames, 3, mkChar("dnisisq")); SET_STRING_ELT(rlistnames, 4, mkChar("yi")); SET_STRING_ELT(rlistnames, 5, mkChar("dni")); SET_STRING_ELT(rlistnames, 6, mkChar("yidli")); SET_STRING_ELT(rlistnames, 7, mkChar("yisitt")); /*added tt*/ SET_STRING_ELT(rlistnames, 8, mkChar("yidlisitt")); /*added tt*/ SET_STRING_ELT(rlistnames, 9, mkChar("yidlisiw")); /*added w*/ setAttrib(rlist, R_NamesSymbol, rlistnames); unprotect(12); /*number of variables + 2*/ return(rlist); }

SEXP R_pncount(SEXP R_x, SEXP R_t, SEXP R_s, SEXP R_o, SEXP R_v) { int i, j, c, f, l, k, n, nr, np, ni, e; int *x, *o = NULL; double s, t = 0; SEXP px, ix, pt, it; SEXP r, pr, ir, pl, il, rs, rc, rl, pi; #ifdef _TIME_H clock_t t5, t4, t3, t2, t1, t0; t1 = t0 = clock(); if (LOGICAL(R_v)[0] == TRUE) { if (LOGICAL(R_o)[0] == TRUE) Rprintf("reducing ... "); else Rprintf("preparing ... "); } #endif if (!inherits(R_x, "ngCMatrix")) error("'x' not of class ngCMatrix"); if (!inherits(R_t, "ngCMatrix")) error("'t' not of class ngCMatrix"); if (INTEGER(GET_SLOT(R_x, install("Dim")))[0] != INTEGER(GET_SLOT(R_t, install("Dim")))[0]) error("the number of rows of 'x' and 't' do not conform"); if (TYPEOF(R_s) != LGLSXP) error("'s' not of type logical"); if (TYPEOF(R_o) != LGLSXP) error("'o' not of type logical"); if (TYPEOF(R_v) != LGLSXP) error("'v' not of type logical"); nr = INTEGER(GET_SLOT(R_x, install("Dim")))[0]; px = GET_SLOT(R_x, install("p")); ix = GET_SLOT(R_x, install("i")); pt = GET_SLOT(R_t, install("p")); it = GET_SLOT(R_t, install("i")); pb = INTEGER(PROTECT(allocVector(INTSXP, nr+1))); if (LOGICAL(R_o)[0] == TRUE) { SEXP pz, iz; o = INTEGER(PROTECT(allocVector(INTSXP, nr))); memset(o, 0, sizeof(int) * nr); for (k = 0; k < LENGTH(it); k++) o[INTEGER(it)[k]]++; memset(pb, 0, sizeof(int) * nr); for (k = 0; k < LENGTH(ix); k++) pb[INTEGER(ix)[k]] = 1; n = c = 0; for (k = 0; k < nr; k++) { if (pb[k]) n += o[k]; else { o[k] = -1; c++; } pb[k] = k; } R_qsort_int_I(o, pb, 1, nr); for (k = 0; k < nr; k++) o[pb[k]] = (k < c) ? -1 : k; PROTECT(iz = allocVector(INTSXP, LENGTH(ix))); f = 0; for (i = 1; i < LENGTH(px); i++) { l = INTEGER(px)[i]; if (f == l) continue; for (k = f; k < l; k++) INTEGER(iz)[k] = o[INTEGER(ix)[k]]; R_isort(INTEGER(iz)+f, l-f); f = l; } ix = iz; PROTECT(pz = allocVector(INTSXP, LENGTH(pt))); PROTECT(iz = allocVector(INTSXP, n)); f = n = INTEGER(pz)[0] = 0; for (i = 1; i < LENGTH(pt); i++) { l = INTEGER(pt)[i]; if (f < l) { for (k = f, f = n; k < l; k++) if ((j = o[INTEGER(it)[k]]) > -1) INTEGER(iz)[n++] = j; R_isort(INTEGER(iz)+f, n-f); f = l; } INTEGER(pz)[i] = n; } pt = pz; ni = LENGTH(it); it = iz; if (LOGICAL(R_s)[0] == FALSE) memcpy(o, pb, sizeof(int) * nr); #ifdef _TIME_H t1 = clock(); if (LOGICAL(R_v)[0] == TRUE) { Rprintf("%i indexes, dropped %i (%.2f) items [%.2fs]\n", LENGTH(ix) + ni, c, 1 - (double) n / ni, ((double) t1 - t0) / CLOCKS_PER_SEC); Rprintf("preparing ... "); } #endif } cpn = apn = npn = 0; if (nb != NULL) nbfree(); nb = (PN **) malloc(sizeof(PN *) * (nr+1)); if (nb == NULL) error("pointer array allocation failed"); k = nr; nb[k] = NULL; while (k-- > 0) nb[k] = pnadd(nb[k+1], &k, 1); if (npn) { nbfree(); error("node allocation failed"); } np = ni = 0; f = 0; for (i = 1; i < LENGTH(px); i++) { l = INTEGER(px)[i]; n = l-f; if (n == 0) continue; x = INTEGER(ix)+f; pnadd(nb[*x], x, n); if (LOGICAL(R_s)[0] == FALSE && n > 1) { if (n > 2) { memcpy(pb, x, sizeof(int) * n); for (k = 0; k < n-1; k++) { if (k > 0) { j = pb[0]; pb[0] = pb[k]; pb[k] = j; } pnadd(nb[pb[1]], pb+1, n-1); } } np += n; ni += n * (n-1); } if (npn) { nbfree(); error("node allocation failed"); } f = l; R_CheckUserInterrupt(); } #ifdef _TIME_H t2 = clock(); if (LOGICAL(R_v)[0] == TRUE) { Rprintf("%i itemsets, created %i (%.2f) nodes [%.2fs]\n", 2 * np + LENGTH(px) - 1, apn, (double) apn / cpn, ((double) t2 - t1) / CLOCKS_PER_SEC); Rprintf("counting ... "); } #endif cpn = npn = 0; f = 0; for (i = 1; i < LENGTH(pt); i++) { l = INTEGER(pt)[i]; n = l-f; if (n == 0) continue; x = INTEGER(it)+f; pncount(nb[*x], x, n); f = l; R_CheckUserInterrupt(); } #ifdef _TIME_H t3 = clock(); if (LOGICAL(R_v)[0] == TRUE) { Rprintf("%i transactions, processed %i (%.2f) nodes [%.2fs]\n", LENGTH(pt) - 1, cpn, (double) npn / cpn, ((double) t3 - t2) / CLOCKS_PER_SEC); Rprintf("writing ... "); } #endif if (LOGICAL(R_s)[0] == TRUE) { PROTECT(r = allocVector(INTSXP, LENGTH(px)-1)); /* warnings */ pl = il = pr = ir = rs = rc = rl = pi = (SEXP)0; } else { SEXP o, p; PROTECT(r = allocVector(VECSXP, 6)); SET_VECTOR_ELT(r, 0, (o = NEW_OBJECT(MAKE_CLASS("ngCMatrix")))); SET_SLOT(o, install("p"), (pl = allocVector(INTSXP, np+1))); SET_SLOT(o, install("i"), (il = allocVector(INTSXP, ni))); SET_SLOT(o, install("Dim"), (p = allocVector(INTSXP, 2))); INTEGER(p)[0] = nr; INTEGER(p)[1] = np; SET_VECTOR_ELT(r, 1, (o = NEW_OBJECT(MAKE_CLASS("ngCMatrix")))); SET_SLOT(o, install("p"), (pr = allocVector(INTSXP, np+1))); SET_SLOT(o, install("i"), (ir = allocVector(INTSXP, np))); SET_SLOT(o, install("Dim"), (p = allocVector(INTSXP, 2))); INTEGER(p)[0] = nr; INTEGER(p)[1] = np; SET_VECTOR_ELT(r, 2, (rs = allocVector(REALSXP, np))); SET_VECTOR_ELT(r, 3, (rc = allocVector(REALSXP, np))); SET_VECTOR_ELT(r, 4, (rl = allocVector(REALSXP, np))); SET_VECTOR_ELT(r, 5, (pi = allocVector(INTSXP, np))); INTEGER(pl)[0] = INTEGER(pr)[0] = np = ni = 0; t = (double) LENGTH(pt)-1; } cpn = npn = 0; e = LENGTH(pt) - 1; f = 0; for (i = 1; i < LENGTH(px); i++) { l = INTEGER(px)[i]; n = l-f; if (n == 0) { if (LOGICAL(R_s)[0] == TRUE) INTEGER(r)[i-1] = e; continue; } x = INTEGER(ix)+f; c = pnget(nb[*x], x, n); if (LOGICAL(R_s)[0] == TRUE) INTEGER(r)[i-1] = c; else if (n > 1) { s = c / t; memcpy(pb, x, sizeof(int) * n); for (k = 0; k < n; k++) { if (k > 0) { j = pb[0]; pb[0] = pb[k]; pb[k] = j; } INTEGER(pi)[np] = i; /* itemset index */ REAL(rs)[np] = s; REAL(rc)[np] = c / (double) pnget(nb[pb[1]], pb+1, n-1); REAL(rl)[np] = REAL(rc)[np] / pnget(nb[pb[0]], pb, 1) * t; INTEGER(ir)[np++] = pb[0]; INTEGER(pr)[np] = np; for (j = 1; j < n; j++) INTEGER(il)[ni++] = pb[j]; INTEGER(pl)[np] = ni; } } f = l; R_CheckUserInterrupt(); } nbfree(); if (apn) error("node deallocation imbalance %i", apn); #ifdef _TIME_H t4 = clock(); if (LOGICAL(R_v)[0] == TRUE) { if (LOGICAL(R_s)[0] == FALSE) Rprintf("%i rules, ", np); else Rprintf("%i counts, ", LENGTH(px)-1); Rprintf("processed %i (%.2f) nodes [%.2fs]\n", cpn, (double) npn / cpn, ((double) t4 - t3) / CLOCKS_PER_SEC); } #endif if (LOGICAL(R_o)[0] == TRUE) { if (LOGICAL(R_s)[0] == FALSE) { #ifdef _TIME_H if (LOGICAL(R_v)[0] == TRUE) Rprintf("recoding ... "); #endif f = 0; for (i = 1; i < LENGTH(pl); i++) { l = INTEGER(pl)[i]; if (f == l) continue; for (k = f; k < l; k++) INTEGER(il)[k] = o[INTEGER(il)[k]]; R_isort(INTEGER(il)+f, l-f); f = l; } for (k = 0; k < LENGTH(ir); k++) INTEGER(ir)[k] = o[INTEGER(ir)[k]]; #ifdef _TIME_H t5 = clock(); if (LOGICAL(R_v)[0] == TRUE) Rprintf(" %i indexes [%.2fs]\n", LENGTH(il) + LENGTH(ir), ((double) t5 - t4) / CLOCKS_PER_SEC); #endif } UNPROTECT(6); } else UNPROTECT(2); return r; }

SEXP R_ocr_boundingBoxes(SEXP filename, SEXP r_vars, SEXP r_level, SEXP r_names) { SEXP ans = R_NilValue; int i; tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); if(api->Init(NULL, "eng")) { PROBLEM "could not intialize tesseract engine." ERROR; } Pix *image = pixRead(CHAR(STRING_ELT(filename, 0))); api->SetImage(image); SEXP r_optNames = GET_NAMES(r_vars); for(i = 0; i < Rf_length(r_vars); i++) api->SetVariable(CHAR(STRING_ELT(r_optNames, i)), CHAR(STRING_ELT(r_vars, i))); api->Recognize(0); tesseract::ResultIterator* ri = api->GetIterator(); tesseract::PageIteratorLevel level = (tesseract::PageIteratorLevel) INTEGER(r_level)[0]; //RIL_WORD; if(ri != 0) { int n = 1, i; while(ri->Next(level)) n++; ri = api->GetIterator(); SEXP names, tmp; PROTECT(names = NEW_CHARACTER(n)); PROTECT(ans = NEW_LIST(n)); i = 0; int x1, y1, x2, y2; do { const char* word = ri->GetUTF8Text(level); float conf = ri->Confidence(level); ri->BoundingBox(level, &x1, &y1, &x2, &y2); SET_STRING_ELT(names, i, Rf_mkChar(word)); SET_VECTOR_ELT(ans, i, tmp = NEW_NUMERIC(5)); REAL(tmp)[0] = conf; REAL(tmp)[1] = x1; REAL(tmp)[2] = y1; REAL(tmp)[3] = x2; REAL(tmp)[4] = y2; SET_NAMES(tmp, r_names); delete[] word; i++; } while (ri->Next(level)); SET_NAMES(ans, names); UNPROTECT(2); } pixDestroy(&image); return(ans); }

SEXP R_pnrindex(SEXP R_x, SEXP R_v) { int i, j, k, f, l, m, n, nr; int *x; SEXP px, ix; SEXP r, is, ir, il; #ifdef _TIME_H clock_t t2, t1 = clock(); #endif if (!inherits(R_x, "ngCMatrix") && !inherits(R_x, "sgCMatrix")) error("'x' not of class ngCMatrix"); if (TYPEOF(R_v) != LGLSXP) error("'v' not of type logical"); #ifdef _TIME_H if (LOGICAL(R_v)[0] == TRUE) Rprintf("processing ... "); #endif nr = INTEGER(GET_SLOT(R_x, install("Dim")))[0]; px = GET_SLOT(R_x, install("p")); ix = GET_SLOT(R_x, install("i")); cpn = apn = npn = 0; if (nb != NULL) nbfree(); nb = (PN **) malloc(sizeof(PN *) * (nr+1)); if (nb == NULL) error("pointer array allocation failed"); k = nr; nb[k] = NULL; while (k-- > 0) nb[k] = pnadd(nb[k+1], &k, 1); if (npn) { nbfree(); error("node allocation failed"); } m = k = 0; f = 0; for (i = 1; i < LENGTH(px); i++) { l = INTEGER(px)[i]; n = l-f; if (n == 0) continue; x = INTEGER(ix)+f; pnadd(nb[*x], x, n); if (npn) { nbfree(); error("node allocation failed"); } if (nq->count == 0) nq->count = i; if (n > 1) m += n; if (n > k) k = n; f = l; R_CheckUserInterrupt(); } PROTECT(r = allocVector(VECSXP, 3)); SET_VECTOR_ELT(r, 0, (is = allocVector(INTSXP, m))); SET_VECTOR_ELT(r, 1, (il = allocVector(INTSXP, m))); SET_VECTOR_ELT(r, 2, (ir = allocVector(INTSXP, m))); pb = INTEGER(PROTECT(allocVector(INTSXP, k+1))); cpn = npn = 0; m = 0; f = 0; for (i = 1; i < LENGTH(px); i++) { l = INTEGER(px)[i]; n = l-f; if (n == 0) continue; if (n > 1) { x = INTEGER(ix)+f; memcpy(pb, x, sizeof(int) * n); for (k = 0; k < n; k++) { if (k > 0) { j = pb[0]; pb[0] = pb[k]; pb[k] = j; } INTEGER(is)[m] = i; INTEGER(il)[m] = pnget(nb[pb[1]], pb+1, n-1); INTEGER(ir)[m] = pnget(nb[pb[0]], pb, 1); m++; } } f = l; R_CheckUserInterrupt(); } nbfree(); if (apn) error("node deallocation imbalance %i", apn); #ifdef _TIME_H t2 = clock(); if (LOGICAL(R_v)[0] == TRUE) Rprintf(" %i itemsets, %i rules [%.2fs]\n", LENGTH(px) - 1, m, ((double) t2-t1) / CLOCKS_PER_SEC); #endif UNPROTECT(2); return r; }

SEXP attribute_hidden do_readDCF(SEXP call, SEXP op, SEXP args, SEXP env) { int nwhat, nret, nc, nr, m, k, lastm, need; Rboolean blank_skip, field_skip = FALSE; int whatlen, dynwhat, buflen = 8096; // was 100, but that re-alloced often char *line, *buf; regex_t blankline, contline, trailblank, regline, eblankline; regmatch_t regmatch[1]; SEXP file, what, what2, retval, retval2, dims, dimnames; Rconnection con = NULL; Rboolean wasopen, is_eblankline; RCNTXT cntxt; SEXP fold_excludes; Rboolean field_fold = TRUE, has_fold_excludes; const char *field_name; int offset = 0; /* -Wall */ checkArity(op, args); file = CAR(args); con = getConnection(asInteger(file)); wasopen = con->isopen; if(!wasopen) { if(!con->open(con)) error(_("cannot open the connection")); /* Set up a context which will close the connection on error */ begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv, R_NilValue, R_NilValue); cntxt.cend = &con_cleanup; cntxt.cenddata = con; } if(!con->canread) error(_("cannot read from this connection")); args = CDR(args); PROTECT(what = coerceVector(CAR(args), STRSXP)); /* argument fields */ nwhat = LENGTH(what); dynwhat = (nwhat == 0); args = CDR(args); PROTECT(fold_excludes = coerceVector(CAR(args), STRSXP)); has_fold_excludes = (LENGTH(fold_excludes) > 0); buf = (char *) malloc(buflen); if(!buf) error(_("could not allocate memory for 'read.dcf'")); nret = 20; /* it is easier if we first have a record per column */ PROTECT(retval = allocMatrixNA(STRSXP, LENGTH(what), nret)); /* These used to use [:blank:] but that can match \xa0 as part of a UTF-8 character (and is nbspace on Windows). */ tre_regcomp(&blankline, "^[[:blank:]]*$", REG_NOSUB & REG_EXTENDED); tre_regcomp(&trailblank, "[ \t]+$", REG_EXTENDED); tre_regcomp(&contline, "^[[:blank:]]+", REG_EXTENDED); tre_regcomp(®line, "^[^:]+:[[:blank:]]*", REG_EXTENDED); tre_regcomp(&eblankline, "^[[:space:]]+\\.[[:space:]]*$", REG_EXTENDED); k = 0; lastm = -1; /* index of the field currently being recorded */ blank_skip = TRUE; void *vmax = vmaxget(); while((line = Rconn_getline2(con))) { if(strlen(line) == 0 || tre_regexecb(&blankline, line, 0, 0, 0) == 0) { /* A blank line. The first one after a record ends a new * record, subsequent ones are skipped */ if(!blank_skip) { k++; if(k > nret - 1){ nret *= 2; PROTECT(retval2 = allocMatrixNA(STRSXP, LENGTH(what), nret)); transferVector(retval2, retval); UNPROTECT_PTR(retval); retval = retval2; } blank_skip = TRUE; lastm = -1; field_skip = FALSE; field_fold = TRUE; } } else { blank_skip = FALSE; if(tre_regexecb(&contline, line, 1, regmatch, 0) == 0) { /* A continuation line: wrong if at the beginning of a record. */ if((lastm == -1) && !field_skip) { line[20] = '\0'; error(_("Found continuation line starting '%s ...' at begin of record."), line); } if(lastm >= 0) { need = (int) strlen(CHAR(STRING_ELT(retval, lastm + nwhat * k))) + 2; if(tre_regexecb(&eblankline, line, 0, NULL, 0) == 0) { is_eblankline = TRUE; } else { is_eblankline = FALSE; if(field_fold) { offset = regmatch[0].rm_eo; /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } else { offset = 0; } need += (int) strlen(line + offset); } if(buflen < need) { char *tmp = (char *) realloc(buf, need); if(!tmp) { free(buf); error(_("could not allocate memory for 'read.dcf'")); } else buf = tmp; buflen = need; } strcpy(buf,CHAR(STRING_ELT(retval, lastm + nwhat * k))); strcat(buf, "\n"); if(!is_eblankline) strcat(buf, line + offset); SET_STRING_ELT(retval, lastm + nwhat * k, mkChar(buf)); } } else { if(tre_regexecb(®line, line, 1, regmatch, 0) == 0) { for(m = 0; m < nwhat; m++){ whatlen = (int) strlen(CHAR(STRING_ELT(what, m))); if(strlen(line) > whatlen && line[whatlen] == ':' && strncmp(CHAR(STRING_ELT(what, m)), line, whatlen) == 0) { /* An already known field we are recording. */ lastm = m; field_skip = FALSE; field_name = CHAR(STRING_ELT(what, lastm)); if(has_fold_excludes) { field_fold = field_is_foldable_p(field_name, fold_excludes); } if(field_fold) { offset = regmatch[0].rm_eo; /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } else { offset = 0; } SET_STRING_ELT(retval, m + nwhat * k, mkChar(line + offset)); break; } else { /* This is a field, but not one prespecified */ lastm = -1; field_skip = TRUE; } } if(dynwhat && (lastm == -1)) { /* A previously unseen field and we are * recording all fields */ field_skip = FALSE; PROTECT(what2 = allocVector(STRSXP, nwhat+1)); PROTECT(retval2 = allocMatrixNA(STRSXP, nrows(retval)+1, ncols(retval))); if(nwhat > 0) { copyVector(what2, what); for(nr = 0; nr < nrows(retval); nr++){ for(nc = 0; nc < ncols(retval); nc++){ SET_STRING_ELT(retval2, nr+nc*nrows(retval2), STRING_ELT(retval, nr+nc*nrows(retval))); } } } UNPROTECT_PTR(retval); UNPROTECT_PTR(what); retval = retval2; what = what2; /* Make sure enough space was used */ need = (int) (Rf_strchr(line, ':') - line + 1); if(buflen < need){ char *tmp = (char *) realloc(buf, need); if(!tmp) { free(buf); error(_("could not allocate memory for 'read.dcf'")); } else buf = tmp; buflen = need; } strncpy(buf, line, Rf_strchr(line, ':') - line); buf[Rf_strchr(line, ':') - line] = '\0'; SET_STRING_ELT(what, nwhat, mkChar(buf)); nwhat++; /* lastm uses C indexing, hence nwhat - 1 */ lastm = nwhat - 1; field_name = CHAR(STRING_ELT(what, lastm)); if(has_fold_excludes) { field_fold = field_is_foldable_p(field_name, fold_excludes); } offset = regmatch[0].rm_eo; if(field_fold) { /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } SET_STRING_ELT(retval, lastm + nwhat * k, mkChar(line + offset)); } } else { /* Must be a regular line with no tag ... */ line[20] = '\0'; error(_("Line starting '%s ...' is malformed!"), line); } } } } vmaxset(vmax); if(!wasopen) {endcontext(&cntxt); con->close(con);} free(buf); tre_regfree(&blankline); tre_regfree(&contline); tre_regfree(&trailblank); tre_regfree(®line); tre_regfree(&eblankline); if(!blank_skip) k++; /* and now transpose the whole matrix */ PROTECT(retval2 = allocMatrixNA(STRSXP, k, LENGTH(what))); copyMatrix(retval2, retval, 1); PROTECT(dimnames = allocVector(VECSXP, 2)); PROTECT(dims = allocVector(INTSXP, 2)); INTEGER(dims)[0] = k; INTEGER(dims)[1] = LENGTH(what); SET_VECTOR_ELT(dimnames, 1, what); setAttrib(retval2, R_DimSymbol, dims); setAttrib(retval2, R_DimNamesSymbol, dimnames); UNPROTECT(6); return(retval2); }

/* * Given nx x values, ny y values, nx*ny z values, * and nl cut-values in z ... * ... produce a list of contour lines: * list of sub-lists * sub-list = x vector, y vector, and cut-value. */ SEXP GEcontourLines(double *x, int nx, double *y, int ny, double *z, double *levels, int nl) { const void *vmax; int i, nlines, len; double atom, zmin, zmax; SEGP* segmentDB; SEXP container, mainlist, templist; /* * "tie-breaker" values */ zmin = DBL_MAX; zmax = DBL_MIN; for (i = 0; i < nx * ny; i++) if (R_FINITE(z[i])) { if (zmax < z[i]) zmax = z[i]; if (zmin > z[i]) zmin = z[i]; } if (zmin >= zmax) { if (zmin == zmax) warning(_("all z values are equal")); else warning(_("all z values are NA")); return R_NilValue; } /* change to 1e-3, reconsidered because of PR#897 * but 1e-7, and even 2*DBL_EPSILON do not prevent inf.loop in contour(). * maybe something like 16 * DBL_EPSILON * (..). * see also max_contour_segments above */ atom = 1e-3 * (zmax - zmin); /* * Create a "container" which is a list with only 1 element. * The element is the list of lines that will be built up. * I create the container because this allows me to PROTECT * the container once here and then UNPROTECT it at the end of * this function and, as long as I always work with * VECTOR_ELT(container, 0) and SET_VECTOR_ELT(container, 0) * in functions called from here, I don't need to worry about * protectin the list that I am building up. * Why bother? Because the list I am building can potentially * grow and it's awkward to get the PROTECTs/UNPROTECTs right * when you're in a loop and growing a list. */ container = PROTECT(allocVector(VECSXP, 1)); /* * Create "large" list (will trim excess at the end if necesary) */ SET_VECTOR_ELT(container, 0, allocVector(VECSXP, CONTOUR_LIST_STEP)); nlines = 0; /* * Add lines for each contour level */ for (i = 0; i < nl; i++) { /* * The vmaxget/set is to manage the memory that gets * R_alloc'ed in the creation of the segmentDB structure */ vmax = vmaxget(); /* * Generate a segment database */ segmentDB = contourLines(x, nx, y, ny, z, levels[i], atom); /* * Add lines to the list based on the segment database */ nlines = addContourLines(x, nx, y, ny, z, levels[i], atom, segmentDB, nlines, container); vmaxset(vmax); } /* * Trim the list of lines to the appropriate length. */ len = LENGTH(VECTOR_ELT(container, 0)); if (nlines < len) { mainlist = VECTOR_ELT(container, 0); templist = PROTECT(allocVector(VECSXP, nlines)); for (i=0; i<nlines; i++) SET_VECTOR_ELT(templist, i, VECTOR_ELT(mainlist, i)); mainlist = templist; UNPROTECT(1); /* UNPROTECT templist */ } else mainlist = VECTOR_ELT(container, 0); UNPROTECT(1); /* UNPROTECT container */ return mainlist; }

SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg) { R_len_t i, j, k=0, maxlen=0, zerolen=0, anslen; SEXP li, thisi, ans; SEXPTYPE type, maxtype=0; Rboolean coerce = FALSE; if (!isNewList(l)) error("l must be a list."); if (!length(l)) return(duplicate(l)); if (!isLogical(ignoreArg) || LOGICAL(ignoreArg)[0] == NA_LOGICAL) error("ignore.empty should be logical TRUE/FALSE."); if (length(fill) != 1) error("fill must be NULL or length=1 vector."); R_len_t ln = LENGTH(l); Rboolean ignore = LOGICAL(ignoreArg)[0]; // preprocessing R_len_t *len = (R_len_t *)R_alloc(ln, sizeof(R_len_t)); for (i=0; i<ln; i++) { li = VECTOR_ELT(l, i); if (!isVectorAtomic(li) && !isNull(li)) error("Item %d of list input is not an atomic vector", i+1); len[i] = length(li); if (len[i] > maxlen) maxlen = len[i]; zerolen += (len[i] == 0); if (isFactor(li)) { maxtype = STRSXP; } else { type = TYPEOF(li); if (type > maxtype) maxtype = type; } } // coerce fill to maxtype fill = PROTECT(coerceVector(fill, maxtype)); // allocate 'ans' ans = PROTECT(allocVector(VECSXP, maxlen)); anslen = (!ignore) ? ln : (ln - zerolen); for (i=0; i<maxlen; i++) { SET_VECTOR_ELT(ans, i, thisi=allocVector(maxtype, anslen) ); } // transpose for (i=0; i<ln; i++) { if (ignore && !len[i]) continue; li = VECTOR_ELT(l, i); if (TYPEOF(li) != maxtype) { coerce = TRUE; if (!isFactor(li)) li = PROTECT(coerceVector(li, maxtype)); else li = PROTECT(asCharacterFactor(li)); } switch (maxtype) { case INTSXP : for (j=0; j<maxlen; j++) { thisi = VECTOR_ELT(ans, j); INTEGER(thisi)[k] = (j < len[i]) ? INTEGER(li)[j] : INTEGER(fill)[0]; } break; case LGLSXP : for (j=0; j<maxlen; j++) { thisi = VECTOR_ELT(ans, j); LOGICAL(thisi)[k] = (j < len[i]) ? LOGICAL(li)[j] : LOGICAL(fill)[0]; } break; case REALSXP : for (j=0; j<maxlen; j++) { thisi = VECTOR_ELT(ans, j); REAL(thisi)[k] = (j < len[i]) ? REAL(li)[j] : REAL(fill)[0]; } break; case STRSXP : for (j=0; j<maxlen; j++) { thisi = VECTOR_ELT(ans, j); SET_STRING_ELT(thisi, k, (j < len[i]) ? STRING_ELT(li, j) : STRING_ELT(fill, 0)); } break; default : error("Unsupported column type '%s'", type2char(maxtype)); } if (coerce) { coerce = FALSE; UNPROTECT(1); } k++; } UNPROTECT(2); return(ans); }

/* * Store the list of segments for a single level in the SEXP * list that will be returned to the user */ static int addContourLines(double *x, int nx, double *y, int ny, double *z, double zc, double atom, SEGP* segmentDB, int nlines, SEXP container) { double xend, yend; int i, ii, j, jj, ns, dir, nc; SEGP seglist, seg, s, start, end; SEXP ctr, level, xsxp, ysxp, names; /* Begin following contours. */ /* 1. Grab a segment */ /* 2. Follow its tail */ /* 3. Follow its head */ /* 4. Save the contour */ for (i = 0; i < nx - 1; i++) for (j = 0; j < ny - 1; j++) { while ((seglist = segmentDB[i + j * nx])) { ii = i; jj = j; start = end = seglist; segmentDB[i + j * nx] = seglist->next; xend = seglist->x1; yend = seglist->y1; while ((dir = ctr_segdir(xend, yend, x, y, &ii, &jj, nx, ny))) { segmentDB[ii + jj * nx] = ctr_segupdate(xend, yend, dir, TRUE,/* = tail */ segmentDB[ii + jj * nx], &seg); if (!seg) break; end->next = seg; end = seg; xend = end->x1; yend = end->y1; } end->next = NULL; /* <<< new for 1.2.3 */ ii = i; jj = j; xend = seglist->x0; yend = seglist->y0; while ((dir = ctr_segdir(xend, yend, x, y, &ii, &jj, nx, ny))) { segmentDB[ii + jj * nx] = ctr_segupdate(xend, yend, dir, FALSE,/* ie. head */ segmentDB[ii+jj*nx], &seg); if (!seg) break; seg->next = start; start = seg; xend = start->x0; yend = start->y0; } /* ns := #{segments of polyline} -- need to allocate */ s = start; ns = 0; /* max_contour_segments: prevent inf.loop (shouldn't be needed) */ while (s && ns < max_contour_segments) { ns++; s = s->next; } if(ns == max_contour_segments) warning(_("contour(): circular/long seglist -- set %s > %d?"), "options(\"max.contour.segments\")", max_contour_segments); /* * "write" the contour locations into the list of contours */ ctr = PROTECT(allocVector(VECSXP, 3)); level = PROTECT(allocVector(REALSXP, 1)); xsxp = PROTECT(allocVector(REALSXP, ns + 1)); ysxp = PROTECT(allocVector(REALSXP, ns + 1)); REAL(level)[0] = zc; SET_VECTOR_ELT(ctr, CONTOUR_LIST_LEVEL, level); s = start; REAL(xsxp)[0] = s->x0; REAL(ysxp)[0] = s->y0; ns = 1; while (s->next && ns < max_contour_segments) { s = s->next; REAL(xsxp)[ns] = s->x0; REAL(ysxp)[ns++] = s->y0; } REAL(xsxp)[ns] = s->x1; REAL(ysxp)[ns] = s->y1; SET_VECTOR_ELT(ctr, CONTOUR_LIST_X, xsxp); SET_VECTOR_ELT(ctr, CONTOUR_LIST_Y, ysxp); /* * Set the names attribute for the contour * So that users can extract components using * meaningful names */ PROTECT(names = allocVector(STRSXP, 3)); SET_STRING_ELT(names, 0, mkChar("level")); SET_STRING_ELT(names, 1, mkChar("x")); SET_STRING_ELT(names, 2, mkChar("y")); setAttrib(ctr, R_NamesSymbol, names); /* * We're about to add another line to the list ... */ nlines += 1; nc = LENGTH(VECTOR_ELT(container, 0)); if (nlines == nc) /* Where does this get UNPROTECTed? */ SET_VECTOR_ELT(container, 0, growList(VECTOR_ELT(container, 0))); SET_VECTOR_ELT(VECTOR_ELT(container, 0), nlines - 1, ctr); UNPROTECT(5); } } return nlines; }

SEXP SP_PREFIX(comment2comm)(SEXP obj) { SEXP ans, comment; int pc=0, ns, i, j, jj, k, nc; char s[15], *buf; int *c, *nss, *co, *coo; PROTECT(comment = getAttrib(obj, install("comment"))); pc++; if (comment == R_NilValue) { UNPROTECT(pc); return(R_NilValue); } nc = length(STRING_ELT(comment, 0)); if (nc < 1) error("comment2comm: empty string comment"); buf = (char *) R_alloc((size_t) (nc+1), sizeof(char)); strcpy(buf, CHAR(STRING_ELT(comment, 0))); i = 0; ns = 0; while (buf[i] != '\0') { if (buf[i] == ' ') ns++; i++; } k = (int) strlen(buf); nss = (int *) R_alloc((size_t) (ns+1), sizeof(int)); c = (int *) R_alloc((size_t) (ns+1), sizeof(int)); i = 0; j = 0; while (buf[i] != '\0') { if (buf[i] == ' ') { nss[j] = i; j++; } i++; } nss[(ns)] = k; s[0] = '\0'; if (nss[0] > 15) error("comment2comm: buffer overflow"); strncpy(s, &buf[0], (size_t) nss[0]); s[nss[0]] = '\0'; c[0] = atoi(s); for (i=0; i<ns; i++) { k = nss[(i+1)]-(nss[i]+1); if (k > 15) error("comment2comm: buffer overflow"); strncpy(s, &buf[(nss[i]+1)], (size_t) k); s[k] = '\0'; c[i+1] = atoi(s); } for (i=0, k=0; i<(ns+1); i++) if (c[i] == 0) k++; PROTECT(ans = NEW_LIST((k))); pc++; co = (int *) R_alloc((size_t) k, sizeof(int)); coo = (int *) R_alloc((size_t) k, sizeof(int)); for (i=0; i<k; i++) co[i] = 1; for (i=0, j=0; i<(ns+1); i++) if (c[i] == 0) coo[j++] = i + R_OFFSET; for (i=0; i<k; i++) for (j=0; j<(ns+1); j++) if ((c[j]) == coo[i]) co[i]++; for (i=0; i<k; i++) SET_VECTOR_ELT(ans, i, NEW_INTEGER(co[i])); for (i=0; i<k; i++) { jj = 0; INTEGER_POINTER(VECTOR_ELT(ans, i))[jj++] = coo[i]; if (co[i] > 1) { for (j=0; j<(ns+1); j++) if (c[j] == coo[i]) INTEGER_POINTER(VECTOR_ELT(ans, i))[jj++] = j + R_OFFSET; } } UNPROTECT(pc); return(ans); }

/* The idea is to produce a transformation for this viewport which * will take any location in INCHES and turn it into a location on the * Device in INCHES. * The reason for working in INCHES is because we want to be able to * do rotations as part of the transformation. * If "incremental" is true, then we just work from the "current" * values of the parent. Otherwise, we have to recurse and recalculate * everything from scratch. */ void calcViewportTransform(SEXP vp, SEXP parent, Rboolean incremental, pGEDevDesc dd) { int i, j; double vpWidthCM, vpHeightCM, rotationAngle; double parentWidthCM, parentHeightCM; double xINCHES, yINCHES; double xadj, yadj; double parentAngle; LViewportLocation vpl; LViewportContext vpc, parentContext; R_GE_gcontext gc, parentgc; LTransform thisLocation, thisRotation, thisJustification, thisTransform; LTransform tempTransform, parentTransform, transform; SEXP currentWidthCM, currentHeightCM, currentRotation; SEXP currentTransform; /* This should never be true when we are doing an incremental * calculation */ if (isNull(parent)) { /* We have a top-level viewport; the parent is the device */ getDeviceSize(dd, &parentWidthCM, &parentHeightCM); /* For a device the transform is the identity transform */ identity(parentTransform); /* For a device, xmin=0, ymin=0, xmax=1, ymax=1, and */ parentContext.xscalemin = 0; parentContext.yscalemin = 0; parentContext.xscalemax = 1; parentContext.yscalemax = 1; /* FIXME: How do I figure out the device fontsize ? * From ps.options etc, ... ? * FIXME: How do I figure out the device lineheight ?? * FIXME: How do I figure out the device cex ?? * FIXME: How do I figure out the device font ?? * FIXME: How do I figure out the device fontfamily ?? */ parentgc.ps = 10; parentgc.lineheight = 1.2; parentgc.cex = 1; parentgc.fontface = 1; parentgc.fontfamily[0] = '\0'; /* The device is not rotated */ parentAngle = 0; fillViewportLocationFromViewport(vp, &vpl); } else { /* Get parent transform (etc ...) * If necessary, recalculate the parent transform (etc ...) */ if (!incremental) calcViewportTransform(parent, viewportParent(parent), 0, dd); /* Get information required to transform viewport location */ parentWidthCM = REAL(viewportWidthCM(parent))[0]; parentHeightCM = REAL(viewportHeightCM(parent))[0]; parentAngle = REAL(viewportRotation(parent))[0]; for (i=0; i<3; i++) for (j=0; j<3; j++) parentTransform[i][j] = REAL(viewportTransform(parent))[i +3*j]; fillViewportContextFromViewport(parent, &parentContext); /* * Don't get gcontext from parent because the most recent * previous gpar setting may have come from a gTree * So we look at this viewport's parentgpar slot instead * * WAS gcontextFromViewport(parent, &parentgc); */ gcontextFromgpar(viewportParentGPar(vp), 0, &parentgc, dd); /* In order for the vp to get its vpl from a layout * it must have specified a layout.pos and the parent * must have a layout * FIXME: Actually, in addition, layout.pos.row and * layout.pos.col must be valid for the layout */ if ((isNull(viewportLayoutPosRow(vp)) && isNull(viewportLayoutPosCol(vp))) || isNull(viewportLayout(parent))) fillViewportLocationFromViewport(vp, &vpl); else if (checkPosRowPosCol(vp, parent)) calcViewportLocationFromLayout(viewportLayoutPosRow(vp), viewportLayoutPosCol(vp), parent, &vpl); } /* NOTE that we are not doing a transformLocn here because * we just want locations and dimensions (in INCHES) relative to * the parent, NOT relative to the device. */ /* First, convert the location of the viewport into CM */ xINCHES = transformXtoINCHES(vpl.x, 0, parentContext, &parentgc, parentWidthCM, parentHeightCM, dd); yINCHES = transformYtoINCHES(vpl.y, 0, parentContext, &parentgc, parentWidthCM, parentHeightCM, dd); /* Calculate the width and height of the viewport in CM too * so that any viewports within this one can do transformations */ vpWidthCM = transformWidthtoINCHES(vpl.width, 0, parentContext, &parentgc, parentWidthCM, parentHeightCM, dd)*2.54; vpHeightCM = transformHeighttoINCHES(vpl.height, 0, parentContext, &parentgc, parentWidthCM, parentHeightCM, dd)*2.54; /* Fall out if location or size are non-finite */ if (!R_FINITE(xINCHES) || !R_FINITE(yINCHES) || !R_FINITE(vpWidthCM) || !R_FINITE(vpHeightCM)) error(_("Non-finite location and/or size for viewport")); /* Determine justification required */ justification(vpWidthCM, vpHeightCM, vpl.hjust, vpl.vjust, &xadj, &yadj); /* Next, produce the transformation to add the location of * the viewport to the location. */ /* Produce transform for this viewport */ translation(xINCHES, yINCHES, thisLocation); if (viewportAngle(vp) != 0) rotation(viewportAngle(vp), thisRotation); else identity(thisRotation); translation(xadj/2.54, yadj/2.54, thisJustification); /* Position relative to origin of rotation THEN rotate. */ multiply(thisJustification, thisRotation, tempTransform); /* Translate to bottom-left corner. */ multiply(tempTransform, thisLocation, thisTransform); /* Combine with parent's transform */ multiply(thisTransform, parentTransform, transform); /* Sum up the rotation angles */ rotationAngle = parentAngle + viewportAngle(vp); /* Finally, allocate the rows and columns for this viewport's * layout if it has one */ if (!isNull(viewportLayout(vp))) { fillViewportContextFromViewport(vp, &vpc); gcontextFromViewport(vp, &gc, dd); calcViewportLayout(vp, vpWidthCM, vpHeightCM, vpc, &gc, dd); } /* Record all of the answers in the viewport * (the layout calculations are done within calcViewportLayout) */ PROTECT(currentWidthCM = ScalarReal(vpWidthCM)); PROTECT(currentHeightCM = ScalarReal(vpHeightCM)); PROTECT(currentRotation = ScalarReal(rotationAngle)); PROTECT(currentTransform = allocMatrix(REALSXP, 3, 3)); for (i=0; i<3; i++) for (j=0; j<3; j++) REAL(currentTransform)[i + 3*j] = transform[i][j]; SET_VECTOR_ELT(vp, PVP_WIDTHCM, currentWidthCM); SET_VECTOR_ELT(vp, PVP_HEIGHTCM, currentHeightCM); SET_VECTOR_ELT(vp, PVP_ROTATION, currentRotation); SET_VECTOR_ELT(vp, PVP_TRANS, currentTransform); UNPROTECT(4); }