RcppExport SEXP rthhist(SEXP x_, SEXP nbins_, SEXP nch_, SEXP nthreads) { Rcpp::NumericVector x(x_); const int n = x.size(); const int nbins = INTEGER(nbins_)[0]; const int nch = INTEGER(nch_)[0]; floublevec dx(x.begin(), x.end()); Rcpp::IntegerVector bincounts(nbins); Rcpp::NumericVector R_left(1); Rcpp::NumericVector R_binwidth(1); #if RTH_OMP omp_set_num_threads(INT(nthreads)); #elif RTH_TBB tbb::task_scheduler_init init(INT(nthreads)); #endif // determine binwidth etc. thrust::pair<floubleveciter, floubleveciter> mm = thrust::minmax_element(dx.begin(), dx.end()); flouble left = *(mm.first), right = *(mm.second); flouble binwidth = (right - left) / nbins; // form matrix of bin counts, one row per chunk intvec dbincounts(nch*nbins); // the heart of the computation, a for_each() loop, one iteration per // chunk thrust::counting_iterator<int> seqa(0); thrust::counting_iterator<int> seqb = seqa + nch; thrust::for_each(seqa,seqb, do1chunk(dx.begin(),dbincounts.begin( ),n,nbins,nch,left,binwidth)); // copy result to host and combine the subhistograms int hbincounts[nch*nbins]; thrust::copy(dbincounts.begin(),dbincounts.end(),hbincounts); int binnum,chunknum; for (binnum = 0; binnum < nbins; binnum++) { int sum = 0; for (chunknum = 0; chunknum < nch; chunknum++) sum += hbincounts[chunknum*nbins + binnum]; bincounts[binnum] = sum; } REAL(R_left)[0] = (double) left; REAL(R_binwidth)[0] = (double) binwidth; return Rcpp::List::create(Rcpp::Named("counts") = bincounts, Rcpp::Named("left") = R_left, Rcpp::Named("binwidth") = R_binwidth); }
// data x has lower, upper bds given in ub_; count computation will be // divided into nch_ chunks, performed by nthreads threads RcppExport SEXP rthtable(SEXP x_, SEXP lb_, SEXP ub_, SEXP nch_, SEXP nthreads) { // housekeeping regarding input data Rcpp::IntegerMatrix x(x_); // x is the data matrix, one observation per row (but stored in // column-major order); each column is a separate variable, i.e. // a separate dimension in the output table intvec dx(x.begin(), x.end()); // input data copied to device const int nobsv = x.nrow(); // number of observations const int ndim = x.ncol(); // e.g. 2 for an m x n table // configuration of output table // // R tables are stored in a generalized column-major format; e.g. // for an m x n x k table, row index varies most rapidly, then column // index and finally layer index // // set up the ndim lower- and upper-bound pairs; variable in // dimension i of the table takes on values in lb[i} through ub[i] std::vector<int> lbstd = Rcpp::as<std::vector<int> >(lb_); // lower bounds on data vals std::vector<int> ubstd = Rcpp::as<std::vector<int> >(ub_); // upper bounds on data vals int *lb = &lbstd[0]; int *ub = &ubstd[0]; // find number of table cells, ncells, and the range for each // variable, in lurng int ncells = 1,i; int lurng[ndim]; // "lower-upper range" for (i = 0; i < ndim; i++) { lurng[i] = ub[i] - lb[i] + 1; ncells *= lurng[i]; } // counts for the table; will eventually be the output Rcpp::IntegerVector rvalcounts(ncells); // partitioning of work const int nch = INTEGER(nch_)[0]; #if RTH_OMP omp_set_num_threads(INT(nthreads)); #elif RTH_TBB // tbb::task_scheduler_init init(INT(nthreads)); // for unknown reasons, this code does not work under TBB return Rcpp::wrap(1); #endif // form matrix of cell counts, one row per chunk, row-major order intvec dcellcounts(nch*ncells); // products needed to compute linear cell indices int bases[ndim],prod=1; for (i = 0; i < ndim; i++) { bases[i] = prod; prod *= lurng[i]; }; // the heart of the computation, a for_each() loop, one iteration per // chunk (chunking should improve cache performance) thrust::counting_iterator<int> seqa(0); thrust::counting_iterator<int> seqb = seqa + nch; thrust::for_each(seqa,seqb, do1chunk(dx.begin(),dcellcounts.begin(),lb,lurng,bases, nobsv,ndim,ncells,nch)); // copy result to host and combine the subhistograms int hvalcounts[nch*ncells]; thrust::copy(dcellcounts.begin(),dcellcounts.end(),hvalcounts); int cellnum,chunknum; for (cellnum = 0; cellnum < ncells; cellnum++) { int sum = 0; for (chunknum = 0; chunknum < nch; chunknum++) sum += hvalcounts[chunknum*ncells + cellnum]; rvalcounts[cellnum] = sum; } return rvalcounts; }
extern "C" SEXP rthhist(SEXP x, SEXP nbins_, SEXP nch_, SEXP nthreads) { const int n = LENGTH(x); const int nbins = INTEGER(nbins_)[0]; const int nch = INTEGER(nch_)[0]; floublevec dx(REAL(x), REAL(x)+n); SEXP bincounts, R_left, R_binwidth; PROTECT(bincounts = allocVector(INTSXP, nbins)); PROTECT(R_left = allocVector(REALSXP, 1)); PROTECT(R_binwidth = allocVector(REALSXP, 1)); SEXP ret, retnames; RTH_GEN_NTHREADS(nthreads); // determine binwidth etc. thrust::pair<floubleveciter, floubleveciter> mm = thrust::minmax_element(dx.begin(), dx.end()); flouble left = *(mm.first), right = *(mm.second); flouble binwidth = (right - left) / nbins; // form matrix of bin counts, one row per chunk intvec dbincounts(nch*nbins); // the heart of the computation, a for_each() loop, one iteration per // chunk thrust::counting_iterator<int> seqa(0); thrust::counting_iterator<int> seqb = seqa + nch; thrust::for_each(seqa,seqb, do1chunk(dx.begin(),dbincounts.begin( ), n, nbins, nch, left, binwidth)); // copy result to host and combine the subhistograms int hbincounts[nch*nbins]; thrust::copy(dbincounts.begin(), dbincounts.end(), hbincounts); int binnum,chunknum; for (binnum = 0; binnum < nbins; binnum++) { int sum = 0; for (chunknum = 0; chunknum < nch; chunknum++) sum += hbincounts[chunknum*nbins + binnum]; INTEGER(bincounts)[binnum] = sum; } REAL(R_left)[0] = (double) left; REAL(R_binwidth)[0] = (double) binwidth; PROTECT(retnames = allocVector(STRSXP, 3)); SET_STRING_ELT(retnames, 0, mkChar("counts")); SET_STRING_ELT(retnames, 1, mkChar("left")); SET_STRING_ELT(retnames, 2, mkChar("binwidth")); PROTECT(ret = allocVector(VECSXP, 3)); SET_VECTOR_ELT(ret, 0, bincounts); SET_VECTOR_ELT(ret, 1, R_left); SET_VECTOR_ELT(ret, 2, R_binwidth); setAttrib(ret, R_NamesSymbol, retnames); UNPROTECT(5); return ret; }