// replacement for bip maybe more error tolerant slightly slower // import: edge matrix, number of tips // export: Descendants(x, 1:max(x$edge), "all") // [[Rcpp::export]] List bipCPP(IntegerMatrix orig, int nTips) { IntegerVector parent = orig( _, 0); IntegerVector children = orig( _, 1); int m = max(parent), j=0; // create list for results std::vector< std::vector<int> > out(m) ; std::vector<int> y; for(int i = 0; i<nTips; i++){ out[i].push_back(i + 1L); } for(int i = 0; i<parent.size(); i++){ j = parent[i] - 1L; if(children[i] > nTips){ y = out[children[i] - 1L]; out[j].insert( out[j].end(), y.begin(), y.end() ); } else out[j].push_back(children[i]); } for(int i=0; i<m; ++i){ sort(out[i].begin(), out[i].end()); } return wrap(out); // return the list }
DataFrameSubsetVisitors::DataFrameSubsetVisitors(const DataFrame& data_, const SymbolVector& names) : data(data_), visitors(), visitor_names(names) { CharacterVector data_names = vec_names_or_empty(data); IntegerVector indices = names.match_in_table(data_names); int n = indices.size(); for (int i = 0; i < n; i++) { int pos = indices[i]; if (pos == NA_INTEGER) { bad_col(names[i], "is unknown"); } SubsetVectorVisitor* v = subset_visitor(data[pos - 1], data_names[pos - 1]); visitors.push_back(v); } }
//[[Rcpp::export]] NumericMatrix partitionTree(IntegerVector parent, IntegerVector order, NumericVector weight, NumericVector height) { NumericMatrix rect(parent.size(), 4); int i; std::vector<Node*> nodes = createHierarchy(as< std::vector<int> >(parent), as< std::vector<int> >(order), as< std::vector<double> >(weight), as< std::vector<double> >(height)); for (i = 0; i < nodes.size(); ++i) { nodes[i]->sortChildren(); } Node* startNode = nodes[0]->getRoot(); icicleLayout(startNode, 0, 0); for (i = 0; i < nodes.size(); ++i) { rect(i, 0) = nodes[i]->bounds.x; rect(i, 1) = nodes[i]->bounds.y; rect(i, 2) = nodes[i]->bounds.width; rect(i, 3) = nodes[i]->bounds.height; delete nodes[i]; } return rect; }
// **********************************************************// // Calculate Dyadic statistics // // **********************************************************// // [[Rcpp::export]] List Dyadic(List history, IntegerVector node, int sender) { int nIP = history.size(); int A = node.size(); List IPmat(nIP); for (int IP = 0; IP < nIP; IP++) { NumericMatrix dyadicmat_IP(A, 6); List historyIP = history[IP]; NumericVector dyadic(6); for (int receiver = 0; receiver < A; receiver++) { if (receiver != sender) { for (unsigned int l = 0; l < 3; l++) { NumericMatrix historyIP_l = historyIP[l]; dyadic[l] = historyIP_l(sender, receiver); dyadic[l+3] = historyIP_l(receiver, sender); } dyadicmat_IP(receiver, _) = dyadic; } } IPmat[IP] = dyadicmat_IP; } return IPmat; }
// [[Rcpp::export]] double statPhist_C(IntegerVector haps, IntegerVector strata, NumericMatrix hapDist) { // function declarations IntegerMatrix table2D(IntegerVector, IntegerVector); NumericVector colSumC(NumericMatrix); LogicalVector hapsGood = !is_na(haps); LogicalVector strataGood = !is_na(strata); LogicalVector toUse = hapsGood & strataGood; haps = haps[toUse]; strata = strata[toUse]; // Extract summary values IntegerMatrix strataHapFreq = table2D(haps, strata); IntegerVector strataFreq = wrap(colSumC(wrap(strataHapFreq))); double ssWP = ssWPCalc(strataFreq, strataHapFreq, hapDist); double ssAP = ssAPCalc(strataFreq, strataHapFreq, hapDist); ssAP = ssAP - ssWP; // Calculate average sample size correction for among strata variance // Eqn 9a in paper, but modified as in Table 8.2.1.1 from Arlequin v3.5.1 manual // (denominator is sum{I} - 1) int numSamples = sum(strataFreq); int numStrata = strataFreq.size(); NumericVector n2(numStrata); for(int i = 0; i < n2.size(); i++) n2[i] = pow(strataFreq[i], 2) / numSamples; double n = (numSamples - sum(n2)) / (numStrata - 1); // Calculate variance components (Table 1) // Set MSD (SSD / df) equal to expected MSD double Vc = ssWP / (numSamples - numStrata); double Vb = ((ssAP / (numStrata - 1)) - Vc) / n; double est(Vb / (Vb + Vc)); if(std::isnan(est)) est = NA_REAL; return est; }
//[[Rcpp::export]] DataFrame pathAttr(DataFrame paths, int ngroups) { LogicalVector solid(ngroups, true); LogicalVector constant(ngroups, true); int currentGroup, currentIndex, i; IntegerVector group = paths["group"]; NumericVector alpha = paths["edge_alpha"]; NumericVector width = paths["edge_width"]; IntegerVector lty = paths["edge_linetype"]; CharacterVector colour = paths["edge_colour"]; currentGroup = group[0]; currentIndex = 0; for (i = 1; i < group.size(); ++i) { if (group[i] == currentGroup) { if (solid[currentIndex]) { solid[currentIndex] = lty[i] == 1 && lty[i] == lty[i-1]; } if (constant[currentIndex]) { constant[currentIndex] = alpha[i] == alpha[i-1] && width[i] == width[i-1] && lty[i] == lty[i-1] && colour[i] == colour[i-1]; } } else { currentGroup = group[i]; ++currentIndex; } } return DataFrame::create( Named("solid") = solid, Named("constant") = constant ); }
//' Determine the most frequently occurring value in an integer vector //' //' @param x integer vector //' @return integer mode of x //' // [[Rcpp::export]] int mode(IntegerVector x) { std::map<int,int> counts; int mode = NA_INTEGER; int modeCount = -1; for (int i = 0; i < x.size(); i++){ if (!IntegerVector::is_na(x[i])) { if(counts.count(x[i]) > 0) { counts[x[i]] += 1; } else { counts[x[i]] = 1; } } } for(auto pair: counts) { if (pair.second > modeCount) { modeCount = pair.second; mode = pair.first; } } return mode ; }
DataFrameSubsetVisitors::DataFrameSubsetVisitors(const DataFrame& data_, const IntegerVector& indices) : data(data_), visitors(), visitor_names() { CharacterVector data_names = vec_names_or_empty(data); int n = indices.size(); for (int i = 0; i < n; i++) { int pos = indices[i]; check_range_one_based(pos, data.size()); const SymbolString& name = data_names[pos - 1]; SubsetVectorVisitor* v = subset_visitor(data[pos - 1], name); visitors.push_back(v); visitor_names.push_back(name); } }
// [[Rcpp::export]] NumericVector maskUV(NumericMatrix U, NumericMatrix V, IntegerVector is, IntegerVector js) { // Get the length of the entries list and the rank of UV' int l = is.size(); int r = U.ncol(); // Initialize the output vector to all zeros NumericVector maskUV(l,0.0); // Loop over non-zero entries and compute output vector int i = is(1)-1; int j = js(1)-1; for(int n = 0; n < l; n++) { i = is(n)-1; // subtract 1 since R arrays start at 1 j = js(n)-1; maskUV(n) = 0; for(int k = 0; k < r; k++) { maskUV(n) += U(i,k)*V(j,k); } } return maskUV; }
void transformCppIndexes(IntegerVector& indexes) { if (!Rf_isNull(indexes) && indexes.size() > 0) { std::transform(indexes.begin(), indexes.end(), indexes.begin(), std::bind2nd(std::plus<int>(), 1)); } }
// [[Rcpp::export]] List starvingforager_eventNM( int L, //Lattice dim int t_term, //Terminal time double alpha, //Resource growth rate double K, //Resource carrying capacity double sigma, //Starvation rate double rho, //Recovery rate double lambda, //Growth rate double mu, //Mortality rate IntegerVector ind_vec, //Initial vector of states IntegerVector loc_vec //Initial vector of locations ) { //Dimension of the lattice int dim = 2; //Lattice size double size = pow(L-2,dim); //Initial time double t = 0; double max; double min; //Output Lists List ind_out(1); List loc_out(1); NumericVector t_out(1); //The initial state ind_out(0) = ind_vec; loc_out(0) = loc_vec; t_out(0) = 0; //ind_vec: the vector of individual states... 0 = resource, 1=starver, 2=full //pos_vec: the vector of individual locations //Initial count of how many resouces, starvers, and full in this timestep?? //Count the number of individual R + S + F int tot = ind_vec.size(); double R = 0.L; double S = 0.L; double F = 0.L; // double Rp; // double Sp; // double Fp; for (int i=0;i<tot;i++) { if (ind_vec(i) == 0) { R = R + 1.L; } if (ind_vec(i) == 1) { S = S + 1.L; } if (ind_vec(i) == 2) { F = F + 1.L; } } //R,S,F are thus densities over the landscape of size 'size' // R = R/size; // S = S/size; // F = F/size; double R_pr_line; double S_pr_line; double F_pr_line; //Iterate over time //The loop stops when t > t_term-1... and will record the last value int tic = 1; while (t < (t_term-1)) { //Construct probability lines, which are a function of R, S, F //Grow <-----> Consumed R_pr_line = (alpha*(K-R))/((alpha*(K-R)) + (F + S)); //R_pr_line(1) = R_pr_line(0) + ((F + S)/((alpha*(K-R)) + (F + S) + Dr)); //R_pr_line(2) = R_pr_line(1) + (Dr/((alpha*(K-R)) + (F + S) + Dr)); //Recover <-----> Mortality S_pr_line = (rho*R)/(rho*R + mu); //S_pr_line(1) = S_pr_line(0) + (mu/(rho*R + mu + Ds)); //S_pr_line(2) = S_pr_line(1) + (Ds/(rho*R + mu + Ds)); //Grow <-----> Starve F_pr_line = lambda/(lambda+sigma*(K-R)); //F_pr_line(1) = F_pr_line(0) + ((sigma*(K-R))/(lambda+sigma*(K-R)+Df)); //F_pr_line(2) = F_pr_line(1) + (Df/(lambda+sigma*(K-R)+Df)); //Initiate variables double dt; //Randomly select an individual (R,S,F) with probability 1/N //ind thus represents the POSITION of the individual //Update total number of individuals tot = ind_vec.size(); max = (double)(tot - 1); min = 0.L; int id = min + (rand() % (int)(max - min + 1)); int state; int location; double draw_event; //If ind is a resource... if (ind_vec(id) == 0) { state = 0; location = loc_vec(id); //Draw a random event //Grow, become consumed or move? draw_event = ((double) rand() / (RAND_MAX)); //Grow if (draw_event < R_pr_line) { //Append a new resource to the END of the vector ind_vec.push_back(state); //Append the resource's location to the END of the vector loc_vec.push_back(location); //Update Tally R = R + 1; //(1.L/size); } //Become consumed!!!! if ((draw_event >= R_pr_line) && (draw_event < 1.L)) { //Remove the consumed resource from the state vector ind_vec.erase(id); //Remove the consumed resource form the location vector loc_vec.erase(id); //Update Tally R = R - 1; //(1.L/size); } dt = 1.L/((alpha*(K-R)) + (F + S)); } //If ind is a starver... if (ind_vec(id) == 1) { state = 1; location = loc_vec(id); //Draw a random event //Recover, die, or move?? draw_event = ((double) rand() / (RAND_MAX)); //Recover if (draw_event < S_pr_line) { //Update the state from starver to full ind_vec(id) = 2; //Update Tally S = S - 1; //(1.L/size); F = F + 1; //(1.L/size); } //Die if ((draw_event >= S_pr_line) && (draw_event < 1.L)) { //Remove the consumed resource from the state vector ind_vec.erase(id); //Remove the consumed resource form the location vector loc_vec.erase(id); //Update Tally S = S - 1; //(1.L/size); } dt = 1.L/(rho*R + mu); } //If ind is Full... if (ind_vec(id) == 2) { state = 2; location = loc_vec(id); //Draw a random event //Grow, starve, or move? draw_event = ((double) rand() / (RAND_MAX)); //Grow if (draw_event < F_pr_line) { //Append a new resource to the END of the vector ind_vec.push_back(state); //Append the resource's location to the END of the vector loc_vec.push_back(location); F = F + 1; //(1.L/size); } //Starve if ((draw_event >= F_pr_line) && (draw_event < 1.L)) { //Update the state from full to starver ind_vec(id) = 1; //Update Tally F = F - 1; //(1.L/size); S = S + 1; //(1.L/size); } dt = 1.L/(lambda+sigma*(K-R)); } //Advance time t = t + dt; //Rcout << "t = " << dt << std::endl; //Update output ind_out.push_back(ind_vec); loc_out.push_back(loc_vec); t_out.push_back(t); tic = tic + 1; } //end while loop over t List cout(3); cout(0) = ind_out; cout(1) = loc_out; cout(2) = t_out; return(cout); }
IntegerVector integer_initializer_list(){ IntegerVector x = {0,1,2,3} ; for( int i=0; i<x.size(); i++) x[i] = x[i]*2 ; return x ; }
//[[Rcpp::depends("RcppArmadillo")]] // [[Rcpp::export]] arma::vec fdlmLogLik(arma::mat Y, arma::mat xFix, arma::mat xDyn, double nFactors, Rcpp::List hyperparms, arma::mat Y_NA, IntegerVector whichNA, NumericVector Beta, NumericVector Lambda, arma::mat psi, double discW) { int T = Y.n_rows; int q = Y.n_cols; int pDyn = xDyn.n_cols; int pFix = xFix.n_cols; int r = pDyn*q; int N = psi.n_rows; arma::cube Beta_(Beta.begin(), pFix, q, N, false); arma::cube Lambda_(Lambda.begin(), q, nFactors, N, false); std::cout << "\nrodou ate Beta_ e Lambda_\n"; double constants = -(q/2)*log2pi; arma::vec nloglik(N); arma::mat V(q, q); arma::mat VInv(q, q); std::cout << "\nrodou ate depois de V e VInv\n"; arma::vec mm(r); arma::mat CC(r, r); std::cout << "\nrodou ate depois de mm e CC.\n"; arma::vec aa(r); arma::mat RR(r, r); arma::vec ff(q); arma::mat QQ(q, q); std::cout << "\nrodou ate depois de ff e QQ.\n"; arma::mat FF; arma::vec ee; arma::mat AA; arma::mat I_q = arma::eye(q, q); std::cout << "\nrodou ate depois de diagmat.\n"; //arma::vec z; //double rootisum; arma::mat Eigvec; arma::vec eigval; mm = as<arma::vec>(hyperparms["m0"]); CC = as<arma::mat>(hyperparms["C0"]); arma::mat Yn(T, q); arma::mat En(T, q); Yn = Y; //arma::vec nloglik_chol(N); std::cout << "\nrodou ate antes do for.\n"; for(int n = 0; n < N; n++){ double loglik = 0.0; //double loglik_chol = 0.0; if(whichNA.size()>0){ Yn.elem(as<arma::uvec>(whichNA)) = arma::trans(Y_NA.row(n)); } En = Yn - xFix * Beta_.slice(n); V = Lambda_.slice(n) * arma::trans(Lambda_.slice(n)) + arma::diagmat(psi.row(n)); VInv = arma::inv(symmatu(V)); for(int k = 1; k < T+1; k++){ //evolucao aa = mm; RR = CC/discW; //predicao FF = kron(I_q, xDyn.row(k-1)); ff = arma::trans(FF) * aa; QQ = arma::symmatu(arma::trans(FF) * RR * FF + V); //verossimilhanca /* arma::mat rooti = arma::trans(arma::inv(trimatu(arma::chol(QQ)))); double rootisum = arma::sum(log(rooti.diag())); arma::vec z = rooti * arma::trans( En.row(k-1) - ff.t()) ; loglik_chol += constants - 0.5 * arma::sum(z%z) + rootisum; */ arma::eig_sym(eigval, Eigvec, QQ); double rootisum = -0.5*arma::sum(log(eigval)); arma::vec z = arma::trans( (En.row(k-1) - ff.t()) * Eigvec * diagmat(1/sqrt(eigval)) ); loglik += constants - 0.5*arma::sum(z%z) + rootisum; //atualizacao //CC = RR - RR * FF * arma::inv(QQ) * arma::trans(FF) * RR; CC = arma::inv(FF * VInv * arma::trans(FF) + arma::inv(RR)); //arma::eig_sym(eigval, Eigvec, FF * VInv * arma::trans(FF)); AA = CC * FF * VInv; mm = aa + AA*(arma::trans(En.row(k-1) - ff.t())); } nloglik(n) = loglik; //nloglik_chol(n) = loglik_chol; } return nloglik; }
// [[Rcpp::export]] SEXP combine_vars(CharacterVector vars, ListOf<IntegerVector> xs) { VarList selected(vars.size()); if (xs.size() == 0) return IntegerVector::create(); // Workaround bug in ListOf<>; can't access attributes SEXP raw_names = Rf_getAttrib(xs, Rf_mkString("names")); CharacterVector xs_names; if (raw_names == R_NilValue) { xs_names = CharacterVector(xs.size(), "" ); } else { xs_names = raw_names ; } // If first component is negative, pre-fill with existing vars if (vector_sign(xs[0]) == -1) { for (int j = 0; j < vars.size(); ++j) { selected.add(j + 1, vars[j]); } } for (int i = 0; i < xs.size(); ++i) { IntegerVector x = xs[i]; if (x.size() == 0) continue; int sign = vector_sign(x); if (sign == 0) stop("Each argument must yield either positive or negative integers"); if (sign == 1) { bool group_named = xs_names[i] != ""; bool has_names = x.attr("names") != R_NilValue; if (group_named) { if (x.size() == 1) { selected.update(x[0], xs_names[i]); } else { // If the group is named, children are numbered sequentially for (int j = 0; j < x.size(); ++j) { std::stringstream out; out << xs_names[i] << j + 1; selected.update(x[j], out.str()); } } } else if (has_names) { CharacterVector names = x.names() ; for (int j = 0; j < x.size(); ++j) { selected.update(x[j], names[j]); } } else { for (int j = 0; j < x.size(); ++j) { int pos = x[j]; if (pos < 1 || pos > vars.size()) stop("Position must be between 0 and n"); // Add default name, if not all ready present if (!selected.has(pos)) selected.update(pos, vars[pos - 1]); } } } else { for (int j = 0; j < x.size(); ++j) { selected.remove(-x[j]); } } } return selected; }
// [[Rcpp::export]] List arrange_impl( DataFrame data, List args, DataDots dots ){ check_valid_colnames(data) ; assert_all_white_list(data) ; // special case arrange() with no arguments for grouped data if( dots.size() == 0 && is<GroupedDataFrame>(data) ){ DataFrame labels( data.attr( "labels" ) ); OrderVisitors o(labels) ; IntegerVector index = o.apply() ; // reorganize labels = DataFrameVisitors( labels, labels.names() ).subset( index, labels.attr("class") ); ListOf<IntegerVector> indices( data.attr("indices") ) ; int ngroups = indices.size() ; List new_indices(ngroups) ; IntegerVector master_index(data.nrows()) ; for( int i=0; i<ngroups; i++){ new_indices[index[i]] = indices[i] ; } IntegerVector group_sizes = data.attr("group_sizes") ; IntegerVector new_group_sizes(ngroups); for( int i=0, k=0; i<ngroups; i++){ IntegerVector idx = new_indices[i] ; IntegerVector new_group_index = seq(k, k + idx.size() - 1 ); for( int j=0; j<idx.size(); j++, k++){ master_index[k] = idx[j] ; } new_indices[i] = new_group_index ; new_group_sizes[i] = idx.size() ; } DataFrame res = DataFrameVisitors( data, data.names() ).subset( master_index, data.attr("class" ) ) ; res.attr( "labels" ) = labels ; res.attr( "indices" ) = new_indices ; res.attr( "vars" ) = data.attr("vars" ) ; res.attr( "group_sizes" ) = new_group_sizes ; res.attr( "biggest_group_size" ) = data.attr("biggest_group_size") ; res.attr( "drop" ) = data.attr("drop") ; return res ; } if( dots.size() == 0 || data.nrows() == 0) return data ; int nargs = dots.size() ; if( is<GroupedDataFrame>(data) ){ nargs += GroupedDataFrame(data).nvars() ; } List variables(nargs) ; LogicalVector ascending(nargs) ; int k = 0 ; if( is<GroupedDataFrame>(data) ){ GroupedDataFrame gdf(data); for( ; k< gdf.nvars(); k++) { ascending[k] = true ; String s = PRINTNAME(gdf.symbol(k)); variables[k] = data[s] ; } } for(int i=0; k<nargs; i++, k++){ Shelter<SEXP> __ ; SEXP call = args[dots.expr_index(i)] ; bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ; CallProxy call_proxy(is_desc ? CADR(call) : call, data, dots.envir(i)) ; SEXP v = __(call_proxy.eval()) ; if( !white_list(v) || TYPEOF(v) == VECSXP ){ std::stringstream ss ; ss << "cannot arrange column of class '" << get_single_class(v) << "'" ; stop(ss.str()) ; } if( Rf_length(v) != data.nrows() ){ std::stringstream s ; s << "incorrect size (" << Rf_length(v) << "), expecting :" << data.nrows() ; stop(s.str()) ; } variables[k] = v ; ascending[k] = !is_desc ; } OrderVisitors o(variables, ascending, nargs) ; IntegerVector index = o.apply() ; DataFrameVisitors visitors( data, data.names() ) ; List res = visitors.subset(index, data.attr("class") ) ; SET_ATTRIB(res, strip_group_attributes(res)); return res ; }
// [[Rcpp::export]] double CramersV_C(IntegerVector x,IntegerVector y, bool Bias_Cor){ //Counts the frequency std::map<std::pair<int, int>, int> counts_xy; std::map<int,int> counts_x; std::map<int,int> counts_y; int n = x.size(); int i=0,j=0; //Read vectors for (i = 0; i <= n-1; ++i) { counts_x[x[i]]++; counts_y[y[i]]++; counts_xy[std::make_pair(x[i],y[i])]++; } //Calculates Chi Square Stats int unique_x = counts_x.size() -1, unique_y = counts_y.size() -1; IntegerVector uni_x = unique(x); IntegerVector uni_y = unique(y); double Exy = 0.0; double chisq=0.0; int Oxy = 0; int a=0,b=0,c=0,d=0; if (unique_x==0 || unique_y==0) { // only 1 unique value then return 1 return 1; } else if (unique_x==1 && unique_y==1){ // if there is only 2 unique value then use special method. if (counts_xy.find(std::make_pair(uni_x[0],uni_y[0])) != counts_xy.end()) { a = counts_xy.find(std::make_pair(uni_x[0],uni_y[0]))->second; } if (counts_xy.find(std::make_pair(uni_x[0],uni_y[1])) != counts_xy.end()) { b = counts_xy.find(std::make_pair(uni_x[0],uni_y[1]))->second; } if (counts_xy.find(std::make_pair(uni_x[1],uni_y[0])) != counts_xy.end()) { c = counts_xy.find(std::make_pair(uni_x[1],uni_y[0]))->second; } if (counts_xy.find(std::make_pair(uni_x[1],uni_y[1])) != counts_xy.end()) { d = counts_xy.find(std::make_pair(uni_x[1],uni_y[1]))->second; } Exy = counts_x.find(uni_x[0])->second * counts_x.find(uni_x[1])->second * counts_y.find(uni_y[0])->second * counts_y.find(uni_y[1])->second; chisq = (a*d-b*c)* n / Exy; } else { for (i=0;i<=unique_x;++i){ for (j=0;j<=unique_y;++j){ Exy = (double)counts_x.find(uni_x[i])->second * (double)counts_y.find(uni_y[j])->second / (double)n ; if (counts_xy.find(std::make_pair(uni_x[i],uni_y[j])) != counts_xy.end()) { Oxy = counts_xy.find(std::make_pair(uni_x[i],uni_y[j]))->second; } else { Oxy=0; } chisq = chisq +(Oxy - Exy)*(Oxy - Exy)/Exy; } } } if (Bias_Cor) { chisq = std::max((double)0,chisq-(double)(unique_x)*(unique_y)/(n-1) ); unique_x = unique_x - (unique_x*unique_x) /(n-1); unique_y = unique_y - (unique_y*unique_y) /(n-1); } return std::sqrt(abs(chisq)/((double)n * std::min(unique_x, unique_y))); }
//' Wear Time Classification //' //' Classifies wear time vs. non-wear time based on a vector of accelerometer //' count values. //' //' If \code{nci = FALSE}, the algorithm uses a moving window to go through //' every possible interval of length \code{window} in \code{counts}. Any //' interval in which no more than \code{tol} counts are non-zero, and those //' are still < \code{tol.upper}, is classified as non-wear time. //' //' If \code{nci = TRUE}, non-wear time is classified according to the algorithm //' used in the NCI's SAS programs. Briefly, this algorithm defines a non-wear //' period as an interval of length \code{window} that starts with a count value //' of 0, does not contain any periods with \code{(tol + 1)} consecutive //' non-zero count values, and does not contain any counts > \code{tol.upper}. //' If these criteria are met, the non-wear period continues until there are //' \code{(tol + 1)} consecutive non-zero count values or a single count value > //' \code{tol.upper}. //' //' //' @param counts Integer vector with accelerometer count values. //' //' @param window Integer value specifying minimum length of a non-wear //' period. //' //' @param tol Integer value specifying tolerance for non-wear algorithm, i.e. //' number of seconds/minutes with non-zero counts allowed during a non-wear //' interval. //' //' @param tol_upper Integer value specifying maximum count value for a //' second/minute with non-zero counts during a non-wear interval. //' //' @param nci Logical value for whether to use algorithm from NCI's SAS //' programs. See \bold{Details}. //' //' @param days_distinct Logical value for whether to treat each day of data as //' distinct, as opposed to analyzing the entire monitoring period as one //' continuous segment. For minute-to-minute counts, strongly recommend setting //' to \code{FALSE} to correctly classify time near midnight. //' //' @param units_day Integer value specifying how many data point are in a day. //' Typically either 1440 or 86400 depending on whether count values are //' minute-to-minute or second-to-second. //' //' //' @return Integer vector with 1's for valid wear time and 0's for non-wear //' time. //' //' //' @references //' National Cancer Institute. Risk factor monitoring and methods: SAS programs //' for analyzing NHANES 2003-2004 accelerometer data. Available at: //' \url{http://riskfactor.cancer.gov/tools/nhanes_pam}. Accessed Aug. 19, 2018. //' //' Acknowledgment: This material is based upon work supported by the National //' Science Foundation Graduate Research Fellowship under Grant No. DGE-0940903. //' //' //' @examples //' # Load accelerometer data for first 5 participants in NHANES 2003-2004 //' data(unidata) //' //' # Get data from ID number 21005 //' counts.part1 <- unidata[unidata[, "seqn"] == 21005, "paxinten"] //' //' # Identify periods of valid wear time //' weartime.flag <- weartime(counts = counts.part1) //' //' //' @export // [[Rcpp::export]] IntegerVector weartime(IntegerVector counts, int window = 60, int tol = 0, int tol_upper = 99, bool nci = false, bool days_distinct = false, int units_day = 1440) { // Get length(counts) and initialize output vector starting with all 1's int n = counts.size(); IntegerVector out(n, 1); // Use appropriate version of algorithm given days_distinct, tol, and nci if (! days_distinct) { if (tol == 0) { int zeros = 0; for (int b = 0; b < n; ++b) { if (counts[b] == 0) zeros +=1; else { if (zeros >= window) for (int c = b - zeros; c < b; ++c) out[c] = 0; zeros = 0; } if (b == n - 1 && zeros >= window) for (int d = b - zeros + 1; d < b + 1; ++d) out[d] = 0; } } else if (tol > 0) { if (! nci) { IntegerVector status(n); for (int b = 0; b < n; ++b) { int counts_b = counts[b]; if (counts_b == 0) status[b] = 0; else if (counts_b <= tol_upper) status[b] = 1; else if (counts_b > tol_upper) status[b] = tol + 1; } int sum = 0; for (int c = 0; c < window; ++c) sum += status[c]; if (sum <= tol) for (int d = 0; d < window; ++d) out[d] = 0; for (int e = window; e < n; ++e) { sum = sum - status[e - window] + status[e]; if (sum <= tol) for (int f = e - window + 1; f <= e; ++f) out[f] = 0; } } else if (nci) { int zeros = 0; int tolcount = 0; int flag = 0; for (int b = 0; b < n; ++b) { int counts_b = counts[b]; if (zeros == 0 && counts_b != 0) continue; if (counts_b == 0) { zeros += 1; tolcount = 0; } else if (counts_b > 0 && counts_b <= tol_upper) { zeros += 1; tolcount += 1; } else if (counts[b]>tol_upper) { zeros += 1; tolcount += 1; flag = 1; } if (tolcount > tol || flag == 1 || b == n - 1) { if (zeros - tolcount >= window) for (int c = b - zeros + 1; c < b - tolcount + 1; ++c) out[c] = 0; zeros = 0; tolcount = 0; flag = 0; } } } } } else { if (tol == 0) { int zeros = 0; for (int b = 0; b < n; ++b) { if (counts[b] == 0) zeros +=1; else { if (zeros >= window) for (int c = b - zeros; c < b; ++c) out[c] = 0; zeros = 0; } if ((b == n-1 || (b + 1) % units_day == 0) && zeros >= window) for (int d = b - zeros + 1; d < b + 1; ++d) out[d] = 0; if ((b + 1) % units_day == 0) zeros = 0; } } else if (tol > 0) { if (! nci) { IntegerVector status(n); for (int b = 0; b < n; ++b) { int counts_b = counts[b]; if (counts_b == 0) status[b] = 0; else if (counts_b <= tol_upper) status[b] = 1; else if (counts_b > tol_upper) status[b] = tol + 1; } int sum = 0; for (int c = 0; c < window; ++c) sum += status[c]; if (sum <= tol) for (int d = 0; d < window; ++d) out[d] = 0; for (int e = window; e < n; ++e) { sum = sum - status[e - window] + status[e]; if (sum <= tol && e % units_day > window - 2) for (int f = e - window + 1; f <= e; ++f) out[f] = 0; } } else if (nci) { int zeros = 0; int tolcount = 0; int flag = 0; for (int b = 0; b < n; ++b) { int counts_b = counts[b]; if (zeros == 0 && counts_b != 0) continue; if (counts_b == 0) { zeros += 1; tolcount = 0; } else if (counts_b > 0 && counts_b <= tol_upper) { zeros += 1; tolcount += 1; } else if (counts_b > tol_upper) { zeros += 1; tolcount += 1; flag = 1; } if (tolcount > tol || flag == 1 || b == n - 1 || (b + 1) % units_day == 0) { if (zeros-tolcount>=window) for (int c = b-zeros+1; c < b-tolcount+1; ++c) out[c] = 0; zeros = 0; tolcount = 0; flag = 0; } } } } } // Return output vector return(out); }
//' @export // [[Rcpp::export]] List ensemble_FHMM(int n_chains, NumericMatrix Y, NumericMatrix w, NumericVector transition_probs, double alpha, int K, int k, int n, double h, int radius, int max_iter, int burnin, int thin, bool estimate_marginals, bool parallel_tempering, bool crossovers, NumericVector temperatures, int swap_type, int swaps_burnin, int swaps_freq, IntegerVector which_chains, IntegerVector subsequence, IntegerVector x, int nrows_crossover, bool HB_sampling, int nrows_gibbs, IntegerMatrix all_combs, bool update_pars){ // initialise ensemble of n_chains Ensemble_Factorial ensemble(n_chains, K, k, n, alpha, h, radius, nrows_crossover, HB_sampling, nrows_gibbs, all_combs); ensemble.set_temperatures(temperatures); ensemble.initialise_pars(w, transition_probs, x, Y.nrow()); ensemble.update_emission_probs(Y); int index; int n_chains_out = which_chains.size(); int trace_length = (max_iter - burnin + (thin - 1)) / thin; int list_length = n_chains_out * trace_length; List tr_x(list_length), tr_X(list_length), tr_pi(list_length), tr_A(list_length), tr_mu(list_length), tr_sigma2(list_length), tr_alpha(list_length), tr_switching_prob(list_length), tr_loglik(list_length), tr_loglik_cond(list_length); List tr_crossovers(trace_length); Timer timer; nanotime_t t0, t1; t0 = timer.now(); for(int iter = 1; iter <= max_iter; iter++){ ensemble.update_x(); ensemble.update_A(); if(update_pars){ ensemble.update_mu(Y); } ensemble.update_emission_probs(Y); if(crossovers && (iter > swaps_burnin) && ((iter-1) % swaps_freq == 0)){ ensemble.do_crossover(); } if((iter > burnin) && ((iter-1) % thin == 0)){ index = (iter - burnin - 1)/thin; ensemble.copy_values_to_trace(which_chains, tr_x, tr_X, tr_pi, tr_A, tr_mu, tr_sigma2, tr_alpha, tr_loglik, tr_loglik_cond, tr_switching_prob, index, subsequence); if(crossovers && (iter > swaps_burnin) && ((iter-1) % swaps_freq == 0)){ tr_crossovers[index] = ensemble.get_crossovers(); } } if(iter % 1000 == 0) printf("iter %d\n", iter); } //ensemble.scale_marginals(max_iter, burnin); //ListOf<NumericMatrix> tr_marginal_distr = ensemble.get_copy_of_marginals(which_chains); t1 = timer.now(); return List::create(Rcpp::Named("trace_x") = tr_x, Rcpp::Named("trace_X") = tr_X, Rcpp::Named("trace_pi") = tr_pi, Rcpp::Named("trace_A") = tr_A, Rcpp::Named("trace_mu") = tr_mu, Rcpp::Named("trace_sigma2") = tr_sigma2, Rcpp::Named("trace_alpha") = tr_alpha, Rcpp::Named("log_posterior") = tr_loglik, Rcpp::Named("log_posterior_cond") = tr_loglik_cond, Rcpp::Named("switching_prob") = tr_switching_prob, //Rcpp::Named("marginal_distr") = tr_marginal_distr, //Rcpp::Named("acceptance_ratio") = ensemble.get_acceptance_ratio(), Rcpp::Named("timer") = t1-t0, Rcpp::Named("crossovers") = tr_crossovers); }
SEXP slice_not_grouped(const DataFrame& df, const LazyDots& dots) { CharacterVector names = df.names(); const Lazy& lazy = dots[0]; Call call(lazy.expr()); CallProxy proxy(call, df, lazy.env()); int nr = df.nrows(); IntegerVector test = check_filter_integer_result(proxy.eval()); int n = test.size(); // count the positive and negatives CountIndices counter(nr, test); // just positives -> one based subset if (counter.is_positive()) { int n_pos = counter.get_n_positive(); std::vector<int> idx(n_pos); int j=0; for (int i=0; i<n_pos; i++) { while (test[j] > nr || test[j] == NA_INTEGER) j++; idx[i] = test[j++] - 1; } return subset(df, idx, df.names(), classes_not_grouped()); } // special case where only NA if (counter.get_n_negative() == 0) { std::vector<int> indices; DataFrame res = subset(df, indices, df.names(), classes_not_grouped()); return res; } // just negatives (out of range is dealt with early in CountIndices). std::set<int> drop; for (int i=0; i<n; i++) { if (test[i] != NA_INTEGER) drop.insert(-test[i]); } int n_drop = drop.size(); std::vector<int> indices(nr - n_drop); std::set<int>::const_iterator drop_it = drop.begin(); int i = 0, j = 0; while (drop_it != drop.end()) { int next_drop = *drop_it - 1; while (j < next_drop) { indices[i++] = j++; } j++; ++drop_it; } while (i < nr - n_drop) { indices[i++] = j++; } DataFrame res = subset(df, indices, df.names(), classes_not_grouped()); return res; }
//' Tabulate methylation patterns. //' //' Tabulate methylation patterns of a given \code{size} from the elements of //' the \code{z} slot of a \code{\link{SimulatedBS}} object. //' //' @param readID an integer vector of read IDs; the \code{readID} column of //' an element of the \code{z} slot of a \code{\link{SimulatedBS}} object. //' @param z an integer vector of methylation states; the \code{z} column of //' an element of the \code{z} slot of a \code{\link{SimulatedBS}} object. //' @param pos an integer vector of the positions of methylation loci sequenced //' by each read; the \code{pos} column of an element of the \code{z} slots of //' a \code{\link{SimulatedBS}} object. //' @param size an integer greater than 1 specifying the size of the m-tuples //' to be created. //' //' @return A list of tabulated methylation patterns. The name of each list //' element is a comma-separated string of positions for that m-tuple and the //' value of each element is a vector of associated counts (the order of this //' vector is identical to that given by //' \code{MethylationTuples:::\link[MethylationTuples]{.makeMethPatNames}}). //' //' @note \strong{WARNING}: Only adjacent m-tuples, with respect to the //' sequenced methylation loci, are created. This means that (A) the list will //' contain unobserved m-tuples (i.e., those with all counts set to zero) and //' (B) that it will be possibly with paired-end reads to create m-tuples with //' NIC > 0. //' \strong{WARNING}: The special case where \code{size} = 1 is handled //' separately by tabulating with the \code{data.table} package. //' //' @keywords internal //' // [[Rcpp::export(".tabulatez")]] std::map<std::string, std::vector<int> > tabulatez(IntegerVector readID, IntegerVector z, IntegerVector pos, int size) { // Argument checks. if (readID.size() != z.size()) { Rcpp::stop("length(readID) != length(z)"); } if (readID.size() != pos.size()) { Rcpp::stop("length(readID) != length(pos)"); } if (size < 2) { Rcpp::stop("size < 2."); } // Variable initialisations // mtuples is a map where the values are the key is the co-ordinates of the // m-tuple (pos1,pos2,...,posm) and the values are the counts of each // methylation pattern. std::map<std::string, std::vector<int> > mtuples; std::stringstream mtuples_key; std::vector<int> mtuples_value(pow(2, size)); // Initialise the map of all adjacent m-tuples IntegerVector unique_pos = clone(unique(pos)); std::sort(unique_pos.begin(), unique_pos.end()); int n = unique_pos.size() - size + 1; for (int i = 0; i < n; i++) { mtuples_key << unique_pos[i] << ","; for (int j = (i + 1); j < (i + size - 1); j++) { mtuples_key << unique_pos[j] << ","; } mtuples_key << unique_pos[(i + size - 1)]; mtuples[mtuples_key.str()] = mtuples_value; mtuples_key.str(std::string()); } // Fill the map with the counts of each methylation pattern int k = 0; int N = readID.size() - size + 1; int idx; int idx0 = (pow(2, size) - 1); while (k < N) { // Check that these 'size' positions are from the same read. if (readID[k] == readID[k + size - 1]) { // idx converts the methylation pattern, e.g., (1, 1), to the index of // the corresponding element in mtuples_value. idx = idx0; for (int l = k; l < (k + size); l++) { idx -= z[l] * pow(2, size - (l - k) - 1); } mtuples_key << pos[k] << ","; for (int m = (k + 1); m < (k + size - 1); m++) { mtuples_key << pos[m] << ","; } mtuples_key << pos[(k + size - 1)]; mtuples[mtuples_key.str()][idx] += 1; mtuples_key.str(std::string()); k += 1; } else { // Can jump the k-index because we know if the read doesn't contain a // tuple then the next positions in the read can't either. k += (size - 1); } } return mtuples; }
// [[Rcpp::export]] List splitDate(IntegerVector inn, // Starttimes - base data IntegerVector out, // Endtimes - base data IntegerVector event, // Event at end of interval 0/1 - base data IntegerVector mergevar, // Merge variable, multiple records can have same pnr - base data IntegerVector seq, // Vector of date values to split by IntegerVector varname // Value to be added to each split date (such as birthdate) ) { std::vector<int> Omergevar; Omergevar.reserve(mergevar.size()*seq.size()); std::vector<int> Oinn; // Starttimes output Oinn.reserve(mergevar.size()*seq.size()); std::vector<int> Oout; // Endtimes output Oout.reserve(mergevar.size()*seq.size()); std::vector<int> Oevent; // Event at end 0/1 Oevent.reserve(mergevar.size()*seq.size()); std::vector<int> Ovalue; // Value for output 0.1,2... Ovalue.reserve(mergevar.size()*seq.size()); std::vector<int> seq_plus(seq.size()); // seq+value for each case for (int i=0; i<mergevar.size(); i++){ // Loop along base data; int seq_num=0; // Number in seq of the first record to create for each record in input for (int j=0; j<seq.size(); j++) seq_plus[j]=seq(j)+varname[i]; // Final vector to split by defined by sum of vector and varname for (int ii=0; ii<seq.size(); ii++){// Create records - loop through seq seq_num++;// next seq starts with one if(seq_plus[ii]>=out(i)){//Seq_plus values >= record, output record and break Omergevar.push_back(mergevar(i)); Oinn.push_back(inn(i)); Ovalue.push_back(seq_num-1); Oout.push_back(out(i)); Oevent.push_back(event(i)); break; //Done with base record } else if(inn(i)>seq_plus[ii] && ii==(seq.size()-1)){//past seq AND last seq - output and break Omergevar.push_back(mergevar(i)); Oinn.push_back(inn(i)); Ovalue.push_back(seq_num); // final seq-value Oout.push_back(out(i)); Oevent.push_back(event(i)); break; } else if(inn(i)<seq_plus[ii] && out(i)>seq_plus[ii]){ //split situation - duration at least 1 day Omergevar.push_back(mergevar(i)); Oinn.push_back(inn(i)); Ovalue.push_back(seq_num-1); //value prior to seq Oout.push_back(seq_plus[ii]); Oevent.push_back(0); // no event // and reset start of base record ready for next value in seq_plus inn(i)=seq_plus[ii]; if(ii==seq.size()-1){ Omergevar.push_back(mergevar(i)); Oinn.push_back(inn(i)); Ovalue.push_back(seq_num); Oout.push_back(out(i)); Oevent.push_back(event(i)); break; } } else if(out(i)==seq_plus[ii] && event(i)==1){ // Also split with zero record in case of event Omergevar.push_back(mergevar(i)); Oinn.push_back(inn(i)); Ovalue.push_back(seq_num-1); //value prior to seq Oout.push_back(seq_plus[ii]); Oevent.push_back(0); // no event // and reset start of base record ready for next value in seq_plus inn(i)=seq_plus[ii]; if(ii==seq.size()-1){ // output of last record Omergevar.push_back(mergevar(i)); Oinn.push_back(inn(i)); Ovalue.push_back(seq_num); Oout.push_back(out(i)); Oevent.push_back(event(i)); break; } } } //end seq-loop } // end base-loop return (Rcpp::List::create(Rcpp::Named("pnrnum")=Omergevar, Rcpp::Named("inn") = Oinn, Rcpp::Named("out") = Oout, Rcpp::Named("event") = Oevent, Rcpp::Named("value") = Ovalue)); }
SEXP concatenate(const DataFrame& x, IntegerVector ind, bool factorsAsStrings) { int nrow = x.nrows(); int n_ind = ind.size(); // We coerce up to the 'max type' if necessary, using the fact // that R's SEXPTYPEs are also ordered in terms of 'precision' // Note: we convert factors to characters if necessary int max_type = 0; int ctype = 0; for (int i = 0; i < n_ind; ++i) { if (Rf_isFactor(x[ind[i]]) and factorsAsStrings) { ctype = STRSXP; } else { ctype = TYPEOF(x[ind[i]]); } max_type = ctype > max_type ? ctype : max_type; } debug(printf("Max type of value variables is %s\n", Rf_type2char(max_type))); Armor<SEXP> tmp; Shield<SEXP> output(Rf_allocVector(max_type, nrow * n_ind)); for (int i = 0; i < n_ind; ++i) { // a 'tmp' pointer to the current column being iterated over, or // a coerced version if necessary if (TYPEOF(x[ind[i]]) == max_type) { tmp = x[ind[i]]; } else if (Rf_isFactor(x[ind[i]]) and factorsAsStrings) { tmp = Rf_asCharacterFactor(x[ind[i]]); } else { tmp = Rf_coerceVector(x[ind[i]], max_type); } switch (max_type) { case INTSXP: DO_CONCATENATE(int); case REALSXP: DO_CONCATENATE(double); case LGLSXP: DO_CONCATENATE(int); case CPLXSXP: DO_CONCATENATE(Rcomplex); case STRSXP: { for (int j = 0; j < nrow; ++j) { SET_STRING_ELT(output, i * nrow + j, STRING_ELT(tmp, j)); } break; } case VECSXP: { for (int j = 0; j < nrow; ++j) { SET_VECTOR_ELT(output, i * nrow + j, VECTOR_ELT(tmp, j)); } break; } default: stop("Unsupported type (%s)", Rf_type2char(max_type)); } } return output; }
// [[Rcpp::export]] List melt_dataframe(const DataFrame& data, const IntegerVector& id_ind, const IntegerVector& measure_ind, String variable_name, String value_name, SEXP attrTemplate, bool factorsAsStrings, bool valueAsFactor, bool variableAsFactor) { int nrow = data.nrows(); CharacterVector data_names = as<CharacterVector>(data.attr("names")); int n_id = id_ind.size(); debug(Rprintf("n_id == %i\n", n_id)); int n_measure = measure_ind.size(); debug(Rprintf("n_measure == %i\n", n_measure)); // Don't melt if the value variables are non-atomic for (int i = 0; i < n_measure; ++i) { if (!Rf_isVector(data[measure_ind[i]])) { stop("Can't gather non-vector column %i", measure_ind[i] + 1); } } // The output should be a data.frame with: // number of columns == number of id vars + 'variable' + 'value', // with number of rows == data.nrow() * number of value vars List output = no_init(n_id + 2); // First, allocate the ID variables // we repeat each ID vector n_measure times for (int i = 0; i < n_id; ++i) { SEXP object = data[id_ind[i]]; std::string var_name = std::string(data_names[id_ind[i]]); output[i] = rep_(object, n_measure, var_name); } // Now, we assign the 'variable' and 'value' columns // 'variable' is made up of repeating the names of the 'measure' variables, // each nrow times. We want this to be a factor as well. CharacterVector id_names = no_init(n_measure); for (int i = 0; i < n_measure; ++i) { id_names[i] = data_names[measure_ind[i]]; } if (variableAsFactor) { output[n_id] = make_variable_column_factor(id_names, nrow); } else { output[n_id] = make_variable_column_character(id_names, nrow); } // 'value' is made by concatenating each of the 'value' variables output[n_id + 1] = concatenate(data, measure_ind, factorsAsStrings); if (!Rf_isNull(attrTemplate)) { Rf_copyMostAttrib(attrTemplate, output[n_id + 1]); } // Make the List more data.frame like // Set the row names output.attr("row.names") = IntegerVector::create(IntegerVector::get_na(), -(nrow * n_measure)); // Set the names CharacterVector out_names = no_init(n_id + 2); for (int i = 0; i < n_id; ++i) { out_names[i] = data_names[id_ind[i]]; } out_names[n_id] = variable_name; out_names[n_id + 1] = value_name; output.attr("names") = out_names; // Set the class output.attr("class") = "data.frame"; return output; }
int mergeSort(IntegerVector &v) { IntegerVector temp(v.size()); return mergeSortRek(v,0,v.size(),temp); }
// Function to generate adjacency graph and count clusters // [[Rcpp::export]] int countpartitions(List aList) { //Takes an adjacency list, //The vector of subset nodes //The number of subset nodes //initialize connCompVec //Initialize visited indices IntegerVector visitedInd(aList.size()); int indexVisit = 0; //Initialize connected components IntegerVector currConnComp(aList.size()); //Initialize the number of connected components int numConnComp = 0; //Loop over nodes for(int i = 0; i < aList.size(); i++){ //If i has not been visited... if(visitedInd[i] == 0){ //List i as visited visitedInd[i] = 1; //Increase the number of connected components numConnComp++; //Add i to the connected component list currConnComp[indexVisit] = i; //increase index visit indexVisit++; //Count the number of nodes in the current connected component int nodeCount = indexVisit - 1; //Initialize a stopping variable: int toStop = 0; //While we don't stop while(toStop == 0){ //get the neighbors of the next current comp IntegerVector listNeighs = aList[currConnComp[nodeCount]]; //If listNeighs does not have length zero... int listLength = listNeighs.size(); if(listLength > 0){ //Add nodes of listLength to currConnComp //and mark nodes as visited for(int j = 0; j < listLength; j++){ if( visitedInd[listNeighs[j]] == 0){ currConnComp[indexVisit] = listNeighs[j]; visitedInd[listNeighs[j]] = 1; //Increment indexVisit indexVisit++; } } } //Increment nodeCount nodeCount++; //If currConnComp[nodeCount] is zero, then we must have new connected component //Also stop if we have too many guys. if(nodeCount == aList.size()){ toStop = 1; } else if(currConnComp[nodeCount] == 0 ){ toStop = 1; } } } } return numConnComp; }
RcppModelData::RcppModelData( ModelType _modelType, const IntegerVector& _pid, const NumericVector& _y, const NumericVector& _z, const NumericVector& _time, const NumericVector& dxv, // dense const IntegerVector& siv, // sparse const IntegerVector& spv, const NumericVector& sxv, const IntegerVector& iiv, // indicator const IntegerVector& ipv, bool useTimeAsOffset ) : ModelData( _modelType, _pid, _y, _z, _time, bsccs::make_shared<loggers::RcppProgressLogger>(), bsccs::make_shared<loggers::RcppErrorHandler>() ) { if (useTimeAsOffset) { // offset // real_vector* r = new real_vector(); RealVectorPtr r = make_shared<RealVector>(); push_back(NULL, r, DENSE); r->assign(offs.begin(), offs.end()); // TODO Should not be necessary with shared_ptr setHasOffsetCovariate(true); getColumn(0).add_label(-1); } // Convert dense int nCovariates = static_cast<int>(dxv.size() / y.size()); for (int i = 0; i < nCovariates; ++i) { push_back( static_cast<IntegerVector::iterator>(NULL), static_cast<IntegerVector::iterator>(NULL), dxv.begin() + i * y.size(), dxv.begin() + (i + 1) * y.size(), DENSE); getColumn(getNumberOfColumns() - 1).add_label(getNumberOfColumns() - (getHasOffsetCovariate() ? 1 : 0)); } // Convert sparse nCovariates = spv.size() - 1; for (int i = 0; i < nCovariates; ++i) { int begin = spv[i]; int end = spv[i + 1]; push_back( siv.begin() + begin, siv.begin() + end, sxv.begin() + begin, sxv.begin() + end, SPARSE); getColumn(getNumberOfColumns() - 1).add_label(getNumberOfColumns() - (getHasOffsetCovariate() ? 1 : 0)); } // Convert indicator nCovariates = ipv.size() - 1; for (int i = 0; i < nCovariates; ++i) { int begin = ipv[i]; int end = ipv[i + 1]; push_back( iiv.begin() + begin, iiv.begin() + end, static_cast<NumericVector::iterator>(NULL), static_cast<NumericVector::iterator>(NULL), INDICATOR); getColumn(getNumberOfColumns() - 1).add_label(getNumberOfColumns() - (getHasOffsetCovariate() ? 1 : 0)); } this->nRows = y.size(); // Clean out PIDs std::vector<int>& cpid = getPidVectorRef(); if (cpid.size() == 0) { for (size_t i = 0; i < nRows; ++i) { cpid.push_back(i); // TODO These are not necessary; remove. } nPatients = nRows; } else { int currentCase = 0; int currentPID = cpid[0]; cpid[0] = currentCase; for (size_t i = 1; i < pid.size(); ++i) { int nextPID = cpid[i]; if (nextPID != currentPID) { currentCase++; currentPID = nextPID; } cpid[i] = currentCase; } nPatients = currentCase + 1; } }
void crossover2_mat(IntegerMatrix X, IntegerMatrix Y, int t, int n, IntegerVector which_rows){ int m = which_rows.size(); for(int i=t+1; i<n; i++){ crossover_one_column(X, Y, i, which_rows, m); } }
// [[Rcpp::export(".interp_genoprob_onechr")]] NumericVector interp_genoprob_onechr(const NumericVector& genoprob, const NumericVector& map, const IntegerVector& pos_index) { // get dimensions if(Rf_isNull(genoprob.attr("dim"))) throw std::invalid_argument("genoprob should be a 3d array but has no dim attribute"); const IntegerVector& d = genoprob.attr("dim"); if(d.size() != 3) throw std::invalid_argument("genoprob should be a 3d array"); const int n_ind = d[0]; const int n_gen = d[1]; const int matsize = n_ind * n_gen; const int n_pos = map.size(); if(pos_index.size() != n_pos) { throw std::invalid_argument("Need length(map) == length(pos_index)"); } NumericVector result(n_ind*n_gen*n_pos); result.attr("dim") = Dimension(n_ind, n_gen, n_pos); // find position to the left that has genoprobs IntegerVector left_index(n_pos); int last = -1; for(int pos=0; pos<n_pos; pos++) { if(pos_index[pos] >= 0) last = pos; left_index[pos] = last; } // find position to the right that has genoprobs IntegerVector right_index(n_pos); last = -1; for(int pos=n_pos-1; pos>=0; pos--) { if(pos_index[pos] >= 0) last = pos; right_index[pos] = last; } // copy or interpolate for(int pos=0; pos<n_pos; pos++) { if(pos_index[pos] >= 0) { // in the old genoprobs std::copy(genoprob.begin()+(pos_index[pos]*matsize), genoprob.begin()+((pos_index[pos]+1)*matsize), result.begin()+(pos*matsize)); } else { double p,q; if(left_index[pos] < 0) { // off end to left p = 0.0; q = 1.0; } else if(right_index[pos] < 0) { // off end to right p = 1.0; q = 0.0; } else { double left_pos = map[left_index[pos]]; double right_pos = map[right_index[pos]]; p = (right_pos - map[pos])/(right_pos - left_pos); q = (map[pos] - left_pos)/(right_pos - left_pos); } for(int ind=0; ind<n_ind; ind++) { for(int gen=0; gen<n_gen; gen++) { result[ind + gen*n_ind + pos*matsize] = 0.0; if(p > 0) result[ind + gen*n_ind + pos*matsize] += (p*genoprob[ind + gen*n_ind + pos_index[left_index[pos]]*matsize]); if(q > 0) result[ind + gen*n_ind + pos*matsize] += (q*genoprob[ind + gen*n_ind + pos_index[right_index[pos]]*matsize]); } } } } return result; }
// [[Rcpp::export]] List arrange_impl( DataFrame data, LazyDots dots ){ if( data.size() == 0 ) return data ; check_valid_colnames(data) ; assert_all_white_list(data) ; // special case arrange() with no arguments for grouped data if( dots.size() == 0 && is<GroupedDataFrame>(data) ){ GroupedDataFrame gdata(data) ; data = gdata.data() ; DataFrame labels( data.attr( "labels" ) ); OrderVisitors o(labels) ; IntegerVector index = o.apply() ; // reorganize labels = DataFrameSubsetVisitors( labels, labels.names() ).subset( index, labels.attr("class") ); ListOf<IntegerVector> indices( data.attr("indices") ) ; int ngroups = indices.size() ; List new_indices(ngroups) ; IntegerVector master_index(data.nrows()) ; for( int i=0; i<ngroups; i++){ new_indices[index[i]] = indices[i] ; } IntegerVector group_sizes = data.attr("group_sizes") ; IntegerVector new_group_sizes(ngroups); for( int i=0, k=0; i<ngroups; i++){ IntegerVector idx = new_indices[i] ; IntegerVector new_group_index = seq(k, k + idx.size() - 1 ); for( int j=0; j<idx.size(); j++, k++){ master_index[k] = idx[j] ; } new_indices[i] = new_group_index ; new_group_sizes[i] = idx.size() ; } DataFrame res = DataFrameSubsetVisitors( data, data.names() ).subset( master_index, data.attr("class" ) ) ; res.attr( "labels" ) = labels ; res.attr( "indices" ) = new_indices ; res.attr( "vars" ) = data.attr("vars" ) ; res.attr( "group_sizes" ) = new_group_sizes ; res.attr( "biggest_group_size" ) = data.attr("biggest_group_size") ; res.attr( "drop" ) = data.attr("drop") ; return res ; } if( dots.size() == 0 || data.nrows() == 0) return data ; int nargs = dots.size() ; if( is<GroupedDataFrame>(data) ){ nargs += GroupedDataFrame(data).nvars() ; } List variables(nargs) ; LogicalVector ascending(nargs) ; int k = 0 ; if( is<GroupedDataFrame>(data) ){ GroupedDataFrame gdf(data); for( ; k< gdf.nvars(); k++) { ascending[k] = true ; String s = PRINTNAME(gdf.symbol(k)); variables[k] = data[s] ; } } for(int i=0; k<nargs; i++, k++){ const Lazy& lazy = dots[i] ; Shield<SEXP> call_( lazy.expr() ) ; SEXP call = call_ ; bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ; CallProxy call_proxy(is_desc ? CADR(call) : call, data, lazy.env()) ; Shield<SEXP> v(call_proxy.eval()) ; if( !white_list(v) ){ stop( "cannot arrange column of class '%s'", get_single_class(v) ) ; } if( Rf_inherits(v, "data.frame" ) ){ DataFrame df(v) ; int nr = df.nrows() ; if( nr != data.nrows() ){ stop( "data frame column with incompatible number of rows (%d), expecting : %d", nr, data.nrows() ); } } else if( Rf_isMatrix(v) ) { SEXP dim = Rf_getAttrib(v, Rf_install( "dim" ) ) ; int nr = INTEGER(dim)[0] ; if( nr != data.nrows() ){ stop( "matrix column with incompatible number of rows (%d), expecting : ", nr, data.nrows() ) ; } } else { if( Rf_length(v) != data.nrows() ){ stop( "incorrect size (%d), expecting : %d", Rf_length(v), data.nrows() ) ; } } variables[k] = v ; ascending[k] = !is_desc ; } OrderVisitors o(variables, ascending, nargs) ; IntegerVector index = o.apply() ; DataFrameSubsetVisitors visitors( data, data.names() ) ; List res = visitors.subset(index, data.attr("class") ) ; if( is<GroupedDataFrame>(data) ){ // so that all attributes are recalculated (indices ... ) // see the lazyness feature in GroupedDataFrame // if we don't do that, we get the values of the un-arranged data // set for free from subset (#1064) res.attr("labels") = R_NilValue ; res.attr( "vars" ) = data.attr("vars" ) ; return GroupedDataFrame(res).data() ; } SET_ATTRIB(res, strip_group_attributes(res)); return res ; }
//' @export // [[Rcpp::export]] List ensemble_discrete(int n_chains, IntegerVector y, double alpha, int k, int s, int n, int max_iter, int burnin, int thin, bool estimate_marginals, bool fixed_pars, bool parallel_tempering, bool crossovers, NumericVector temperatures, int swap_type, int swaps_burnin, int swaps_freq, NumericMatrix B, IntegerVector which_chains, IntegerVector subsequence){ // initialise ensemble of n_chains Ensemble_Discrete ensemble(n_chains, k, s, n, alpha, fixed_pars); // initialise transition matrices for all chains in the ensemble ensemble.initialise_pars(); if(fixed_pars){ ensemble.initialise_pars(B); } // parallel tempering initilisation if(parallel_tempering){ ensemble.activate_parallel_tempering(temperatures); } // initialise x ensemble.update_x(y, false); int index; int n_chains_out = which_chains.size(); int trace_length = (max_iter - burnin + (thin - 1)) / thin; int list_length = n_chains_out * trace_length; List tr_x(list_length), tr_pi(list_length), tr_A(list_length), tr_B(list_length), tr_switching_prob(list_length), tr_loglik(list_length), tr_loglik_cond(list_length), tr_alpha(list_length); Timer timer; nanotime_t t0, t1, t2, t3; NumericVector comp_times(3); for(int iter = 1; iter <= max_iter; iter++){ t0 = timer.now(); ensemble.update_pars(y); t1 = timer.now(); ensemble.update_x(y, estimate_marginals && (iter > burnin)); t2 = timer.now(); if(crossovers && (iter > swaps_burnin) && ((iter-1) % swaps_freq == 0)){ ensemble.do_crossover(); } if(parallel_tempering && (iter > swaps_burnin) && ((iter-1) % swaps_freq == 0)){ if(swap_type == 0) ensemble.swap_everything(); if(swap_type == 1) ensemble.swap_pars(); if(swap_type == 2) ensemble.swap_x(); } t3 = timer.now(); if((iter > burnin) && ((iter-1) % thin == 0)){ index = (iter - burnin - 1)/thin; ensemble.copy_values_to_trace(which_chains, tr_x, tr_pi, tr_A, tr_B, tr_alpha, tr_loglik, tr_loglik_cond, tr_switching_prob, index, subsequence); comp_times += 1.0/trace_length * NumericVector::create(t1-t0, t2-t1, t3-t2); comp_times[0] += 1.0/trace_length * (t1 - t0); comp_times[1] += 1.0/trace_length * (t2 - t1); if((iter-1) % swaps_freq == 0){ comp_times[2] += 1.0/trace_length * swaps_freq * (t3 - t2); } } if(iter % 1000 == 0) printf("iter %d\n", iter); } comp_times.attr("names") = CharacterVector::create("update pars", "update x", "swap/crossover"); ensemble.scale_marginals(max_iter, burnin); ListOf<NumericMatrix> tr_marginal_distr = ensemble.get_copy_of_marginals(which_chains); return List::create(Rcpp::Named("trace_x") = tr_x, Rcpp::Named("trace_pi") = tr_pi, Rcpp::Named("trace_A") = tr_A, Rcpp::Named("trace_B") = tr_B, Rcpp::Named("trace_alpha") = tr_alpha, Rcpp::Named("log_posterior") = tr_loglik, Rcpp::Named("log_posterior_cond") = tr_loglik_cond, Rcpp::Named("switching_prob") = tr_switching_prob, Rcpp::Named("marginal_distr") = tr_marginal_distr, Rcpp::Named("acceptance_ratio") = ensemble.get_acceptance_ratio(), Rcpp::Named("timer") = comp_times); }