Ejemplo n.º 1
0
// replacement for bip maybe more error tolerant slightly slower
// import: edge matrix, number of tips
// export: Descendants(x, 1:max(x$edge), "all")
// [[Rcpp::export]]
List bipCPP(IntegerMatrix orig, int nTips) {
    IntegerVector parent = orig( _, 0);
    IntegerVector children = orig( _, 1);
    int m = max(parent), j=0;
    // create list for results
    std::vector< std::vector<int> > out(m) ;
    std::vector<int> y;
    for(int i = 0; i<nTips; i++){
        out[i].push_back(i + 1L);
    }
    for(int i = 0; i<parent.size(); i++){
        j = parent[i] - 1L;
        if(children[i] > nTips){ 
            y = out[children[i] - 1L];
            out[j].insert( out[j].end(), y.begin(), y.end() );
        }
        else out[j].push_back(children[i]);
    }
    for(int i=0; i<m; ++i){
        sort(out[i].begin(), out[i].end());
    }
    return wrap(out);    // return the list
}
Ejemplo n.º 2
0
DataFrameSubsetVisitors::DataFrameSubsetVisitors(const DataFrame& data_, const SymbolVector& names) :
  data(data_),
  visitors(),
  visitor_names(names)
{

  CharacterVector data_names = vec_names_or_empty(data);
  IntegerVector indices = names.match_in_table(data_names);

  int n = indices.size();
  for (int i = 0; i < n; i++) {

    int pos = indices[i];
    if (pos == NA_INTEGER) {
      bad_col(names[i], "is unknown");
    }

    SubsetVectorVisitor* v = subset_visitor(data[pos - 1], data_names[pos - 1]);
    visitors.push_back(v);

  }

}
Ejemplo n.º 3
0
//[[Rcpp::export]]
NumericMatrix partitionTree(IntegerVector parent, IntegerVector order, NumericVector weight, NumericVector height) {
    NumericMatrix rect(parent.size(), 4);
    int i;

    std::vector<Node*> nodes = createHierarchy(as< std::vector<int> >(parent), as< std::vector<int> >(order), as< std::vector<double> >(weight), as< std::vector<double> >(height));

    for (i = 0; i < nodes.size(); ++i) {
        nodes[i]->sortChildren();
    }

    Node* startNode = nodes[0]->getRoot();
    icicleLayout(startNode, 0, 0);

    for (i = 0; i < nodes.size(); ++i) {
        rect(i, 0) = nodes[i]->bounds.x;
        rect(i, 1) = nodes[i]->bounds.y;
        rect(i, 2) = nodes[i]->bounds.width;
        rect(i, 3) = nodes[i]->bounds.height;
        delete nodes[i];
    }

    return rect;
}
Ejemplo n.º 4
0
// **********************************************************//
//                  Calculate Dyadic statistics              //
// **********************************************************//
// [[Rcpp::export]]
List Dyadic(List history, IntegerVector node, int sender) {
  int nIP = history.size();
  int A = node.size();
  List IPmat(nIP);  
  
  for (int IP = 0; IP < nIP; IP++) {
  	 NumericMatrix dyadicmat_IP(A, 6);
  	 List historyIP = history[IP];
     NumericVector dyadic(6); 
     for (int receiver = 0; receiver < A; receiver++) {
          if (receiver != sender) {
        for (unsigned int l = 0; l < 3; l++) {
    		NumericMatrix historyIP_l = historyIP[l];
    		dyadic[l] = historyIP_l(sender, receiver);
    		dyadic[l+3] = historyIP_l(receiver, sender);
    	}
      dyadicmat_IP(receiver, _) = dyadic;
    }
     }
    IPmat[IP] = dyadicmat_IP;
  } 
  return IPmat;
}  
Ejemplo n.º 5
0
// [[Rcpp::export]]
double statPhist_C(IntegerVector haps, IntegerVector strata, NumericMatrix hapDist) {
  // function declarations
  IntegerMatrix table2D(IntegerVector, IntegerVector);
  NumericVector colSumC(NumericMatrix);
  
  LogicalVector hapsGood = !is_na(haps);
  LogicalVector strataGood = !is_na(strata);
  LogicalVector toUse = hapsGood & strataGood;
  haps = haps[toUse];
  strata = strata[toUse];
  
  // Extract summary values
  IntegerMatrix strataHapFreq = table2D(haps, strata);
  IntegerVector strataFreq = wrap(colSumC(wrap(strataHapFreq)));

  double ssWP = ssWPCalc(strataFreq, strataHapFreq, hapDist);
  double ssAP = ssAPCalc(strataFreq, strataHapFreq, hapDist);
  ssAP = ssAP - ssWP;
  
  // Calculate average sample size correction for among strata variance 
  //   Eqn 9a in paper, but modified as in Table 8.2.1.1 from Arlequin v3.5.1 manual
  //   (denominator is sum{I} - 1)
  int numSamples = sum(strataFreq);
  int numStrata = strataFreq.size();
  NumericVector n2(numStrata);
  for(int i = 0; i < n2.size(); i++) n2[i] = pow(strataFreq[i], 2) / numSamples;
  double n = (numSamples - sum(n2)) / (numStrata - 1);
  
  // Calculate variance components (Table 1)
  //   Set MSD (SSD / df) equal to expected MSD
  double Vc = ssWP / (numSamples - numStrata);
  double Vb = ((ssAP / (numStrata - 1)) - Vc) / n;
  double est(Vb / (Vb + Vc));
  
  if(std::isnan(est)) est = NA_REAL;
  return est;
}
Ejemplo n.º 6
0
//[[Rcpp::export]]
DataFrame pathAttr(DataFrame paths, int ngroups) {
    LogicalVector solid(ngroups, true);
    LogicalVector constant(ngroups, true);

    int currentGroup, currentIndex, i;
    IntegerVector group = paths["group"];
    NumericVector alpha = paths["edge_alpha"];
    NumericVector width = paths["edge_width"];
    IntegerVector lty = paths["edge_linetype"];
    CharacterVector colour = paths["edge_colour"];

    currentGroup = group[0];
    currentIndex = 0;

    for (i = 1; i < group.size(); ++i) {
        if (group[i] == currentGroup) {
            if (solid[currentIndex]) {
                solid[currentIndex] = lty[i] == 1 && lty[i] == lty[i-1];
            }
            if (constant[currentIndex]) {
                constant[currentIndex] = alpha[i] == alpha[i-1] &&
                    width[i] == width[i-1] &&
                    lty[i] == lty[i-1] &&
                    colour[i] == colour[i-1];
            }
        } else {
            currentGroup = group[i];
            ++currentIndex;
        }
    }

    return DataFrame::create(
        Named("solid") = solid,
        Named("constant") = constant
    );
}
Ejemplo n.º 7
0
//' Determine the most frequently occurring value in an integer vector
//'
//' @param x integer vector
//' @return integer mode of x
//' 
// [[Rcpp::export]]
int mode(IntegerVector x) {
    std::map<int,int> counts;
    int mode = NA_INTEGER;
    int modeCount = -1;
    
    for (int i = 0; i < x.size(); i++){
        if (!IntegerVector::is_na(x[i])) {
            if(counts.count(x[i]) > 0) {
                counts[x[i]] += 1;
            } else {
                counts[x[i]] = 1;
            }
        }
    }
    
    for(auto pair: counts) {
        if (pair.second > modeCount) {
            modeCount = pair.second;
            mode = pair.first;
        }
    }
    
    return mode ;
}
Ejemplo n.º 8
0
DataFrameSubsetVisitors::DataFrameSubsetVisitors(const DataFrame& data_, const IntegerVector& indices) :
  data(data_),
  visitors(),
  visitor_names()
{

  CharacterVector data_names = vec_names_or_empty(data);

  int n = indices.size();
  for (int i = 0; i < n; i++) {

    int pos = indices[i];

    check_range_one_based(pos, data.size());

    const SymbolString& name = data_names[pos - 1];

    SubsetVectorVisitor* v = subset_visitor(data[pos - 1], name);
    visitors.push_back(v);
    visitor_names.push_back(name);

  }

}
Ejemplo n.º 9
0
// [[Rcpp::export]]
NumericVector maskUV(NumericMatrix U, NumericMatrix V, IntegerVector is, IntegerVector js)
{
	// Get the length of the entries list and the rank of UV'
	int l = is.size();
	int r = U.ncol();
	
	// Initialize the output vector to all zeros
	NumericVector maskUV(l,0.0);
	
	// Loop over non-zero entries and compute output vector
	int i = is(1)-1;
	int j = js(1)-1;
	for(int n = 0; n < l; n++)
	{
		i = is(n)-1; // subtract 1 since R arrays start at 1
		j = js(n)-1;
		maskUV(n) = 0;
		for(int k = 0; k < r; k++)
		{
			maskUV(n) += U(i,k)*V(j,k);
		}
	}
	return maskUV;
}
void transformCppIndexes(IntegerVector& indexes) {
  if (!Rf_isNull(indexes) && indexes.size() > 0) {
    std::transform(indexes.begin(), indexes.end(), indexes.begin(),
                   std::bind2nd(std::plus<int>(), 1));  
  }
}
// [[Rcpp::export]]
List starvingforager_eventNM(
int L,          //Lattice dim
int t_term,     //Terminal time
double alpha,   //Resource growth rate
double K,       //Resource carrying capacity
double sigma,   //Starvation rate
double rho,     //Recovery rate
double lambda,  //Growth rate
double mu,      //Mortality rate
IntegerVector ind_vec, //Initial vector of states
IntegerVector loc_vec //Initial vector of locations
) {
    //Dimension of the lattice
    int dim = 2;
    //Lattice size
    double size = pow(L-2,dim);
    //Initial time
    double t = 0;

    double max;
    double min;


    //Output Lists
    List ind_out(1);
    List loc_out(1);
    NumericVector t_out(1);
    //The initial state
    ind_out(0) = ind_vec;
    loc_out(0) = loc_vec;
    t_out(0) = 0;
    //ind_vec: the vector of individual states... 0 = resource, 1=starver, 2=full
    //pos_vec: the vector of individual locations

    //Initial count of how many resouces, starvers, and full in this timestep??
    //Count the number of individual R + S + F
    int tot = ind_vec.size();
    double R = 0.L;
    double S = 0.L;
    double F = 0.L;
    // double Rp;
    // double Sp;
    // double Fp;
    for (int i=0;i<tot;i++) {
        if (ind_vec(i) == 0) {
            R = R + 1.L;
        }
        if (ind_vec(i) == 1) {
            S = S + 1.L;
        }
        if (ind_vec(i) == 2) {
            F = F + 1.L;
        }
    }

    //R,S,F are thus densities over the landscape of size 'size'
    // R = R/size;
    // S = S/size;
    // F = F/size;

    double R_pr_line;
    double S_pr_line;
    double F_pr_line;

    //Iterate over time
    //The loop stops when t > t_term-1... and will record the last value
    int tic = 1;
    while (t < (t_term-1)) {

        //Construct probability lines, which are a function of R, S, F

        //Grow <-----> Consumed
        R_pr_line = (alpha*(K-R))/((alpha*(K-R)) + (F + S));
        //R_pr_line(1) = R_pr_line(0) + ((F + S)/((alpha*(K-R)) + (F + S) + Dr));
        //R_pr_line(2) = R_pr_line(1) + (Dr/((alpha*(K-R)) + (F + S) + Dr));

        //Recover <-----> Mortality
        S_pr_line = (rho*R)/(rho*R + mu);
        //S_pr_line(1) = S_pr_line(0) + (mu/(rho*R + mu + Ds));
        //S_pr_line(2) = S_pr_line(1) + (Ds/(rho*R + mu + Ds));

        //Grow <-----> Starve
        F_pr_line = lambda/(lambda+sigma*(K-R));
        //F_pr_line(1) = F_pr_line(0) + ((sigma*(K-R))/(lambda+sigma*(K-R)+Df));
        //F_pr_line(2) = F_pr_line(1) + (Df/(lambda+sigma*(K-R)+Df));


        //Initiate variables
        double dt;

        //Randomly select an individual (R,S,F) with probability 1/N
        //ind thus represents the POSITION of the individual
        //Update total number of individuals
        tot = ind_vec.size();
        max = (double)(tot - 1);
        min = 0.L;
        int id = min + (rand() % (int)(max - min + 1));

        int state;
        int location;

        double draw_event;
        //If ind is a resource...
        if (ind_vec(id) == 0) {
            state = 0;
            location = loc_vec(id);

            //Draw a random event
            //Grow, become consumed or move?
            draw_event = ((double) rand() / (RAND_MAX));

            //Grow
            if (draw_event < R_pr_line) {
                //Append a new resource to the END of the vector
                ind_vec.push_back(state);
                //Append the resource's location to the END of the vector
                loc_vec.push_back(location);
                //Update Tally
                R = R + 1; //(1.L/size);
            }
            //Become consumed!!!!
            if ((draw_event >= R_pr_line) && (draw_event < 1.L)) {
                //Remove the consumed resource from the state vector
                ind_vec.erase(id);
                //Remove the consumed resource form the location vector
                loc_vec.erase(id);
                //Update Tally
                R = R - 1; //(1.L/size);
            }
            dt = 1.L/((alpha*(K-R)) + (F + S));
        }
        //If ind is a starver...
        if (ind_vec(id) == 1) {
            state = 1;
            location = loc_vec(id);

            //Draw a random event
            //Recover, die, or move??
            draw_event = ((double) rand() / (RAND_MAX));

            //Recover
            if (draw_event < S_pr_line) {
                //Update the state from starver to full
                ind_vec(id) = 2;
                //Update Tally
                S = S - 1; //(1.L/size);
                F = F + 1; //(1.L/size);
            }
            //Die
            if ((draw_event >= S_pr_line) && (draw_event < 1.L)) {
                //Remove the consumed resource from the state vector
                ind_vec.erase(id);
                //Remove the consumed resource form the location vector
                loc_vec.erase(id);
                //Update Tally
                S = S - 1; //(1.L/size);
            }
            dt = 1.L/(rho*R + mu);
        }
        //If ind is Full...
        if (ind_vec(id) == 2) {
            state = 2;
            location = loc_vec(id);

            //Draw a random event
            //Grow, starve, or move?
            draw_event = ((double) rand() / (RAND_MAX));

            //Grow
            if (draw_event < F_pr_line) {
                //Append a new resource to the END of the vector
                ind_vec.push_back(state);
                //Append the resource's location to the END of the vector
                loc_vec.push_back(location);
                F = F + 1; //(1.L/size);
            }
            //Starve
            if ((draw_event >= F_pr_line) && (draw_event < 1.L)) {
                //Update the state from full to starver
                ind_vec(id) = 1;
                //Update Tally
                F = F - 1; //(1.L/size);
                S = S + 1; //(1.L/size);
            }
            dt = 1.L/(lambda+sigma*(K-R));
        }

        //Advance time
        t = t + dt;
        //Rcout << "t = " << dt << std::endl;
        //Update output
        ind_out.push_back(ind_vec);
        loc_out.push_back(loc_vec);
        t_out.push_back(t);
        tic = tic + 1;

    } //end while loop over t

    List cout(3);
    cout(0) = ind_out;
    cout(1) = loc_out;
    cout(2) = t_out;
    return(cout);

}
Ejemplo n.º 12
0
 IntegerVector integer_initializer_list(){
     IntegerVector x = {0,1,2,3} ;
 	for( int i=0; i<x.size(); i++) x[i] = x[i]*2 ;
 	return x ;
 }
Ejemplo n.º 13
0
//[[Rcpp::depends("RcppArmadillo")]]
// [[Rcpp::export]]
arma::vec fdlmLogLik(arma::mat Y, arma::mat xFix, arma::mat xDyn, double nFactors, 
Rcpp::List hyperparms, 
arma::mat Y_NA, IntegerVector whichNA, NumericVector Beta, NumericVector Lambda, 
arma::mat psi, double discW)
{
  int T = Y.n_rows;
  int q = Y.n_cols;
  int pDyn = xDyn.n_cols;
  int pFix = xFix.n_cols;
  int r = pDyn*q;
  int N = psi.n_rows;
  
  arma::cube Beta_(Beta.begin(), pFix, q, N, false);
  arma::cube Lambda_(Lambda.begin(), q, nFactors, N, false);
  std::cout << "\nrodou ate Beta_ e Lambda_\n";
  
  double constants = -(q/2)*log2pi;
  arma::vec nloglik(N);
  
  arma::mat V(q, q);
  arma::mat VInv(q, q);
  std::cout << "\nrodou ate depois de V e VInv\n";
  
  arma::vec mm(r);
  arma::mat CC(r, r);
  std::cout << "\nrodou ate depois de mm e CC.\n";
  arma::vec aa(r);
  arma::mat RR(r, r);
  arma::vec ff(q);
  arma::mat QQ(q, q);
  std::cout << "\nrodou ate depois de ff e QQ.\n";
  arma::mat FF;
  arma::vec ee;
  arma::mat AA;
  arma::mat I_q = arma::eye(q, q);
  std::cout << "\nrodou ate depois de diagmat.\n";
  
  //arma::vec z;
  //double rootisum;
  arma::mat Eigvec;
  arma::vec eigval;
  mm = as<arma::vec>(hyperparms["m0"]);
  CC = as<arma::mat>(hyperparms["C0"]);
  
  
  arma::mat Yn(T, q);
  arma::mat En(T, q);
  Yn = Y;
  
  //arma::vec nloglik_chol(N);
  
  std::cout << "\nrodou ate antes do for.\n";
  for(int n = 0; n < N; n++){
    double loglik = 0.0;
    //double loglik_chol = 0.0;  
    
    if(whichNA.size()>0){
      Yn.elem(as<arma::uvec>(whichNA)) = arma::trans(Y_NA.row(n));  
    }
    
    En = Yn - xFix * Beta_.slice(n);
    V = Lambda_.slice(n) * arma::trans(Lambda_.slice(n)) + arma::diagmat(psi.row(n));
    VInv = arma::inv(symmatu(V));
    
    for(int k = 1; k < T+1; k++){
    //evolucao
    aa = mm;
    RR = CC/discW;
    
    //predicao
    FF = kron(I_q, xDyn.row(k-1));
    ff = arma::trans(FF) * aa;
    QQ = arma::symmatu(arma::trans(FF) * RR * FF + V);
    
    //verossimilhanca
    /*
    arma::mat rooti = arma::trans(arma::inv(trimatu(arma::chol(QQ))));
    double rootisum = arma::sum(log(rooti.diag()));
    arma::vec z = rooti * arma::trans( En.row(k-1) - ff.t()) ;    
    loglik_chol += constants - 0.5 * arma::sum(z%z) + rootisum; 
    */
    arma::eig_sym(eigval, Eigvec, QQ);
    double rootisum = -0.5*arma::sum(log(eigval));
    arma::vec z = arma::trans( (En.row(k-1) - ff.t()) * Eigvec * diagmat(1/sqrt(eigval)) );
    loglik += constants - 0.5*arma::sum(z%z) + rootisum;
    
    //atualizacao
    //CC = RR - RR * FF * arma::inv(QQ) * arma::trans(FF) * RR;
    CC = arma::inv(FF * VInv * arma::trans(FF) + arma::inv(RR));
    //arma::eig_sym(eigval, Eigvec, FF * VInv * arma::trans(FF));
    AA = CC * FF * VInv;
    mm = aa + AA*(arma::trans(En.row(k-1) - ff.t()));
    }
    nloglik(n) = loglik;
    //nloglik_chol(n) = loglik_chol;
  }
  
  return nloglik;
}
Ejemplo n.º 14
0
// [[Rcpp::export]]
SEXP combine_vars(CharacterVector vars, ListOf<IntegerVector> xs) {
  VarList selected(vars.size());
  if (xs.size() == 0)
    return IntegerVector::create();

  // Workaround bug in ListOf<>; can't access attributes
  SEXP raw_names = Rf_getAttrib(xs, Rf_mkString("names"));
  CharacterVector xs_names;
  if (raw_names == R_NilValue) {
    xs_names = CharacterVector(xs.size(), "" );
  } else {
    xs_names = raw_names ;
  }

  // If first component is negative, pre-fill with existing vars
  if (vector_sign(xs[0]) == -1) {
    for (int j = 0; j < vars.size(); ++j) {
      selected.add(j + 1, vars[j]);
    }
  }

  for (int i = 0; i < xs.size(); ++i) {
    IntegerVector x = xs[i];
    if (x.size() == 0) continue;

    int sign = vector_sign(x);

    if (sign == 0)
      stop("Each argument must yield either positive or negative integers");

    if (sign == 1) {
      bool group_named = xs_names[i] != "";
      bool has_names = x.attr("names") != R_NilValue;
      if (group_named) {
        if (x.size() == 1) {
          selected.update(x[0], xs_names[i]);
        } else {
          // If the group is named, children are numbered sequentially
          for (int j = 0; j < x.size(); ++j) {
            std::stringstream out;
            out << xs_names[i] << j + 1;
            selected.update(x[j], out.str());
          }
        }
      } else if (has_names) {
        CharacterVector names = x.names() ;
        for (int j = 0; j < x.size(); ++j) {
          selected.update(x[j], names[j]);
        }
      } else {
        for (int j = 0; j < x.size(); ++j) {
          int pos = x[j];
          if (pos < 1 || pos > vars.size())
            stop("Position must be between 0 and n");

          // Add default name, if not all ready present
          if (!selected.has(pos))
            selected.update(pos, vars[pos - 1]);
        }
      }
    } else {
      for (int j = 0; j < x.size(); ++j) {
        selected.remove(-x[j]);
      }
    }
  }

  return selected;
}
Ejemplo n.º 15
0
// [[Rcpp::export]]
List arrange_impl( DataFrame data, List args, DataDots dots ){
    check_valid_colnames(data) ;
    assert_all_white_list(data) ;
    
    // special case arrange() with no arguments for grouped data
    if( dots.size() == 0 && is<GroupedDataFrame>(data) ){
        DataFrame labels( data.attr( "labels" ) );
        OrderVisitors o(labels) ;
        IntegerVector index = o.apply() ;
        
        // reorganize
        labels = DataFrameVisitors( labels, labels.names() ).subset( index, labels.attr("class") );
        
        ListOf<IntegerVector> indices( data.attr("indices") ) ;
        int ngroups = indices.size() ;
        List new_indices(ngroups) ;
        IntegerVector master_index(data.nrows()) ;
        for( int i=0; i<ngroups; i++){
            new_indices[index[i]] = indices[i] ;
        }
        IntegerVector group_sizes = data.attr("group_sizes") ;
        IntegerVector new_group_sizes(ngroups); 
        for( int i=0, k=0; i<ngroups; i++){
            IntegerVector idx = new_indices[i] ;
            IntegerVector new_group_index = seq(k, k + idx.size() - 1 );
            for( int j=0; j<idx.size(); j++, k++){
                master_index[k] = idx[j] ;    
            }
            new_indices[i] = new_group_index ;
            new_group_sizes[i] = idx.size() ;
        }
        
        DataFrame res = DataFrameVisitors( data, data.names() ).subset( master_index, data.attr("class" ) ) ;
        res.attr( "labels" )  = labels ;
        res.attr( "indices" ) = new_indices ;
        res.attr( "vars"    ) = data.attr("vars" ) ;
        res.attr( "group_sizes" ) = new_group_sizes ;
        res.attr( "biggest_group_size" ) = data.attr("biggest_group_size") ;
        res.attr( "drop" ) = data.attr("drop") ;
        return res ;
    }
    
    if( dots.size() == 0 || data.nrows() == 0) return data ;
    
    int nargs = dots.size() ;
    if( is<GroupedDataFrame>(data) ){
        nargs += GroupedDataFrame(data).nvars() ;
    }
    
    List variables(nargs) ;
    LogicalVector ascending(nargs) ;
    
    int k = 0 ;
    if( is<GroupedDataFrame>(data) ){
        GroupedDataFrame gdf(data);
        for( ; k< gdf.nvars(); k++) {
            ascending[k] = true ;
            
            String s = PRINTNAME(gdf.symbol(k));
            variables[k] = data[s] ; 
        }
    }            
    
    for(int i=0; k<nargs; i++, k++){
        Shelter<SEXP> __ ;
    
        SEXP call = args[dots.expr_index(i)] ;
        bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ;
        
        CallProxy call_proxy(is_desc ? CADR(call) : call, data, dots.envir(i)) ;
        
        SEXP v = __(call_proxy.eval()) ;
        if( !white_list(v) || TYPEOF(v) == VECSXP ){
            std::stringstream ss ;
            ss << "cannot arrange column of class '"
               << get_single_class(v) 
               << "'" ;
            stop(ss.str()) ;
        }
        
        if( Rf_length(v) != data.nrows() ){
            std::stringstream s ;
            s << "incorrect size ("
              << Rf_length(v)
              << "), expecting :"
              << data.nrows() ;
            stop(s.str()) ;
        }
        variables[k] = v ;
        ascending[k] = !is_desc ;
    }
    OrderVisitors o(variables, ascending, nargs) ;
    IntegerVector index = o.apply() ;
    
    DataFrameVisitors visitors( data, data.names() ) ;
    List res = visitors.subset(index, data.attr("class") ) ;
    SET_ATTRIB(res, strip_group_attributes(res));
    return res ;
}
Ejemplo n.º 16
0
// [[Rcpp::export]]
double CramersV_C(IntegerVector x,IntegerVector y, bool Bias_Cor){
  //Counts the frequency
  std::map<std::pair<int, int>, int> counts_xy;
  std::map<int,int> counts_x;
  std::map<int,int> counts_y;

  int n = x.size();
  int i=0,j=0;
  
  //Read vectors
  for (i = 0; i <= n-1; ++i) {
    counts_x[x[i]]++;
    counts_y[y[i]]++;
    counts_xy[std::make_pair(x[i],y[i])]++;
  }
  
  //Calculates Chi Square Stats
  int unique_x = counts_x.size() -1, unique_y = counts_y.size() -1;
  IntegerVector uni_x = unique(x);
  IntegerVector uni_y = unique(y);
  
  double Exy = 0.0;
  double chisq=0.0;
  int Oxy = 0;
  int a=0,b=0,c=0,d=0;
  
  
  if (unique_x==0 || unique_y==0) {
    // only 1 unique value then return 1
    return 1;
  }
  else if (unique_x==1 && unique_y==1){
    // if there is only 2 unique value then use special method.
    if (counts_xy.find(std::make_pair(uni_x[0],uni_y[0])) != counts_xy.end()) {
      a = counts_xy.find(std::make_pair(uni_x[0],uni_y[0]))->second;
    }
    if (counts_xy.find(std::make_pair(uni_x[0],uni_y[1])) != counts_xy.end()) {
      b = counts_xy.find(std::make_pair(uni_x[0],uni_y[1]))->second;
    }
    if (counts_xy.find(std::make_pair(uni_x[1],uni_y[0])) != counts_xy.end()) {
      c = counts_xy.find(std::make_pair(uni_x[1],uni_y[0]))->second;
    }
    if (counts_xy.find(std::make_pair(uni_x[1],uni_y[1])) != counts_xy.end()) {
      d = counts_xy.find(std::make_pair(uni_x[1],uni_y[1]))->second;
    }
    
    Exy = counts_x.find(uni_x[0])->second * counts_x.find(uni_x[1])->second * counts_y.find(uni_y[0])->second * counts_y.find(uni_y[1])->second;
    chisq = (a*d-b*c)* n / Exy;
  }
  else {
    for (i=0;i<=unique_x;++i){
      for (j=0;j<=unique_y;++j){
        Exy = (double)counts_x.find(uni_x[i])->second * (double)counts_y.find(uni_y[j])->second / (double)n ;
        if (counts_xy.find(std::make_pair(uni_x[i],uni_y[j])) != counts_xy.end()) {
          Oxy = counts_xy.find(std::make_pair(uni_x[i],uni_y[j]))->second;
        }
        else {
          Oxy=0;
        }
        chisq  = chisq +(Oxy - Exy)*(Oxy - Exy)/Exy;
       }
    }
  }
  
  if (Bias_Cor) {
    chisq = std::max((double)0,chisq-(double)(unique_x)*(unique_y)/(n-1) );
    unique_x = unique_x - (unique_x*unique_x) /(n-1);
    unique_y = unique_y - (unique_y*unique_y) /(n-1);
  }
  return std::sqrt(abs(chisq)/((double)n * std::min(unique_x, unique_y)));
}
Ejemplo n.º 17
0
//' Wear Time Classification
//' 
//' Classifies wear time vs. non-wear time based on a vector of accelerometer 
//' count values.
//' 
//' If \code{nci = FALSE}, the algorithm uses a moving window to go through 
//' every possible interval of length \code{window} in \code{counts}. Any 
//' interval in which no more than \code{tol} counts are non-zero, and those 
//' are still < \code{tol.upper}, is classified as non-wear time. 
//' 
//' If \code{nci = TRUE}, non-wear time is classified according to the algorithm 
//' used in the NCI's SAS programs. Briefly, this algorithm defines a non-wear 
//' period as an interval of length \code{window} that starts with a count value 
//' of 0, does not contain any periods with \code{(tol + 1)} consecutive 
//' non-zero count values, and does not contain any counts > \code{tol.upper}. 
//' If these criteria are met, the non-wear period continues until there are 
//' \code{(tol + 1)} consecutive non-zero count values or a single count value > 
//' \code{tol.upper}.
//' 
//' 
//' @param counts Integer vector with accelerometer count values.
//'
//' @param window Integer value specifying minimum length of a non-wear 
//' period.
//' 
//' @param tol Integer value specifying tolerance for non-wear algorithm, i.e. 
//' number of seconds/minutes with non-zero counts allowed during a non-wear 
//' interval.
//' 
//' @param tol_upper Integer value specifying maximum count value for a 
//' second/minute with non-zero counts during a non-wear interval.
//' 
//' @param nci Logical value for whether to use algorithm from NCI's SAS 
//' programs. See \bold{Details}.
//' 
//' @param days_distinct Logical value for whether to treat each day of data as 
//' distinct, as opposed to analyzing the entire monitoring period as one 
//' continuous segment. For minute-to-minute counts, strongly recommend setting 
//' to \code{FALSE} to correctly classify time near midnight.
//' 
//' @param units_day Integer value specifying how many data point are in a day. 
//' Typically either 1440 or 86400 depending on whether count values are 
//' minute-to-minute or second-to-second.
//' 
//' 
//' @return Integer vector with 1's for valid wear time and 0's for non-wear 
//' time.
//' 
//' 
//' @references 
//' National Cancer Institute. Risk factor monitoring and methods: SAS programs 
//' for analyzing NHANES 2003-2004 accelerometer data. Available at: 
//' \url{http://riskfactor.cancer.gov/tools/nhanes_pam}. Accessed Aug. 19, 2018.
//' 
//' Acknowledgment: This material is based upon work supported by the National 
//' Science Foundation Graduate Research Fellowship under Grant No. DGE-0940903.
//' 
//' 
//' @examples
//' # Load accelerometer data for first 5 participants in NHANES 2003-2004
//' data(unidata)
//' 
//' # Get data from ID number 21005
//' counts.part1 <- unidata[unidata[, "seqn"] == 21005, "paxinten"]
//' 
//' # Identify periods of valid wear time
//' weartime.flag <- weartime(counts = counts.part1)
//' 
//' 
//' @export
// [[Rcpp::export]]
IntegerVector weartime(IntegerVector counts, int window = 60, int tol = 0, 
                       int tol_upper = 99, bool nci = false, 
                       bool days_distinct = false, int units_day = 1440) {
  
  // Get length(counts) and initialize output vector starting with all 1's
  int n = counts.size();
  IntegerVector out(n, 1);
  
  // Use appropriate version of algorithm given days_distinct, tol, and nci
  if (! days_distinct) {
    
    if (tol == 0) {
      
      int zeros = 0;
      for (int b = 0; b < n; ++b) {
        if (counts[b] == 0) zeros +=1;
        else {
          if (zeros >= window)
            for (int c = b - zeros; c < b; ++c) out[c] = 0;
          zeros = 0;
        }
        if (b == n - 1 && zeros >= window)
          for (int d = b - zeros + 1; d < b + 1; ++d) out[d] = 0;
      }
      
    }
    else if (tol > 0) {
      
      if (! nci) {
        
        IntegerVector status(n);
        for (int b = 0; b < n; ++b) {
          int counts_b = counts[b];
          if (counts_b == 0) status[b] = 0;
          else if (counts_b <= tol_upper) status[b] = 1;
          else if (counts_b > tol_upper) status[b] = tol + 1;
        }
        int sum = 0;
        for (int c = 0; c < window; ++c) 
          sum += status[c];
        if (sum <= tol)
          for (int d = 0; d < window; ++d) out[d] = 0;
        for (int e = window; e < n; ++e) {
          sum = sum - status[e - window] + status[e];
          if (sum <= tol)
            for (int f = e - window + 1; f <= e; ++f) out[f] = 0;
        }
        
      }            
      else if (nci) {
        
        int zeros = 0;
        int tolcount = 0;
        int flag = 0;
        for (int b = 0; b < n; ++b) {
          int counts_b = counts[b];
          if (zeros == 0 && counts_b != 0) continue;
          if (counts_b == 0) {
            zeros += 1;
            tolcount = 0;
          }
          else if (counts_b > 0 && counts_b <= tol_upper) {
            zeros += 1;
            tolcount += 1;
          }
          else if (counts[b]>tol_upper) {
            zeros += 1;
            tolcount += 1;
            flag = 1;
          }
          if (tolcount > tol || flag == 1 || b == n - 1) {
            if (zeros - tolcount >= window)
              for (int c = b - zeros + 1; c < b - tolcount + 1; ++c) out[c] = 0;
            zeros = 0;
            tolcount = 0;
            flag = 0;
          }
        }
        
      }
    }
  }
  else {
    
    if (tol == 0) {
      
      int zeros = 0;
      for (int b = 0; b < n; ++b) {
        if (counts[b] == 0) zeros +=1;
        else {
          if (zeros >= window)
            for (int c = b - zeros; c < b; ++c) out[c] = 0;
          zeros = 0;
        }
        if ((b == n-1 || (b + 1) % units_day == 0) && zeros >= window)
          for (int d = b - zeros + 1; d < b + 1; ++d) out[d] = 0;
        if ((b + 1) % units_day == 0) zeros = 0;
      }
      
    }                
    else if (tol > 0) {
      
      if (! nci) {
        
        IntegerVector status(n);
        for (int b = 0; b < n; ++b) {
          int counts_b = counts[b];
          if (counts_b == 0) status[b] = 0;
          else if (counts_b <= tol_upper) status[b] = 1;
          else if (counts_b > tol_upper) status[b] = tol + 1;
        }
        int sum = 0;
        for (int c = 0; c < window; ++c)
          sum += status[c];
        if (sum <= tol)
          for (int d = 0; d < window; ++d) out[d] = 0;
        for (int e = window; e < n; ++e) {
          sum = sum - status[e - window] + status[e];
          if (sum <= tol && e % units_day > window - 2)
            for (int f = e - window + 1; f <= e; ++f) out[f] = 0;
        }
        
      }                
      else if (nci) {
        
        int zeros = 0;
        int tolcount = 0;
        int flag = 0;
        for (int b = 0; b < n; ++b) {
          int counts_b = counts[b];
          if (zeros == 0 && counts_b != 0) continue;
          if (counts_b == 0) {
            zeros += 1;
            tolcount = 0;
          }
          else if (counts_b > 0 && counts_b <= tol_upper) {
            zeros += 1;
            tolcount += 1;
          }
          else if (counts_b > tol_upper) {
            zeros += 1;
            tolcount += 1;
            flag = 1;
          }
          if (tolcount > tol || flag == 1 || b == n - 1 || (b + 1) % units_day == 0) {
            if (zeros-tolcount>=window)
              for (int c = b-zeros+1; c < b-tolcount+1; ++c) out[c] = 0;
            zeros = 0;
            tolcount = 0;
            flag = 0;
          }
        }
        
      }
    }
  }
  
  // Return output vector
  return(out);
}
Ejemplo n.º 18
0
//' @export
// [[Rcpp::export]]
List ensemble_FHMM(int n_chains, NumericMatrix Y, NumericMatrix w, NumericVector transition_probs, double alpha, 
                   int K, int k, int n, double h, int radius, 
                   int max_iter, int burnin, int thin, 
                   bool estimate_marginals, bool parallel_tempering, bool crossovers, 
                   NumericVector temperatures, int swap_type, int swaps_burnin, int swaps_freq, 
                   IntegerVector which_chains, IntegerVector subsequence, IntegerVector x, 
                   int nrows_crossover, bool HB_sampling, int nrows_gibbs, IntegerMatrix all_combs, 
                   bool update_pars){
  
  // initialise ensemble of n_chains
  Ensemble_Factorial ensemble(n_chains, K, k, n, alpha, h, radius, nrows_crossover, HB_sampling, nrows_gibbs, all_combs);
  
  ensemble.set_temperatures(temperatures);
  
  ensemble.initialise_pars(w, transition_probs, x, Y.nrow());
  ensemble.update_emission_probs(Y);
  
  int index;
  int n_chains_out = which_chains.size();
  int trace_length = (max_iter - burnin + (thin - 1)) / thin;
  int list_length = n_chains_out * trace_length;
  List tr_x(list_length), tr_X(list_length), tr_pi(list_length), tr_A(list_length), tr_mu(list_length), tr_sigma2(list_length), tr_alpha(list_length), tr_switching_prob(list_length), tr_loglik(list_length), tr_loglik_cond(list_length);
  List tr_crossovers(trace_length);
  
  Timer timer;
  nanotime_t t0, t1;
  t0 = timer.now();
  for(int iter = 1; iter <= max_iter; iter++){
    
    ensemble.update_x();
    ensemble.update_A();
    if(update_pars){
      ensemble.update_mu(Y);
    }
    ensemble.update_emission_probs(Y);
    
    if(crossovers && (iter > swaps_burnin) && ((iter-1) % swaps_freq == 0)){
      ensemble.do_crossover();
    }
    
    if((iter > burnin) && ((iter-1) % thin == 0)){
      index = (iter - burnin - 1)/thin;
      ensemble.copy_values_to_trace(which_chains, tr_x, tr_X, tr_pi, tr_A, tr_mu, tr_sigma2, tr_alpha, tr_loglik, tr_loglik_cond, tr_switching_prob, index, subsequence);
      if(crossovers && (iter > swaps_burnin) && ((iter-1) % swaps_freq == 0)){
        tr_crossovers[index] = ensemble.get_crossovers();  
      }
    }
    if(iter % 1000 == 0) printf("iter %d\n", iter);
  }
  
  //ensemble.scale_marginals(max_iter, burnin);
  //ListOf<NumericMatrix> tr_marginal_distr = ensemble.get_copy_of_marginals(which_chains);
  
  t1 = timer.now();
  return List::create(Rcpp::Named("trace_x") = tr_x, 
                      Rcpp::Named("trace_X") = tr_X,
                      Rcpp::Named("trace_pi") = tr_pi,
                      Rcpp::Named("trace_A") = tr_A,
                      Rcpp::Named("trace_mu") = tr_mu,
                      Rcpp::Named("trace_sigma2") = tr_sigma2,
                      Rcpp::Named("trace_alpha") = tr_alpha,
                      Rcpp::Named("log_posterior") = tr_loglik,
                      Rcpp::Named("log_posterior_cond") = tr_loglik_cond,
                      Rcpp::Named("switching_prob") = tr_switching_prob,
                      //Rcpp::Named("marginal_distr") = tr_marginal_distr, 
                      //Rcpp::Named("acceptance_ratio") = ensemble.get_acceptance_ratio(), 
                      Rcpp::Named("timer") = t1-t0, 
                      Rcpp::Named("crossovers") = tr_crossovers);
  
}
Ejemplo n.º 19
0
SEXP slice_not_grouped(const DataFrame& df, const LazyDots& dots) {
  CharacterVector names = df.names();

  const Lazy& lazy = dots[0];
  Call call(lazy.expr());
  CallProxy proxy(call, df, lazy.env());
  int nr = df.nrows();

  IntegerVector test = check_filter_integer_result(proxy.eval());

  int n = test.size();

  // count the positive and negatives
  CountIndices counter(nr, test);

  // just positives -> one based subset
  if (counter.is_positive()) {
    int n_pos = counter.get_n_positive();
    std::vector<int> idx(n_pos);
    int j=0;
    for (int i=0; i<n_pos; i++) {
      while (test[j] > nr || test[j] == NA_INTEGER) j++;
      idx[i] = test[j++] - 1;
    }

    return subset(df, idx, df.names(), classes_not_grouped());
  }

  // special case where only NA
  if (counter.get_n_negative() == 0) {
    std::vector<int> indices;
    DataFrame res = subset(df, indices, df.names(), classes_not_grouped());
    return res;
  }

  // just negatives (out of range is dealt with early in CountIndices).
  std::set<int> drop;
  for (int i=0; i<n; i++) {
    if (test[i] != NA_INTEGER)
      drop.insert(-test[i]);
  }
  int n_drop = drop.size();
  std::vector<int> indices(nr - n_drop);
  std::set<int>::const_iterator drop_it = drop.begin();

  int i = 0, j = 0;
  while (drop_it != drop.end()) {
    int next_drop = *drop_it - 1;
    while (j < next_drop) {
      indices[i++] = j++;
    }
    j++;
    ++drop_it;
  }
  while (i < nr - n_drop) {
    indices[i++] = j++;
  }

  DataFrame res = subset(df, indices, df.names(), classes_not_grouped());
  return res;

}
Ejemplo n.º 20
0
//' Tabulate methylation patterns.
//'
//' Tabulate methylation patterns of a given \code{size} from the elements of
//' the \code{z} slot of a \code{\link{SimulatedBS}} object.
//'
//' @param readID an integer vector of read IDs; the \code{readID} column of
//' an element of the \code{z} slot of a \code{\link{SimulatedBS}} object.
//' @param z an integer vector of methylation states; the \code{z} column of
//' an element of the \code{z} slot of a \code{\link{SimulatedBS}} object.
//' @param pos an integer vector of the positions of methylation loci sequenced
//' by each read; the \code{pos} column of an element of the \code{z} slots of
//' a \code{\link{SimulatedBS}} object.
//' @param size an integer greater than 1 specifying the size of the m-tuples
//' to be created.
//'
//' @return A list of tabulated methylation patterns. The name of each list
//' element is a comma-separated string of positions for that m-tuple and the
//' value of each element is a vector of associated counts (the order of this
//' vector is identical to that given by
//' \code{MethylationTuples:::\link[MethylationTuples]{.makeMethPatNames}}).
//'
//' @note \strong{WARNING}: Only adjacent m-tuples, with respect to the
//' sequenced methylation loci, are created. This means that (A) the list will
//' contain unobserved m-tuples (i.e., those with all counts set to zero) and
//' (B) that it will be possibly with paired-end reads to create m-tuples with
//' NIC > 0.
//' \strong{WARNING}: The special case where \code{size} = 1 is handled
//' separately by tabulating with the \code{data.table} package.
//'
//' @keywords internal
//'
// [[Rcpp::export(".tabulatez")]]
std::map<std::string, std::vector<int> > tabulatez(IntegerVector readID,
                                                   IntegerVector z,
                                                   IntegerVector pos,
                                                   int size) {

  // Argument checks.
  if (readID.size() != z.size()) {
    Rcpp::stop("length(readID) != length(z)");
  }
  if (readID.size() != pos.size()) {
    Rcpp::stop("length(readID) != length(pos)");
  }
  if (size < 2) {
    Rcpp::stop("size < 2.");
  }

  // Variable initialisations
  // mtuples is a map where the values are the key is the co-ordinates of the
  // m-tuple (pos1,pos2,...,posm) and the values are the counts of each
  // methylation pattern.
  std::map<std::string, std::vector<int> > mtuples;
  std::stringstream mtuples_key;
  std::vector<int> mtuples_value(pow(2, size));

  // Initialise the map of all adjacent m-tuples
  IntegerVector unique_pos = clone(unique(pos));
  std::sort(unique_pos.begin(), unique_pos.end());
  int n = unique_pos.size() - size + 1;
  for (int i = 0; i < n; i++) {
    mtuples_key << unique_pos[i] << ",";
    for (int j = (i + 1); j < (i + size - 1); j++) {
      mtuples_key << unique_pos[j] << ",";
    }
    mtuples_key << unique_pos[(i + size - 1)];
    mtuples[mtuples_key.str()] = mtuples_value;
    mtuples_key.str(std::string());
  }

  // Fill the map with the counts of each methylation pattern
  int k = 0;
  int N = readID.size() - size + 1;
  int idx;
  int idx0 = (pow(2, size) - 1);
  while (k < N) {
    // Check that these 'size' positions are from the same read.
    if (readID[k] == readID[k + size - 1]) {
      // idx converts the methylation pattern, e.g., (1, 1), to the index of
      // the corresponding element in mtuples_value.
      idx = idx0;
      for (int l = k; l < (k + size); l++) {
        idx -= z[l] * pow(2, size - (l - k) - 1);
      }
      mtuples_key << pos[k] << ",";
      for (int m = (k + 1); m < (k + size - 1); m++) {
        mtuples_key << pos[m] << ",";
      }
      mtuples_key << pos[(k + size - 1)];
      mtuples[mtuples_key.str()][idx] += 1;
      mtuples_key.str(std::string());
      k += 1;
    } else {
      // Can jump the k-index because we know if the read doesn't contain a
      // tuple then the next positions in the read can't either.
      k += (size - 1);
    }
  }

  return mtuples;
}
Ejemplo n.º 21
0
// [[Rcpp::export]]
List splitDate(IntegerVector inn, // Starttimes - base data
               IntegerVector out, // Endtimes - base data
               IntegerVector event, // Event at end of interval 0/1 - base data
               IntegerVector mergevar, // Merge variable, multiple records can have same pnr - base data
               IntegerVector seq, // Vector of date values to split by
               IntegerVector varname // Value to be added to each split date (such as birthdate)     
) 
{ 
  std::vector<int> Omergevar;  
  Omergevar.reserve(mergevar.size()*seq.size());
  std::vector<int> Oinn;  // Starttimes output
  Oinn.reserve(mergevar.size()*seq.size());
  std::vector<int> Oout; // Endtimes output
  Oout.reserve(mergevar.size()*seq.size());
  std::vector<int> Oevent; // Event at end 0/1
  Oevent.reserve(mergevar.size()*seq.size());
  std::vector<int> Ovalue; // Value for output 0.1,2...
  Ovalue.reserve(mergevar.size()*seq.size());
  std::vector<int> seq_plus(seq.size()); // seq+value for each case
  
  
  for (int i=0; i<mergevar.size(); i++){ // Loop along base data;
    int seq_num=0; // Number in seq of the first record to create for each record in input
    for (int j=0; j<seq.size(); j++)  seq_plus[j]=seq(j)+varname[i]; // Final vector to split by defined by sum of vector and varname
    for (int ii=0; ii<seq.size(); ii++){// Create records - loop through seq
      seq_num++;// next seq starts with one
      if(seq_plus[ii]>=out(i)){//Seq_plus values >= record, output record and break
        Omergevar.push_back(mergevar(i));
        Oinn.push_back(inn(i));
        Ovalue.push_back(seq_num-1);
        Oout.push_back(out(i));
        Oevent.push_back(event(i));
        break; //Done with base record
      }   
      else
        if(inn(i)>seq_plus[ii] && ii==(seq.size()-1)){//past seq AND last seq - output and break
          Omergevar.push_back(mergevar(i));
          Oinn.push_back(inn(i));
          Ovalue.push_back(seq_num); // final seq-value
          Oout.push_back(out(i));
          Oevent.push_back(event(i)); 
          break;
        } 
        else
          if(inn(i)<seq_plus[ii] && out(i)>seq_plus[ii]){ //split situation - duration at least 1 day
            Omergevar.push_back(mergevar(i));
            Oinn.push_back(inn(i));
            Ovalue.push_back(seq_num-1); //value prior to seq
            Oout.push_back(seq_plus[ii]);
            Oevent.push_back(0); // no event
            // and reset start of base record ready for next value in seq_plus
            inn(i)=seq_plus[ii];
            if(ii==seq.size()-1){
              Omergevar.push_back(mergevar(i));
              Oinn.push_back(inn(i));
              Ovalue.push_back(seq_num); 
              Oout.push_back(out(i));
              Oevent.push_back(event(i));
              break;
            }  
          }
          else
            if(out(i)==seq_plus[ii] && event(i)==1){ // Also split with zero record in case of event 
              Omergevar.push_back(mergevar(i));
              Oinn.push_back(inn(i));
              Ovalue.push_back(seq_num-1); //value prior to seq
              Oout.push_back(seq_plus[ii]);
              Oevent.push_back(0); // no event
              // and reset start of base record ready for next value in seq_plus
              inn(i)=seq_plus[ii];
              if(ii==seq.size()-1){ // output of last record
                Omergevar.push_back(mergevar(i));
                Oinn.push_back(inn(i));
                Ovalue.push_back(seq_num); 
                Oout.push_back(out(i));
                Oevent.push_back(event(i)); 
                break;
              }
            } 
    }  //end seq-loop 
  } // end base-loop
  return (Rcpp::List::create(Rcpp::Named("pnrnum")=Omergevar, 
                             Rcpp::Named("inn") = Oinn,
                             Rcpp::Named("out") = Oout,
                             Rcpp::Named("event") = Oevent,
                             Rcpp::Named("value") = Ovalue));  
}
Ejemplo n.º 22
0
SEXP concatenate(const DataFrame& x, IntegerVector ind, bool factorsAsStrings) {

  int nrow = x.nrows();
  int n_ind = ind.size();

  // We coerce up to the 'max type' if necessary, using the fact
  // that R's SEXPTYPEs are also ordered in terms of 'precision'
  // Note: we convert factors to characters if necessary
  int max_type = 0;
  int ctype = 0;
  for (int i = 0; i < n_ind; ++i) {

    if (Rf_isFactor(x[ind[i]]) and factorsAsStrings) {
      ctype = STRSXP;
    } else {
      ctype = TYPEOF(x[ind[i]]);
    }
    max_type = ctype > max_type ? ctype : max_type;
  }

  debug(printf("Max type of value variables is %s\n", Rf_type2char(max_type)));

  Armor<SEXP> tmp;
  Shield<SEXP> output(Rf_allocVector(max_type, nrow * n_ind));
  for (int i = 0; i < n_ind; ++i) {

    // a 'tmp' pointer to the current column being iterated over, or
    // a coerced version if necessary
    if (TYPEOF(x[ind[i]]) == max_type) {
      tmp = x[ind[i]];
    } else if (Rf_isFactor(x[ind[i]]) and factorsAsStrings) {
      tmp = Rf_asCharacterFactor(x[ind[i]]);
    } else {
      tmp = Rf_coerceVector(x[ind[i]], max_type);
    }

    switch (max_type) {
      case INTSXP:
        DO_CONCATENATE(int);
      case REALSXP:
        DO_CONCATENATE(double);
      case LGLSXP:
        DO_CONCATENATE(int);
      case CPLXSXP:
        DO_CONCATENATE(Rcomplex);
      case STRSXP: {
        for (int j = 0; j < nrow; ++j) {
          SET_STRING_ELT(output, i * nrow + j, STRING_ELT(tmp, j));
        }
        break;
      }
      case VECSXP: {
        for (int j = 0; j < nrow; ++j) {
          SET_VECTOR_ELT(output, i * nrow + j, VECTOR_ELT(tmp, j));
        }
        break;
      }
    default:
      stop("Unsupported type (%s)", Rf_type2char(max_type));
    }
  }

  return output;
}
Ejemplo n.º 23
0
// [[Rcpp::export]]
List melt_dataframe(const DataFrame& data,
                    const IntegerVector& id_ind,
                    const IntegerVector& measure_ind,
                    String variable_name,
                    String value_name,
                    SEXP attrTemplate,
                    bool factorsAsStrings,
                    bool valueAsFactor,
                    bool variableAsFactor) {

  int nrow = data.nrows();

  CharacterVector data_names = as<CharacterVector>(data.attr("names"));

  int n_id = id_ind.size();
  debug(Rprintf("n_id == %i\n", n_id));

  int n_measure = measure_ind.size();
  debug(Rprintf("n_measure == %i\n", n_measure));

  // Don't melt if the value variables are non-atomic
  for (int i = 0; i < n_measure; ++i) {
    if (!Rf_isVector(data[measure_ind[i]])) {
      stop("Can't gather non-vector column %i", measure_ind[i] + 1);
    }
  }

  // The output should be a data.frame with:
  // number of columns == number of id vars + 'variable' + 'value',
  // with number of rows == data.nrow() * number of value vars
  List output = no_init(n_id + 2);

  // First, allocate the ID variables
  // we repeat each ID vector n_measure times
  for (int i = 0; i < n_id; ++i) {
    SEXP object = data[id_ind[i]];
    std::string var_name = std::string(data_names[id_ind[i]]);
    output[i] = rep_(object, n_measure, var_name);
  }

  // Now, we assign the 'variable' and 'value' columns

  // 'variable' is made up of repeating the names of the 'measure' variables,
  // each nrow times. We want this to be a factor as well.
  CharacterVector id_names = no_init(n_measure);
  for (int i = 0; i < n_measure; ++i) {
    id_names[i] = data_names[measure_ind[i]];
  }
  if (variableAsFactor) {
    output[n_id] = make_variable_column_factor(id_names, nrow);
  } else {
    output[n_id] = make_variable_column_character(id_names, nrow);
  }

  // 'value' is made by concatenating each of the 'value' variables
  output[n_id + 1] = concatenate(data, measure_ind, factorsAsStrings);
  if (!Rf_isNull(attrTemplate)) {
    Rf_copyMostAttrib(attrTemplate, output[n_id + 1]);
  }

  // Make the List more data.frame like

  // Set the row names
  output.attr("row.names") =
      IntegerVector::create(IntegerVector::get_na(), -(nrow * n_measure));

  // Set the names
  CharacterVector out_names = no_init(n_id + 2);
  for (int i = 0; i < n_id; ++i) {
    out_names[i] = data_names[id_ind[i]];
  }
  out_names[n_id] = variable_name;
  out_names[n_id + 1] = value_name;
  output.attr("names") = out_names;

  // Set the class
  output.attr("class") = "data.frame";

  return output;
}
Ejemplo n.º 24
0
int mergeSort(IntegerVector &v)
{
  IntegerVector temp(v.size());
  return mergeSortRek(v,0,v.size(),temp);
}
Ejemplo n.º 25
0
// Function to generate adjacency graph and count clusters
// [[Rcpp::export]]
int countpartitions(List aList) 
{   

  //Takes an adjacency list,
  //The vector of subset nodes
  //The number of subset nodes
						
  //initialize connCompVec   
  //Initialize visited indices
  IntegerVector visitedInd(aList.size());
  int indexVisit = 0;
  
  //Initialize connected components
  IntegerVector currConnComp(aList.size());

  //Initialize the number of connected components
  int numConnComp = 0;
  
  //Loop over nodes
  for(int i = 0; i < aList.size(); i++){
    
    //If i has not been visited...
    if(visitedInd[i] == 0){
      
      //List i as visited
      visitedInd[i] = 1;

      //Increase the number of connected components
      numConnComp++;

      //Add i to the connected component list
      currConnComp[indexVisit] = i;
      
      //increase index visit
      indexVisit++;
      
      //Count the number of nodes in the current connected component
      int nodeCount = indexVisit - 1;
      
      //Initialize a stopping variable:
      int toStop = 0;

      //While we don't stop
      while(toStop == 0){
	
	//get the neighbors of the next current comp
	IntegerVector listNeighs = aList[currConnComp[nodeCount]];
	
	//If listNeighs does not have length zero...
	int listLength = listNeighs.size();
	if(listLength > 0){
	  
	  //Add nodes of listLength to currConnComp
	  //and mark nodes as visited
	  for(int j = 0; j < listLength; j++){
	    if( visitedInd[listNeighs[j]] == 0){
	      currConnComp[indexVisit] = listNeighs[j];
	      visitedInd[listNeighs[j]] = 1;

	      //Increment indexVisit
	      indexVisit++;
	    }
	  }
	}
	
	//Increment nodeCount
	nodeCount++;

	//If currConnComp[nodeCount] is zero, then we must have new connected component
	//Also stop if we have too many guys.
	if(nodeCount == aList.size()){
	  toStop = 1;
	}
	else if(currConnComp[nodeCount] == 0 ){
	  toStop = 1;
	}
      }
    }
  }
  
  return numConnComp;
  
}
Ejemplo n.º 26
0
RcppModelData::RcppModelData(
        ModelType _modelType,
		const IntegerVector& _pid,
		const NumericVector& _y,
		const NumericVector& _z,
		const NumericVector& _time,
		const NumericVector& dxv, // dense
		const IntegerVector& siv, // sparse
		const IntegerVector& spv,
		const NumericVector& sxv,
		const IntegerVector& iiv, // indicator
		const IntegerVector& ipv,
		bool useTimeAsOffset
		) : ModelData(
                _modelType,
				_pid,
				_y,
				_z,
				_time,
				bsccs::make_shared<loggers::RcppProgressLogger>(),
				bsccs::make_shared<loggers::RcppErrorHandler>()
				) {
	if (useTimeAsOffset) {
	    // offset
//        real_vector* r = new real_vector();
        RealVectorPtr r = make_shared<RealVector>();
        push_back(NULL, r, DENSE);   
        r->assign(offs.begin(), offs.end()); // TODO Should not be necessary with shared_ptr
        setHasOffsetCovariate(true);
	    getColumn(0).add_label(-1);
	}			
				
	// Convert dense
	int nCovariates = static_cast<int>(dxv.size() / y.size());
	for (int i = 0; i < nCovariates; ++i) {
		push_back(
				static_cast<IntegerVector::iterator>(NULL), static_cast<IntegerVector::iterator>(NULL),
				dxv.begin() + i * y.size(), dxv.begin() + (i + 1) * y.size(),
				DENSE);
		getColumn(getNumberOfColumns() - 1).add_label(getNumberOfColumns() - (getHasOffsetCovariate() ? 1 : 0));
	}

	// Convert sparse
	nCovariates = spv.size() - 1;
	for (int i = 0; i < nCovariates; ++i) {

		int begin = spv[i];
		int end = spv[i + 1];

		push_back(
				siv.begin() + begin, siv.begin() + end,
				sxv.begin() + begin, sxv.begin() + end,
				SPARSE);
        getColumn(getNumberOfColumns() - 1).add_label(getNumberOfColumns() - (getHasOffsetCovariate() ? 1 : 0));				
	}

	// Convert indicator
	nCovariates = ipv.size() - 1;
	for (int i = 0; i < nCovariates; ++i) {

		int begin = ipv[i];
		int end = ipv[i + 1];

		push_back(
				iiv.begin() + begin, iiv.begin() + end,
				static_cast<NumericVector::iterator>(NULL), static_cast<NumericVector::iterator>(NULL),
				INDICATOR);
        getColumn(getNumberOfColumns() - 1).add_label(getNumberOfColumns() - (getHasOffsetCovariate() ? 1 : 0));				
	}

	this->nRows = y.size();
	
	// Clean out PIDs
	std::vector<int>& cpid = getPidVectorRef();
	
	if (cpid.size() == 0) {
	    for (size_t i = 0; i < nRows; ++i) {
	        cpid.push_back(i); // TODO These are not necessary; remove.
	    }
	    nPatients = nRows;
	} else {
    	int currentCase = 0;
    	int currentPID = cpid[0];
    	cpid[0] = currentCase;
    	for (size_t i = 1; i < pid.size(); ++i) {
    	    int nextPID = cpid[i];
    	    if (nextPID != currentPID) {
	            currentCase++;
	            currentPID = nextPID;
    	    }
	        cpid[i] = currentCase;
    	}
        nPatients = currentCase + 1;
    }    
}
Ejemplo n.º 27
0
void crossover2_mat(IntegerMatrix X, IntegerMatrix Y, int t, int n, IntegerVector which_rows){
  int m = which_rows.size();
  for(int i=t+1; i<n; i++){
    crossover_one_column(X, Y, i, which_rows, m);
  }
}
Ejemplo n.º 28
0
// [[Rcpp::export(".interp_genoprob_onechr")]]
NumericVector interp_genoprob_onechr(const NumericVector& genoprob,
                                     const NumericVector& map,
                                     const IntegerVector& pos_index)
{
    // get dimensions
    if(Rf_isNull(genoprob.attr("dim")))
        throw std::invalid_argument("genoprob should be a 3d array but has no dim attribute");
    const IntegerVector& d = genoprob.attr("dim");
    if(d.size() != 3)
        throw std::invalid_argument("genoprob should be a 3d array");
    const int n_ind = d[0];
    const int n_gen = d[1];
    const int matsize = n_ind * n_gen;
    const int n_pos = map.size();
    if(pos_index.size() != n_pos) {
        throw std::invalid_argument("Need length(map) == length(pos_index)");
    }

    NumericVector result(n_ind*n_gen*n_pos);
    result.attr("dim") = Dimension(n_ind, n_gen, n_pos);

    // find position to the left that has genoprobs
    IntegerVector left_index(n_pos);
    int last = -1;
    for(int pos=0; pos<n_pos; pos++) {
        if(pos_index[pos] >= 0) last = pos;
        left_index[pos] = last;
    }

    // find position to the right that has genoprobs
    IntegerVector right_index(n_pos);
    last = -1;
    for(int pos=n_pos-1; pos>=0; pos--) {
        if(pos_index[pos] >= 0) last = pos;
        right_index[pos] = last;
    }

    // copy or interpolate
    for(int pos=0; pos<n_pos; pos++) {
        if(pos_index[pos] >= 0) { // in the old genoprobs
            std::copy(genoprob.begin()+(pos_index[pos]*matsize),
                      genoprob.begin()+((pos_index[pos]+1)*matsize),
                      result.begin()+(pos*matsize));
        }
        else {
            double p,q;
            if(left_index[pos] < 0) { // off end to left
                p = 0.0;
                q = 1.0;
            }
            else if(right_index[pos] < 0) { // off end to right
                p = 1.0;
                q = 0.0;
            }
            else {
                double left_pos =  map[left_index[pos]];
                double right_pos = map[right_index[pos]];
                p = (right_pos - map[pos])/(right_pos - left_pos);
                q = (map[pos] - left_pos)/(right_pos - left_pos);
            }

            for(int ind=0; ind<n_ind; ind++) {
                for(int gen=0; gen<n_gen; gen++) {
                    result[ind + gen*n_ind + pos*matsize] = 0.0;
                    if(p > 0)
                        result[ind + gen*n_ind + pos*matsize] +=
                            (p*genoprob[ind + gen*n_ind + pos_index[left_index[pos]]*matsize]);
                    if(q > 0)
                        result[ind + gen*n_ind + pos*matsize] +=
                            (q*genoprob[ind + gen*n_ind + pos_index[right_index[pos]]*matsize]);
                }
            }
        }
    }

    return result;
}
Ejemplo n.º 29
0
// [[Rcpp::export]]
List arrange_impl( DataFrame data, LazyDots dots ){
    if( data.size() == 0 ) return data ;
    check_valid_colnames(data) ;
    assert_all_white_list(data) ;

    // special case arrange() with no arguments for grouped data
    if( dots.size() == 0 && is<GroupedDataFrame>(data) ){
        GroupedDataFrame gdata(data) ;
        data = gdata.data() ;
        DataFrame labels( data.attr( "labels" ) );
        OrderVisitors o(labels) ;
        IntegerVector index = o.apply() ;

        // reorganize
        labels = DataFrameSubsetVisitors( labels, labels.names() ).subset( index, labels.attr("class") );

        ListOf<IntegerVector> indices( data.attr("indices") ) ;
        int ngroups = indices.size() ;
        List new_indices(ngroups) ;
        IntegerVector master_index(data.nrows()) ;
        for( int i=0; i<ngroups; i++){
            new_indices[index[i]] = indices[i] ;
        }
        IntegerVector group_sizes = data.attr("group_sizes") ;
        IntegerVector new_group_sizes(ngroups);
        for( int i=0, k=0; i<ngroups; i++){
            IntegerVector idx = new_indices[i] ;
            IntegerVector new_group_index = seq(k, k + idx.size() - 1 );
            for( int j=0; j<idx.size(); j++, k++){
                master_index[k] = idx[j] ;
            }
            new_indices[i] = new_group_index ;
            new_group_sizes[i] = idx.size() ;
        }

        DataFrame res = DataFrameSubsetVisitors( data, data.names() ).subset( master_index, data.attr("class" ) ) ;
        res.attr( "labels" )  = labels ;
        res.attr( "indices" ) = new_indices ;
        res.attr( "vars"    ) = data.attr("vars" ) ;
        res.attr( "group_sizes" ) = new_group_sizes ;
        res.attr( "biggest_group_size" ) = data.attr("biggest_group_size") ;
        res.attr( "drop" ) = data.attr("drop") ;
        return res ;
    }

    if( dots.size() == 0 || data.nrows() == 0) return data ;

    int nargs = dots.size() ;
    if( is<GroupedDataFrame>(data) ){
        nargs += GroupedDataFrame(data).nvars() ;
    }
    List variables(nargs) ;
    LogicalVector ascending(nargs) ;

    int k = 0 ;
    if( is<GroupedDataFrame>(data) ){
        GroupedDataFrame gdf(data);
        for( ; k< gdf.nvars(); k++) {
            ascending[k] = true ;

            String s = PRINTNAME(gdf.symbol(k));
            variables[k] = data[s] ;
        }
    }

    for(int i=0; k<nargs; i++, k++){
        const Lazy& lazy = dots[i] ;

        Shield<SEXP> call_( lazy.expr() ) ;
        SEXP call = call_ ;
        bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ;

        CallProxy call_proxy(is_desc ? CADR(call) : call, data, lazy.env()) ;

        Shield<SEXP> v(call_proxy.eval()) ;
        if( !white_list(v) ){
            stop( "cannot arrange column of class '%s'", get_single_class(v) ) ;
        }

        if( Rf_inherits(v, "data.frame" ) ){
            DataFrame df(v) ;
            int nr = df.nrows() ;
            if( nr != data.nrows() ){
                stop( "data frame column with incompatible number of rows (%d), expecting : %d", nr, data.nrows() );
            }
        } else if( Rf_isMatrix(v) ) {
            SEXP dim = Rf_getAttrib(v, Rf_install( "dim" ) ) ;
            int nr = INTEGER(dim)[0] ;
            if( nr != data.nrows() ){
                stop( "matrix column with incompatible number of rows (%d), expecting : ", nr, data.nrows() ) ;
            }
        } else {
            if( Rf_length(v) != data.nrows() ){
                stop( "incorrect size (%d), expecting : %d", Rf_length(v), data.nrows() ) ;
            }
        }
        variables[k] = v ;
        ascending[k] = !is_desc ;
    }
    OrderVisitors o(variables, ascending, nargs) ;
    IntegerVector index = o.apply() ;

    DataFrameSubsetVisitors visitors( data, data.names() ) ;
    List res = visitors.subset(index, data.attr("class") ) ;

    if( is<GroupedDataFrame>(data) ){
        // so that all attributes are recalculated (indices ... )
        // see the lazyness feature in GroupedDataFrame
        // if we don't do that, we get the values of the un-arranged data
        // set for free from subset (#1064)
        res.attr("labels") = R_NilValue ;
        res.attr( "vars" )  = data.attr("vars" ) ;
        return GroupedDataFrame(res).data() ;
    }
    SET_ATTRIB(res, strip_group_attributes(res));
    return res ;
}
Ejemplo n.º 30
0
//' @export
// [[Rcpp::export]]
List ensemble_discrete(int n_chains, IntegerVector y, double alpha, int k, int s, int n, 
                       int max_iter, int burnin, int thin, 
                       bool estimate_marginals, bool fixed_pars, bool parallel_tempering, bool crossovers, 
                       NumericVector temperatures, int swap_type, int swaps_burnin, int swaps_freq, NumericMatrix B, 
                       IntegerVector which_chains, IntegerVector subsequence){
  
  // initialise ensemble of n_chains
  Ensemble_Discrete ensemble(n_chains, k, s, n, alpha, fixed_pars);
  
  // initialise transition matrices for all chains in the ensemble
  ensemble.initialise_pars();
  if(fixed_pars){
    ensemble.initialise_pars(B);
  }
  
  // parallel tempering initilisation
  if(parallel_tempering){
    ensemble.activate_parallel_tempering(temperatures);
  }
  
  // initialise x
  ensemble.update_x(y, false);
  
  int index;
  int n_chains_out = which_chains.size();
  int trace_length = (max_iter - burnin + (thin - 1)) / thin;
  int list_length = n_chains_out * trace_length;
  List tr_x(list_length), tr_pi(list_length), tr_A(list_length), tr_B(list_length), tr_switching_prob(list_length), tr_loglik(list_length), tr_loglik_cond(list_length), tr_alpha(list_length);
  
  Timer timer;
  nanotime_t t0, t1, t2, t3;
  NumericVector comp_times(3);
  for(int iter = 1; iter <= max_iter; iter++){
    t0 = timer.now();
    ensemble.update_pars(y);
    t1 = timer.now();
    ensemble.update_x(y, estimate_marginals && (iter > burnin));
    t2 = timer.now();
    
    if(crossovers && (iter > swaps_burnin) && ((iter-1) % swaps_freq == 0)){
      ensemble.do_crossover();
    }
    if(parallel_tempering && (iter > swaps_burnin) && ((iter-1) % swaps_freq == 0)){
      if(swap_type == 0) ensemble.swap_everything();
      if(swap_type == 1) ensemble.swap_pars();
      if(swap_type == 2) ensemble.swap_x();
    }
    t3 = timer.now();
    
    if((iter > burnin) && ((iter-1) % thin == 0)){
      index = (iter - burnin - 1)/thin;
      ensemble.copy_values_to_trace(which_chains, tr_x, tr_pi, tr_A, tr_B, tr_alpha, tr_loglik, tr_loglik_cond, tr_switching_prob, index, subsequence);
      comp_times += 1.0/trace_length * NumericVector::create(t1-t0, t2-t1, t3-t2);
      comp_times[0] += 1.0/trace_length * (t1 - t0);
      comp_times[1] += 1.0/trace_length * (t2 - t1);
      if((iter-1) % swaps_freq == 0){
        comp_times[2] += 1.0/trace_length * swaps_freq * (t3 - t2);
      }
    }
    if(iter % 1000 == 0) printf("iter %d\n", iter);
  }
  comp_times.attr("names") = CharacterVector::create("update pars", "update x", "swap/crossover");
  
  ensemble.scale_marginals(max_iter, burnin);
  ListOf<NumericMatrix> tr_marginal_distr = ensemble.get_copy_of_marginals(which_chains);
  
  return List::create(Rcpp::Named("trace_x") = tr_x,
                      Rcpp::Named("trace_pi") = tr_pi,
                      Rcpp::Named("trace_A") = tr_A,
                      Rcpp::Named("trace_B") = tr_B,
                      Rcpp::Named("trace_alpha") = tr_alpha,
                      Rcpp::Named("log_posterior") = tr_loglik,
                      Rcpp::Named("log_posterior_cond") = tr_loglik_cond,
                      Rcpp::Named("switching_prob") = tr_switching_prob,
                      Rcpp::Named("marginal_distr") = tr_marginal_distr, 
                      Rcpp::Named("acceptance_ratio") = ensemble.get_acceptance_ratio(), 
                      Rcpp::Named("timer") = comp_times);
  
}