//'@title Decode the genetic representation of the edge assignment. //'@description Implementation is based on pseudo-code provided by [Julia Handl 2004]. //'@param clusters Integervector containing genetic representation of edges. //'@return Integervector containing the assigned cluster for each point. //'@references 2004 Julia Handl - Multiobjective clustering with automatic //'determination of the number of clusters. //' //'@examples //'clusters = c(2,3,1,5,4) //'decodeC(clusters) //' //'@export // [[Rcpp::export]] IntegerVector decodeC(IntegerVector clusters) { int currentCluster = 1; IntegerVector clusterAssignment(clusters.length()); IntegerVector previous(clusters.length()); for(int i = 0; i < clusters.length(); i++){ clusterAssignment(i) = -1; } for(int i = 0; i < clusters.length(); i++){ int ctr = 0; if(clusterAssignment(i) == -1){ clusterAssignment(i) = currentCluster; int neighbour = clusters(i) - 1; previous(ctr) = i; ctr += 1; while(clusterAssignment(neighbour) == -1){ previous(ctr) = neighbour; clusterAssignment(neighbour) = currentCluster; neighbour = clusters(neighbour) - 1; ctr += 1; } if(clusterAssignment(neighbour) != currentCluster){ ctr -= 1; while(ctr >= 0){ clusterAssignment(previous(ctr)) = clusterAssignment(neighbour); ctr -= 1; } }else{ currentCluster += 1; } } } return(clusterAssignment); }
NumericMatrix calc_term_sim_mat( IntegerVector anc_start, IntegerVector anc_stop, IntegerVector ancestors, NumericVector info, IntegerVector terms1, IntegerVector terms2 ) { NumericMatrix result(terms1.length(), terms2.length()); for (int i1 = 0; i1 < terms1.length(); i1++) { for (int i2 = 0; i2 < terms2.length(); i2++) { result(i1, i2) = 0.0; int t1 = terms1[i1]; int t2 = terms2[i2]; int cur_t2_anc_ind = anc_start[t2]; for (int a1_ind = anc_start[t1]; a1_ind < anc_stop[t1]; a1_ind++) { int a1 = ancestors[a1_ind]; while ((cur_t2_anc_ind < (anc_stop[t2]-1)) && (ancestors[cur_t2_anc_ind] < a1)) { cur_t2_anc_ind++; } if (ancestors[cur_t2_anc_ind] == a1) { result(i1, i2) = info[a1]; break; } } } } return result; }
// [[Rcpp::export]] List c_knnangles(NumericMatrix x, int k, IntegerVector from, IntegerVector to) { int i, j, l, m, pos, f, t; int n = x.nrow(); int dim = x.ncol(); double dx, dy, dz, d, ang, nnd; NumericVector angles(n); NumericVector angles2(n); NumericVector nndists(n); NumericVector knnd(k); IntegerVector knni(k); for(i=0; i < from.length(); i++) { f = from[i]-1; for(l=0; l< k; l++){ knnd[l] = DBL_MAX; knni[l] = -1; } for(j=0; j < to.length(); j++) { t = to[j]-1; if(t!=f) { d=0; for(l=0; l < dim; l++) d += pow(x(f,l)-x(t,l), 2); pos = k; for(l=0; l < k; l++){ if (d < knnd[l]){ pos = l; break; } } if(pos < k){ for(m=k-1; m > pos; m--){ knnd[m] = knnd[m-1]; knni[m] = knni[m-1]; } knnd[pos] = d; knni[pos] = t; } } } t = knni[k-1]; nnd = sqrt(knnd[k-1]); dx = x(t,0) - x(f,0); dy = x(t,1) - x(f,1); ang = atan2(dy, dx); if(ang<0) ang = 2*PI+ang; angles[f] = ang; nndists[f] = nnd; if(dim==3){ dz = x(t,2) - x(f,2); ang = acos(dz/nnd); angles2[f] = ang; } } if(dim==3) return List::create(angles, angles2, nndists); return List::create(angles, nndists); }
IntegerVector C(IntegerVector a, IntegerVector b){ int lena = a.length(); int lenb = b.length(); IntegerVector out(lena + lenb); for(int i = 0; i < lena+lenb; i++){ if(i < lena){ out(i) = a(i); } else { out(i) = b(i-lena); } } return(out); }
//'@title Zero-based Rcpp implementation of \code{which} for integer vectors. //'@param x Integer to be found. //'@param v IntegerVector in which to find \code{x}. //'@return indexVector IntegerVector containing all zero-based indexes of //'appearences of \code{x} in \code{v}. //' //'@examples //'x = 42; v = 40:44 //'whichC(x,v) //' //'@seealso //'which //' //'@export // [[Rcpp::export]] List whichCList(int x, IntegerVector v) { //Initially this vector has the same size as v and is later resized List indexVector(v.length()); int index = 0; for(int i = 0; i < v.length(); i++){ if(v(i) == x){ indexVector[index] = i + 1; index += 1; } } indexVector = resizeList(indexVector, index); return(indexVector); }
//' FITS image writer //' //' Writes a vector, matrix or 3D array to a FITS file as an image. //' The data is written to the primary HDU. //' // [[Rcpp::export]] int gv_writefits_img(NumericVector img, CharacterVector fits_name, CharacterVector hdu_name = "") { IntegerVector dim; if (!img.hasAttribute("dim")) { REprintf("ERROR: image has not been dimensioned.\n"); return 1; } dim = img.attr("dim"); if (dim.length() > 3) { REprintf("ERROR: dimension of more than 3 unsupported.\n"); return 1; } fitsfile *pfits=NULL; int err=0; std::string fname = as<std::string>(fits_name[0]); fits_create_file(&pfits, (char *) fname.c_str(), &err); if (err) { gv_print_fits_err(err); return err; } #ifdef GV_DEBUG Rcout << "Number of dim: " << dim.length() << std::endl; for (int i=0; i<dim.length(); i++) { Rcout << "Dim[" << i << "]: " << dim[i] << std::endl; } Rcout << "Number of elements: " << img.length() << std::endl; double *p = &(*img.begin()); for (int i=0; i<img.length(); i++) { Rcout << "*(p+" << i << ") = " << *(p+i) << std::endl; } #endif long longdim[3], startpix[3] = {1,1,1}; // default start for (int i=0; i<dim.length(); i++) longdim[i] = (long) dim[i]; // start writing to file fits_create_img(pfits, DOUBLE_IMG, dim.length(), longdim, &err); fits_write_pix(pfits, TDOUBLE, startpix, img.length(), &(*img.begin()), &err); fits_close_file(pfits, &err); return err; }
IntegerVector stratified_sample_int( IntegerVector strata_sizes, IntegerVector strata_sample_sizes ) { int num_strata = strata_sizes.length(); int total_sample_size = 0; int total_items = 0; for (int i = 0; i < num_strata; i++) { total_sample_size += strata_sample_sizes[i]; total_items += strata_sizes[i]; } IntegerVector result(total_sample_size); int set_from = 0; int min_val = 0; for (int i = 0; i < num_strata; i++) { set_sample( result, set_from, set_from + strata_sample_sizes[i], min_val, min_val + strata_sizes[i] ); set_from += strata_sample_sizes[i]; min_val += strata_sizes[i]; } return result; }
double sim_p( std::string type, RObject term_sets_data, bool use_mean, IntegerVector group, int min_its, int max_its, double signif, double dismiss ) { ReduceSim r(use_mean ? &add : &worst, use_mean ? &by_size : &identity, use_mean ? 0.0 : INFINITY); GroupSim* sim_mat = sim_matrix_from_data(type, r, term_sets_data); double sim = sim_mat->groupsim(group); simple_sampler simple = simple_sampler(sim_mat->population_size(), group.length()); Sampler* sampler = &simple; double p_val = p( sampler, sim_mat, sim, min_its, max_its, signif, dismiss ); delete sim_mat; return p_val; }
// [[Rcpp::export]] DoubleVector rcpp_correlate(IntegerVector& trial_id, DoubleVector& noise, DoubleVector& rho_vec) { double rho = rho_vec[0]; for(int i=1; i < trial_id.length(); i++) { if(trial_id[i-1] == trial_id[i]) noise[i] = noise[i-1]*rho + noise[i]*sqrt(1-pow(rho,2)); } return Rcpp::wrap(noise); }
CharacterVector reencode_factor(IntegerVector x) { CharacterVector levels(reencode_char(get_levels(x))); CharacterVector ret(x.length()); R_xlen_t nlevels = levels.length(); R_xlen_t len = x.length(); for (R_xlen_t i = 0; i < len; ++i) { int xi = x[i]; if (xi <= 0 || xi > nlevels) ret[i] = NA_STRING; else ret[i] = levels[xi - 1]; } return ret; }
double VectorSim::groupsim(IntegerVector group) { double agg = reducer.sim0; int n = group.length(); for (int i = 0; i < n; i++) { double sim = vec[group[i]]; agg = reducer.reduce(agg, sim); } return reducer.norm(agg, n); }
bool next(){ ++rpos; if (rpos == rlens[run]){ //end of the run, go to the next ++run; rpos = 0; if (run == rlens.length()) valid = false; return valid; } valid = true; return valid; }
//' Forward-backward algorithm //' //' Forward-backward algorithm using the scaling technique. //' That's more stable (and maybe even faster) than the method with the logarithm. //' Warning: this function overwrites the lliks matrix. //' @param initP matrix of initial probabilities: each column corresponds to a sequence //' @param trans transition matrix (rows are previous state, columns are next state) //' @param lliks matrix with emission probabilities for each datapoint and each state. //' Columns are datapoints and rows are states. //' @param seqlens length of each subsequence of datapoints (set this to ncol(lliks) //' if there is only one sequence). //' @param posteriors the posteriors matrix where the posteriors will be written. //' its value when the function is called does not matter, but it needs to have //' the right dimensions (rows are states and columns are observations). //' @param nthreads number of threads used. Sequences of observations are //' processed independently by different threads (if \code{length(seqlens) > 1}). //' @return a list with the following arguments: //' \item{posteriors}{posterior probability of being in a certain state for a certain datapoint. //' Same matrix used as input argument.} //' \item{tot_llik}{total log-likelihood of the data given the hmm model.} //' \item{new_trans}{update for the transition probabilities (it is already normalized).} //' @export // [[Rcpp::export]] List forward_backward(NumericMatrix initP, NumericMatrix trans, NumericMatrix lliks, IntegerVector seqlens, NumericMatrix posteriors, int nthreads=1){ int nmod = initP.nrow(); double totlen = Rcpp::sum(seqlens); if (nmod != trans.nrow() || nmod != trans.ncol() || nmod != lliks.nrow() || nmod != posteriors.nrow()) Rcpp::stop("Unable to figure out the number of models"); if (((double) lliks.ncol()) != totlen || ((double)posteriors.ncol()) != totlen) Rcpp::stop("Seqence lengths don't match with the provided matrices"); if (initP.ncol() != seqlens.length()) Rcpp::stop("'initP' must have as many columns as the number of sequences"); NumericMatrix newTrans(trans.nrow(), trans.ncol()); NumericMatrix newInitP(initP.nrow(), initP.ncol()); double tot_llik = fb_core(asMat(initP), asMat(trans), asMat(lliks), asVec(seqlens), asMat(posteriors), asMat(newTrans), asMat(newInitP), nthreads); return List::create(_("posteriors")=posteriors, _("tot_llik")=tot_llik, _("new_trans")=newTrans, _("new_initP")=newInitP); }
NumericMatrix Zdbcomparepairwisemc(IntegerVector db, int nloci, int njobs, int job) { //expects as.vector(t(db)) as argument db NumericMatrix M(nloci+1,nloci+1); unsigned long dblen = db.length(); int plen = 2*nloci; std::vector<int> pi; //profile i pi.resize(plen); int m; int a,b,c,d; bool ac,ad,bc,bd; int mf=0,mp=0; //full matches, partial matches int i=0,j=0; //positions in character ver int ki=0,kj=0; // id of profile i,j unsigned long dbn = dblen/plen; //now read profile by profile i = (job-1)*plen; for(ki=(job-1);ki<(dbn-1);ki+=njobs){ for(m=0;m<plen;m++){ pi[m] = db[i+m]; //read profile i } j = i+plen; // next profile is located plen bytes ahead of profile i //cycle through all profiles j>i, so read rest of the character vector for(kj=ki+1;kj<(dbn);kj++){ mf=0;mp=0; //read profiles i and j @ all loci and compare for(m=0;m<(plen);m+=2){ a = pi[m]; b = pi[m+1]; //alleles of person i c = db[j+m]; d = db[j+m+1]; //alleles of j // compare alleles ac = a==c; ad = a==d; bc = b==c; bd = b==d; // check for partial or full match if ((ac^bd)|(ad^bc)) mp++; if ((ac&bd)|(ad&bc)) mf++; } // count the matches M(mf,mp)++; j+=plen; // next profile is plen bytes ahead } i += njobs*plen; } return(M); }
double SimMatrix::groupsim( IntegerVector group ) { double agg = reducer.sim0; int n = group.length(); for (int row = 1; row < n; row++) for (int col = 0; col < row; col++) { double sim = pairsim(group[row], group[col]); agg = reducer.reduce(agg, sim); } return reducer.norm(agg, ((double)(n * (n - 1)) * 0.5)); }
// [[Rcpp::export]] NumericMatrix c_pairwise_dist_angle_subset(NumericMatrix x, IntegerVector from, IntegerVector to) { int i, j, ind, l, t, f; double dx, dy, dz; int dim = x.ncol(); double d, ang; int nfrom = from.length(); int nto = to.length(); // Upper triangle no longer meaningful structure, so just a storage. NumericMatrix val( nfrom * nto, dim); ind = 0; for(i=0; i < nfrom; i++) { f = from[i]-1; for(j=0; j < nto; j++) { t = to[j]-1; if(f!=t){ d=0; for(l=0; l < dim; l++) d += pow(x(f,l)-x(t,l), 2); d = sqrt(d); dx = x(f,0) - x(t,0); dy = x(f,1) - x(t,1); ang = atan2(dy, dx); if(ang<0) ang = 2*PI+ang; // upper triangle location val(ind,0) = d; val(ind,1) = ang; if(dim==3){ dz = x(f,2) - x(t,2); ang = acos(dz/d); val(ind,2) = ang; } ind++; } } } return val; }
// Function for pair indexing // [[Rcpp::export]] List indexToCoord(IntegerVector V, const int N){ std::vector<int> rows; std::vector<int> cols; for(int i = 0; i < V.length(); i++){ int J = (V[i] - 1) / N + 1; cols.push_back(J); int I = (V[i] - 1) % N + 1; rows.push_back(I); } return List::create( _["feat1"] = rows, _["feat2"] = cols ); }
// [[Rcpp::export]] List coldataFeather(const List& feather, const IntegerVector& indexes) { auto table = getTableFromFeather(feather); int n = indexes.length(), p = table->num_rows(); List out(n), names(n); for (int i = 0; i < n; ++i) { auto col = getColumn(*table, indexes[i] - 1); names[i] = Rf_mkCharCE(col->name().c_str(), CE_UTF8); out[i] = toSEXP(col); } out.attr("names") = names; out.attr("row.names") = IntegerVector::create(NA_INTEGER, -p); out.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame"); return out; }
//parses the GR object. void parseRegions(std::vector<GArray>& container, RObject& gr, samfile_t* in){ if (not gr.inherits("GRanges")) stop("must provide a GRanges object"); IntegerVector starts = as<IntegerVector>(as<RObject>(gr.slot("ranges")).slot("start")); IntegerVector lens = as<IntegerVector>(as<RObject>(gr.slot("ranges")).slot("width")); RObject chrsRle = as<RObject>(gr.slot("seqnames")); RObject strandsRle = as<RObject>(gr.slot("strand")); RleIter chrs(chrsRle); RleIter strands(strandsRle); container.reserve(container.size() + starts.length()); Iint e_starts = starts.end(); Iint i_starts = starts.begin(); Iint i_lens = lens.begin(); int lastStrandRun = -1; int strand = -1; int lastChrsRun = -1; int rid = -1; for (; i_starts < e_starts; ++i_starts, ++i_lens, chrs.next(), strands.next()){ //if new run, update chromosome if (lastChrsRun != chrs.run){ lastChrsRun = chrs.run; rid = getRefId(in, chrs.getValue()); if (rid == -1) stop("chromosome " + (std::string)chrs.getValue() + " not present in the bam file"); } //if new run, update strand if (lastStrandRun != strands.run){ lastStrandRun = strands.run; const std::string& s = strands.getValue(); if (s == "-"){ strand = -1; } else if (s == "+"){ strand = +1; } else { strand = 0; } } container.push_back(GArray(rid, *i_starts - 1, *i_lens, strand)); } }
//--------------------------------------------------------------------- // Lewis's routine to compute s and s^2 "running sums" (look-up tables) // Matrix I/O in column major format // // ff is the "flattened" f(u,v) in column major order // // nz_idxs are the indices of the LUTs that are not by definition = 0. // If we know where they are we don't have to use if's in the for loop // to find them. // They are assumed to be in 0-index (offset indexing) format. // Is there a way to just compute them instead of using expand.grid and // which on the R side?? // //--------------------------------------------------------------------- // [[Rcpp::export]] NumericMatrix lewis(int num_svals, IntegerVector nz_idxs, NumericVector ff, int offset1, int offset2, int offset3) { //s LUT goes in col 0, s^2 LUT goes in col 1: NumericMatrix svals(num_svals,2); for(int i = 0; i<nz_idxs.length(); i++) { int sidx1 = nz_idxs(i) - offset1; int sidx2 = nz_idxs(i) - offset2; int sidx3 = nz_idxs(i) - offset3; // s(u,v) = f(u,v) + s(u-1,v) + s(u,v-1) - s(u-1,v-1): svals(nz_idxs(i),0) = ff(i) + svals(sidx1,0) + svals(sidx2,0) - svals(sidx3,0); // s^2(u,v) = f^2(u,v) + s^2(u-1,v) + s^2(u,v-1) - s^2(u-1,v-1): svals(nz_idxs(i),1) = pow(ff(i),2) + svals(sidx1,1) + svals(sidx2,1) - svals(sidx3,1); } return svals; }
IntegerVector sbSequenceCpp(int T, int b_av, int length){ IntegerVector index_sequence(2*T); for(int j = 0; j < 2*T; j++){ int k; if(j >= T){ k = j-T; } else { k = j; } index_sequence[j] = k+1; } IntegerVector sequence = rep(IntegerVector::get_na(), length + T); IntegerVector temp = seq(1,T); // to sample from int current = -1; while(current < (length-1)){ int start = as<int>(Csample(temp)); // int start = as<int>(sample_(temp, 1, false)); NumericVector b_num = rgeom(1, 1.0/b_av) + 1.0; int b_int = as<int>(b_num); IntegerVector _to_set = seq(current+1,current+b_int); IntegerVector _idx = seq(start-1, start-1 + b_int - 1); // replace with NA (when index out of bound error) for(int i = 0; i < _idx.length(); i++){ if(_idx[i] >= index_sequence.length() || _to_set[i] >= sequence.length()){ // do nothing } else { sequence[_to_set[i]] = index_sequence[_idx[i]]; } } current = current + b_int; } return sequence[Range(0,length-1)]; }
void print_vec(const IntegerVector &vec) { for(int i = 0; i <vec.length(); i++) { cout<<vec[i]<< " "; } cout<<endl; }
//' @export // [[Rcpp::export]] List blockSizeCalibrate(NumericMatrix ret, IntegerVector b_vec = IntegerVector::create(1, 3, 6, 10), double alpha = 0.05, int M = 199, int K = 1000, int b_av = 5, int T_start = 50) { int b_len = b_vec.length(); NumericVector emp_reject_probs = rep(0.0, b_len); double Delta_hat = sharpeRatioDiff(ret); NumericVector ret1 = ret(_,0); NumericVector ret2 = ret(_,1); int T = ret1.length(); NumericMatrix Var_data(T_start + T, 2); Var_data(0,_) = ret(0,_); IntegerVector range1 = seq(1, T-1); IntegerVector range2 = seq(0, T-2); NumericVector intercept = rep(1.0, ret1.length()); List fit1 = fastLm(ret1[range1], cbindCpp(intercept[range2], cbindCpp(ret1[range2],ret2[range2]))); List fit2 = fastLm(ret2[range1], cbindCpp(intercept[range2], cbindCpp(ret1[range2],ret2[range2]))); NumericVector coef1 = as<NumericVector>(wrap(fit1["coef"])); NumericVector coef2 = as<NumericVector>(wrap(fit2["coef"])); NumericMatrix resid_mat = cbindCpp(as<NumericVector>(fit1["resid"]), as<NumericVector>(fit2["resid"])); for(int k = 0; k < K; k++){ // create resid_mat_star NumericMatrix resid_mat_star(T_start + T, resid_mat.cols()); // fill with NA by default int xsize = resid_mat_star.nrow() * resid_mat_star.ncol(); for (int i = 0; i < xsize; i++) { resid_mat_star[i] = NumericMatrix::get_na(); } // fill first row with 0 for(int c = 0; c < resid_mat_star.ncol(); c++) resid_mat_star(0,c) = 0.0; IntegerVector index = sbSequenceCpp(T-1, b_av, T_start + T - 1) - 1; for(int j = 0; j < index.length(); j++){ // handling NA values if( IntegerVector::is_na(index(j)) ){ // do nothing } else if( (j+1) > resid_mat_star.nrow() ){ // do nothing } else if( index[j] > resid_mat.nrow() ){ // do nothing } else { resid_mat_star(j+1,_) = resid_mat(index[j],_); } } for(int t = 1; t < (T_start + T); t++){ Var_data(t,0) = coef1[0] + coef1[1]*Var_data(t-1,0) + coef1[2]*Var_data(t-1,1) + resid_mat_star(t,0); Var_data(t,1) = coef2[0] + coef2[1]*Var_data(t-1,0) + coef2[2]*Var_data(t-1,1) + resid_mat_star(t,1); } NumericMatrix Var_data_trunc = Var_data(Range(T_start,T_start+T-1), Range(0,Var_data.ncol()-1)); for(int j = 0; j < b_len; j++){ List bTI = bootTimeInference(Var_data_trunc, b_vec[j], M, Delta_hat); double p_value = as<double>(wrap(bTI["p.Value"])); if(p_value <= alpha) emp_reject_probs[j] = emp_reject_probs[j] + 1; } } emp_reject_probs = emp_reject_probs/(double)K; Environment env = Environment::base_namespace(); Function order = env["order"]; IntegerVector b_order = order(abs(emp_reject_probs - alpha)); int b_opt = as<int>(wrap(b_vec(b_order[0]-1))); NumericMatrix b_vec_with_probs = rbindCpp(as<NumericVector>(wrap(b_vec)), emp_reject_probs); return(List::create( _["Empirical.Rejection.Probs"] = b_vec_with_probs, _["b.optimal"] = b_opt )); }
// [[Rcpp::export]] Rcpp::XPtr<SimulatedGenealogy> fwpopsim_fixed_genealogy(int G, IntegerVector H0, int pop_size, List mutmodel, bool progress, bool trace, bool cleanup_haplotypes = true, bool cleanup_lineages = true, bool plot = false, bool all_pairs = false, int random_pairs = 0, bool continue_to_one_founder = false) { Function Rprint("print"); int mutation_model_type = as<int>(mutmodel["modeltype"]); NumericMatrix mutpars = mutmodel["mutpars"]; int loci = H0.length(); /****************************************************************************/ /* PRINT PARAMETERS */ /****************************************************************************/ if (trace) { Rcout << "#--- G ------------------------#" << std::endl; Rcout << G << std::endl; Rcout << std::endl; Rcout << "#--- r -----------------------#" << std::endl; Rcout << loci << std::endl; Rcout << std::endl; Rcout << "#--- H0 -----------------------#" << std::endl; Rprint(H0); Rcout << std::endl; Rcout << "#--- pop size------------------#" << std::endl; Rprint(pop_size); Rcout << std::endl; Rcout << "#--- mutmodel -----------------#" << std::endl; Rprint(mutmodel); } /****************************************************************************/ /* DETERMINE MUTATION MODEL */ /****************************************************************************/ MutationModel* mutation_model = NULL; SMM mutation_smm; LMM mutation_lmm; EMM mutation_emm; if (mutation_model_type == 1) { mutation_smm = SMM(mutpars); mutation_model = &mutation_smm; } else if (mutation_model_type == 2) { mutation_lmm = LMM(mutpars); mutation_model = &mutation_lmm; } else if (mutation_model_type == 3) { mutation_emm = EMM(mutpars); mutation_model = &mutation_emm; } else { throw std::invalid_argument("The mutation model was not recognized!"); } /****************************************************************************/ /* INITIAL POPULATION */ /****************************************************************************/ std::vector<Individual*> init_population(pop_size); std::vector<Individual*> population(pop_size); int id = 0; std::vector<int> H0vec(loci); for (int locus = 0; locus < loci; locus++) { H0vec[locus] = H0[locus]; } for (int individual = 0; individual < pop_size; ++individual) { Individual* ind = new Individual(++id, 0, individual, NULL, H0vec); population[individual] = ind; init_population[individual] = ind; } int founders_with_descendants = 0; //for (int generation = 1; generation <= G; generation++) { //for (int generation = 1; generation <= G && (!continue_to_one_founder || (continue_to_one_founder && founders_with_descendants == 1)); generation++) { //for (int generation = 1; generation <= G && (continue_to_one_founder && founders_with_descendants == 1); generation++) { for (int generation = 1; generation <= G || (continue_to_one_founder && founders_with_descendants != 1); generation++) { if (trace) { Rcout << "===============================================" << std::endl; Rcout << "Generation " << generation << " (out of " << G << ")" << std::endl; Rcout << "===============================================" << std::endl; } std::vector<Individual*> new_population(pop_size); // FIXME: Smarter data structure! (Hash)Map? std::vector<int> founders(pop_size); for (int individual = 0; individual < pop_size; individual++) { int parent_index = (int)(pop_size*Rf_runif(0, 1)); Individual* parent = population[parent_index]; std::vector<int> haplotype = parent->get_haplotype(); if (trace) { Rcout << "Before mutation: " << sprint_vector(haplotype) << std::endl; } for (int locus = 0; locus < loci; locus++) { double locus_mut_prob[3]; mutation_model->mutation_table(haplotype[locus], locus, locus_mut_prob); double mut_down = locus_mut_prob[0]; double mut_up_cum = locus_mut_prob[0] + locus_mut_prob[1]; double u = Rf_runif(0, 1); if (u <= mut_down) { haplotype[locus] -= 1; } else if (u <= mut_up_cum) { haplotype[locus] += 1; } } if (trace) { Rcout << "After mutation: " << sprint_vector(haplotype) << std::endl; Rcout << std::endl; } Individual* ind = new Individual(++id, generation, individual, parent, haplotype); new_population[individual] = ind; parent->add_child(ind); founders[ind->get_founder_id()] += 1; } /* for (individual in pop_tree) */ if (cleanup_haplotypes) { for (int individual = 0; individual < pop_size; individual++) { population[individual]->cleanup_haplotype(); } } if (cleanup_lineages) { for (int individual = 0; individual < pop_size; individual++) { Individual::cleanup_lineage(population[individual]); } } population = new_population; founders_with_descendants = 0; for (int individual = 0; individual < pop_size; individual++) { if (founders[individual] > 0) { founders_with_descendants++; } } if (progress && !trace) { Rcout << "Generation " << generation << " / " << G << " done (" << founders_with_descendants << " founders left)\r" << std::flush; } } /* for (generation in 1:G) */ if (progress && !trace) { Rcout << std::endl; } SimulatedGenealogy* simres = new SimulatedGenealogy(population, init_population); if (all_pairs) { std::vector<int> gs_all = all_pairwise_MRCA(population); double mean_all = std::accumulate(gs_all.begin(), gs_all.end(), 0.0) / gs_all.size(); Rcout << "MRCA mean is " << mean_all << " generations back based on all pairs of individuals" << std::endl; } if (random_pairs > 0) { std::vector<int> gs_sample = sample_pairwise_MRCA(population, random_pairs); double mean_sample = std::accumulate(gs_sample.begin(), gs_sample.end(), 0.0) / gs_sample.size(); Rcout << "MRCA mean is " << mean_sample << " generations back based on sample of " << gs_sample.size() << " random pairs" << std::endl; } if (plot) { std::ofstream outfile; std::ostringstream dotstream; genealogy_to_dot(init_population, dotstream, cleanup_lineages); outfile.open("tmp-proto.dot", std::ios::out | std::ios::trunc ); outfile << dotstream.str(); outfile.close(); //////////// int idx1 = 1; int idx2 = 8; Individual* i1 = population[idx1]; Individual* i2 = population[idx2]; std::vector<Individual*> lineage_ids; lineage_ids.push_back(i1); lineage_ids.push_back(i2); Individual* mrca = find_MRCA_with_lineage(i1, i2, lineage_ids); int g = i1->get_generation() - mrca->get_generation(); Rcout << "MRCA is " << g << " generations back" << std::endl; //Rcout << "MRCA is " << (i1->get_generation() - find_MRCA(i1, i2)->get_generation()) << " generations back" << std::endl; std::vector<int> mark_ids; for (auto &node: lineage_ids) { mark_ids.push_back(node->get_id()); } //mark_ids.push_back(3); //mark_ids.push_back(50); std::ostringstream dotstream_marked; genealogy_to_dot(init_population, dotstream_marked, cleanup_lineages, mark_ids); outfile.open("tmp-proto-marked.dot", std::ios::out | std::ios::trunc ); outfile << dotstream_marked.str(); outfile.close(); } //http://www.r-bloggers.com/external-pointers-with-rcpp/ //http://r.789695.n4.nabble.com/reinterpreting-externalptr-in-R-td4653908.html Rcpp::XPtr<SimulatedGenealogy> res(simres); /* CharacterVector classes = res.attr("class") ; classes.push_back("myclass") ; res.attr("class") = classes; */ res.attr("class") = CharacterVector::create("fwsim_fixed_sim_genealogy", "externalptr"); return(res); }
List Zdbcomparepairwisemctrackhits(IntegerVector db, int nloci, int hit, int njobs, int job) { //expects as.vector(t(db)) as argument db NumericMatrix M(nloci+1,nloci+1); unsigned long dblen = db.length(); int plen = 2*nloci; std::vector<int> pi; //profile i pi.resize(plen); int m; int a,b,c,d; bool ac,ad,bc,bd; int mf=0,mp=0; //full matches, partial matches int i=0,j=0; //positions in character ver int ki=0,kj=0; // id of profile i,j unsigned long dbn = dblen/plen; // keep track of matching profiles (if hit>0) std::vector<int> hitid1; std::vector<int> hitid2; std::vector<int> hitf; // # matches std::vector<int> hitp; // partials //now read profile by profile i = (job-1)*plen; for(ki=(job-1);ki<(dbn-1);ki+=njobs){ for(m=0;m<plen;m++){ pi[m] = db[i+m]; //read profile i } j = i+plen; // next profile is located plen bytes ahead of profile i //cycle through all profiles j>i, so read rest of the character vector for(kj=ki+1;kj<(dbn);kj++){ mf=0;mp=0; //read profiles i and j @ all loci and compare for(m=0;m<(plen);m+=2){ a = pi[m]; b = pi[m+1]; //alleles of person i c = db[j+m]; d = db[j+m+1]; //alleles of j // compare alleles ac = a==c; ad = a==d; bc = b==c; bd = b==d; // check for partial or full match if ((ac^bd)|(ad^bc)) mp++; if ((ac&bd)|(ad&bc)) mf++; } // count the matches M(mf,mp)++; //keep track of matching profiles (?) if (mf>=hit){ hitid1.push_back(ki+1); hitid2.push_back(kj+1); hitf.push_back(mf); hitp.push_back(mp); } j+=plen; // next profile is plen bytes ahead } i += njobs*plen; } return List::create( _["M"] = M, _["hits"] = DataFrame::create( _("id1")= hitid1, _("id2") = hitid2, _("match") = hitf, _("partial") = hitp ) ) ; }
// Assign values // [[Rcpp::export(name = ".assignValues")]] NumericMatrix assignValues_cpp(int val, IntegerVector ad, NumericMatrix mtx) { for (int i = 0; i < ad.length(); i++){ mtx[ad[i]-1] = val; } return(mtx); }