// [[Rcpp::export]] Rcpp::List subsetCounts(Rcpp::IntegerVector counts, Rcpp::IntegerVector start, Rcpp::IntegerVector width, Rcpp::LogicalVector strand){ if (start.length() != width.length() || start.length() != strand.length()) Rcpp::stop("provided vectors have different lengths..."); int nr = start.length(); int len = counts.length(); int tot = 0; int* S = start.begin(); int* W = width.begin(); for (int i = 0; i < nr; ++i){ int s = S[i] - 1; int w = W[i]; if (s < 0) Rcpp::stop("negative start positions are invalid"); if (s + w > len) Rcpp::stop("range exceeds the lengths of the counts vector"); tot += w; } Rcpp::IntegerVector res(tot); Rcpp::IntegerVector nstart(nr); Rcpp::IntegerVector nend(nr); int* R = res.begin(); int* C = counts.begin(); int* ST = strand.begin(); int* NS = nstart.begin(); int* NE = nend.begin(); int currpos = 0; for (int i = 0; i < nr; ++i){ NS[i] = currpos + 1; int w = W[i]; if (ST[i]) std::copy(C + S[i]-1, C + S[i]-1 + w, R + currpos); else std::reverse_copy(C + S[i]-1, C + S[i]-1 + w, R + currpos); currpos += w; NE[i] = currpos; } return List::create(_("counts")=res, _("starts")=nstart, _("ends")=nend); }
// [[Rcpp::export]] Rcpp::NumericVector seqC(double from_, double to_, double by_ = 1.0) { int adjust = std::pow(10, std::ceil(std::log10(10 / by_)) - 1); int from = adjust * from_; int to = adjust * to_; int by = adjust * by_; std::size_t n = ((to - from) / by) + 1; Rcpp::IntegerVector res = Rcpp::rep(from, n); add_multiple ftor(by); std::transform(res.begin(), res.end(), res.begin(), ftor); return Rcpp::NumericVector(res) / adjust; }
// [[Rcpp::export]] Rcpp::IntegerMatrix quantileNorm(Rcpp::IntegerMatrix mat, Rcpp::IntegerVector ref, int nthreads=1, int seed=13){ if (mat.nrow() != ref.length()) Rcpp::stop("incompatible arrays..."); if (!std::is_sorted(ref.begin(), ref.end())) Rcpp::stop("ref must be sorted"); int ncol = mat.ncol(); int nrow = mat.nrow(); //allocate new matrix Rcpp::IntegerMatrix res(nrow, ncol); Mat<int> oldmat = asMat(mat); Mat<int> newmat = asMat(res); Vec<int> ref2 = asVec(ref); //allocate a seed for each column std::seed_seq sseq{seed}; std::vector<std::uint32_t> seeds(ncol); sseq.generate(seeds.begin(), seeds.end()); #pragma omp parallel num_threads(nthreads) { std::vector<std::pair<int, int> > storage(nrow);//pairs <value, index> #pragma omp for for (int col = 0; col < ncol; ++col){ std::mt19937 gen(seeds[col]); qtlnorm(oldmat.getCol(col), ref2, newmat.getCol(col), storage, gen); } } res.attr("dimnames") = mat.attr("dimnames"); return res; }
// Calculate mk = sum_i I(M(ti)=k), k=1, ..., M with m0=0; // where h=(h0, h1, ..., hM) with h0=0 and d=(d0, d1, ..., dM) with d0=0, dM=R_PosInf void Getmk(Rcpp::IntegerVector& mk, const Rcpp::IntegerVector& Mt){ int n = Mt.size(); std::fill(mk.begin(), mk.end(), 0); for (int i=0; i<n; ++i){ int k = Mt[i]; mk[k] +=1; } }
// [[Rcpp::export]] Rcpp::IntegerVector countInSubset(Rcpp::IntegerVector counts, Rcpp::IntegerVector start, Rcpp::IntegerVector width){ if (start.length() != width.length()) Rcpp::stop("provided vectors have different lengths..."); int nr = start.length(); int len = counts.length(); Rcpp::IntegerVector res(nr); int* R = res.begin(); int* C = counts.begin(); int* S = start.begin(); int* W = width.begin(); for (int i = 0; i < nr; ++i){ int s = S[i] - 1; int w = W[i]; if (s < 0) Rcpp::stop("negative start positions are invalid"); if (s + w > len) Rcpp::stop("range exceeds the lengths of the counts vector"); R[i] = sum(C + s, w); } return res; }
Permutation::Permutation(Rcpp::IntegerVector &vv) : d_perm(vv), n(vv.size()) { int *vpt = vv.begin(); std::vector<bool> chk(n); std::fill(chk.begin(), chk.end(), false); for (int i = 0; i < n; i++) { int vi = vpt[i]; if (vi < 0 || n <= vi) throw runtime_error("permutation elements must be in [0,n)"); if (chk[vi]) throw runtime_error("permutation is not a permutation"); chk[vi] = true; } }
R_xlen_t countPreClusterMarkers(SEXP preClusterResults_, bool& noDuplicates) { Rcpp::List preClusterResults = preClusterResults_; std::vector<int> markers; for(Rcpp::List::iterator i = preClusterResults.begin(); i != preClusterResults.end(); i++) { Rcpp::IntegerVector Rmarkers = *i; for(Rcpp::IntegerVector::iterator j = Rmarkers.begin(); j != Rmarkers.end(); j++) { markers.push_back(*j); } } R_xlen_t nMarkers1 = markers.size(); std::sort(markers.begin(), markers.end()); std::vector<int>::iterator lastUnique = std::unique(markers.begin(), markers.end()); R_xlen_t nMarkers2 = std::distance(markers.begin(), lastUnique); noDuplicates = nMarkers1 == nMarkers2; return nMarkers1; }
SEXP constructDissimilarityMatrixInternal(unsigned char* data, std::vector<double>& levels, int size, SEXP clusters_, int start, const std::vector<int>& currentPermutation) { Rcpp::IntegerVector clusters = Rcpp::as<Rcpp::IntegerVector>(clusters_); int minCluster = *std::min_element(clusters.begin(), clusters.end()), maxCluster = *std::max_element(clusters.begin(), clusters.end()); if(minCluster != 1) { throw std::runtime_error("Clusters must have consecutive indices starting at 1"); } std::vector<std::vector<int> > groupIndices(maxCluster); for(int i = 0; i < clusters.size(); i++) { groupIndices[clusters[i]-1].push_back(currentPermutation[i + start]); } std::vector<int> table(levels.size()); Rcpp::NumericMatrix result(maxCluster, maxCluster); for(int rowCluster = 1; rowCluster <= maxCluster; rowCluster++) { for(int columnCluster = 1; columnCluster <= rowCluster; columnCluster++) { const std::vector<int>& columnIndices = groupIndices[columnCluster-1]; const std::vector<int>& rowIndices = groupIndices[rowCluster-1]; std::fill(table.begin(), table.end(), 0); for(std::vector<int>::const_iterator columnMarker = columnIndices.begin(); columnMarker != columnIndices.end(); columnMarker++) { for(std::vector<int>::const_iterator rowMarker = rowIndices.begin(); rowMarker != rowIndices.end(); rowMarker++) { int x = *rowMarker, y = *columnMarker; if(x < y) std::swap(x, y); int byte = data[x *(x + (R_xlen_t)1)/(R_xlen_t)2 + y]; if(byte == 255) throw std::runtime_error("Values of NA not allowed"); table[byte]++; } } double sum = 0; for(int i = 0; i < table.size(); i++) sum += table[i] * levels[i]; result(rowCluster-1, columnCluster-1) = result(columnCluster-1, rowCluster-1) = sum / (columnIndices.size() * rowIndices.size()); } } return result; }
// [[Rcpp::export]] NumericVector avg_rank(Rcpp::NumericVector x) { R_xlen_t sz = x.size(); Rcpp::IntegerVector w = Rcpp::seq(0, sz - 1); std::sort(w.begin(), w.end(), Comparator(x)); Rcpp::NumericVector r = Rcpp::no_init_vector(sz); R_xlen_t n; #pragma omp parallel for for (int i = 0; i < sz; i += n) { n = 1; while (i + n < sz && x[w[i]] == x[w[i + n]]) ++n; #pragma omp parallel for for (R_xlen_t k = 0; k < n; k++) { r[w[i + k]] = i + (n + 1) / 2.; } } return r; }
// [[Rcpp::export]] Rcpp::CharacterMatrix read_body_gz(std::string x, Rcpp::NumericVector stats, int nrows = -1, int skip = 0, Rcpp::IntegerVector cols = 0, int convertNA = 1, int verbose = 1) { // NA matrix for unexpected results. Rcpp::StringMatrix na_matrix(1,1); na_matrix(0,0) = NA_STRING; /* * Manage cols vector. * The first eight (1-based) columns are mandatory. * We can ensure they are there by adding them, * sorting and removing adjacent non-identical values. */ // for( int i=9; i >= 1; i-- ){ for( int i=8; i >= 1; i-- ){ cols.push_front(i); } cols.sort(); // Remove duplicate values using a set. std::set<int> s( cols.begin(), cols.end() ); cols.assign( s.begin(), s.end() ); cols = cols - 1; // R is 1-based, C is 0-based. // Initialize matrix for body data. // old: Rcpp::CharacterMatrix gt(stats[2], stats[3]); int row_num = 0; if( ( nrows == -1 ) & ( skip == 0 ) ){ nrows = stats[2]; } else if ( ( nrows != -1 ) & ( skip == 0 ) ){ // nrows = nrows; } else if ( ( nrows == -1 ) & ( skip > 0) ){ nrows = stats[2] - skip; } else if ( ( nrows != -1 ) & ( skip > 0) ){ // nrows = nrows; } else { Rcpp::Rcerr << "failed to calculate return matrix geometry."; return na_matrix; } Rcpp::CharacterMatrix gt( nrows, cols.size() ); // if ( nrows > -1 & skip == 0 ){ // row_num = nrows; // } else if ( nrows == -1 & skip > 0 ){ // row_num = stats[2] - skip; // } else { // row_num = stats[2]; // } // Rcpp::CharacterMatrix gt( row_num, cols.size() ); row_num = 0; if( verbose == 1 ){ Rcpp::Rcout << "Character matrix gt created.\n"; Rcpp::Rcout << "Character matrix gt rows: "; Rcpp::Rcout << gt.rows(); Rcpp::Rcout << "\n"; Rcpp::Rcout << "Character matrix gt cols: "; Rcpp::Rcout << gt.cols(); Rcpp::Rcout << "\n"; Rcpp::Rcout << "skip: "; Rcpp::Rcout << skip; Rcpp::Rcout << "\n"; Rcpp::Rcout << "nrows: "; Rcpp::Rcout << nrows; Rcpp::Rcout << "\n"; Rcpp::Rcout << "row_num: "; Rcpp::Rcout << row_num; Rcpp::Rcout << "\n"; Rcpp::Rcout << "\n"; } // Create filehandle and open. gzFile file; file = gzopen (x.c_str(), "r"); if (! file) { Rcpp::Rcerr << "gzopen of " << x << " failed: " << strerror (errno) << ".\n"; return na_matrix; } // Because the last line may be incomplete, // We'll typically omit it from processing and // concatenate it to the first line. // But first we'll have to initialize it. std::string lastline = ""; // String vector to store the header (^#CHROM...). std::vector<std::string> header_vec; // variant counter. int var_num = 0; // Scroll through buffers. while (1) { Rcpp::checkUserInterrupt(); int err; // Slurp in a buffer. int bytes_read; char buffer[LENGTH]; bytes_read = gzread (file, buffer, LENGTH - 1); buffer[bytes_read] = '\0'; // Terminate the buffer. std::string mystring(reinterpret_cast<char*>(buffer)); // Recast buffer as a string. mystring = lastline + mystring; // Concatenate last line to the buffer // Delimit into lines. std::vector < std::string > svec; // Initialize vector of strings for parsed buffer. char split = '\n'; // Must be single quotes! vcfRCommon::strsplit(mystring, svec, split); /* svec should now contain a vector of strings, one string for each line where the last line may be incomplete. We can now process each line except the last. */ // Scroll through lines. unsigned int i = 0; for(i = 0; i < svec.size() - 1; i++){ // Check and remove carriage returns (Windows). if( svec[i][ svec[i].size()-1] == '\r' ){ svec[i].erase( svec[i].size() - 1 ); } if(svec[i][0] == '#' && svec[i][1] == '#'){ // Meta line, ignore. } else if(svec[i][0] == '#' && svec[i][1] == 'C'){ // Process header. // Rcpp::Rcout << svec[i].substr(0,40) << "\n\n"; char header_split = '\t'; vcfRCommon::strsplit(svec[i], header_vec, header_split); // Subset the header to select columns. std::vector<std::string> header_vec2( cols.size() ); for(int j=0; j<cols.size(); j++){ header_vec2[j] = header_vec[ cols[j] ]; } header_vec = header_vec2; } else { // Variant line. if ( ( var_num >= skip ) & ( row_num < nrows ) ){ proc_body_line(gt, row_num, svec[i], cols, convertNA); row_num++; // Return matrix row number. } var_num++; // Input row number. if(var_num % nreport == 0 && verbose == 1){ Rcpp::Rcout << "\rProcessed variant " << var_num; } } } // Processed all lines of current buffer. // Keep the last line so we can append it to //the beginning of the next buffer. lastline = svec[svec.size() - 1]; // Rcpp::Rcout << "line-2:" << svec[svec.size() - 2].substr(0,40) << "|<-\n"; // Rcpp::Rcout << "line-1:" << svec[svec.size() - 1].substr(0,40) << "|<-\n"; // Rcpp::Rcout << "\n"; /* * We can bail out early if we have read nrows. * Before we do we need to check that: * 1) we have read in nrows * 2) we have processed the header * (important when nrows is small) * 3) we actually have a line (when buffer ends at the end of a line). */ if( ( row_num >= nrows ) & ( lastline[0] != '#' ) & ( lastline.size() > 0 ) ){ // Rcpp::Rcout << "\nBreaking!\n"; // Rcpp::Rcout << "lastline: " << lastline.substr(0,40) << "\n"; break; } // Check for EOF or errors. if (bytes_read < LENGTH - 1) { if (gzeof (file)) { break; } else { const char * error_string; error_string = gzerror (file, & err); if (err) { Rcpp::Rcerr << "Error: " << error_string << ".\n"; return na_matrix; } } } // Return to top of loop and process another buffer. } // Close while. // Close filehandle. gzclose (file); // Rcpp::Rcout << "\n\n>>---<< Made it: file close! >>---<<\n\n"; // Rcpp::Rcout << "header_vec.size(): " << header_vec.size() << "\n"; if( stats[1] == 0 ){ if( verbose == 1 ){ Rcpp::Rcout << "Warning: no header information was found! Data contains no sample names!\n"; } } else { if( header_vec.size() == (unsigned)gt.ncol() ){ header_vec[0] = "CHROM"; gt.attr("dimnames") = Rcpp::List::create(Rcpp::CharacterVector::create(), header_vec); } else { if( verbose == 1 ){ Rcpp::Rcout << "Warning: no header information found!\n"; } } } // Rcpp::Rcout << "\n\n>>---<< Made it! >>---<<\n\n"; if(verbose == 1){ Rcpp::Rcout << "\rProcessed variant: " << var_num; Rcpp::Rcout << "\nAll variants processed\n"; } // Rcpp::DataFrame df1 = Rcpp::DataFrame::create(gt); // Rcpp::DataFrame df1(gt); // df1.names() = header_vec; // if(verbose == 1){ // Rcpp::Rcout << "Rcpp::DataFrame created.\n"; // } return gt; }
// [[Rcpp::export]] Rcpp::IntegerVector sample_int(int n, int min, int max) { Rcpp::IntegerVector pool = Rcpp::seq(min, max); std::random_shuffle(pool.begin(), pool.end()); return pool[Rcpp::Range(0, n - 1)]; }
//[[Rcpp::export]] int nRoots (Rcpp::IntegerVector ances) { int ans = std::count (ances.begin(), ances.end(), 0); return ans; }
//[[Rcpp::export]] Rcpp::IntegerVector getAllNodesSafe (Rcpp::IntegerMatrix edge) { Rcpp::IntegerVector ans = Rcpp::as_vector(edge); Rcpp::IntegerVector tmp = Rcpp::unique(ans); std::sort(tmp.begin(), tmp.end()); return tmp; }
Rcpp::IntegerVector stl_sort(Rcpp::IntegerVector x) { //http://gallery.rcpp.org/articles/sorting/ Rcpp::IntegerVector y = clone(x); std::sort(y.begin(), y.end()); return y; }
int index(const int val, Rcpp::IntegerVector vec) { int ind = std::find(vec.begin(), vec.end(), val) - vec.begin(); return ind; }