//[[Rcpp::export]] DataFrame reldist_impl(GroupedDataFrame x, GroupedDataFrame y) { std::vector<float> rel_distances ; std::vector<int> indices_x ; DataFrame df_x = x.data() ; PairedGroupApply(x, y, reldist_grouped, std::ref(indices_x), std::ref(rel_distances)); DataFrame subset_x = DataFrameSubsetVisitors(df_x, names(df_x)).subset(indices_x, "data.frame"); auto ncol_x = subset_x.size() ; CharacterVector names(ncol_x + 1) ; CharacterVector names_x = subset_x.attr("names") ; List out(ncol_x + 1) ; // x names, data for( int i=0; i<ncol_x; i++) { names[i] = names_x[i] ; out[i] = subset_x[i] ; } out[ncol_x] = rel_distances ; names[ncol_x] = "reldist" ; out.attr("names") = names ; out.attr("class") = classes_not_grouped() ; auto nrows = subset_x.nrows() ; set_rownames(out, nrows) ; return out ; }
//[[Rcpp::export]] DataFrame intersect_impl(GroupedDataFrame x, GroupedDataFrame y, const std::string& suffix_x = ".x", const std::string& suffix_y = ".y") { // indices for subsetting std::vector<int> indices_x ; std::vector<int> indices_y ; // overlap sizes std::vector<int> overlap_sizes ; auto data_x = x.data() ; auto data_y = y.data() ; // set up interval trees for each chromosome and apply intersect_group GroupApply(x, y, intersect_group, std::ref(indices_x), std::ref(indices_y), std::ref(overlap_sizes)); DataFrame subset_x = DataFrameSubsetVisitors(data_x, names(data_x)).subset(indices_x, "data.frame"); DataFrame subset_y = DataFrameSubsetVisitors(data_y, names(data_y)).subset(indices_y, "data.frame"); auto ncol_x = subset_x.size() ; auto ncol_y = subset_y.size() ; CharacterVector names(ncol_x + ncol_y) ; CharacterVector names_x = subset_x.attr("names") ; CharacterVector names_y = subset_y.attr("names") ; // replacing y chrom with overlap, same number of cols List out(ncol_x + ncol_y) ; // x names, data for (int i = 0; i < ncol_x; i++) { auto name_x = as<std::string>(names_x[i]) ; if (name_x != "chrom") { name_x += suffix_x ; } names[i] = name_x ; out[i] = subset_x[i] ; } // y names, data for (int i = 0; i < ncol_y; i++) { auto name_y = as<std::string>(names_y[i]) ; if (name_y == "chrom") continue ; name_y += suffix_y ; names[i + ncol_x - 1] = name_y ; out[i + ncol_x - 1] = subset_y[i] ; } // overlaps out[ncol_x + ncol_y - 1] = overlap_sizes ; names[ncol_x + ncol_y - 1] = ".overlap" ; out.attr("names") = names ; out.attr("class") = classes_not_grouped() ; auto nrows = subset_x.nrows() ; set_rownames(out, nrows) ; return out ; }
//[[Rcpp::export]] DataFrame flank_impl(DataFrame inputTable, DataFrame genome, double both = 0, double left = 0, double right = 0, bool fraction = false, bool strand = false, bool trim = false) { // Warnings if (both == 0 & left == 0 & right == 0) stop("specify one of both, left, right"); if (both != 0 & (left != 0 || right != 0)) stop("ambiguous side spec for bed_flank"); std::vector<std::string> TableNames = inputTable.names(); int TableLen = TableNames.size(); bool strandTest = false; for (int i = 0; i < TableLen; i++) if (TableNames[i] == "strand") strandTest = true; if (strand == true & strandTest == false) stop("expected strand column"); // Set both if (both > 0) left = right = both; // Set input and output vectors std::vector<std::string> chroms = inputTable["chrom"]; std::vector<int> startCoords = inputTable["start"]; std::vector<int> endCoords = inputTable["end"]; int N = startCoords.size(); std::vector<int> coordSize(N); std::vector<int> idxOut; std::vector<double> startOut; std::vector<double> endOut; // Create unordered map for chrom sizes genome_map_t chroMap = makeChromSizes(genome); for (int i = 0; i < N; i++) { int leftstart; int leftend; int rightstart; int rightend; // strand if (strand == true) { std::vector<std::string> strands = inputTable["strand"]; // strand, fraction if (fraction == true) { coordSize[i] = endCoords[i] - startCoords[i]; if (strands[i] == "+") { leftstart = startCoords[i] - coordSize[i] * left; leftend = startCoords[i]; rightstart = endCoords[i]; rightend = endCoords[i] + coordSize[i] * right; } else { leftstart = endCoords[i]; leftend = endCoords[i] + coordSize[i] * left; rightstart = startCoords[i] - coordSize[i] * right; rightend = startCoords[i]; } // strand, no fraction } else { if (strands[i] == "+") { leftstart = startCoords[i] - left; leftend = startCoords[i]; rightstart = endCoords[i]; rightend = endCoords[i] + right; } else { leftstart = endCoords[i]; leftend = endCoords[i] + left; rightstart = startCoords[i] - right; rightend = startCoords[i]; } } // no strand } else { // no strand, fraction if (fraction == true) { coordSize[i] = endCoords[i] - startCoords[i]; leftstart = startCoords[i] - coordSize[i] * left; leftend = startCoords[i]; rightstart = endCoords[i]; rightend = endCoords[i] + coordSize[i] * right; // no strand, no fraction } else { leftstart = startCoords[i] - left; leftend = startCoords[i]; rightstart = endCoords[i]; rightend = endCoords[i] + right; } } // Compare new intervals to chrom sizes std::string chrom = chroms[i]; int chrSize = chroMap[chrom]; if (left > 0 & leftstart > 0 & leftend <= chrSize) { startOut.push_back (leftstart); endOut.push_back (leftend); idxOut.push_back (i); } else if (trim == true & leftstart > 0 & leftend > chrSize) { startOut.push_back (leftstart); endOut.push_back (chrSize); idxOut.push_back (i); } else if (trim == true & leftstart <= 0 & leftend <= chrSize) { startOut.push_back (1); endOut.push_back (leftend); idxOut.push_back (i); } else if (trim == true & leftstart <= 0 & leftend > chrSize) { startOut.push_back (1); endOut.push_back (chrSize); idxOut.push_back (i); } if (right > 0 & rightstart > 0 & rightend <= chrSize) { startOut.push_back (rightstart); endOut.push_back (rightend); idxOut.push_back (i); } else if (trim == true & rightstart > 0 & rightend > chrSize) { startOut.push_back (rightstart); endOut.push_back (chrSize); idxOut.push_back (i); } else if (trim == true & rightstart <= 0 & rightend <= chrSize) { startOut.push_back (1); endOut.push_back (rightend); idxOut.push_back (i); } else if (trim == true & rightstart <= 0 & rightend > chrSize) { startOut.push_back (1); endOut.push_back (chrSize); idxOut.push_back (i); } } // Write new DataFrame DataFrame outTable = DataFrameSubsetVisitors(inputTable, names(inputTable)).subset(idxOut, "data.frame"); outTable["start"] = startOut; outTable["end"] = endOut; return outTable; }