void wigTestMaker(char *outDir) /* wigTestMaker - Create test wig files.. */ { makeDir(outDir); setCurrentDir(outDir); makeChromSizes("chrom.sizes", 100); makeEmpty("empty.wig"); makeEmptyFixed("emptyFixed.wig"); makeEmptyVar("emptyVar.wig"); makeShortFixed("shortFixed.wig"); makeShortVar("shortVar.wig"); makeShortBed("shortBed.wig"); makeSineSineFixed("sineSineFixed.wig", 1000, 500, 10, 8); makeSineSineVar("sineSineVar.wig", 1000, 500, 10, 4); makeSineSineBed("sineSineBed.wig", 1000, 500, 10, 2); makeIncreasing("increasing.wig"); makeMixem("mixem.wig"); makeContigSizes("contig.sizes", 666666); makeManyContigs("contigs.wig", 666666); }
//[[Rcpp::export]] DataFrame flank_impl(DataFrame inputTable, DataFrame genome, double both = 0, double left = 0, double right = 0, bool fraction = false, bool strand = false, bool trim = false) { // Warnings if (both == 0 & left == 0 & right == 0) stop("specify one of both, left, right"); if (both != 0 & (left != 0 || right != 0)) stop("ambiguous side spec for bed_flank"); std::vector<std::string> TableNames = inputTable.names(); int TableLen = TableNames.size(); bool strandTest = false; for (int i = 0; i < TableLen; i++) if (TableNames[i] == "strand") strandTest = true; if (strand == true & strandTest == false) stop("expected strand column"); // Set both if (both > 0) left = right = both; // Set input and output vectors std::vector<std::string> chroms = inputTable["chrom"]; std::vector<int> startCoords = inputTable["start"]; std::vector<int> endCoords = inputTable["end"]; int N = startCoords.size(); std::vector<int> coordSize(N); std::vector<int> idxOut; std::vector<double> startOut; std::vector<double> endOut; // Create unordered map for chrom sizes genome_map_t chroMap = makeChromSizes(genome); for (int i = 0; i < N; i++) { int leftstart; int leftend; int rightstart; int rightend; // strand if (strand == true) { std::vector<std::string> strands = inputTable["strand"]; // strand, fraction if (fraction == true) { coordSize[i] = endCoords[i] - startCoords[i]; if (strands[i] == "+") { leftstart = startCoords[i] - coordSize[i] * left; leftend = startCoords[i]; rightstart = endCoords[i]; rightend = endCoords[i] + coordSize[i] * right; } else { leftstart = endCoords[i]; leftend = endCoords[i] + coordSize[i] * left; rightstart = startCoords[i] - coordSize[i] * right; rightend = startCoords[i]; } // strand, no fraction } else { if (strands[i] == "+") { leftstart = startCoords[i] - left; leftend = startCoords[i]; rightstart = endCoords[i]; rightend = endCoords[i] + right; } else { leftstart = endCoords[i]; leftend = endCoords[i] + left; rightstart = startCoords[i] - right; rightend = startCoords[i]; } } // no strand } else { // no strand, fraction if (fraction == true) { coordSize[i] = endCoords[i] - startCoords[i]; leftstart = startCoords[i] - coordSize[i] * left; leftend = startCoords[i]; rightstart = endCoords[i]; rightend = endCoords[i] + coordSize[i] * right; // no strand, no fraction } else { leftstart = startCoords[i] - left; leftend = startCoords[i]; rightstart = endCoords[i]; rightend = endCoords[i] + right; } } // Compare new intervals to chrom sizes std::string chrom = chroms[i]; int chrSize = chroMap[chrom]; if (left > 0 & leftstart > 0 & leftend <= chrSize) { startOut.push_back (leftstart); endOut.push_back (leftend); idxOut.push_back (i); } else if (trim == true & leftstart > 0 & leftend > chrSize) { startOut.push_back (leftstart); endOut.push_back (chrSize); idxOut.push_back (i); } else if (trim == true & leftstart <= 0 & leftend <= chrSize) { startOut.push_back (1); endOut.push_back (leftend); idxOut.push_back (i); } else if (trim == true & leftstart <= 0 & leftend > chrSize) { startOut.push_back (1); endOut.push_back (chrSize); idxOut.push_back (i); } if (right > 0 & rightstart > 0 & rightend <= chrSize) { startOut.push_back (rightstart); endOut.push_back (rightend); idxOut.push_back (i); } else if (trim == true & rightstart > 0 & rightend > chrSize) { startOut.push_back (rightstart); endOut.push_back (chrSize); idxOut.push_back (i); } else if (trim == true & rightstart <= 0 & rightend <= chrSize) { startOut.push_back (1); endOut.push_back (rightend); idxOut.push_back (i); } else if (trim == true & rightstart <= 0 & rightend > chrSize) { startOut.push_back (1); endOut.push_back (chrSize); idxOut.push_back (i); } } // Write new DataFrame DataFrame outTable = DataFrameSubsetVisitors(inputTable, names(inputTable)).subset(idxOut, "data.frame"); outTable["start"] = startOut; outTable["end"] = endOut; return outTable; }
//[[Rcpp::export]] DataFrame complement_impl(GroupedDataFrame gdf, DataFrame genome) { genome_map_t chrom_sizes = makeChromSizes(genome) ; DataFrame df = gdf.data() ; IntegerVector starts = df["start"] ; IntegerVector ends = df["end"] ; CharacterVector chroms = df["chrom"] ; std::vector<std::string> chroms_out ; std::vector<int> starts_out ; std::vector<int> ends_out ; int ngroups = gdf.ngroups() ; GroupedDataFrame::group_iterator git = gdf.group_begin() ; for (int i = 0; i < ngroups; ++i, ++git) { SlicingIndex indices = *git ; int ni = indices.size() ; int start, end ; int last_end = 1 ; // get chrom from first index auto chrom = as<std::string>(chroms[indices[0]]) ; for (int j = 0; j < ni; ++j) { start = starts[indices[j]] ; end = ends[indices[j]] ; if (j == 0) { if (start == 1) { last_end = end ; continue ; } else { chroms_out.push_back(chrom) ; starts_out.push_back(1) ; ends_out.push_back(start) ; } } else { chroms_out.push_back(chrom) ; starts_out.push_back(last_end) ; ends_out.push_back(start) ; } last_end = end; } auto chrom_size = chrom_sizes[chrom] ; if (last_end < chrom_size) { chroms_out.push_back(chrom) ; starts_out.push_back(last_end) ; ends_out.push_back(chrom_size) ; } } return DataFrame::create(_("chrom") = chroms_out, _("start") = starts_out, _("end") = ends_out, _("stringsAsFactors") = false) ; }