コード例 #1
0
ファイル: reldist.cpp プロジェクト: jayhesselberth/valr
//[[Rcpp::export]]
DataFrame reldist_impl(GroupedDataFrame x, GroupedDataFrame y) {
  
  std::vector<float> rel_distances ; 
  std::vector<int> indices_x ;
  
  DataFrame df_x = x.data() ;
  PairedGroupApply(x, y, reldist_grouped, std::ref(indices_x), std::ref(rel_distances)); 
  
  DataFrame subset_x = DataFrameSubsetVisitors(df_x, names(df_x)).subset(indices_x, "data.frame");
  
  auto ncol_x = subset_x.size() ;
  
  CharacterVector names(ncol_x + 1) ;
  CharacterVector names_x = subset_x.attr("names") ;
  
  List out(ncol_x + 1) ;
  
  // x names, data
  for( int i=0; i<ncol_x; i++) {
    names[i] = names_x[i] ;
    out[i] = subset_x[i] ;
  }
  out[ncol_x] = rel_distances ;
  names[ncol_x] = "reldist" ;
  
  out.attr("names") = names ; 
  out.attr("class") = classes_not_grouped() ;
  auto nrows = subset_x.nrows() ; 
  set_rownames(out, nrows) ;
  
  return out ; 
  
}
コード例 #2
0
ファイル: intersect.cpp プロジェクト: sheridar/valr
//[[Rcpp::export]]
DataFrame intersect_impl(GroupedDataFrame x, GroupedDataFrame y,
                         const std::string& suffix_x = ".x",
                         const std::string& suffix_y = ".y") {

  // indices for subsetting
  std::vector<int> indices_x ;
  std::vector<int> indices_y ;

  // overlap sizes
  std::vector<int> overlap_sizes ;

  auto data_x = x.data() ;
  auto data_y = y.data() ;

  // set up interval trees for each chromosome and apply intersect_group
  GroupApply(x, y, intersect_group, std::ref(indices_x), std::ref(indices_y), std::ref(overlap_sizes));

  DataFrame subset_x = DataFrameSubsetVisitors(data_x, names(data_x)).subset(indices_x, "data.frame");
  DataFrame subset_y = DataFrameSubsetVisitors(data_y, names(data_y)).subset(indices_y, "data.frame");

  auto ncol_x = subset_x.size() ;
  auto ncol_y = subset_y.size() ;

  CharacterVector names(ncol_x + ncol_y) ;
  CharacterVector names_x = subset_x.attr("names") ;
  CharacterVector names_y = subset_y.attr("names") ;

  // replacing y chrom with overlap, same number of cols
  List out(ncol_x + ncol_y) ;

  // x names, data
  for (int i = 0; i < ncol_x; i++) {
    auto name_x = as<std::string>(names_x[i]) ;
    if (name_x != "chrom") {
      name_x += suffix_x ;
    }
    names[i] = name_x ;
    out[i] = subset_x[i] ;
  }

  // y names, data
  for (int i = 0; i < ncol_y; i++) {
    auto name_y = as<std::string>(names_y[i]) ;

    if (name_y == "chrom") continue ;

    name_y += suffix_y ;

    names[i + ncol_x - 1] = name_y ;
    out[i + ncol_x - 1] = subset_y[i] ;
  }

  // overlaps
  out[ncol_x + ncol_y - 1] = overlap_sizes ;
  names[ncol_x + ncol_y - 1] = ".overlap" ;

  out.attr("names") = names ;
  out.attr("class") = classes_not_grouped() ;
  auto nrows = subset_x.nrows() ;
  set_rownames(out, nrows) ;

  return out ;

}
コード例 #3
0
ファイル: bed_flank.cpp プロジェクト: sheridar/valr
//[[Rcpp::export]]
DataFrame flank_impl(DataFrame inputTable, DataFrame genome,
                     double both = 0, double left = 0, double right = 0,
                     bool fraction = false, bool strand = false, bool trim = false) {

  // Warnings
  if (both == 0 & left == 0 & right == 0)
    stop("specify one of both, left, right");

  if (both != 0 & (left != 0 || right != 0))
    stop("ambiguous side spec for bed_flank");

  std::vector<std::string> TableNames = inputTable.names();
  int TableLen = TableNames.size();

  bool strandTest = false;

  for (int i = 0; i < TableLen; i++)
    if (TableNames[i] == "strand")
      strandTest = true;

  if (strand == true & strandTest == false)
    stop("expected strand column");


  // Set both
  if (both > 0) left = right = both;


  // Set input and output vectors
  std::vector<std::string> chroms = inputTable["chrom"];
  std::vector<int> startCoords    = inputTable["start"];
  std::vector<int> endCoords      = inputTable["end"];

  int N = startCoords.size();
  std::vector<int> coordSize(N);
  std::vector<int> idxOut;

  std::vector<double> startOut;
  std::vector<double> endOut;


  // Create unordered map for chrom sizes
  genome_map_t chroMap = makeChromSizes(genome);



  for (int i = 0; i < N; i++) {

    int leftstart;
    int leftend;
    int rightstart;
    int rightend;

    // strand
    if (strand == true) {
      std::vector<std::string> strands = inputTable["strand"];

      // strand, fraction
      if (fraction == true) {
        coordSize[i] = endCoords[i] - startCoords[i];

        if (strands[i] == "+") {
          leftstart  = startCoords[i] - coordSize[i] * left;
          leftend    = startCoords[i];
          rightstart = endCoords[i];
          rightend   = endCoords[i] + coordSize[i] * right;

        } else {
          leftstart  = endCoords[i];
          leftend    = endCoords[i] + coordSize[i] * left;
          rightstart = startCoords[i] - coordSize[i] * right;
          rightend   = startCoords[i];
        }

      // strand, no fraction
      } else {

        if (strands[i] == "+") {
          leftstart  = startCoords[i] - left;
          leftend    = startCoords[i];
          rightstart = endCoords[i];
          rightend   = endCoords[i] + right;

        } else {
          leftstart  = endCoords[i];
          leftend    = endCoords[i] + left;
          rightstart = startCoords[i] - right;
          rightend   = startCoords[i];
        }
      }

    // no strand
    } else {

      // no strand, fraction
      if (fraction == true) {
        coordSize[i] = endCoords[i] - startCoords[i];

        leftstart  = startCoords[i] - coordSize[i] * left;
        leftend    = startCoords[i];
        rightstart = endCoords[i];
        rightend   = endCoords[i] + coordSize[i] * right;

      // no strand, no fraction
      } else {
        leftstart  = startCoords[i] - left;
        leftend    = startCoords[i];
        rightstart = endCoords[i];
        rightend   = endCoords[i] + right;
      }
    }


    // Compare new intervals to chrom sizes
    std::string chrom = chroms[i];
    int chrSize = chroMap[chrom];

    if (left > 0 & leftstart > 0 & leftend <= chrSize) {
      startOut.push_back (leftstart);
      endOut.push_back   (leftend);
      idxOut.push_back   (i);

    } else if (trim == true & leftstart > 0 & leftend > chrSize) {
      startOut.push_back (leftstart);
      endOut.push_back   (chrSize);
      idxOut.push_back   (i);

    } else if (trim == true & leftstart <= 0 & leftend <= chrSize) {
      startOut.push_back (1);
      endOut.push_back   (leftend);
      idxOut.push_back   (i);

    } else if (trim == true & leftstart <= 0 & leftend > chrSize) {
      startOut.push_back (1);
      endOut.push_back   (chrSize);
      idxOut.push_back   (i);
    }


    if (right > 0 & rightstart > 0 & rightend <= chrSize) {
      startOut.push_back (rightstart);
      endOut.push_back   (rightend);
      idxOut.push_back   (i);

    } else if (trim == true & rightstart > 0 & rightend > chrSize) {
      startOut.push_back (rightstart);
      endOut.push_back   (chrSize);
      idxOut.push_back   (i);

    } else if (trim == true & rightstart <= 0 & rightend <= chrSize) {
      startOut.push_back (1);
      endOut.push_back   (rightend);
      idxOut.push_back   (i);

    } else if (trim == true & rightstart <= 0 & rightend > chrSize) {
      startOut.push_back (1);
      endOut.push_back   (chrSize);
      idxOut.push_back   (i);
    }
  }


  // Write new DataFrame
  DataFrame outTable = DataFrameSubsetVisitors(inputTable, names(inputTable)).subset(idxOut, "data.frame");

  outTable["start"] = startOut;
  outTable["end"] = endOut;

  return outTable;
}