Exemplo n.º 1
0
//[[Rcpp::export]]
DataFrame closest_impl(ValrGroupedDataFrame x, ValrGroupedDataFrame y,
                       IntegerVector grp_idx_x,
                       IntegerVector grp_idx_y,
                       const std::string& suffix_x,
                       const std::string& suffix_y) {

  DataFrame df_x = x.data() ;
  DataFrame df_y = y.data() ;

  // for subsetting / return df
  std::vector<int> indices_x ;
  std::vector<int> indices_y ;
  std::vector<int> overlap_sizes ;
  std::vector<int> distance_sizes ;

  // set up interval trees for each chromosome and apply closest_grouped
  GroupApply(x, y, grp_idx_x, grp_idx_y,
             closest_grouped, std::ref(indices_x), std::ref(indices_y),
             std::ref(overlap_sizes), std::ref(distance_sizes));

  DataFrame subset_x = subset_dataframe(df_x, indices_x) ;
  DataFrame subset_y = subset_dataframe(df_y, indices_y) ;

  DataFrameBuilder out;
  // x names, data
  out.add_df(subset_x, suffix_x, false) ;

  // y names, data
  out.add_df(subset_y, suffix_y, true) ;

  // overlaps and distances
  out.add_vec(".overlap", wrap(overlap_sizes)) ;
  out.add_vec(".dist", wrap(distance_sizes)) ;

  auto nrows = subset_x.nrows() ;
  auto res = out.format_df(nrows) ;
  return res ;

}
Exemplo n.º 2
0
//[[Rcpp::export]]
DataFrame intersect_impl(GroupedDataFrame x, GroupedDataFrame y,
                         const std::string& suffix_x = ".x",
                         const std::string& suffix_y = ".y") {

  // indices for subsetting
  std::vector<int> indices_x ;
  std::vector<int> indices_y ;

  // overlap sizes
  std::vector<int> overlap_sizes ;

  auto data_x = x.data() ;
  auto data_y = y.data() ;

  // set up interval trees for each chromosome and apply intersect_group
  GroupApply(x, y, intersect_group, std::ref(indices_x), std::ref(indices_y), std::ref(overlap_sizes));

  DataFrame subset_x = DataFrameSubsetVisitors(data_x, names(data_x)).subset(indices_x, "data.frame");
  DataFrame subset_y = DataFrameSubsetVisitors(data_y, names(data_y)).subset(indices_y, "data.frame");

  auto ncol_x = subset_x.size() ;
  auto ncol_y = subset_y.size() ;

  CharacterVector names(ncol_x + ncol_y) ;
  CharacterVector names_x = subset_x.attr("names") ;
  CharacterVector names_y = subset_y.attr("names") ;

  // replacing y chrom with overlap, same number of cols
  List out(ncol_x + ncol_y) ;

  // x names, data
  for (int i = 0; i < ncol_x; i++) {
    auto name_x = as<std::string>(names_x[i]) ;
    if (name_x != "chrom") {
      name_x += suffix_x ;
    }
    names[i] = name_x ;
    out[i] = subset_x[i] ;
  }

  // y names, data
  for (int i = 0; i < ncol_y; i++) {
    auto name_y = as<std::string>(names_y[i]) ;

    if (name_y == "chrom") continue ;

    name_y += suffix_y ;

    names[i + ncol_x - 1] = name_y ;
    out[i + ncol_x - 1] = subset_y[i] ;
  }

  // overlaps
  out[ncol_x + ncol_y - 1] = overlap_sizes ;
  names[ncol_x + ncol_y - 1] = ".overlap" ;

  out.attr("names") = names ;
  out.attr("class") = classes_not_grouped() ;
  auto nrows = subset_x.nrows() ;
  set_rownames(out, nrows) ;

  return out ;

}