Exemplo n.º 1
Rcpp::RawVector toBitVec(Rcpp::LogicalVector indx) {
	unsigned nBit = indx.size();
	unsigned nByte = ceil(float(nBit)/8);
	Rcpp::RawVector bytes(nByte);//default are all 0s
	bytes.attr("bitlen") = nBit;

    unsigned byteIndex, bitIndex ;
    for(unsigned i = 0 ; i < nBit; i++) {
        byteIndex = i / 8;
        bitIndex = i % 8;
        if(indx(i) == 1)
			bytes[byteIndex] = bytes[byteIndex] | 1 << bitIndex;
    return bytes;
Exemplo n.º 2
// [[Rcpp::export]]
Rcpp::DataFrame gt_to_popsum(Rcpp::DataFrame var_info, Rcpp::CharacterMatrix gt) {
  // Calculate popgen summaries for the sample.
  // var_info should contain columns named 'CHROM', 'POS', 'mask' and possibly others.
  Rcpp::LogicalVector   mask = var_info["mask"];
  Rcpp::IntegerVector   nsample(mask.size());
  Rcpp::StringVector    allele_counts(mask.size());
  Rcpp::NumericVector   Hes(mask.size());
  Rcpp::NumericVector   Nes(mask.size());
  int i = 0;
  int j = 0;
  int k = 0;
  for(i=0; i < gt.nrow(); i++){ // Iterate over variants (rows)
    if(mask[i] == TRUE){
      std::vector<int> myalleles (1,0);
      for(j=0; j < gt.ncol(); j++){ // Iterate over samples (columns)
        if(gt(i, j) != NA_STRING){
          nsample[i]++;  // Increment sample count.

          // Count alleles per sample.
          std::vector < int > intv = gtsplit(as<std::string>(gt(i, j)));
          for(k=0; k<intv.size(); k++){
            while(myalleles.size() - 1 < intv[k]){
              // We have more alleles than exist in the vector myalleles.

      // Concatenate allele counts into a comma delimited string.
      int n;
      char buffer [50];
      n = sprintf (buffer, "%d", myalleles[0]);
      for(j=1; j < myalleles.size(); j++){
        n=sprintf (buffer, "%s,%d", buffer, myalleles[j]);
      allele_counts[i] = buffer;

      // Sum all alleles.
      int nalleles = myalleles[0];
      for(j=1; j < myalleles.size(); j++){
        nalleles = nalleles + myalleles[j];

      // Stats.
      double He = 1;
      He = He - pow(double(myalleles[0])/double(nalleles), myalleles.size());
      for(j=1; j < myalleles.size(); j++){
        He = He - pow(double(myalleles[j])/double(nalleles), myalleles.size());
      Hes[i] = He;
      Nes[i] = 1/(1-He);
    } else { // Missing variant (row=NA)
      nsample[i] = NA_INTEGER;     

  return Rcpp::DataFrame::create(var_info, 
Exemplo n.º 3
//' @export
// [[Rcpp::export(name=".gt_to_popsum")]]
Rcpp::DataFrame gt_to_popsum(Rcpp::DataFrame var_info, Rcpp::CharacterMatrix gt) {
  // Calculate popgen summaries for the sample.
  // var_info should contain columns named 'CHROM', 'POS', 'mask' and possibly others.
  Rcpp::LogicalVector   mask = var_info["mask"];
  Rcpp::IntegerVector   nsample(mask.size());
  Rcpp::StringVector    allele_counts(mask.size());
  Rcpp::NumericVector   Hes(mask.size());
  Rcpp::NumericVector   Nes(mask.size());
  int i = 0;
  int j = 0;
//  unsigned int j = 0;
  unsigned int k = 0;
  for(i=0; i < gt.nrow(); i++){ // Iterate over variants (rows)
    if(mask[i] == TRUE){
      std::vector<int> myalleles (1,0);
      for(j=0; j < gt.ncol(); j++){ // Iterate over samples (columns)
        if(gt(i, j) != NA_STRING){
          nsample[i]++;  // Increment sample count.
//          Rcout << "gt: " << gt(i, j) << "\n";
          // Count alleles per sample.

          int unphased_as_na = 0; // 0 == FALSE
          std::vector < std::string > gt_vector;
          std::string gt2 = as<std::string>(gt(i,j));
          vcfRCommon::gtsplit( gt2, gt_vector, unphased_as_na );
//          Rcout << "gt_vector.size: " << gt_vector.size() << "\n";

          for(k=0; k<gt_vector.size(); k++){
            int myAllele = std::stoi(gt_vector[k]);
//            Rcout << "  " << myAllele;
//            // If this genotype had an allele we did not previously observe
            // we'll have to grow the vector.
            while(myalleles.size() - 1 < myAllele){
//          Rcout << "\n\n";

      // Concatenate allele counts into a comma delimited string.
      char buffer [50];
//      int n;
//      n=sprintf(buffer, "%d", myalleles[0]);
      sprintf(buffer, "%d", myalleles[0]);
      for(j=1; (unsigned)j < myalleles.size(); j++){
//        n=sprintf (buffer, "%s,%d", buffer, myalleles[j]);
        sprintf (buffer, "%s,%d", buffer, myalleles[j]);
      allele_counts[i] = buffer;

      // Sum all alleles.
      int nalleles = myalleles[0];
      for(j=1; (unsigned)j < myalleles.size(); j++){
        nalleles = nalleles + myalleles[j];

      // Stats.
      double He = 1;
      He = He - pow(double(myalleles[0])/double(nalleles), myalleles.size());
      for(j=1; (unsigned)j < myalleles.size(); j++){
        He = He - pow(double(myalleles[j])/double(nalleles), myalleles.size());
      Hes[i] = He;
      Nes[i] = 1/(1-He);
    } else { // Missing variant (row=NA)
      nsample[i] = NA_INTEGER;     

  return Rcpp::DataFrame::create(var_info, 