Beispiel #1
0
// [[Rcpp::export]]
Rcpp::List subsetCounts(Rcpp::IntegerVector counts, Rcpp::IntegerVector start, Rcpp::IntegerVector width, Rcpp::LogicalVector strand){
	if (start.length() != width.length() || start.length() != strand.length()) Rcpp::stop("provided vectors have different lengths...");
	int nr = start.length();
	int len = counts.length();
	int tot = 0;
	int* S = start.begin(); int* W = width.begin();
	for (int i = 0; i < nr; ++i){
		int s = S[i] - 1;
		int w = W[i]; 
		if (s < 0) Rcpp::stop("negative start positions are invalid");
		if (s + w > len) Rcpp::stop("range exceeds the lengths of the counts vector");
		tot += w;
	}
	
	Rcpp::IntegerVector res(tot); 
	Rcpp::IntegerVector nstart(nr);
	Rcpp::IntegerVector nend(nr);
	int* R = res.begin(); int* C = counts.begin(); int* ST = strand.begin();
	int* NS = nstart.begin(); int* NE = nend.begin();
	int currpos = 0;
	for (int i = 0; i < nr; ++i){
		NS[i] = currpos + 1;
		int w = W[i];
		if (ST[i]) std::copy(C + S[i]-1, C + S[i]-1 + w, R + currpos);
		else std::reverse_copy(C + S[i]-1, C + S[i]-1 + w, R + currpos);
		currpos += w;
		NE[i] = currpos;
	}
	return List::create(_("counts")=res, _("starts")=nstart, _("ends")=nend);
}
Beispiel #2
0
Rcpp::IntegerVector get_which(Rcpp::LogicalVector row) {
	int j = 0;
	for (int i = 0; i < row.length(); i++)
		if (row(i))
			j++;
	Rcpp::IntegerVector ret(j);
	for (int i = 0, j = 0; i < row.length(); i++)
		if (row(i))
			ret(j++) = i + 1; // R is 1-based
	return ret;
}
Beispiel #3
0
// [[Rcpp::export]]
Rcpp::LogicalVector doc_validate(XPtrDoc doc, XPtrDoc schema) {
  xmlLineNumbersDefault(1);
  Rcpp::CharacterVector vec;
  xmlSchemaParserCtxtPtr cptr = xmlSchemaNewDocParserCtxt(schema.checked_get());
  xmlSchemaSetParserStructuredErrors(cptr, handleSchemaError, &vec);
  xmlSchemaPtr sptr = xmlSchemaParse(cptr);
  xmlSchemaValidCtxtPtr vptr = xmlSchemaNewValidCtxt(sptr);
  xmlSchemaSetValidStructuredErrors(vptr, handleSchemaError, &vec);
  Rcpp::LogicalVector out;
  out.push_back(0 == xmlSchemaValidateDoc(vptr, doc.checked_get()));
  xmlSchemaFreeParserCtxt(cptr);
  xmlSchemaFreeValidCtxt(vptr);
  xmlSchemaFree(sptr);
  out.attr("errors") = vec;
  return out;
}
Beispiel #4
0
//[[Rcpp::export]]
Rcpp::RawVector toBitVec(Rcpp::LogicalVector indx) {
	unsigned nBit = indx.size();
	unsigned nByte = ceil(float(nBit)/8);
	Rcpp::RawVector bytes(nByte);//default are all 0s
	bytes.attr("bitlen") = nBit;

    unsigned byteIndex, bitIndex ;
    for(unsigned i = 0 ; i < nBit; i++) {
        byteIndex = i / 8;
        bitIndex = i % 8;
        if(indx(i) == 1)
			bytes[byteIndex] = bytes[byteIndex] | 1 << bitIndex;
    }
    return bytes;
}
Beispiel #5
0
// [[Rcpp::export]]
Rcpp::DataFrame gt_to_popsum(Rcpp::DataFrame var_info, Rcpp::CharacterMatrix gt) {
  // Calculate popgen summaries for the sample.
  // var_info should contain columns named 'CHROM', 'POS', 'mask' and possibly others.
  Rcpp::LogicalVector   mask = var_info["mask"];
  Rcpp::IntegerVector   nsample(mask.size());
  Rcpp::StringVector    allele_counts(mask.size());
  Rcpp::NumericVector   Hes(mask.size());
  Rcpp::NumericVector   Nes(mask.size());
  
  int i = 0;
  int j = 0;
  int k = 0;
  
  for(i=0; i < gt.nrow(); i++){ // Iterate over variants (rows)
    if(mask[i] == TRUE){
      std::vector<int> myalleles (1,0);
      for(j=0; j < gt.ncol(); j++){ // Iterate over samples (columns)
        if(gt(i, j) != NA_STRING){
          nsample[i]++;  // Increment sample count.

          // Count alleles per sample.
          std::vector < int > intv = gtsplit(as<std::string>(gt(i, j)));
          for(k=0; k<intv.size(); k++){
            while(myalleles.size() - 1 < intv[k]){
              // We have more alleles than exist in the vector myalleles.
              myalleles.push_back(0);
            }
            myalleles[intv[k]]++;
          }
        }
      }

      // Concatenate allele counts into a comma delimited string.
      int n;
      char buffer [50];
      n = sprintf (buffer, "%d", myalleles[0]);
      for(j=1; j < myalleles.size(); j++){
        n=sprintf (buffer, "%s,%d", buffer, myalleles[j]);
      }
      allele_counts[i] = buffer;

      // Sum all alleles.
      int nalleles = myalleles[0];
      for(j=1; j < myalleles.size(); j++){
        nalleles = nalleles + myalleles[j];
      }

      // Stats.
      double He = 1;
      He = He - pow(double(myalleles[0])/double(nalleles), myalleles.size());
      for(j=1; j < myalleles.size(); j++){
        He = He - pow(double(myalleles[j])/double(nalleles), myalleles.size());
      }
      Hes[i] = He;
      Nes[i] = 1/(1-He);
    } else { // Missing variant (row=NA)
      nsample[i] = NA_INTEGER;     
    }
  }

  return Rcpp::DataFrame::create(var_info, 
      _["n"]=nsample, 
      _["Allele_counts"]=allele_counts,
      _["He"]=Hes,
      _["Ne"]=Nes
  );
}
Beispiel #6
0
//' @export
// [[Rcpp::export(name=".gt_to_popsum")]]
Rcpp::DataFrame gt_to_popsum(Rcpp::DataFrame var_info, Rcpp::CharacterMatrix gt) {
  // Calculate popgen summaries for the sample.
  // var_info should contain columns named 'CHROM', 'POS', 'mask' and possibly others.
  Rcpp::LogicalVector   mask = var_info["mask"];
  Rcpp::IntegerVector   nsample(mask.size());
  Rcpp::StringVector    allele_counts(mask.size());
  Rcpp::NumericVector   Hes(mask.size());
  Rcpp::NumericVector   Nes(mask.size());
  
  int i = 0;
  int j = 0;
//  unsigned int j = 0;
  unsigned int k = 0;
  
  for(i=0; i < gt.nrow(); i++){ // Iterate over variants (rows)
    if(mask[i] == TRUE){
      std::vector<int> myalleles (1,0);
      for(j=0; j < gt.ncol(); j++){ // Iterate over samples (columns)
        if(gt(i, j) != NA_STRING){
          nsample[i]++;  // Increment sample count.
          
//          Rcout << "gt: " << gt(i, j) << "\n";
          // Count alleles per sample.

          int unphased_as_na = 0; // 0 == FALSE
          std::vector < std::string > gt_vector;
          std::string gt2 = as<std::string>(gt(i,j));
          vcfRCommon::gtsplit( gt2, gt_vector, unphased_as_na );
          
//          Rcout << "gt_vector.size: " << gt_vector.size() << "\n";

          for(k=0; k<gt_vector.size(); k++){
            int myAllele = std::stoi(gt_vector[k]);
//            Rcout << "  " << myAllele;
//            // If this genotype had an allele we did not previously observe
            // we'll have to grow the vector.
            while(myalleles.size() - 1 < myAllele){
              myalleles.push_back(0);
            }
            myalleles[myAllele]++;
          }
//          Rcout << "\n\n";
        }
      }

      // Concatenate allele counts into a comma delimited string.
      char buffer [50];
//      int n;
//      n=sprintf(buffer, "%d", myalleles[0]);
      sprintf(buffer, "%d", myalleles[0]);
      for(j=1; (unsigned)j < myalleles.size(); j++){
//        n=sprintf (buffer, "%s,%d", buffer, myalleles[j]);
        sprintf (buffer, "%s,%d", buffer, myalleles[j]);
      }
      allele_counts[i] = buffer;

      // Sum all alleles.
      int nalleles = myalleles[0];
      for(j=1; (unsigned)j < myalleles.size(); j++){
        nalleles = nalleles + myalleles[j];
      }

      // Stats.
      double He = 1;
      He = He - pow(double(myalleles[0])/double(nalleles), myalleles.size());
      for(j=1; (unsigned)j < myalleles.size(); j++){
        He = He - pow(double(myalleles[j])/double(nalleles), myalleles.size());
      }
      Hes[i] = He;
      Nes[i] = 1/(1-He);
    } else { // Missing variant (row=NA)
      nsample[i] = NA_INTEGER;     
    }
  }

  return Rcpp::DataFrame::create(var_info, 
      _["n"]=nsample, 
      _["Allele_counts"]=allele_counts,
      _["He"]=Hes,
      _["Ne"]=Nes
  );
}