Ejemplo n.º 1
0
// [[Rcpp::export]]
Rcpp::IntegerMatrix quantileNorm(Rcpp::IntegerMatrix mat, Rcpp::IntegerVector ref, int nthreads=1, int seed=13){
    if (mat.nrow() != ref.length()) Rcpp::stop("incompatible arrays...");
    if (!std::is_sorted(ref.begin(), ref.end())) Rcpp::stop("ref must be sorted");
    int ncol = mat.ncol();
    int nrow = mat.nrow();
    //allocate new matrix
    Rcpp::IntegerMatrix res(nrow, ncol);
    Mat<int> oldmat = asMat(mat); 
    Mat<int> newmat = asMat(res);
    Vec<int> ref2 = asVec(ref);
    //allocate a seed for each column
    std::seed_seq sseq{seed};
    std::vector<std::uint32_t> seeds(ncol);
    sseq.generate(seeds.begin(), seeds.end());
    
    #pragma omp parallel num_threads(nthreads)
    {
        std::vector<std::pair<int, int> > storage(nrow);//pairs <value, index>
        #pragma omp for 
        for (int col = 0; col < ncol; ++col){
            std::mt19937 gen(seeds[col]);
            qtlnorm(oldmat.getCol(col), ref2, newmat.getCol(col), storage, gen);
        }
    }
    
    res.attr("dimnames") = mat.attr("dimnames");
    return res;
}
Ejemplo n.º 2
0
// Calculate mk = sum_i I(M(ti)=k), k=1, ..., M with m0=0;
// where h=(h0, h1, ..., hM) with h0=0 and d=(d0, d1, ..., dM) with d0=0, dM=R_PosInf
void Getmk(Rcpp::IntegerVector& mk, const Rcpp::IntegerVector& Mt){
  int n = Mt.size();
  std::fill(mk.begin(), mk.end(), 0);
  for (int i=0; i<n; ++i){
    int k = Mt[i];
    mk[k] +=1;
  }
}
Ejemplo n.º 3
0
// [[Rcpp::export]]
Rcpp::NumericVector seqC(double from_, double to_, double by_ = 1.0) {
  int adjust = std::pow(10, std::ceil(std::log10(10 / by_)) - 1);
  int from = adjust * from_;
  int to = adjust * to_;
  int by = adjust * by_;
  
  std::size_t n = ((to - from) / by) + 1;
  Rcpp::IntegerVector res = Rcpp::rep(from, n);
  add_multiple ftor(by);
  
  std::transform(res.begin(), res.end(), res.begin(), ftor);
  return Rcpp::NumericVector(res) / adjust;
}
Ejemplo n.º 4
0
R_xlen_t countPreClusterMarkers(SEXP preClusterResults_, bool& noDuplicates)
{
	Rcpp::List preClusterResults = preClusterResults_;
	std::vector<int> markers;
	for(Rcpp::List::iterator i = preClusterResults.begin(); i != preClusterResults.end(); i++)
	{
		Rcpp::IntegerVector Rmarkers = *i;
		for(Rcpp::IntegerVector::iterator j = Rmarkers.begin(); j != Rmarkers.end(); j++)
		{
			markers.push_back(*j);
		}
	}
	R_xlen_t nMarkers1 = markers.size();
	std::sort(markers.begin(), markers.end());
	std::vector<int>::iterator lastUnique = std::unique(markers.begin(), markers.end());
	R_xlen_t nMarkers2 = std::distance(markers.begin(), lastUnique);
	noDuplicates = nMarkers1 == nMarkers2;
	return nMarkers1;
}
Ejemplo n.º 5
0
SEXP constructDissimilarityMatrixInternal(unsigned char* data, std::vector<double>& levels, int size, SEXP clusters_, int start, const std::vector<int>& currentPermutation)
{
	Rcpp::IntegerVector clusters = Rcpp::as<Rcpp::IntegerVector>(clusters_);
	int minCluster = *std::min_element(clusters.begin(), clusters.end()), maxCluster = *std::max_element(clusters.begin(), clusters.end());
	if(minCluster != 1)
	{
		throw std::runtime_error("Clusters must have consecutive indices starting at 1");
	}
	std::vector<std::vector<int> > groupIndices(maxCluster);
	for(int i = 0; i < clusters.size(); i++)
	{
		groupIndices[clusters[i]-1].push_back(currentPermutation[i + start]);
	}

	std::vector<int> table(levels.size());

	Rcpp::NumericMatrix result(maxCluster, maxCluster);
	for(int rowCluster = 1; rowCluster <= maxCluster; rowCluster++)
	{
		for(int columnCluster = 1; columnCluster <= rowCluster; columnCluster++)
		{
			const std::vector<int>& columnIndices = groupIndices[columnCluster-1];
			const std::vector<int>& rowIndices = groupIndices[rowCluster-1];
			std::fill(table.begin(), table.end(), 0);
			for(std::vector<int>::const_iterator columnMarker = columnIndices.begin(); columnMarker != columnIndices.end(); columnMarker++)
			{
				for(std::vector<int>::const_iterator rowMarker = rowIndices.begin(); rowMarker != rowIndices.end(); rowMarker++)
				{
					int x = *rowMarker, y = *columnMarker;
					if(x < y) std::swap(x, y);
					int byte = data[x *(x + (R_xlen_t)1)/(R_xlen_t)2 + y];
					if(byte == 255) throw std::runtime_error("Values of NA not allowed");
					table[byte]++;
				}
			}
			double sum = 0;
			for(int i = 0; i < table.size(); i++) sum += table[i] * levels[i];
			result(rowCluster-1, columnCluster-1) = result(columnCluster-1, rowCluster-1) = sum / (columnIndices.size() * rowIndices.size());
		}
	}
	return result;
}
Ejemplo n.º 6
0
// [[Rcpp::export]]
NumericVector avg_rank(Rcpp::NumericVector x)
{
    R_xlen_t sz = x.size();
    Rcpp::IntegerVector w = Rcpp::seq(0, sz - 1);
    std::sort(w.begin(), w.end(), Comparator(x));

    Rcpp::NumericVector r = Rcpp::no_init_vector(sz);
    R_xlen_t n;
    #pragma omp parallel for
    for (int i = 0; i < sz; i += n) {
        n = 1;
        while (i + n < sz && x[w[i]] == x[w[i + n]]) ++n;
        #pragma omp parallel for
        for (R_xlen_t k = 0; k < n; k++) {
            r[w[i + k]] = i + (n + 1) / 2.;
        }
    }

    return r;
}
Ejemplo n.º 7
0
// [[Rcpp::export]]
Rcpp::CharacterMatrix read_body_gz(std::string x,
                                   Rcpp::NumericVector stats,
                                   int nrows = -1,
                                   int skip = 0,
                                   Rcpp::IntegerVector cols = 0,
                                   int convertNA = 1,
                                   int verbose = 1) {

  // NA matrix for unexpected results.
  Rcpp::StringMatrix na_matrix(1,1);
  na_matrix(0,0) = NA_STRING;
  
  
  /*
   * Manage cols vector.
   * The first eight (1-based) columns are mandatory.
   * We can ensure they are there by adding them,
   * sorting and removing adjacent non-identical values.
   */
//  for( int i=9; i >= 1; i-- ){
  for( int i=8; i >= 1; i-- ){
    cols.push_front(i);
  }
  cols.sort();

  // Remove duplicate values using a set.
  std::set<int> s( cols.begin(), cols.end() );
  cols.assign( s.begin(), s.end() );

  cols = cols - 1; // R is 1-based, C is 0-based.

  
  // Initialize matrix for body data.
  // old: Rcpp::CharacterMatrix gt(stats[2], stats[3]);
  int row_num = 0;
  

  if( ( nrows == -1 ) & ( skip == 0 ) ){
    nrows = stats[2];
  } else if ( ( nrows != -1 ) & ( skip == 0 ) ){
    // nrows = nrows;
  } else if ( ( nrows == -1 ) & ( skip > 0) ){
    nrows = stats[2] - skip;
  } else if ( ( nrows != -1 ) & ( skip > 0) ){
    // nrows = nrows;
  } else {
    Rcpp::Rcerr << "failed to calculate return matrix geometry.";
    return na_matrix;
  }
  Rcpp::CharacterMatrix gt( nrows, cols.size() );
  
//  if ( nrows > -1 & skip == 0 ){
//    row_num = nrows;
//  } else if ( nrows == -1 & skip > 0 ){
//    row_num = stats[2] - skip;
//  } else {
//    row_num = stats[2];    
//  }
//  Rcpp::CharacterMatrix gt( row_num, cols.size() );

  row_num = 0;
  
  if( verbose == 1 ){
    Rcpp::Rcout << "Character matrix gt created.\n";
    Rcpp::Rcout << "Character matrix gt rows: ";  Rcpp::Rcout << gt.rows();
    Rcpp::Rcout << "\n";
    Rcpp::Rcout << "Character matrix gt cols: ";  Rcpp::Rcout << gt.cols();
    Rcpp::Rcout << "\n";
    Rcpp::Rcout << "skip: ";  Rcpp::Rcout << skip;
    Rcpp::Rcout << "\n";
    Rcpp::Rcout << "nrows: ";  Rcpp::Rcout << nrows;
    Rcpp::Rcout << "\n";
    Rcpp::Rcout << "row_num: ";  Rcpp::Rcout << row_num;
    Rcpp::Rcout << "\n";
    Rcpp::Rcout << "\n";
  }

  
  // Create filehandle and open.
  gzFile file;
  file = gzopen (x.c_str(), "r");
  if (! file) {
    Rcpp::Rcerr << "gzopen of " << x << " failed: " << strerror (errno) << ".\n";
    return na_matrix;
  }


  // Because the last line may be incomplete,
  // We'll typically omit it from processing and
  // concatenate it to the first line.
  // But first we'll have to initialize it.
  std::string lastline = "";
  
  // String vector to store the header (^#CHROM...).
  std::vector<std::string> header_vec;
  
  // variant counter.  
  int var_num = 0;


  // Scroll through buffers.
  while (1) {
    Rcpp::checkUserInterrupt();
    int err;

    // Slurp in a buffer.
    int bytes_read;
    char buffer[LENGTH];
    bytes_read = gzread (file, buffer, LENGTH - 1);
    buffer[bytes_read] = '\0'; // Terminate the buffer.

    std::string mystring(reinterpret_cast<char*>(buffer));  // Recast buffer as a string.
    mystring = lastline + mystring; // Concatenate last line to the buffer
    
    // Delimit into lines.
    std::vector < std::string > svec;  // Initialize vector of strings for parsed buffer.
    char split = '\n'; // Must be single quotes!
    vcfRCommon::strsplit(mystring, svec, split);
    
    /* 
    svec should now contain a vector of strings,
    one string for each line
    where the last line may be incomplete.
    We can now process each line except the last.
    */

    // Scroll through lines.
    unsigned int i = 0;
    for(i = 0; i < svec.size() - 1; i++){
      
      // Check and remove carriage returns (Windows).
      if( svec[i][ svec[i].size()-1] == '\r' ){
        svec[i].erase( svec[i].size() - 1 );
      }

      if(svec[i][0] == '#' && svec[i][1] == '#'){
        // Meta line, ignore.
      } else if(svec[i][0] == '#' && svec[i][1] == 'C'){
        // Process header.
//        Rcpp::Rcout << svec[i].substr(0,40) << "\n\n";
        char header_split = '\t';
        vcfRCommon::strsplit(svec[i], header_vec, header_split);
        
        // Subset the header to select columns.
        std::vector<std::string> header_vec2( cols.size() );
        for(int j=0; j<cols.size(); j++){
          header_vec2[j] = header_vec[ cols[j] ];
        }
        header_vec = header_vec2;
      } else {
        // Variant line.

        if ( ( var_num >= skip ) & ( row_num < nrows ) ){
          proc_body_line(gt, row_num, svec[i], cols, convertNA);
          row_num++; // Return matrix row number.
        }
        var_num++; // Input row number.


        if(var_num % nreport == 0 && verbose == 1){
          Rcpp::Rcout << "\rProcessed variant " << var_num;
        }
      }   
    }

    
    // Processed all lines of current buffer.
    // Keep the last line so we can append it to 
    //the beginning of the next buffer.
    lastline = svec[svec.size() - 1];

//    Rcpp::Rcout << "line-2:" << svec[svec.size() - 2].substr(0,40) << "|<-\n";
//    Rcpp::Rcout << "line-1:" << svec[svec.size() - 1].substr(0,40) << "|<-\n";
//    Rcpp::Rcout << "\n";

      
    /*
     * We can bail out early if we have read nrows.
     * Before we do we need to check that:
     * 1) we have read in nrows
     * 2) we have processed the header
     * (important when nrows is small)
     * 3) we actually have a line (when buffer ends at the end of a line).
     */
    if( ( row_num >= nrows ) & ( lastline[0] != '#' ) & ( lastline.size() > 0 ) ){
//        Rcpp::Rcout << "\nBreaking!\n";
//        Rcpp::Rcout << "lastline: " << lastline.substr(0,40) << "\n";
        break;
    }


    // Check for EOF or errors.
    if (bytes_read < LENGTH - 1) {
      if (gzeof (file)) {
        break;
      }
      else {
        const char * error_string;
        error_string = gzerror (file, & err);
        if (err) {
          Rcpp::Rcerr << "Error: " << error_string << ".\n";
          return na_matrix;
        }
      }
    }
    
  // Return to top of loop and process another buffer.
  } // Close while.
  
  // Close filehandle.
  gzclose (file);

//  Rcpp::Rcout << "\n\n>>---<< Made it: file close! >>---<<\n\n";
//  Rcpp::Rcout << "header_vec.size(): " << header_vec.size() << "\n";
  
  
  if( stats[1] == 0 ){
    if( verbose == 1 ){
      Rcpp::Rcout << "Warning: no header information was found! Data contains no sample names!\n";
    }
  } else {

    if( header_vec.size() == (unsigned)gt.ncol() ){
      header_vec[0] = "CHROM";
      gt.attr("dimnames") = Rcpp::List::create(Rcpp::CharacterVector::create(), header_vec);
    } else {
      if( verbose == 1 ){
        Rcpp::Rcout << "Warning: no header information found!\n";
      }
    }
  }

//  Rcpp::Rcout << "\n\n>>---<< Made it! >>---<<\n\n";

    
  if(verbose == 1){
    Rcpp::Rcout << "\rProcessed variant: " << var_num;
    Rcpp::Rcout << "\nAll variants processed\n";
  }

//  Rcpp::DataFrame df1 = Rcpp::DataFrame::create(gt);
//  Rcpp::DataFrame df1(gt);
//  df1.names() = header_vec;
//  if(verbose == 1){
//    Rcpp::Rcout << "Rcpp::DataFrame created.\n";
//  }
  

  return gt;
}
Ejemplo n.º 8
0
// [[Rcpp::export]]
Rcpp::IntegerVector sample_int(int n, int min, int max) {
    Rcpp::IntegerVector pool = Rcpp::seq(min, max);
    std::random_shuffle(pool.begin(), pool.end());
    return pool[Rcpp::Range(0, n - 1)];
}
Ejemplo n.º 9
0
//[[Rcpp::export]]
int nRoots (Rcpp::IntegerVector ances) {
    int ans = std::count (ances.begin(), ances.end(), 0);
    return ans;
}
Ejemplo n.º 10
0
//[[Rcpp::export]]
Rcpp::IntegerVector getAllNodesSafe (Rcpp::IntegerMatrix edge) {
    Rcpp::IntegerVector ans = Rcpp::as_vector(edge);
    Rcpp::IntegerVector tmp = Rcpp::unique(ans);
    std::sort(tmp.begin(), tmp.end());
    return tmp;
}
Ejemplo n.º 11
0
Rcpp::IntegerVector stl_sort(Rcpp::IntegerVector x) {
  //http://gallery.rcpp.org/articles/sorting/
  Rcpp::IntegerVector y = clone(x);
  std::sort(y.begin(), y.end());
  return y;
}
Ejemplo n.º 12
0
int index(const int val, Rcpp::IntegerVector vec) {
	int ind = std::find(vec.begin(), vec.end(), val) - vec.begin();
	return ind;
}