// [[Rcpp::export]] Rcpp::IntegerMatrix quantileNorm(Rcpp::IntegerMatrix mat, Rcpp::IntegerVector ref, int nthreads=1, int seed=13){ if (mat.nrow() != ref.length()) Rcpp::stop("incompatible arrays..."); if (!std::is_sorted(ref.begin(), ref.end())) Rcpp::stop("ref must be sorted"); int ncol = mat.ncol(); int nrow = mat.nrow(); //allocate new matrix Rcpp::IntegerMatrix res(nrow, ncol); Mat<int> oldmat = asMat(mat); Mat<int> newmat = asMat(res); Vec<int> ref2 = asVec(ref); //allocate a seed for each column std::seed_seq sseq{seed}; std::vector<std::uint32_t> seeds(ncol); sseq.generate(seeds.begin(), seeds.end()); #pragma omp parallel num_threads(nthreads) { std::vector<std::pair<int, int> > storage(nrow);//pairs <value, index> #pragma omp for for (int col = 0; col < ncol; ++col){ std::mt19937 gen(seeds[col]); qtlnorm(oldmat.getCol(col), ref2, newmat.getCol(col), storage, gen); } } res.attr("dimnames") = mat.attr("dimnames"); return res; }
void CEnv::SetData(Rcpp::IntegerMatrix x_, Rcpp::IntegerMatrix mcz_) { int J = x_.nrow(); int n = x_.ncol(); int nZeroMC = mcz_.ncol(); if (mcz_.nrow() != J) { // no mcz nZeroMC = 0; } intvec x = Rcpp::as<intvec>(x_); intvec mcz = Rcpp::as<intvec>(mcz_); Rcpp::List something(x_.attr("levels")); intvec levels(something.size()); for (unsigned int i = 0; i < levels.size(); i++) { SEXP exp = something[i]; Rcpp::CharacterVector v(exp); levels[i] = v.size(); //Rprintf( "%d\n", levels[i]) ; } SetData(x, J, n, mcz, nZeroMC, levels); }
// [[Rcpp::export]] Rcpp::NumericMatrix CramersV_DF(Rcpp::IntegerMatrix dm, bool Bias_Cor) { int iCol = dm.ncol(); int i=0,j=0; Rcpp::NumericMatrix ResCV(iCol,iCol); for (i=0;i<iCol;i++){ for (j=i;j<iCol;j++){ if(i==j) {ResCV(i,j)=1;} else { ResCV(i,j) = CramersV_C((IntegerVector)dm(_,i),(IntegerVector)dm(_,j),(bool)Bias_Cor); ResCV(j,i) = ResCV(i,j); } } } Rcpp::List dimnms = Rcpp::List::create(VECTOR_ELT(dm.attr("dimnames"), 1), VECTOR_ELT(dm.attr("dimnames"), 1)); ResCV.attr("dimnames")=dimnms; return ResCV; }
/* This function specifically checks whether the observed data is consistent with the *pedigree*. It assumes that every observed value in the finals is already valid - That is, every observed value contained in the finals is also listed as a possibility in the hetData object */ void estimateRFCheckFunnels(Rcpp::IntegerMatrix finals, Rcpp::IntegerMatrix founders, Rcpp::List hetData, Rcpp::S4 pedigree, std::vector<int>& intercrossingGenerations, std::vector<std::string>& warnings, std::vector<std::string>& errors, std::vector<funnelType>& allFunnels, std::vector<funnelType>& lineFunnels) { Rcpp::CharacterVector pedigreeLineNames = Rcpp::as<Rcpp::CharacterVector>(pedigree.slot("lineNames")); //We make a copy of the pedigree line names and sort it (otherwise the std::find relating to pedigreeLineNames is prohibitive) std::vector<pedigreeLineStruct> sortedLineNames; sortPedigreeLineNames(pedigreeLineNames, sortedLineNames); Rcpp::IntegerVector mother = Rcpp::as<Rcpp::IntegerVector>(pedigree.slot("mother")); Rcpp::IntegerVector father = Rcpp::as<Rcpp::IntegerVector>(pedigree.slot("father")); bool warnImproperFunnels = Rcpp::as<bool>(pedigree.slot("warnImproperFunnels")); Rcpp::CharacterVector finalNames = Rcpp::as<Rcpp::CharacterVector>(Rcpp::as<Rcpp::List>(finals.attr("dimnames"))[0]); Rcpp::CharacterVector markerNames = Rcpp::as<Rcpp::CharacterVector>(Rcpp::as<Rcpp::List>(finals.attr("dimnames"))[1]); int nFinals = finals.nrow(), nFounders = founders.nrow(), nMarkers = finals.ncol(); if(nFounders != 2 && nFounders != 4 && nFounders != 8 && nFounders != 16) { throw std::runtime_error("Number of founders must be 2, 4, 8, or 16"); } xMajorMatrix<int> foundersToMarkerAlleles(nFounders, nFounders, nMarkers, -1); for(int markerCounter = 0; markerCounter < nMarkers; markerCounter++) { Rcpp::IntegerMatrix currentMarkerHetData = hetData(markerCounter); for(int founderCounter1 = 0; founderCounter1 < nFounders; founderCounter1++) { for(int founderCounter2 = 0; founderCounter2 < nFounders; founderCounter2++) { int markerAllele1 = founders(founderCounter1, markerCounter); int markerAllele2 = founders(founderCounter2, markerCounter); for(int hetDataRowCounter = 0; hetDataRowCounter < currentMarkerHetData.nrow(); hetDataRowCounter++) { if(markerAllele1 == currentMarkerHetData(hetDataRowCounter, 0) && markerAllele2 == currentMarkerHetData(hetDataRowCounter, 1)) { foundersToMarkerAlleles(founderCounter1, founderCounter2, markerCounter) = currentMarkerHetData(hetDataRowCounter, 2); } } } } } std::vector<long> individualsToCheckFunnels; for(long finalCounter = 0; finalCounter < nFinals; finalCounter++) { individualsToCheckFunnels.clear(); std::string currentLineName = Rcpp::as<std::string>(finalNames(finalCounter)); std::vector<pedigreeLineStruct>::iterator findLineName = std::lower_bound(sortedLineNames.begin(), sortedLineNames.end(), pedigreeLineStruct(currentLineName, -1)); if(findLineName == sortedLineNames.end() || findLineName->lineName != currentLineName) { std::stringstream ss; ss << "Unable to find line number " << finalCounter << " named " << finalNames(finalCounter) << " in pedigree"; throw std::runtime_error(ss.str().c_str()); } int pedigreeRow = findLineName->index; //This vector lists all the founders that are ancestors of the current line. This may comprise any number - E.g. if we have an AIC line descended from funnels 1,2,1,2 and 2,3,2,3 then this vector is going it contain 1,2,3 std::vector<int> representedFounders; if(intercrossingGenerations[finalCounter] == 0) { individualsToCheckFunnels.push_back(pedigreeRow); } else { try { getAICParentLines(mother, father, pedigreeRow, intercrossingGenerations[finalCounter], individualsToCheckFunnels); } catch(...) { std::stringstream ss; ss << "Error while attempting to trace intercrossing lines for line " << finalNames(finalCounter); errors.push_back(ss.str()); goto nextLine; } } //Now we know the lines for which we need to check the funnels from the pedigree (note: We don't necessarily have genotype data for all of these, it's purely a pedigree check) //Fixed length arrays to store funnels. If we have less than 16 founders then part of this is garbage and we don't use that bit.... funnelType funnel, copiedFunnel; for(std::vector<long>::iterator i = individualsToCheckFunnels.begin(); i != individualsToCheckFunnels.end(); i++) { try { getFunnel(*i, mother, father, &(funnel.val[0]), nFounders); } catch(...) { std::stringstream ss; ss << "Attempting to trace pedigree for line " << finalNames(finalCounter) << ": Unable to get funnel for line " << pedigreeLineNames(*i); errors.push_back(ss.str()); goto nextLine; } //insert these founders into the vector containing all the represented founders representedFounders.insert(representedFounders.end(), &(funnel.val[0]), &(funnel.val[0]) + nFounders); //Copy the funnel memcpy(&copiedFunnel, &funnel, sizeof(funnelType)); std::sort(&(copiedFunnel.val[0]), &(copiedFunnel.val[0]) + nFounders); if(std::unique(&(copiedFunnel.val[0]), &(copiedFunnel.val[0]) + nFounders) != &(copiedFunnel.val[0]) + nFounders) { //If we have intercrossing generations then having repeated founders is an error. Otherwise if warnImproperFunnels is true it's still a warning. if(intercrossingGenerations[finalCounter] != 0 || warnImproperFunnels) { std::stringstream ss; ss << "Funnel for line " << pedigreeLineNames(*i) << " contained founders {" << funnel.val[0]; if(nFounders == 2) { ss << ", " << funnel.val[1] << "}"; } else if(nFounders == 4) { ss << ", " << funnel.val[1] << ", " << funnel.val[2] << ", " << funnel.val[3] << "}"; } else if(nFounders == 8) { ss << ", " << funnel.val[1] << ", " << funnel.val[2] << ", " << funnel.val[3] << ", " << funnel.val[4] << ", " << funnel.val[5] << ", " << funnel.val[6] << ", " << funnel.val[7]<< "}"; } else if (nFounders == 16) { ss << ", " << funnel.val[1] << ", " << funnel.val[2] << ", " << funnel.val[3] << ", " << funnel.val[4] << ", " << funnel.val[5] << ", " << funnel.val[6] << ", " << funnel.val[7] << ", " << funnel.val[8] << ", " << funnel.val[9] << ", " << funnel.val[10] << ", " << funnel.val[11] << ", " << funnel.val[12] << ", " << funnel.val[13] << ", " << funnel.val[14] << ", " << funnel.val[15] << "}"; } //In this case it's an error if(intercrossingGenerations[finalCounter] != 0) { ss << ". Repeated founders are only allowed with zero generations of intercrossing"; errors.push_back(ss.str()); } //But if we have zero intercrossing generations then it's only a warning else { ss << ". Did you intend to use all " << nFounders << " founders?"; warnings.push_back(ss.str()); } } } allFunnels.push_back(funnel); } //remove duplicates in representedFounders std::sort(representedFounders.begin(), representedFounders.end()); representedFounders.erase(std::unique(representedFounders.begin(), representedFounders.end()), representedFounders.end()); //Try and check for inconsistent generations of selfing for(std::vector<int>::iterator i = representedFounders.begin(); i != representedFounders.end(); i++) { if(*i > nFounders) { std::stringstream ss; ss << "Error in pedigree for line number " << finalCounter << " named " << finalNames(finalCounter) << ". Inconsistent number of generations of intercrossing"; errors.push_back(ss.str()); goto nextLine; } } //Not having all the founders in the input funnels is more serious if it causes the observed marker data to be impossible. So check for this. for(int markerCounter = 0; markerCounter < nMarkers; markerCounter++) { bool okMarker = false; //If observed value is an NA then than's ok, continue int value = finals(finalCounter, markerCounter); if(value == NA_INTEGER) continue; for(std::vector<int>::iterator founderIterator1 = representedFounders.begin(); founderIterator1 != representedFounders.end(); founderIterator1++) { for(std::vector<int>::iterator founderIterator2 = representedFounders.begin(); founderIterator2 != representedFounders.end(); founderIterator2++) { //Note that founderIterator comes from representedFounders, which comes from getFunnel - Which returns values starting at 1, not 0. So we have to subtract one. if(finals(finalCounter, markerCounter) == foundersToMarkerAlleles((*founderIterator1)-1, (*founderIterator2)-1, markerCounter)) { okMarker = true; break; } } } if(!okMarker) { std::stringstream ss; ss << "Data for marker " << markerNames(markerCounter) << " is impossible for individual " << finalNames(finalCounter) << " with given pedigree"; errors.push_back(ss.str()); if(errors.size() > 1000) return; goto nextLine; } } //In this case individualsToCheckFunnels contains one element => getFunnel was only called once => we can reuse the funnel variable if(intercrossingGenerations[finalCounter] == 0) { orderFunnel(&(funnel.val[0]), nFounders); lineFunnels.push_back(funnel); } else { //Add a dummy value in lineFunnel for(int i = 0; i < 16; i++) funnel.val[i] = 0; lineFunnels.push_back(funnel); } nextLine: ; } }
// [[Rcpp::export]] XPtrImage magick_image_readbitmap_native(Rcpp::IntegerMatrix x){ Rcpp::IntegerVector dims(x.attr("dim")); return magick_image_bitmap(x.begin(), Magick::CharPixel, 4, dims[1], dims[0]); }