//[[Rcpp::export]] extern "C" SEXP notbayesEstimation(SEXP Xs, SEXP Ys, SEXP Zs, SEXP Vs) { Rcpp::NumericVector Xr(Xs); Rcpp::NumericMatrix Yr(Ys); Rcpp::NumericMatrix Zr(Zs); Rcpp::NumericMatrix Vr(Vs); int n = Yr.nrow(), k = Yr.ncol(); int l = Vr.nrow(), m = Vr.ncol(); arma::mat x(Xr.begin(),n,k,false); //fit2$sigma arma::mat y(Yr.begin(),n,k,false); //fit2b$stdev.unscaled arma::mat z(Zr.begin(),1,1,false); //min.variance.factor arma::mat v(Vr.begin(),l,m,false); //fit2$df.residual arma::mat sda(n,1); arma::vec dof(n); arma::vec sd(n); sd = sqrt( (y%x)%(y%x) + as_scalar(z)); sda = sd/(x%y); dof = v; Rcpp::NumericVector Sd = Rcpp::wrap(sd); Rcpp::NumericVector Sda = Rcpp::wrap(sda); Rcpp::NumericVector DOF = Rcpp::wrap(dof); Sd.names() = Rcpp::List(Yr.attr("dimnames"))[0]; Sda.names() = Rcpp::List(Yr.attr("dimnames"))[0]; return Rcpp::List::create( Rcpp::Named("SD") = Sd, Rcpp::Named("DOF") = DOF, Rcpp::Named("sd.alpha") = Sda); }
// [[Rcpp::export]] Rcpp::NumericMatrix infoContentMethod_cpp( Rcpp::StringVector& id1_, Rcpp::StringVector& id2_, Rcpp::List& anc_, Rcpp::NumericVector& ic_, const std::string& method_, const std::string& ont_ ) { go_dist_func_t* go_dist; // Resnik does not consider how distant the terms are from their common ancestor. // Lin and Jiang take that distance into account. if (method_ == "Resnik") { go_dist = &go_dist_Resnik; } else if (method_ == "Lin") { go_dist = &go_dist_Lin; } else if (method_ == "Jiang") { go_dist = &go_dist_Jiang; } else if (method_ == "Rel") { go_dist = &go_dist_Rel; } else { throw std::runtime_error( "Unknown GO distance method" ); } typedef std::string term_id_t; typedef std::set<term_id_t> term_set_t; // calculate the maximum IC and build the map of normalized IC typedef std::map<term_id_t, double> ic_map_t; ic_map_t normIcMap; // more specific term, larger IC value. // Normalized, all divide the most informative IC. // all IC values range from 0(root node) to 1(most specific node) double mic = NA_REAL; { Rcpp::StringVector icNames( ic_.names() ); for (std::size_t i=0; i < ic_.size(); i++ ) { const double cic = ic_[i]; if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue; if ( Rcpp::NumericVector::is_na( mic ) || mic < cic ) mic = cic; } LOG_DEBUG( "mic=" << mic ); for (std::size_t i=0; i < ic_.size(); i++ ) { const double cic = ic_[i]; if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue; normIcMap.insert( std::make_pair( (std::string) icNames[i], cic / mic ) ); } } // set root node IC to 0 if(ont_ == "DO") { normIcMap["DOID:4"] = 0; } else { normIcMap["all"] = 0; } // convert anc_ into map of sets typedef std::map<term_id_t, term_set_t> anc_map_t; anc_map_t ancMap; { Rcpp::StringVector goTerms( anc_.names() ); for (std::size_t i=0; i < anc_.size(); i++ ) { const std::vector<std::string> ancVec = Rcpp::as<std::vector<std::string> >( anc_[i] ); term_set_t ancestors( ancVec.begin(), ancVec.end() ); // term itself is also considered an ancestor ancestors.insert( (std::string)goTerms[i] ); ancMap.insert( std::make_pair( (std::string) goTerms[i], ancestors ) ); } } Rcpp::NumericMatrix res( id1_.size(), id2_.size() ); res.attr("dimnames") = Rcpp::Rcpp_list2( id1_, id2_ ); for ( std::size_t i = 0; i < id1_.size(); i++ ) { const std::string id1_term = (std::string)id1_[i]; const ic_map_t::const_iterator iIcIt = normIcMap.find( id1_term ); if ( iIcIt != normIcMap.end() && iIcIt->second != 0 ) { const double iIc = iIcIt->second; LOG_DEBUG( "ic[" << id1_term << "]=" << iIc ); const anc_map_t::const_iterator iAncsIt = ancMap.find( id1_term ); for ( std::size_t j = 0; j < id2_.size(); j++ ) { const std::string id2_term = (std::string)id2_[j]; const ic_map_t::const_iterator jIcIt = normIcMap.find( id2_term ); if ( jIcIt != normIcMap.end() && jIcIt->second != 0 ) { const anc_map_t::const_iterator jAncsIt = ancMap.find( id2_term ); // find common ancestors term_set_t commonAncs; if ( iAncsIt != ancMap.end() && jAncsIt != ancMap.end() ) { std::set_intersection( iAncsIt->second.begin(), iAncsIt->second.end(), jAncsIt->second.begin(), jAncsIt->second.end(), std::inserter( commonAncs, commonAncs.end() ) ); } LOG_DEBUG( "n(commonAncs(" << id1_term << "," << id2_term << "))=" << commonAncs.size() ); // Information Content of the most informative common ancestor (MICA) double mica = 0; for ( term_set_t::const_iterator termIt = commonAncs.begin(); termIt != commonAncs.end(); ++termIt ) { ic_map_t::const_iterator ancIcIt = normIcMap.find( *termIt ); if ( ancIcIt != normIcMap.end() && mica < ancIcIt->second ) mica = ancIcIt->second; } LOG_DEBUG( "mica(" << id1_term << "," << id2_term << ")=" << mica ); res(i,j) = go_dist( mica, iIc, jIcIt->second, mic ); } else { res(i,j) = NA_REAL; } } } else { for ( std::size_t j = 0; j < id2_.size(); j++ ) { res(i,j) = NA_REAL; } } } return ( res ); }
///' Calculate the network properties, data matrix not provided ///' ///' @details ///' \subsection{Input expectations:}{ ///' Note that this function expects all inputs to be sensible, as checked by ///' the R function 'checkUserInput' and processed by 'networkProperties'. ///' ///' These requirements are: ///' \itemize{ ///' \item{'net' is a square matrix, and its rownames are identical to its ///' column names.} ///' \item{'moduleAssigments' is a named character vector, where the names ///' represent node labels found in the discovery dataset. Unlike ///' 'PermutationProcedure', these may include nodes that are not ///' present in 'data' and 'net'.} ///' \item{The module labels specified in 'modules' must occur in ///' 'moduleAssignments'.} ///' } ///' } ///' ///' @param net adjacency matrix of network edge weights between all pairs of ///' nodes in the dataset in which to calculate the network properties. ///' @param moduleAssignments a named character vector containing the module ///' each node belongs to in the discovery dataset. ///' @param modules a character vector of modules for which to calculate the ///' network properties for. ///' ///' @return a list containing the summary profile, node contribution, module ///' coherence, weighted degree, and average edge weight for each 'module'. ///' ///' @keywords internal // [[Rcpp::export]] Rcpp::List NetPropsNoData ( Rcpp::NumericMatrix net, Rcpp::CharacterVector moduleAssignments, Rcpp::CharacterVector modules ) { // convert the colnames / rownames to C++ equivalents const std::vector<std::string> nodeNames (Rcpp::as<std::vector<std::string>>(colnames(net))); unsigned int nNodes = net.ncol(); R_CheckUserInterrupt(); /* Next, we need to create two mappings: * - From node IDs to indices in the dataset of interest * - From modules to node IDs * - From modules to only node IDs present in the dataset of interest */ const namemap nodeIdxMap = MakeIdxMap(nodeNames); const stringmap modNodeMap = MakeModMap(moduleAssignments); const stringmap modNodePresentMap = MakeModMap(moduleAssignments, nodeIdxMap); // What modules do we actually want to analyse? const std::vector<std::string> mods (Rcpp::as<std::vector<std::string>>(modules)); R_CheckUserInterrupt(); // Calculate the network properties for each module std::string mod; // iterators unsigned int mNodesPresent, mNodes; arma::uvec nodeIdx, propIdx, nodeRank; namemap propIdxMap; std::vector<std::string> modNodeNames; arma::vec WD; // results containers double avgWeight; Rcpp::NumericVector degree; // for casting to R equivalents Rcpp::List results; // final storage container for (auto mi = mods.begin(); mi != mods.end(); ++mi) { // What nodes are in this module? // modNodeNames = names(moduleAssignments[moduleAssignments == mod]) mod = *mi; modNodeNames = GetModNodeNames(mod, modNodeMap); // initialise results containers with NA values for nodes not present in // the dataset we're calculating the network properties in. degree = Rcpp::NumericVector(modNodeNames.size(), NA_REAL); avgWeight = NA_REAL; degree.names() = modNodeNames; // Create a mapping between node names and the result vectors propIdxMap = MakeIdxMap(modNodeNames); // Get just the indices of nodes that are present in the requested dataset nodeIdx = GetNodeIdx(mod, modNodePresentMap, nodeIdxMap); mNodesPresent = nodeIdx.n_elem; // And a mapping of those nodes to the initialised vectors propIdx = GetNodeIdx(mod, modNodePresentMap, propIdxMap); mNodes = propIdx.n_elem; // Calculate the properties if the module has nodes in the test dataset if (nodeIdx.n_elem > 0) { // sort the node indices for sequential memory access nodeRank = SortNodes(nodeIdx.memptr(), mNodesPresent); WD = WeightedDegree(net.begin(), nNodes, nodeIdx.memptr(), mNodesPresent); WD = WD(nodeRank); // reorder results avgWeight = AverageEdgeWeight(WD.memptr(), WD.n_elem); R_CheckUserInterrupt(); // Fill the results vectors appropriately Fill(degree, WD.memptr(), mNodesPresent, propIdx.memptr(), mNodes); } results.push_back( Rcpp::List::create( Rcpp::Named("degree") = degree, Rcpp::Named("avgWeight") = avgWeight ) ); } results.names() = mods; return(results); }
//[[Rcpp::export]] Rcpp::List checkTreeCpp(Rcpp::S4 obj, Rcpp::List opts) { std::string err, wrn; Rcpp::IntegerMatrix ed = obj.slot("edge"); int nrow = ed.nrow(); Rcpp::IntegerVector ances = getAnces(ed); //Rcpp::IntegerVector desc = getDesc(ed); int nroots = nRoots(ances); bool rooted = nroots > 0; Rcpp::NumericVector edLength = obj.slot("edge.length"); Rcpp::CharacterVector edLengthNm = edLength.names(); Rcpp::CharacterVector label = obj.slot("label"); Rcpp::CharacterVector labelNm = label.names(); Rcpp::CharacterVector edLabel = obj.slot("edge.label"); Rcpp::CharacterVector edLabelNm = edLabel.names(); Rcpp::IntegerVector allnodesSafe = getAllNodesSafe(ed); Rcpp::IntegerVector allnodesFast = getAllNodesFast(ed, rooted); int nEdLength = edLength.size(); int nLabel = label.size(); int nEdLabel = edLabel.size(); int nEdges = nrow; bool hasEdgeLength = !all_naC(edLength); // check tips int ntipsSafe = nTipsSafe(ances); int ntipsFast = nTipsFastCpp(ances); bool testnTips = ntipsFast == ntipsSafe; if (! testnTips) { err.append("Tips incorrectly labeled. "); } //check internal nodes bool testNodes = Rcpp::all(allnodesSafe == allnodesFast).is_true() && // is both ways comparison needed? Rcpp::all(allnodesFast == allnodesSafe).is_true(); if (! testNodes) { err.append("Nodes incorrectly labeled. "); } // check edge lengths if (hasEdgeLength) { if (nEdLength != nEdges) { err.append("Number of edge lengths do not match number of edges. "); } // if (nb_naC(edLength) > nroots) { // not enough! -- best done in R // err.append("Only the root should have NA as an edge length. "); // } if (getRange(edLength, TRUE)[0] < 0) { err.append("Edge lengths must be non-negative. "); } Rcpp::CharacterVector edgeLblSupp = edgeIdCpp(ed, "all"); Rcpp::CharacterVector edgeLblDiff = Rcpp::setdiff(edLengthNm, edgeLblSupp); if ( edgeLblDiff.size() != 0 ) { err.append("Edge lengths incorrectly labeled. "); } } // check label names Rcpp::CharacterVector chrLabelNm = Rcpp::as<Rcpp::CharacterVector>(allnodesFast); int j = 0; while (j < nroots) { //remove root(s) chrLabelNm.erase(0); j++; } bool testLabelNm = isLabelName(labelNm, chrLabelNm); if (!testLabelNm) { err.append("Tip and node labels must be a named vector, the names must match the node IDs. "); err.append("Use tipLabels<- and/or nodeLabels<- to update them. "); } // check that tips have labels Rcpp::CharacterVector tiplabel(ntipsFast); std::copy (label.begin(), label.begin()+ntipsFast, tiplabel.begin()); bool emptyTipLabel = is_true(any(Rcpp::is_na(tiplabel))); if ( emptyTipLabel ) { err.append("All tips must have a label."); } // check edgeLabels Rcpp::CharacterVector chrEdgeLblNm = edgeIdCpp(ed, "all"); bool testEdgeLblNm = isLabelName(edLabelNm, chrEdgeLblNm); if (!testEdgeLblNm) { err.append("Edge labels are not labelled correctly. Use the function edgeLabels<- to update them. "); } // make sure that tips and node labels are unique if (hasDuplicatedLabelsCpp(label)) { std::string labOpt = opts["allow.duplicated.labels"]; if (labOpt == "fail") { err.append("Labels are not unique. "); } if (labOpt == "warn") { wrn.append("Labels are not unique. "); } } // check for polytomies if (hasPolytomy(ances)) { std::string msgPoly = "Tree includes polytomies. "; std::string polyOpt = opts["poly"]; if (polyOpt == "fail") { err.append(msgPoly); } if (polyOpt == "warn") { wrn.append(msgPoly); } } // check number of roots if (nroots > 1) { std::string msgRoot = "Tree has more than one root. "; std::string rootOpt = opts["multiroot"]; if (rootOpt == "fail") { err.append(msgRoot); } if (rootOpt == "warn") { wrn.append(msgRoot); } } // check for singletons if (hasSingleton(ances)) { std::string msgSing = "Tree contains singleton nodes. "; std::string singOpt = opts["singleton"]; if (singOpt == "fail") { err.append(msgSing); } if (singOpt == "warn") { wrn.append(msgSing); } } return Rcpp::List::create(err, wrn); }