Пример #1
0
//[[Rcpp::export]]
extern "C" SEXP notbayesEstimation(SEXP Xs, SEXP Ys, SEXP Zs, SEXP Vs) {
Rcpp::NumericVector Xr(Xs);
Rcpp::NumericMatrix Yr(Ys);
Rcpp::NumericMatrix Zr(Zs);
Rcpp::NumericMatrix Vr(Vs);
int n = Yr.nrow(), k = Yr.ncol();
int l = Vr.nrow(), m = Vr.ncol();
arma::mat x(Xr.begin(),n,k,false); //fit2$sigma
arma::mat y(Yr.begin(),n,k,false); //fit2b$stdev.unscaled
arma::mat z(Zr.begin(),1,1,false); //min.variance.factor
arma::mat v(Vr.begin(),l,m,false); //fit2$df.residual


arma::mat sda(n,1);
arma::vec dof(n);
arma::vec sd(n);

sd  = sqrt(  (y%x)%(y%x) + as_scalar(z));
sda = sd/(x%y);
dof = v;
Rcpp::NumericVector Sd = Rcpp::wrap(sd);
Rcpp::NumericVector Sda = Rcpp::wrap(sda);
Rcpp::NumericVector DOF = Rcpp::wrap(dof);
   Sd.names() = Rcpp::List(Yr.attr("dimnames"))[0];
   Sda.names() = Rcpp::List(Yr.attr("dimnames"))[0];
 
return Rcpp::List::create( Rcpp::Named("SD") = Sd,
                           Rcpp::Named("DOF") =  DOF,
                           Rcpp::Named("sd.alpha") = Sda);
}
Пример #2
0
// [[Rcpp::export]]
Rcpp::NumericMatrix infoContentMethod_cpp(
    Rcpp::StringVector&  id1_,
    Rcpp::StringVector&  id2_,
    Rcpp::List&          anc_,
    Rcpp::NumericVector& ic_,
    const std::string&   method_,
    const std::string&   ont_
) {
    go_dist_func_t* go_dist;
    // Resnik does not consider how distant the terms are from their common ancestor.
    //  Lin and Jiang take that distance into account.
    if (method_ == "Resnik") {
        go_dist = &go_dist_Resnik;
    }
    else if (method_ == "Lin") {
        go_dist = &go_dist_Lin;
    }
    else if (method_ == "Jiang") {
        go_dist = &go_dist_Jiang;
    }
    else if (method_ == "Rel") {
        go_dist = &go_dist_Rel;
    }
    else {
        throw std::runtime_error( "Unknown GO distance method" );
    }

    typedef std::string term_id_t;
    typedef std::set<term_id_t> term_set_t;

    // calculate the maximum IC and build the map of normalized IC
    typedef std::map<term_id_t, double> ic_map_t;
    ic_map_t normIcMap;
    // more specific term, larger IC value.
    // Normalized, all divide the most informative IC.
    // all IC values range from 0(root node) to 1(most specific node)
    double mic = NA_REAL;
    {
        Rcpp::StringVector icNames( ic_.names() );
        for (std::size_t i=0; i < ic_.size(); i++ ) {
            const double cic = ic_[i];
            if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue;
            if ( Rcpp::NumericVector::is_na( mic ) || mic < cic ) mic = cic;
        }
        LOG_DEBUG( "mic=" << mic );
        for (std::size_t i=0; i < ic_.size(); i++ ) {
            const double cic = ic_[i];
            if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue;
            normIcMap.insert( std::make_pair( (std::string) icNames[i], cic / mic ) );
        }
    }

    // set root node IC to 0
    if(ont_ == "DO") {
        normIcMap["DOID:4"] = 0;
    } else {
        normIcMap["all"] = 0;
    }

    // convert anc_ into map of sets
    typedef std::map<term_id_t, term_set_t> anc_map_t;
    anc_map_t ancMap;
    {
        Rcpp::StringVector goTerms( anc_.names() );
        for (std::size_t i=0; i < anc_.size(); i++ ) {
            const std::vector<std::string> ancVec = Rcpp::as<std::vector<std::string> >( anc_[i] );
            term_set_t ancestors( ancVec.begin(), ancVec.end() );
            // term itself is also considered an ancestor
            ancestors.insert( (std::string)goTerms[i] );
            ancMap.insert( std::make_pair( (std::string) goTerms[i], ancestors ) );
        }
    }

    Rcpp::NumericMatrix res( id1_.size(), id2_.size() );
    res.attr("dimnames") = Rcpp::Rcpp_list2( id1_, id2_ );
    for ( std::size_t i = 0; i < id1_.size(); i++ ) {
        const std::string id1_term = (std::string)id1_[i];
        const ic_map_t::const_iterator iIcIt = normIcMap.find( id1_term );
        if ( iIcIt != normIcMap.end() && iIcIt->second != 0 ) {
            const double iIc = iIcIt->second;
            LOG_DEBUG( "ic[" << id1_term << "]=" << iIc );
            const anc_map_t::const_iterator iAncsIt = ancMap.find( id1_term );
            for ( std::size_t j = 0; j < id2_.size(); j++ ) {
                const std::string id2_term = (std::string)id2_[j];
                const ic_map_t::const_iterator jIcIt = normIcMap.find( id2_term );
                if ( jIcIt != normIcMap.end() && jIcIt->second != 0 ) {
                    const anc_map_t::const_iterator jAncsIt = ancMap.find( id2_term );
                    // find common ancestors
                    term_set_t commonAncs;
                    if ( iAncsIt != ancMap.end() && jAncsIt != ancMap.end() ) {
                        std::set_intersection( iAncsIt->second.begin(), iAncsIt->second.end(),
                                               jAncsIt->second.begin(), jAncsIt->second.end(),
                                               std::inserter( commonAncs, commonAncs.end() ) );
                    }
                    LOG_DEBUG( "n(commonAncs(" << id1_term << "," << id2_term << "))=" << commonAncs.size() );

                    // Information Content of the most informative common ancestor (MICA)
                    double mica = 0;
                    for ( term_set_t::const_iterator termIt = commonAncs.begin(); termIt != commonAncs.end(); ++termIt ) {
                        ic_map_t::const_iterator ancIcIt = normIcMap.find( *termIt );
                        if ( ancIcIt != normIcMap.end() && mica < ancIcIt->second ) mica = ancIcIt->second;
                    }
                    LOG_DEBUG( "mica(" << id1_term << "," << id2_term << ")=" << mica );
                    res(i,j) = go_dist( mica, iIc, jIcIt->second, mic );
                } else {
                    res(i,j) = NA_REAL;
                }
            }
        } else {
            for ( std::size_t j = 0; j < id2_.size(); j++ ) {
                res(i,j) = NA_REAL;
            }
        }
    }
    return ( res );
}
Пример #3
0
///' Calculate the network properties, data matrix not provided
///' 
///' @details
///' \subsection{Input expectations:}{
///'   Note that this function expects all inputs to be sensible, as checked by
///'   the R function 'checkUserInput' and processed by 'networkProperties'. 
///'   
///'   These requirements are:
///'   \itemize{
///'   \item{'net' is a square matrix, and its rownames are identical to its 
///'         column names.}
///'   \item{'moduleAssigments' is a named character vector, where the names
///'         represent node labels found in the discovery dataset. Unlike 
///'         'PermutationProcedure', these may include nodes that are not 
///'         present in 'data' and 'net'.}
///'   \item{The module labels specified in 'modules' must occur in 
///'         'moduleAssignments'.}
///'   }
///' }
///' 
///' @param net adjacency matrix of network edge weights between all pairs of 
///'   nodes in the dataset in which to calculate the network properties.
///' @param moduleAssignments a named character vector containing the module 
///'   each node belongs to in the discovery dataset. 
///' @param modules a character vector of modules for which to calculate the 
///'   network properties for.
///' 
///' @return a list containing the summary profile, node contribution, module
///'   coherence, weighted degree, and average edge weight for each 'module'.
///'   
///' @keywords internal
// [[Rcpp::export]]
Rcpp::List NetPropsNoData (
    Rcpp::NumericMatrix net, 
    Rcpp::CharacterVector moduleAssignments,
    Rcpp::CharacterVector modules
) {
  // convert the colnames / rownames to C++ equivalents
  const std::vector<std::string> nodeNames (Rcpp::as<std::vector<std::string>>(colnames(net)));
  unsigned int nNodes = net.ncol();
  
  R_CheckUserInterrupt(); 
  
  /* Next, we need to create two mappings:
  *  - From node IDs to indices in the dataset of interest
  *  - From modules to node IDs
  *  - From modules to only node IDs present in the dataset of interest
  */
  const namemap nodeIdxMap = MakeIdxMap(nodeNames);
  const stringmap modNodeMap = MakeModMap(moduleAssignments);
  const stringmap modNodePresentMap = MakeModMap(moduleAssignments, nodeIdxMap);
  
  // What modules do we actually want to analyse?
  const std::vector<std::string> mods (Rcpp::as<std::vector<std::string>>(modules));
  
  R_CheckUserInterrupt(); 
  
  // Calculate the network properties for each module
  std::string mod; // iterators
  unsigned int mNodesPresent, mNodes;
  arma::uvec nodeIdx, propIdx, nodeRank;
  namemap propIdxMap;
  std::vector<std::string> modNodeNames; 
  arma::vec WD; // results containers
  double avgWeight; 
  Rcpp::NumericVector degree; // for casting to R equivalents
  Rcpp::List results; // final storage container
  for (auto mi = mods.begin(); mi != mods.end(); ++mi) {
    // What nodes are in this module?
    // modNodeNames = names(moduleAssignments[moduleAssignments == mod])
    mod = *mi;
    modNodeNames = GetModNodeNames(mod, modNodeMap);
    
    // initialise results containers with NA values for nodes not present in
    // the dataset we're calculating the network properties in.
    degree = Rcpp::NumericVector(modNodeNames.size(), NA_REAL);
    avgWeight = NA_REAL;
    degree.names() = modNodeNames;
    
    // Create a mapping between node names and the result vectors
    propIdxMap = MakeIdxMap(modNodeNames);
    
    // Get just the indices of nodes that are present in the requested dataset
    nodeIdx = GetNodeIdx(mod, modNodePresentMap, nodeIdxMap);
    mNodesPresent = nodeIdx.n_elem;
    
    // And a mapping of those nodes to the initialised vectors
    propIdx = GetNodeIdx(mod, modNodePresentMap, propIdxMap);
    mNodes = propIdx.n_elem;

    // Calculate the properties if the module has nodes in the test dataset
    if (nodeIdx.n_elem > 0) {
      // sort the node indices for sequential memory access
      nodeRank = SortNodes(nodeIdx.memptr(), mNodesPresent);
      
      WD = WeightedDegree(net.begin(), nNodes, nodeIdx.memptr(), mNodesPresent);
      WD = WD(nodeRank); // reorder results
      
      avgWeight = AverageEdgeWeight(WD.memptr(), WD.n_elem);
      R_CheckUserInterrupt(); 
      
      // Fill the results vectors appropriately
      Fill(degree, WD.memptr(), mNodesPresent, propIdx.memptr(), mNodes);
    }

    results.push_back(
      Rcpp::List::create(
        Rcpp::Named("degree") = degree,
        Rcpp::Named("avgWeight") = avgWeight
      )
    );
  }
  results.names() = mods;
  
  return(results);
}
Пример #4
0
//[[Rcpp::export]]
Rcpp::List checkTreeCpp(Rcpp::S4 obj, Rcpp::List opts) {
  
    std::string err, wrn;
    Rcpp::IntegerMatrix ed = obj.slot("edge");
    int nrow = ed.nrow();
    Rcpp::IntegerVector ances = getAnces(ed);
    //Rcpp::IntegerVector desc = getDesc(ed);
    int nroots = nRoots(ances);
    bool rooted = nroots > 0;
    Rcpp::NumericVector edLength = obj.slot("edge.length");
    Rcpp::CharacterVector edLengthNm = edLength.names();
    Rcpp::CharacterVector label = obj.slot("label");
    Rcpp::CharacterVector labelNm = label.names();
    Rcpp::CharacterVector edLabel = obj.slot("edge.label");
    Rcpp::CharacterVector edLabelNm = edLabel.names();
    Rcpp::IntegerVector allnodesSafe = getAllNodesSafe(ed);
    Rcpp::IntegerVector allnodesFast = getAllNodesFast(ed, rooted);
    int nEdLength = edLength.size();
    int nLabel = label.size();
    int nEdLabel = edLabel.size();
    int nEdges = nrow;
    bool hasEdgeLength = !all_naC(edLength);

    // check tips
    int ntipsSafe = nTipsSafe(ances);
    int ntipsFast = nTipsFastCpp(ances);
    bool testnTips = ntipsFast == ntipsSafe;
    if (! testnTips) {
	err.append("Tips incorrectly labeled. ");
    }

    //check internal nodes
    bool testNodes = Rcpp::all(allnodesSafe == allnodesFast).is_true() && // is both ways comparison needed?
    	Rcpp::all(allnodesFast == allnodesSafe).is_true();
    if (! testNodes) {
    	err.append("Nodes incorrectly labeled. ");
    }

    // check edge lengths
    if (hasEdgeLength) {	
    	if (nEdLength != nEdges) {
    	    err.append("Number of edge lengths do not match number of edges. ");
    	}
    	// if (nb_naC(edLength) > nroots) { // not enough!  -- best done in R
    	//     err.append("Only the root should have NA as an edge length. ");
    	// }
    	if (getRange(edLength, TRUE)[0] < 0) {
    	    err.append("Edge lengths must be non-negative. ");
    	}
    	Rcpp::CharacterVector edgeLblSupp = edgeIdCpp(ed, "all");
	Rcpp::CharacterVector edgeLblDiff = Rcpp::setdiff(edLengthNm, edgeLblSupp);
    	if ( edgeLblDiff.size() != 0 ) {
    	    err.append("Edge lengths incorrectly labeled. ");
    	}	    
    }
    
    // check label names
    Rcpp::CharacterVector chrLabelNm = Rcpp::as<Rcpp::CharacterVector>(allnodesFast);
    int j = 0;
    while (j < nroots) { //remove root(s)
    	chrLabelNm.erase(0); 
    	j++;
    }
    bool testLabelNm = isLabelName(labelNm, chrLabelNm);
    if (!testLabelNm) {
    	err.append("Tip and node labels must be a named vector, the names must match the node IDs. ");
    	err.append("Use tipLabels<- and/or nodeLabels<- to update them. ");
    }
    
    // check that tips have labels
    Rcpp::CharacterVector tiplabel(ntipsFast);
    std::copy (label.begin(), label.begin()+ntipsFast, tiplabel.begin());
    bool emptyTipLabel = is_true(any(Rcpp::is_na(tiplabel)));
    if ( emptyTipLabel ) {
    	err.append("All tips must have a label.");
    }

    // check edgeLabels
    Rcpp::CharacterVector chrEdgeLblNm = edgeIdCpp(ed, "all");
    bool testEdgeLblNm = isLabelName(edLabelNm, chrEdgeLblNm);
    if (!testEdgeLblNm) {
    	err.append("Edge labels are not labelled correctly. Use the function edgeLabels<- to update them. ");
    }

    // make sure that tips and node labels are unique
    if (hasDuplicatedLabelsCpp(label)) {
	std::string labOpt = opts["allow.duplicated.labels"];
	if (labOpt == "fail") {
	    err.append("Labels are not unique. ");
	}
	if (labOpt == "warn") {
	    wrn.append("Labels are not unique. ");
	}
    }

    // check for polytomies
    if (hasPolytomy(ances)) {
	std::string msgPoly = "Tree includes polytomies. ";
	std::string polyOpt = opts["poly"];
	if (polyOpt == "fail") {
	    err.append(msgPoly);
	}
	if (polyOpt == "warn") {
	    wrn.append(msgPoly);
	}
    }

    // check number of roots
    if (nroots > 1) {
	std::string msgRoot = "Tree has more than one root. ";
	std::string rootOpt = opts["multiroot"];
	if (rootOpt == "fail") {
	    err.append(msgRoot);
	}
	if (rootOpt == "warn") {
	    wrn.append(msgRoot);
	}
    }

    // check for singletons
    if (hasSingleton(ances)) {
	std::string msgSing = "Tree contains singleton nodes. ";
	std::string singOpt = opts["singleton"];
	if (singOpt == "fail") {
	    err.append(msgSing);
	}
	if (singOpt == "warn") {
	    wrn.append(msgSing);
	}
    }

    return Rcpp::List::create(err, wrn);
}