Beispiel #1
0
//' @rdname convert
//' @keywords internal manip
// [[Rcpp::export]]
Rcpp::List icd9DecimalToPartsCpp(const Rcpp::CharacterVector icd9Decimal, const Rcpp::String minorEmpty) {
	Rcpp::CharacterVector majors;
	Rcpp::CharacterVector minors;
	int ilen = icd9Decimal.length();

	if (ilen == 0) {
		return Rcpp::List::create(Rcpp::_["major"] =
				Rcpp::CharacterVector::create(), Rcpp::_["minor"] =
						Rcpp::CharacterVector::create());
	}

	for (Rcpp::CharacterVector::const_iterator it = icd9Decimal.begin();
			it != icd9Decimal.end(); ++it) {
		Rcpp::String strna = *it;
		if (strna == NA_STRING || strna == "") {
			majors.push_back(NA_STRING);
			minors.push_back(NA_STRING);
			continue;
		}
		// TODO: Rcpp::Rcpp::String doesn't implement many functions, so using STL. A FAST way
		// would be to use Rcpp::String's function get_cstring, and recode the trim
		// functions to take const char *. This would avoid the type change AND be
		// faster trimming.
		std::string thiscode = Rcpp::as<std::string>(*it);
		thiscode = strimCpp(thiscode); // This updates 'thisccode' by reference, no copy
		std::size_t pos = thiscode.find(".");
		// substring parts
		std::string majorin;
		Rcpp::String minorout;
		if (pos != std::string::npos) {
			majorin = thiscode.substr(0, pos);
			minorout = thiscode.substr(pos + 1);
		} else {
			majorin = thiscode;
			minorout = minorEmpty;
		}
		majors.push_back(icd9AddLeadingZeroesMajorSingle(majorin));
		minors.push_back(minorout);
	}
	return Rcpp::List::create(Rcpp::_["major"] = majors, Rcpp::_["minor"] =
			minors);
}
	bool isUndirectedGraphNEL(SEXP graph_sexp)
	{
		Rcpp::S4 graph_s4;
		try
		{
			graph_s4 = Rcpp::as<Rcpp::S4>(graph_sexp);
		}
		catch(Rcpp::not_compatible&)
		{
			throw std::runtime_error("Input graph must be an S4 object");
		}
		if(Rcpp::as<std::string>(graph_s4.attr("class")) != "graphNEL")
		{
			throw std::runtime_error("Input graph must have class graphNEL");
		}
		
		Rcpp::RObject nodes_obj;
		try
		{
			nodes_obj = Rcpp::as<Rcpp::RObject>(graph_s4.slot("nodes"));
		}
		catch(Rcpp::not_compatible&)
		{
			throw std::runtime_error("Error extracting slot nodes");
		}

		Rcpp::RObject edges_obj;
		try
		{
			edges_obj = Rcpp::as<Rcpp::RObject>(graph_s4.slot("edgeL"));
		}
		catch(Rcpp::not_compatible&)
		{
			throw std::runtime_error("Error extracting slot edgeL");
		}

		Rcpp::CharacterVector nodeNames;
		try
		{
			nodeNames = Rcpp::as<Rcpp::CharacterVector>(nodes_obj);
		}
		catch(Rcpp::not_compatible&)
		{
			throw std::runtime_error("Slot nodes of input graph must be a character vector");
		}
		{
			std::vector<std::string> uniqueNodeNames = Rcpp::as<std::vector<std::string> >(nodeNames);
			std::sort(uniqueNodeNames.begin(), uniqueNodeNames.end());
			uniqueNodeNames.erase(std::unique(uniqueNodeNames.begin(), uniqueNodeNames.end()), uniqueNodeNames.end());
			if((std::size_t)uniqueNodeNames.size() != (std::size_t)nodeNames.size())
			{
				throw std::runtime_error("Node names of input graph were not unique");
			}
		}
		int nVertices = nodeNames.size();

		Rcpp::List edges_list;
		try
		{
			edges_list = Rcpp::as<Rcpp::List>(edges_obj);
		}
		catch(Rcpp::not_compatible&)
		{
			throw std::runtime_error("Slot edgeL of input graph must be a list");
		}
		Rcpp::CharacterVector edges_list_names = Rcpp::as<Rcpp::CharacterVector>(edges_list.attr("names"));

		Context::inputGraph graphRef = Context::inputGraph(nVertices);
		for(int i = 0; i < edges_list.size(); i++)
		{
			int nodeIndex = std::distance(nodeNames.begin(), std::find(nodeNames.begin(), nodeNames.end(), edges_list_names(i)));
			Rcpp::List subList;
			Rcpp::CharacterVector subListNames;
			try
			{
				subList = Rcpp::as<Rcpp::List>(edges_list(i));
				subListNames = Rcpp::as<Rcpp::CharacterVector>(subList.attr("names"));
			}
			catch(Rcpp::not_compatible&)
			{
				throw std::runtime_error("Slot edgeL of input graph had an invalid format");
			}
			if(std::find(subListNames.begin(), subListNames.end(), "edges") == subListNames.end())
			{
				throw std::runtime_error("Slot edgeL of input graph had an invalid format");
			}
			Rcpp::NumericVector targetIndicesThisNode;
			try
			{
				targetIndicesThisNode = Rcpp::as<Rcpp::NumericVector>(subList("edges"));
			}
			catch(Rcpp::not_compatible&)
			{
				throw std::runtime_error("Slot edgeL of input graph had an invalid format");
			}
			for(int j = 0; j < targetIndicesThisNode.size(); j++)
			{
				boost::add_edge((std::size_t)nodeIndex, (std::size_t)((int)targetIndicesThisNode(j)-1), graphRef);
			}
		}
		Context::inputGraph::edge_iterator current, end;
		boost::tie(current, end) = boost::edges(graphRef);
		for(; current != end; current++)
		{
			int source = boost::source(*current, graphRef), target = boost::target(*current, graphRef);
			if(!boost::edge(target, source, graphRef).second)
			{
				return false;
			}
		}
		return true;
	}
	void convertGraphNEL(SEXP graph_sexp, context::inputGraph& graphRef)
	{
		Rcpp::S4 graph_s4;
		try
		{
			graph_s4 = Rcpp::as<Rcpp::S4>(graph_sexp);
		}
		catch(Rcpp::not_compatible&)
		{
			throw std::runtime_error("Input graph must be an S4 object");
		}
		if(Rcpp::as<std::string>(graph_s4.attr("class")) != "graphNEL")
		{
			throw std::runtime_error("Input graph must have class graphNEL");
		}
		
		Rcpp::RObject nodes_obj;
		try
		{
			nodes_obj = Rcpp::as<Rcpp::RObject>(graph_s4.slot("nodes"));
		}
		catch(Rcpp::not_compatible&)
		{
			throw std::runtime_error("Error extracting slot nodes");
		}

		Rcpp::RObject edges_obj;
		try
		{
			edges_obj = Rcpp::as<Rcpp::RObject>(graph_s4.slot("edgeL"));
		}
		catch(Rcpp::not_compatible&)
		{
			throw std::runtime_error("Error extracting slot edgeL");
		}

		Rcpp::CharacterVector nodeNames;
		try
		{
			nodeNames = Rcpp::as<Rcpp::CharacterVector>(nodes_obj);
		}
		catch(Rcpp::not_compatible&)
		{
			throw std::runtime_error("Slot nodes of input graph must be a character vector");
		}
		{
			std::vector<std::string> uniqueNodeNames = Rcpp::as<std::vector<std::string> >(nodeNames);
			std::sort(uniqueNodeNames.begin(), uniqueNodeNames.end());
			uniqueNodeNames.erase(std::unique(uniqueNodeNames.begin(), uniqueNodeNames.end()), uniqueNodeNames.end());
			if((std::size_t)uniqueNodeNames.size() != (std::size_t)nodeNames.size())
			{
				throw std::runtime_error("Node names of input graph were not unique");
			}
		}
		int nVertices = nodeNames.size();

		Rcpp::List edges_list;
		try
		{
			edges_list = Rcpp::as<Rcpp::List>(edges_obj);
		}
		catch(Rcpp::not_compatible&)
		{
			throw std::runtime_error("Slot edgeL of input graph must be a list");
		}
		Rcpp::CharacterVector edges_list_names = Rcpp::as<Rcpp::CharacterVector>(edges_list.attr("names"));

		graphRef = context::inputGraph(nVertices);
		int edgeIndexCounter = 0;
		for(int i = 0; i < edges_list.size(); i++)
		{
			int nodeIndex = std::distance(nodeNames.begin(), std::find(nodeNames.begin(), nodeNames.end(), edges_list_names(i)));
			Rcpp::List subList;
			Rcpp::CharacterVector subListNames;
			try
			{
				subList = Rcpp::as<Rcpp::List>(edges_list(i));
				subListNames = Rcpp::as<Rcpp::CharacterVector>(subList.attr("names"));
			}
			catch(Rcpp::not_compatible&)
			{
				throw std::runtime_error("Slot edgeL of input graph had an invalid format");
			}
			if(std::find(subListNames.begin(), subListNames.end(), "edges") == subListNames.end())
			{
				throw std::runtime_error("Slot edgeL of input graph had an invalid format");
			}
			Rcpp::NumericVector targetIndicesThisNode;
			try
			{
				targetIndicesThisNode = Rcpp::as<Rcpp::NumericVector>(subList("edges"));
			}
			catch(Rcpp::not_compatible&)
			{
				throw std::runtime_error("Slot edgeL of input graph had an invalid format");
			}
			for(int j = 0; j < targetIndicesThisNode.size(); j++)
			{
				boost::add_edge((std::size_t)nodeIndex, (std::size_t)((int)targetIndicesThisNode(j)-1), edgeIndexCounter, graphRef);
				edgeIndexCounter++;
			}
		}
	}
Beispiel #4
0
// [[Rcpp::export]]
Rcpp::CharacterVector icd9MajMinToCode(const Rcpp::CharacterVector major,
		const Rcpp::CharacterVector minor, bool isShort) {
#ifdef ICD9_DEBUG_TRACE
  Rcpp::Rcout << "icd9MajMinToCode: major.size() = " << major.size()
			<< " and minor.size() = " << minor.size() << "\n";
#endif

	if (major.size() != minor.size())
		Rcpp::stop("major and minor lengths differ");

#ifdef ICD9_DEBUG_TRACE
	Rcpp::Rcout << "major and minor are the same?\n";
#endif

	Rcpp::CharacterVector out; // wish I could reserve space for this
	Rcpp::CharacterVector::const_iterator j = major.begin();
	Rcpp::CharacterVector::const_iterator n = minor.begin();

	for (; j != major.end() && n != minor.end(); ++j, ++n) {
		Rcpp::String mjrelem = *j;
		if (mjrelem == NA_STRING) {
			out.push_back(NA_STRING);
			continue;
		}
		// work around Rcpp bug with push_front: convert to string just for this
		// TODO: try to do this with C string instead
		const char* smj_c = mjrelem.get_cstring();
		std::string smj = std::string(smj_c);
		switch (strlen(smj_c)) {
		case 0:
			out.push_back(NA_STRING);
			continue;
		case 1:
			if (!icd9IsASingleVE(smj_c)) {
				smj.insert(0, "00");
			}
			break;
		case 2:
			if (!icd9IsASingleVE(smj_c)) {
				smj.insert(0, "0");
			} else {
				smj.insert(1, "0");
			}
			// default: // major is 3 (or more) chars already
		}
		Rcpp::String mnrelem = *n;
		if (mnrelem == NA_STRING) {
			//out.push_back(mjrelem);
			out.push_back(smj);
			continue;
		}
		// this can probably be done more quickly:
		//std::string smj(mjrelem);
		if (!isShort && mnrelem != "") {
			smj.append(".");
		}
		smj.append(mnrelem);
		out.push_back(smj);

	}
	// ?slow step somewhere around here, with use of Rcpp::String, maybe in the wrapping? Maybe in the multiple push_back calls

	//return wrap(out);
	return out;
}
Beispiel #5
0
//[[Rcpp::export]]
Rcpp::List checkTreeCpp(Rcpp::S4 obj, Rcpp::List opts) {
  
    std::string err, wrn;
    Rcpp::IntegerMatrix ed = obj.slot("edge");
    int nrow = ed.nrow();
    Rcpp::IntegerVector ances = getAnces(ed);
    //Rcpp::IntegerVector desc = getDesc(ed);
    int nroots = nRoots(ances);
    bool rooted = nroots > 0;
    Rcpp::NumericVector edLength = obj.slot("edge.length");
    Rcpp::CharacterVector edLengthNm = edLength.names();
    Rcpp::CharacterVector label = obj.slot("label");
    Rcpp::CharacterVector labelNm = label.names();
    Rcpp::CharacterVector edLabel = obj.slot("edge.label");
    Rcpp::CharacterVector edLabelNm = edLabel.names();
    Rcpp::IntegerVector allnodesSafe = getAllNodesSafe(ed);
    Rcpp::IntegerVector allnodesFast = getAllNodesFast(ed, rooted);
    int nEdLength = edLength.size();
    int nLabel = label.size();
    int nEdLabel = edLabel.size();
    int nEdges = nrow;
    bool hasEdgeLength = !all_naC(edLength);

    // check tips
    int ntipsSafe = nTipsSafe(ances);
    int ntipsFast = nTipsFastCpp(ances);
    bool testnTips = ntipsFast == ntipsSafe;
    if (! testnTips) {
	err.append("Tips incorrectly labeled. ");
    }

    //check internal nodes
    bool testNodes = Rcpp::all(allnodesSafe == allnodesFast).is_true() && // is both ways comparison needed?
    	Rcpp::all(allnodesFast == allnodesSafe).is_true();
    if (! testNodes) {
    	err.append("Nodes incorrectly labeled. ");
    }

    // check edge lengths
    if (hasEdgeLength) {	
    	if (nEdLength != nEdges) {
    	    err.append("Number of edge lengths do not match number of edges. ");
    	}
    	// if (nb_naC(edLength) > nroots) { // not enough!  -- best done in R
    	//     err.append("Only the root should have NA as an edge length. ");
    	// }
    	if (getRange(edLength, TRUE)[0] < 0) {
    	    err.append("Edge lengths must be non-negative. ");
    	}
    	Rcpp::CharacterVector edgeLblSupp = edgeIdCpp(ed, "all");
	Rcpp::CharacterVector edgeLblDiff = Rcpp::setdiff(edLengthNm, edgeLblSupp);
    	if ( edgeLblDiff.size() != 0 ) {
    	    err.append("Edge lengths incorrectly labeled. ");
    	}	    
    }
    
    // check label names
    Rcpp::CharacterVector chrLabelNm = Rcpp::as<Rcpp::CharacterVector>(allnodesFast);
    int j = 0;
    while (j < nroots) { //remove root(s)
    	chrLabelNm.erase(0); 
    	j++;
    }
    bool testLabelNm = isLabelName(labelNm, chrLabelNm);
    if (!testLabelNm) {
    	err.append("Tip and node labels must be a named vector, the names must match the node IDs. ");
    	err.append("Use tipLabels<- and/or nodeLabels<- to update them. ");
    }
    
    // check that tips have labels
    Rcpp::CharacterVector tiplabel(ntipsFast);
    std::copy (label.begin(), label.begin()+ntipsFast, tiplabel.begin());
    bool emptyTipLabel = is_true(any(Rcpp::is_na(tiplabel)));
    if ( emptyTipLabel ) {
    	err.append("All tips must have a label.");
    }

    // check edgeLabels
    Rcpp::CharacterVector chrEdgeLblNm = edgeIdCpp(ed, "all");
    bool testEdgeLblNm = isLabelName(edLabelNm, chrEdgeLblNm);
    if (!testEdgeLblNm) {
    	err.append("Edge labels are not labelled correctly. Use the function edgeLabels<- to update them. ");
    }

    // make sure that tips and node labels are unique
    if (hasDuplicatedLabelsCpp(label)) {
	std::string labOpt = opts["allow.duplicated.labels"];
	if (labOpt == "fail") {
	    err.append("Labels are not unique. ");
	}
	if (labOpt == "warn") {
	    wrn.append("Labels are not unique. ");
	}
    }

    // check for polytomies
    if (hasPolytomy(ances)) {
	std::string msgPoly = "Tree includes polytomies. ";
	std::string polyOpt = opts["poly"];
	if (polyOpt == "fail") {
	    err.append(msgPoly);
	}
	if (polyOpt == "warn") {
	    wrn.append(msgPoly);
	}
    }

    // check number of roots
    if (nroots > 1) {
	std::string msgRoot = "Tree has more than one root. ";
	std::string rootOpt = opts["multiroot"];
	if (rootOpt == "fail") {
	    err.append(msgRoot);
	}
	if (rootOpt == "warn") {
	    wrn.append(msgRoot);
	}
    }

    // check for singletons
    if (hasSingleton(ances)) {
	std::string msgSing = "Tree contains singleton nodes. ";
	std::string singOpt = opts["singleton"];
	if (singOpt == "fail") {
	    err.append(msgSing);
	}
	if (singOpt == "warn") {
	    wrn.append(msgSing);
	}
    }

    return Rcpp::List::create(err, wrn);
}