//' @rdname convert //' @keywords internal manip // [[Rcpp::export]] Rcpp::List icd9DecimalToPartsCpp(const Rcpp::CharacterVector icd9Decimal, const Rcpp::String minorEmpty) { Rcpp::CharacterVector majors; Rcpp::CharacterVector minors; int ilen = icd9Decimal.length(); if (ilen == 0) { return Rcpp::List::create(Rcpp::_["major"] = Rcpp::CharacterVector::create(), Rcpp::_["minor"] = Rcpp::CharacterVector::create()); } for (Rcpp::CharacterVector::const_iterator it = icd9Decimal.begin(); it != icd9Decimal.end(); ++it) { Rcpp::String strna = *it; if (strna == NA_STRING || strna == "") { majors.push_back(NA_STRING); minors.push_back(NA_STRING); continue; } // TODO: Rcpp::Rcpp::String doesn't implement many functions, so using STL. A FAST way // would be to use Rcpp::String's function get_cstring, and recode the trim // functions to take const char *. This would avoid the type change AND be // faster trimming. std::string thiscode = Rcpp::as<std::string>(*it); thiscode = strimCpp(thiscode); // This updates 'thisccode' by reference, no copy std::size_t pos = thiscode.find("."); // substring parts std::string majorin; Rcpp::String minorout; if (pos != std::string::npos) { majorin = thiscode.substr(0, pos); minorout = thiscode.substr(pos + 1); } else { majorin = thiscode; minorout = minorEmpty; } majors.push_back(icd9AddLeadingZeroesMajorSingle(majorin)); minors.push_back(minorout); } return Rcpp::List::create(Rcpp::_["major"] = majors, Rcpp::_["minor"] = minors); }
bool isUndirectedGraphNEL(SEXP graph_sexp) { Rcpp::S4 graph_s4; try { graph_s4 = Rcpp::as<Rcpp::S4>(graph_sexp); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Input graph must be an S4 object"); } if(Rcpp::as<std::string>(graph_s4.attr("class")) != "graphNEL") { throw std::runtime_error("Input graph must have class graphNEL"); } Rcpp::RObject nodes_obj; try { nodes_obj = Rcpp::as<Rcpp::RObject>(graph_s4.slot("nodes")); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Error extracting slot nodes"); } Rcpp::RObject edges_obj; try { edges_obj = Rcpp::as<Rcpp::RObject>(graph_s4.slot("edgeL")); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Error extracting slot edgeL"); } Rcpp::CharacterVector nodeNames; try { nodeNames = Rcpp::as<Rcpp::CharacterVector>(nodes_obj); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Slot nodes of input graph must be a character vector"); } { std::vector<std::string> uniqueNodeNames = Rcpp::as<std::vector<std::string> >(nodeNames); std::sort(uniqueNodeNames.begin(), uniqueNodeNames.end()); uniqueNodeNames.erase(std::unique(uniqueNodeNames.begin(), uniqueNodeNames.end()), uniqueNodeNames.end()); if((std::size_t)uniqueNodeNames.size() != (std::size_t)nodeNames.size()) { throw std::runtime_error("Node names of input graph were not unique"); } } int nVertices = nodeNames.size(); Rcpp::List edges_list; try { edges_list = Rcpp::as<Rcpp::List>(edges_obj); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Slot edgeL of input graph must be a list"); } Rcpp::CharacterVector edges_list_names = Rcpp::as<Rcpp::CharacterVector>(edges_list.attr("names")); Context::inputGraph graphRef = Context::inputGraph(nVertices); for(int i = 0; i < edges_list.size(); i++) { int nodeIndex = std::distance(nodeNames.begin(), std::find(nodeNames.begin(), nodeNames.end(), edges_list_names(i))); Rcpp::List subList; Rcpp::CharacterVector subListNames; try { subList = Rcpp::as<Rcpp::List>(edges_list(i)); subListNames = Rcpp::as<Rcpp::CharacterVector>(subList.attr("names")); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Slot edgeL of input graph had an invalid format"); } if(std::find(subListNames.begin(), subListNames.end(), "edges") == subListNames.end()) { throw std::runtime_error("Slot edgeL of input graph had an invalid format"); } Rcpp::NumericVector targetIndicesThisNode; try { targetIndicesThisNode = Rcpp::as<Rcpp::NumericVector>(subList("edges")); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Slot edgeL of input graph had an invalid format"); } for(int j = 0; j < targetIndicesThisNode.size(); j++) { boost::add_edge((std::size_t)nodeIndex, (std::size_t)((int)targetIndicesThisNode(j)-1), graphRef); } } Context::inputGraph::edge_iterator current, end; boost::tie(current, end) = boost::edges(graphRef); for(; current != end; current++) { int source = boost::source(*current, graphRef), target = boost::target(*current, graphRef); if(!boost::edge(target, source, graphRef).second) { return false; } } return true; }
void convertGraphNEL(SEXP graph_sexp, context::inputGraph& graphRef) { Rcpp::S4 graph_s4; try { graph_s4 = Rcpp::as<Rcpp::S4>(graph_sexp); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Input graph must be an S4 object"); } if(Rcpp::as<std::string>(graph_s4.attr("class")) != "graphNEL") { throw std::runtime_error("Input graph must have class graphNEL"); } Rcpp::RObject nodes_obj; try { nodes_obj = Rcpp::as<Rcpp::RObject>(graph_s4.slot("nodes")); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Error extracting slot nodes"); } Rcpp::RObject edges_obj; try { edges_obj = Rcpp::as<Rcpp::RObject>(graph_s4.slot("edgeL")); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Error extracting slot edgeL"); } Rcpp::CharacterVector nodeNames; try { nodeNames = Rcpp::as<Rcpp::CharacterVector>(nodes_obj); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Slot nodes of input graph must be a character vector"); } { std::vector<std::string> uniqueNodeNames = Rcpp::as<std::vector<std::string> >(nodeNames); std::sort(uniqueNodeNames.begin(), uniqueNodeNames.end()); uniqueNodeNames.erase(std::unique(uniqueNodeNames.begin(), uniqueNodeNames.end()), uniqueNodeNames.end()); if((std::size_t)uniqueNodeNames.size() != (std::size_t)nodeNames.size()) { throw std::runtime_error("Node names of input graph were not unique"); } } int nVertices = nodeNames.size(); Rcpp::List edges_list; try { edges_list = Rcpp::as<Rcpp::List>(edges_obj); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Slot edgeL of input graph must be a list"); } Rcpp::CharacterVector edges_list_names = Rcpp::as<Rcpp::CharacterVector>(edges_list.attr("names")); graphRef = context::inputGraph(nVertices); int edgeIndexCounter = 0; for(int i = 0; i < edges_list.size(); i++) { int nodeIndex = std::distance(nodeNames.begin(), std::find(nodeNames.begin(), nodeNames.end(), edges_list_names(i))); Rcpp::List subList; Rcpp::CharacterVector subListNames; try { subList = Rcpp::as<Rcpp::List>(edges_list(i)); subListNames = Rcpp::as<Rcpp::CharacterVector>(subList.attr("names")); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Slot edgeL of input graph had an invalid format"); } if(std::find(subListNames.begin(), subListNames.end(), "edges") == subListNames.end()) { throw std::runtime_error("Slot edgeL of input graph had an invalid format"); } Rcpp::NumericVector targetIndicesThisNode; try { targetIndicesThisNode = Rcpp::as<Rcpp::NumericVector>(subList("edges")); } catch(Rcpp::not_compatible&) { throw std::runtime_error("Slot edgeL of input graph had an invalid format"); } for(int j = 0; j < targetIndicesThisNode.size(); j++) { boost::add_edge((std::size_t)nodeIndex, (std::size_t)((int)targetIndicesThisNode(j)-1), edgeIndexCounter, graphRef); edgeIndexCounter++; } } }
// [[Rcpp::export]] Rcpp::CharacterVector icd9MajMinToCode(const Rcpp::CharacterVector major, const Rcpp::CharacterVector minor, bool isShort) { #ifdef ICD9_DEBUG_TRACE Rcpp::Rcout << "icd9MajMinToCode: major.size() = " << major.size() << " and minor.size() = " << minor.size() << "\n"; #endif if (major.size() != minor.size()) Rcpp::stop("major and minor lengths differ"); #ifdef ICD9_DEBUG_TRACE Rcpp::Rcout << "major and minor are the same?\n"; #endif Rcpp::CharacterVector out; // wish I could reserve space for this Rcpp::CharacterVector::const_iterator j = major.begin(); Rcpp::CharacterVector::const_iterator n = minor.begin(); for (; j != major.end() && n != minor.end(); ++j, ++n) { Rcpp::String mjrelem = *j; if (mjrelem == NA_STRING) { out.push_back(NA_STRING); continue; } // work around Rcpp bug with push_front: convert to string just for this // TODO: try to do this with C string instead const char* smj_c = mjrelem.get_cstring(); std::string smj = std::string(smj_c); switch (strlen(smj_c)) { case 0: out.push_back(NA_STRING); continue; case 1: if (!icd9IsASingleVE(smj_c)) { smj.insert(0, "00"); } break; case 2: if (!icd9IsASingleVE(smj_c)) { smj.insert(0, "0"); } else { smj.insert(1, "0"); } // default: // major is 3 (or more) chars already } Rcpp::String mnrelem = *n; if (mnrelem == NA_STRING) { //out.push_back(mjrelem); out.push_back(smj); continue; } // this can probably be done more quickly: //std::string smj(mjrelem); if (!isShort && mnrelem != "") { smj.append("."); } smj.append(mnrelem); out.push_back(smj); } // ?slow step somewhere around here, with use of Rcpp::String, maybe in the wrapping? Maybe in the multiple push_back calls //return wrap(out); return out; }
//[[Rcpp::export]] Rcpp::List checkTreeCpp(Rcpp::S4 obj, Rcpp::List opts) { std::string err, wrn; Rcpp::IntegerMatrix ed = obj.slot("edge"); int nrow = ed.nrow(); Rcpp::IntegerVector ances = getAnces(ed); //Rcpp::IntegerVector desc = getDesc(ed); int nroots = nRoots(ances); bool rooted = nroots > 0; Rcpp::NumericVector edLength = obj.slot("edge.length"); Rcpp::CharacterVector edLengthNm = edLength.names(); Rcpp::CharacterVector label = obj.slot("label"); Rcpp::CharacterVector labelNm = label.names(); Rcpp::CharacterVector edLabel = obj.slot("edge.label"); Rcpp::CharacterVector edLabelNm = edLabel.names(); Rcpp::IntegerVector allnodesSafe = getAllNodesSafe(ed); Rcpp::IntegerVector allnodesFast = getAllNodesFast(ed, rooted); int nEdLength = edLength.size(); int nLabel = label.size(); int nEdLabel = edLabel.size(); int nEdges = nrow; bool hasEdgeLength = !all_naC(edLength); // check tips int ntipsSafe = nTipsSafe(ances); int ntipsFast = nTipsFastCpp(ances); bool testnTips = ntipsFast == ntipsSafe; if (! testnTips) { err.append("Tips incorrectly labeled. "); } //check internal nodes bool testNodes = Rcpp::all(allnodesSafe == allnodesFast).is_true() && // is both ways comparison needed? Rcpp::all(allnodesFast == allnodesSafe).is_true(); if (! testNodes) { err.append("Nodes incorrectly labeled. "); } // check edge lengths if (hasEdgeLength) { if (nEdLength != nEdges) { err.append("Number of edge lengths do not match number of edges. "); } // if (nb_naC(edLength) > nroots) { // not enough! -- best done in R // err.append("Only the root should have NA as an edge length. "); // } if (getRange(edLength, TRUE)[0] < 0) { err.append("Edge lengths must be non-negative. "); } Rcpp::CharacterVector edgeLblSupp = edgeIdCpp(ed, "all"); Rcpp::CharacterVector edgeLblDiff = Rcpp::setdiff(edLengthNm, edgeLblSupp); if ( edgeLblDiff.size() != 0 ) { err.append("Edge lengths incorrectly labeled. "); } } // check label names Rcpp::CharacterVector chrLabelNm = Rcpp::as<Rcpp::CharacterVector>(allnodesFast); int j = 0; while (j < nroots) { //remove root(s) chrLabelNm.erase(0); j++; } bool testLabelNm = isLabelName(labelNm, chrLabelNm); if (!testLabelNm) { err.append("Tip and node labels must be a named vector, the names must match the node IDs. "); err.append("Use tipLabels<- and/or nodeLabels<- to update them. "); } // check that tips have labels Rcpp::CharacterVector tiplabel(ntipsFast); std::copy (label.begin(), label.begin()+ntipsFast, tiplabel.begin()); bool emptyTipLabel = is_true(any(Rcpp::is_na(tiplabel))); if ( emptyTipLabel ) { err.append("All tips must have a label."); } // check edgeLabels Rcpp::CharacterVector chrEdgeLblNm = edgeIdCpp(ed, "all"); bool testEdgeLblNm = isLabelName(edLabelNm, chrEdgeLblNm); if (!testEdgeLblNm) { err.append("Edge labels are not labelled correctly. Use the function edgeLabels<- to update them. "); } // make sure that tips and node labels are unique if (hasDuplicatedLabelsCpp(label)) { std::string labOpt = opts["allow.duplicated.labels"]; if (labOpt == "fail") { err.append("Labels are not unique. "); } if (labOpt == "warn") { wrn.append("Labels are not unique. "); } } // check for polytomies if (hasPolytomy(ances)) { std::string msgPoly = "Tree includes polytomies. "; std::string polyOpt = opts["poly"]; if (polyOpt == "fail") { err.append(msgPoly); } if (polyOpt == "warn") { wrn.append(msgPoly); } } // check number of roots if (nroots > 1) { std::string msgRoot = "Tree has more than one root. "; std::string rootOpt = opts["multiroot"]; if (rootOpt == "fail") { err.append(msgRoot); } if (rootOpt == "warn") { wrn.append(msgRoot); } } // check for singletons if (hasSingleton(ances)) { std::string msgSing = "Tree contains singleton nodes. "; std::string singOpt = opts["singleton"]; if (singOpt == "fail") { err.append(msgSing); } if (singOpt == "warn") { wrn.append(msgSing); } } return Rcpp::List::create(err, wrn); }