Rcpp::CharacterVector parse_gb_qualifiers( 
    const std::vector<std::string>& qualifiers )
{
    int begin_, end_;
    std::vector<std::string> names, values;
    names.reserve( qualifiers.size() );
    values.reserve( qualifiers.size() );
    std::vector<std::string>::const_iterator s_it = std::begin(qualifiers);
    for ( ; s_it != std::end( qualifiers ); s_it++ ) {
        begin_ = s_it->find_first_not_of("=");
        end_ = s_it->find_first_of( "=", begin_ );
        names.push_back( s_it->substr( begin_, end_ ) );
        values.push_back( trim( s_it->substr(end_ + 1, s_it->length() - end_), "\"") );
    }
    Rcpp::CharacterVector Values = Rcpp::CharacterVector( std::begin(values), std::end(values) );
    Values.names() = names;
    return Values; 
}
Beispiel #2
0
//[[Rcpp::export]]
Rcpp::List checkTreeCpp(Rcpp::S4 obj, Rcpp::List opts) {
  
    std::string err, wrn;
    Rcpp::IntegerMatrix ed = obj.slot("edge");
    int nrow = ed.nrow();
    Rcpp::IntegerVector ances = getAnces(ed);
    //Rcpp::IntegerVector desc = getDesc(ed);
    int nroots = nRoots(ances);
    bool rooted = nroots > 0;
    Rcpp::NumericVector edLength = obj.slot("edge.length");
    Rcpp::CharacterVector edLengthNm = edLength.names();
    Rcpp::CharacterVector label = obj.slot("label");
    Rcpp::CharacterVector labelNm = label.names();
    Rcpp::CharacterVector edLabel = obj.slot("edge.label");
    Rcpp::CharacterVector edLabelNm = edLabel.names();
    Rcpp::IntegerVector allnodesSafe = getAllNodesSafe(ed);
    Rcpp::IntegerVector allnodesFast = getAllNodesFast(ed, rooted);
    int nEdLength = edLength.size();
    int nLabel = label.size();
    int nEdLabel = edLabel.size();
    int nEdges = nrow;
    bool hasEdgeLength = !all_naC(edLength);

    // check tips
    int ntipsSafe = nTipsSafe(ances);
    int ntipsFast = nTipsFastCpp(ances);
    bool testnTips = ntipsFast == ntipsSafe;
    if (! testnTips) {
	err.append("Tips incorrectly labeled. ");
    }

    //check internal nodes
    bool testNodes = Rcpp::all(allnodesSafe == allnodesFast).is_true() && // is both ways comparison needed?
    	Rcpp::all(allnodesFast == allnodesSafe).is_true();
    if (! testNodes) {
    	err.append("Nodes incorrectly labeled. ");
    }

    // check edge lengths
    if (hasEdgeLength) {	
    	if (nEdLength != nEdges) {
    	    err.append("Number of edge lengths do not match number of edges. ");
    	}
    	// if (nb_naC(edLength) > nroots) { // not enough!  -- best done in R
    	//     err.append("Only the root should have NA as an edge length. ");
    	// }
    	if (getRange(edLength, TRUE)[0] < 0) {
    	    err.append("Edge lengths must be non-negative. ");
    	}
    	Rcpp::CharacterVector edgeLblSupp = edgeIdCpp(ed, "all");
	Rcpp::CharacterVector edgeLblDiff = Rcpp::setdiff(edLengthNm, edgeLblSupp);
    	if ( edgeLblDiff.size() != 0 ) {
    	    err.append("Edge lengths incorrectly labeled. ");
    	}	    
    }
    
    // check label names
    Rcpp::CharacterVector chrLabelNm = Rcpp::as<Rcpp::CharacterVector>(allnodesFast);
    int j = 0;
    while (j < nroots) { //remove root(s)
    	chrLabelNm.erase(0); 
    	j++;
    }
    bool testLabelNm = isLabelName(labelNm, chrLabelNm);
    if (!testLabelNm) {
    	err.append("Tip and node labels must be a named vector, the names must match the node IDs. ");
    	err.append("Use tipLabels<- and/or nodeLabels<- to update them. ");
    }
    
    // check that tips have labels
    Rcpp::CharacterVector tiplabel(ntipsFast);
    std::copy (label.begin(), label.begin()+ntipsFast, tiplabel.begin());
    bool emptyTipLabel = is_true(any(Rcpp::is_na(tiplabel)));
    if ( emptyTipLabel ) {
    	err.append("All tips must have a label.");
    }

    // check edgeLabels
    Rcpp::CharacterVector chrEdgeLblNm = edgeIdCpp(ed, "all");
    bool testEdgeLblNm = isLabelName(edLabelNm, chrEdgeLblNm);
    if (!testEdgeLblNm) {
    	err.append("Edge labels are not labelled correctly. Use the function edgeLabels<- to update them. ");
    }

    // make sure that tips and node labels are unique
    if (hasDuplicatedLabelsCpp(label)) {
	std::string labOpt = opts["allow.duplicated.labels"];
	if (labOpt == "fail") {
	    err.append("Labels are not unique. ");
	}
	if (labOpt == "warn") {
	    wrn.append("Labels are not unique. ");
	}
    }

    // check for polytomies
    if (hasPolytomy(ances)) {
	std::string msgPoly = "Tree includes polytomies. ";
	std::string polyOpt = opts["poly"];
	if (polyOpt == "fail") {
	    err.append(msgPoly);
	}
	if (polyOpt == "warn") {
	    wrn.append(msgPoly);
	}
    }

    // check number of roots
    if (nroots > 1) {
	std::string msgRoot = "Tree has more than one root. ";
	std::string rootOpt = opts["multiroot"];
	if (rootOpt == "fail") {
	    err.append(msgRoot);
	}
	if (rootOpt == "warn") {
	    wrn.append(msgRoot);
	}
    }

    // check for singletons
    if (hasSingleton(ances)) {
	std::string msgSing = "Tree contains singleton nodes. ";
	std::string singOpt = opts["singleton"];
	if (singOpt == "fail") {
	    err.append(msgSing);
	}
	if (singOpt == "warn") {
	    wrn.append(msgSing);
	}
    }

    return Rcpp::List::create(err, wrn);
}