bool summarize(OTCLI &otCLI) override { if (reportStats) { OttIdSet oids; OttIdSet ntoids; std::size_t numNonTerminals = 0; for (auto tn : directlyIncludedNodes) { if (!tn->isTip()) { numNonTerminals++; ntoids.insert(tn->getOttId()); } oids.insert(tn->getOttId()); } otCLI.out << numNonTerminals << " non-terminal taxa in OTT that are mapped by at least 1 input.\n"; otCLI.out << (directlyIncludedNodes.size() - numNonTerminals) << " terminal taxa in OTT that are mapped by at least 1 input\n"; otCLI.out << directlyIncludedNodes.size() << " total taxa in OTT that are mapped by at least 1 input.\n"; if (otCLI.verbose) { otCLI.out << "total included OTT Ids\n"; for (const auto & oid : oids) { otCLI.out << oid << '\n'; } otCLI.err << "non-terminal OTT Ids\n"; for (const auto & oid : ntoids) { otCLI.err << oid << '\n'; } } return true; } assert(taxonomy != nullptr && !includedNodes.empty()); std::set<RootedTreeNodeNoData *> toPrune; std::size_t numLeavesPruned = 0; std::size_t numInternalsPruned = 0; for (auto nd : iter_node(*taxonomy)) { const RootedTreeNodeNoData * c = const_cast<const RootedTreeNodeNoData *>(nd); if (!contains(includedNodes, c)) { if (contains(includedNodes, c->getParent())) { toPrune.insert(nd); } if (c->isTip()) { numLeavesPruned += 1; } else { numInternalsPruned += 1; } } } for (auto nd : toPrune) { pruneAndDelete(*taxonomy, nd); } writeTreeAsNewick(otCLI.out, *taxonomy); otCLI.out << '\n'; otCLI.err << numLeavesPruned << " terminal taxa pruned\n"; otCLI.err << numInternalsPruned << " non-terminal taxa pruned\n"; return true; }
bool RootedForest<T, U>::addIngroupOverlappingPhyloStatementToGraph(const std::list<OverlapFTreePair<T, U> > & byIncCardinality, const PhyloStatement &ps) { std::list<node_type * > nonTrivMRCAs; OttIdSet attachedElsewhere; std::vector<bool> shouldResolveVec; std::vector<bool> shouldCreateDeeperVec; if (!checkCanAddIngroupOverlappingPhyloStatementToGraph(byIncCardinality, ps, nonTrivMRCAs, attachedElsewhere, shouldResolveVec, shouldCreateDeeperVec)) { return false; } novelAcceptedPSInOrder.push_back(ps); //TMP DEBUGGING // all non trivial overlapping trees have approved this split... auto ntmIt = begin(nonTrivMRCAs); auto srIt = begin(shouldResolveVec); auto scdIt = begin(shouldCreateDeeperVec); unsigned i = 0; InterTreeBand<T> * itbp = nullptr; for (const auto & incPair : byIncCardinality) { LOG(DEBUG) << " addIngroupOverlappingPhyloStatementToGraph mod for loop round " << ++i; debugInvariantsCheck(); tree_type * f = incPair.second; assert(ntmIt != nonTrivMRCAs.end()); node_type * includeGroupA = *ntmIt++; const bool addNode = *srIt++; const bool shouldCreateDeeperRoot = *scdIt; if (addNode) { includeGroupA = f->resolveToCreateCladeOfIncluded(includeGroupA, ps); assert(getTreeForNode(includeGroupA) == f); LOG(DEBUG) << " back from resolveToCreateCladeOfIncluded for loop round " << i; debugInvariantsCheck(); } else if (shouldCreateDeeperRoot) { f->createDeeperRoot(); includeGroupA = f->getRoot(); assert(getTreeForNode(includeGroupA) == f); LOG(DEBUG) << " back from createDeeperRoot for loop round " << i; debugInvariantsCheck(); } else { assert(getTreeForNode(includeGroupA) == f); } if (byIncCardinality.size() > 1 && itbp == nullptr) { itbp = _createNewBand(*f, *includeGroupA, ps); } auto connectedHere = f->addPhyloStatementAtNode(ps, includeGroupA, attachedElsewhere, itbp); if (!connectedHere.empty()) { attachedElsewhere.insert(begin(connectedHere), end(connectedHere)); } dbWriteOttSet(" includeGroupA...desIds ", includeGroupA->getData().desIds); LOG(DEBUG) << " back from addPhyloStatementAtNode for loop round " << i; debugInvariantsCheck(); } LOG(DEBUG) << " addIngroupOverlappingPhyloStatementToGraph exit true " << i; return true; }
bool culledAndCompleteIncompatWRTLeafSet(const OttIdSet & culled, const OttIdSet & complete, const OttIdSet & leafSet) { //TMP this could be more efficient. See areCompatibleDesIdSets const OttIdSet inter = set_intersection_as_set(culled, complete); if (inter.empty()) { return false; } if (inter == culled) { return false; } const OttIdSet compCulled = set_intersection_as_set(complete, leafSet); return (inter != compCulled); }
bool processSubproblemTree(OTCLI & otCLI, const TreeMappedWithSplits & tree) { if (pruneInpTreesNotSynth) { std::string path = outDir + std::string("/") + otCLI.currentFilename; std::ofstream outstream(path.c_str()); writeTreeAsNewick(outstream, tree); return true; } for (const auto nd : iter_leaf_const(tree)) { auto ottId = nd->getOttId(); if (nd->hasOttId()) { subproblemTipIds.insert(ottId); } auto synthNode = synthTree->getData().getNodeForOttId(ottId); if (synthNode != nullptr) { if (!contains(includedNodes, synthNode)) { includedNodes.insert(synthNode); insertAncestorsToParaphyleticSet(synthNode, includedNodes); } } else { auto taxoNode = taxonomy->getData().getNodeForOttId(ottId); assert(taxoNode != nullptr); assert(!taxoNode->isTip()); otCLI.err << "Warning ott" << ottId << " was is an internal node that was a tip in the subproblem, but is not found in the tree being pruned.\n"; } } return true; }
std::list<OverlapFTreePair<T, U> > RootedForest<T, U>::getSortedOverlappingTrees(const OttIdSet &inc) { typedef OverlapFTreePair<T, U> MyOverlapFTreePair; std::map<std::size_t, std::list<MyOverlapFTreePair> > byOverlapSize; for (auto & tpIt : trees) { tree_type * ftree = &(tpIt.second); const OttIdSet & inTree = ftree->getIncludedOttIds(); const OttIdSet inter = set_intersection_as_set(inTree, inc); if (!inter.empty()) { const auto k = inter.size(); auto & tsList = byOverlapSize[k]; tsList.push_back(MyOverlapFTreePair(inter, ftree)); } } std::list<MyOverlapFTreePair> r; consumeMapToList(byOverlapSize, r); return r; }
bool RootedForest<T, U>::checkCanAddIngroupOverlappingPhyloStatementToGraph( const std::list<OverlapFTreePair<T, U> > & byIncCardinality, const PhyloStatement &ps, std::list<node_type * > & nonTrivMRCAs, OttIdSet & attachedElsewhere, std::vector<bool> & shouldResolveVec, std::vector<bool> & shouldCreateDeeperVec) const { for (const auto & incPair : byIncCardinality) { const auto & incGroupIntersection = incPair.first; attachedElsewhere.insert(incGroupIntersection.begin(), incGroupIntersection.end()); tree_type * f = incPair.second; node_type * includeGroupA = nullptr; includeGroupA = f->getMRCA(incGroupIntersection); assert(includeGroupA != nullptr); assert(getTreeForNode(includeGroupA) == f); if (includeGroupA->isTip()) { // this can happen if the overlap is one taxon. includeGroupA = includeGroupA->getParent(); assert(includeGroupA != nullptr); assert(getTreeForNode(includeGroupA) == f); } // If any of the ingroup are specifically excluded, then we have move deeper in the tree. // TMP this could be more efficient and avoid the while loop. while (f->anyExcludedAtNode(includeGroupA, ps.includeGroup)) { if (f->anyIncludedAtNode(includeGroupA, ps.excludeGroup)) { return false; } if (f->anyPhantomNodesAtNode(includeGroupA, ps.includeGroup)) { return false; } includeGroupA = includeGroupA->getParent(); if (includeGroupA == nullptr) { break; } assert(getTreeForNode(includeGroupA) == f); } OttIdSet excInc; bool forceDeeperRoot = false; if (includeGroupA == nullptr) { includeGroupA = f->getRoot(); forceDeeperRoot = true; assert(getTreeForNode(includeGroupA) == f); } else { excInc = set_intersection_as_set(includeGroupA->getData().desIds, ps.excludeGroup); if (debuggingOutputEnabled) { LOG(DEBUG) << " addPhyloStatementToGraph search for an ancestor of ..."; dbWriteOttSet(" addPhyloStatementToGraph search for an ancestor of: ", incGroupIntersection); dbWriteOttSet(" wanted to avoid = ", ps.excludeGroup); dbWriteOttSet(" found a node with desIds: ", includeGroupA->getData().desIds); dbWriteOttSet(" which includes the excludegroup members: ", excInc); } if (!canBeResolvedToDisplayIncExcGroup(includeGroupA, ps.includeGroup, excInc)) { return false; // the MRCA of the includeGroup had interdigitated members of the excludeGroup } } shouldCreateDeeperVec.push_back(forceDeeperRoot); shouldResolveVec.push_back(!excInc.empty()); nonTrivMRCAs.push_back(includeGroupA); } return true; }