Пример #1
0
    bool summarize(OTCLI &otCLI) override {
        if (reportStats) {
            OttIdSet oids;
            OttIdSet ntoids;

            std::size_t numNonTerminals = 0;
            for (auto tn : directlyIncludedNodes) {
                if (!tn->isTip()) {
                    numNonTerminals++;
                    ntoids.insert(tn->getOttId());
                }
                oids.insert(tn->getOttId());
            }
            otCLI.out << numNonTerminals << " non-terminal taxa in OTT that are mapped by at least 1 input.\n";
            otCLI.out << (directlyIncludedNodes.size() - numNonTerminals) << " terminal taxa in OTT that are mapped by at least 1 input\n";
            otCLI.out << directlyIncludedNodes.size() << " total taxa in OTT that are mapped by at least 1 input.\n";
            if (otCLI.verbose) {
                otCLI.out << "total included OTT Ids\n";
                for (const auto & oid : oids) {
                    otCLI.out << oid << '\n';
                }
                otCLI.err << "non-terminal OTT Ids\n";
                for (const auto & oid : ntoids) {
                    otCLI.err << oid << '\n';
                }
            }
            return true;
        }
        assert(taxonomy != nullptr && !includedNodes.empty());
        std::set<RootedTreeNodeNoData *> toPrune;
        std::size_t numLeavesPruned = 0;
        std::size_t numInternalsPruned = 0;
        for (auto nd : iter_node(*taxonomy)) {
            const RootedTreeNodeNoData *  c = const_cast<const RootedTreeNodeNoData *>(nd);
            if (!contains(includedNodes, c)) {
                if (contains(includedNodes, c->getParent())) {
                    toPrune.insert(nd);
                }
                if (c->isTip()) {
                    numLeavesPruned += 1;
                } else {
                    numInternalsPruned += 1;
                }
            }
        }
        for (auto nd : toPrune) {
            pruneAndDelete(*taxonomy, nd);
        }
        writeTreeAsNewick(otCLI.out, *taxonomy);
        otCLI.out << '\n';
        otCLI.err << numLeavesPruned << " terminal taxa pruned\n";
        otCLI.err << numInternalsPruned << " non-terminal taxa pruned\n";
        return true;
    }
Пример #2
0
bool RootedForest<T, U>::addIngroupOverlappingPhyloStatementToGraph(const std::list<OverlapFTreePair<T, U> > & byIncCardinality,
                                                                   const PhyloStatement &ps) {
    std::list<node_type * > nonTrivMRCAs;
    OttIdSet attachedElsewhere;
    std::vector<bool> shouldResolveVec;
    std::vector<bool> shouldCreateDeeperVec;
    if (!checkCanAddIngroupOverlappingPhyloStatementToGraph(byIncCardinality, ps, nonTrivMRCAs, attachedElsewhere, shouldResolveVec, shouldCreateDeeperVec)) {
        return false;
    }
    novelAcceptedPSInOrder.push_back(ps); //TMP DEBUGGING
    // all non trivial overlapping trees have approved this split...
    auto ntmIt = begin(nonTrivMRCAs);
    auto srIt = begin(shouldResolveVec);
    auto scdIt = begin(shouldCreateDeeperVec);
    unsigned i = 0;
    InterTreeBand<T> * itbp = nullptr;
    for (const auto & incPair : byIncCardinality) {
        LOG(DEBUG) << "   addIngroupOverlappingPhyloStatementToGraph mod for loop round " << ++i;
        debugInvariantsCheck();
        tree_type * f = incPair.second;
        assert(ntmIt != nonTrivMRCAs.end());
        node_type * includeGroupA = *ntmIt++;
        const bool addNode = *srIt++;
        const bool shouldCreateDeeperRoot = *scdIt;
        if (addNode) {
            includeGroupA = f->resolveToCreateCladeOfIncluded(includeGroupA, ps);
            assert(getTreeForNode(includeGroupA) == f);
            LOG(DEBUG) << "   back from resolveToCreateCladeOfIncluded for loop round " << i;
            debugInvariantsCheck();
        } else if (shouldCreateDeeperRoot) {
            f->createDeeperRoot();
            includeGroupA = f->getRoot();
            assert(getTreeForNode(includeGroupA) == f);
            LOG(DEBUG) << "   back from createDeeperRoot for loop round " << i;
            debugInvariantsCheck();
        } else {
            assert(getTreeForNode(includeGroupA) == f);
        }
        if (byIncCardinality.size() > 1 && itbp == nullptr) {
            itbp = _createNewBand(*f, *includeGroupA, ps);
        }
        auto connectedHere = f->addPhyloStatementAtNode(ps, includeGroupA, attachedElsewhere, itbp);
        if (!connectedHere.empty()) {
            attachedElsewhere.insert(begin(connectedHere), end(connectedHere));
        }
        dbWriteOttSet(" includeGroupA...desIds ", includeGroupA->getData().desIds);
        LOG(DEBUG) << "   back from addPhyloStatementAtNode for loop round " << i;
        debugInvariantsCheck();
    }
    LOG(DEBUG) << "   addIngroupOverlappingPhyloStatementToGraph exit true " << i;
    return true;
}
Пример #3
0
bool culledAndCompleteIncompatWRTLeafSet(const OttIdSet & culled,
                                                const OttIdSet & complete,
                                                const OttIdSet & leafSet) {
    //TMP this could be more efficient. See areCompatibleDesIdSets
    const OttIdSet inter = set_intersection_as_set(culled, complete);
    if (inter.empty()) {
        return false;
    }
    if (inter == culled) {
        return false;
    }
    const OttIdSet compCulled = set_intersection_as_set(complete, leafSet);
    return (inter != compCulled);
}
 bool processSubproblemTree(OTCLI & otCLI, const TreeMappedWithSplits & tree) {
     if (pruneInpTreesNotSynth) {
         std::string path = outDir + std::string("/") + otCLI.currentFilename;
         std::ofstream outstream(path.c_str());
         writeTreeAsNewick(outstream, tree);
         return true;
     }
     for (const auto nd : iter_leaf_const(tree)) {
         auto ottId = nd->getOttId();
         if (nd->hasOttId()) {
             subproblemTipIds.insert(ottId);
         }
         auto synthNode = synthTree->getData().getNodeForOttId(ottId);
         if (synthNode != nullptr) {
             if (!contains(includedNodes, synthNode)) {
                 includedNodes.insert(synthNode);
                 insertAncestorsToParaphyleticSet(synthNode, includedNodes);
             }
         } else {
             auto taxoNode = taxonomy->getData().getNodeForOttId(ottId);
             assert(taxoNode != nullptr);
             assert(!taxoNode->isTip());
             otCLI.err << "Warning ott" << ottId << " was is an internal node that was a tip in the subproblem, but is not found in the tree being pruned.\n";
         }
     }
     return true;
 }
Пример #5
0
std::list<OverlapFTreePair<T, U> > RootedForest<T, U>::getSortedOverlappingTrees(const OttIdSet &inc) {
    typedef OverlapFTreePair<T, U> MyOverlapFTreePair;
    std::map<std::size_t, std::list<MyOverlapFTreePair> > byOverlapSize;
    for (auto & tpIt : trees) {
        tree_type * ftree = &(tpIt.second);
        const OttIdSet & inTree = ftree->getIncludedOttIds();
        const OttIdSet inter = set_intersection_as_set(inTree, inc);
        if (!inter.empty()) {
            const auto k = inter.size();
            auto & tsList = byOverlapSize[k];
            tsList.push_back(MyOverlapFTreePair(inter, ftree));
        }
    }
    std::list<MyOverlapFTreePair> r;
    consumeMapToList(byOverlapSize, r);
    return r;
}
Пример #6
0
bool RootedForest<T, U>::checkCanAddIngroupOverlappingPhyloStatementToGraph(
            const std::list<OverlapFTreePair<T, U> > & byIncCardinality,
            const PhyloStatement &ps,
            std::list<node_type * > & nonTrivMRCAs,
            OttIdSet & attachedElsewhere,
            std::vector<bool> & shouldResolveVec,
            std::vector<bool> & shouldCreateDeeperVec) const {
    for (const auto & incPair : byIncCardinality) {
        const auto & incGroupIntersection = incPair.first;
        attachedElsewhere.insert(incGroupIntersection.begin(), incGroupIntersection.end());
        tree_type * f = incPair.second;
        node_type * includeGroupA = nullptr;
        includeGroupA = f->getMRCA(incGroupIntersection);
        assert(includeGroupA != nullptr);
        assert(getTreeForNode(includeGroupA) == f);
        if (includeGroupA->isTip()) {
            // this can happen if the overlap is one taxon.
            includeGroupA = includeGroupA->getParent();
            assert(includeGroupA != nullptr);
            assert(getTreeForNode(includeGroupA) == f);
        }
        // If any of the ingroup are specifically excluded, then we have move deeper in the tree.
        // TMP this could be more efficient and avoid the while loop.
        while (f->anyExcludedAtNode(includeGroupA, ps.includeGroup)) {
            if (f->anyIncludedAtNode(includeGroupA, ps.excludeGroup)) {
                return false;
            }
            if (f->anyPhantomNodesAtNode(includeGroupA, ps.includeGroup)) {
                return false;
            }
            includeGroupA = includeGroupA->getParent();
            if (includeGroupA == nullptr) {
                break;
            }
            assert(getTreeForNode(includeGroupA) == f);
        }
        OttIdSet excInc;
        bool forceDeeperRoot = false;
        if (includeGroupA == nullptr) {
            includeGroupA = f->getRoot();
            forceDeeperRoot = true;
            assert(getTreeForNode(includeGroupA) == f);
        } else {
            excInc = set_intersection_as_set(includeGroupA->getData().desIds, ps.excludeGroup);
            if (debuggingOutputEnabled) {
                LOG(DEBUG) << "     addPhyloStatementToGraph search for an ancestor of ..."; 
                dbWriteOttSet(" addPhyloStatementToGraph search for an ancestor of:  ", incGroupIntersection);
                dbWriteOttSet(" wanted to avoid =  ", ps.excludeGroup);
                dbWriteOttSet(" found a node with desIds:  ", includeGroupA->getData().desIds);
                dbWriteOttSet(" which includes the excludegroup members:  ", excInc);
            }
            if (!canBeResolvedToDisplayIncExcGroup(includeGroupA, ps.includeGroup, excInc)) {
                return false; // the MRCA of the includeGroup had interdigitated members of the excludeGroup
            }
        }
        shouldCreateDeeperVec.push_back(forceDeeperRoot);
        shouldResolveVec.push_back(!excInc.empty());
        nonTrivMRCAs.push_back(includeGroupA);
    }
    return true;
}