Example #1
0
// note: takes smart pointer as it maybe added to the results
static hal_size_t mapUp(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, bool doDupes, hal_size_t minLength) {
    const Genome *parent = mappedSeg->getGenome()->getParent();
    assert(parent != NULL);
    hal_size_t added = 0;
    if (mappedSeg->isTop() == true) {
        BottomSegmentIteratorPtr botSegIt = parent->getBottomSegmentIterator();
        TopSegmentIteratorPtr topSegIt = mappedSeg->targetAsTop();
        if (topSegIt->tseg()->hasParent() == true && topSegIt->getLength() >= minLength &&
            (doDupes == true || topSegIt->tseg()->isCanonicalParalog() == true)) {
            botSegIt->toParent(topSegIt);
            mappedSeg->setTarget(std::dynamic_pointer_cast<SegmentIterator>(botSegIt));
            results.push_back(mappedSeg);
            ++added;
        }
    } else {
        hal_index_t rightCutoff = mappedSeg->getEndPosition();
        BottomSegmentIteratorPtr botSegIt = mappedSeg->targetAsBottom();
        hal_index_t startOffset = (hal_index_t)botSegIt->getStartOffset();
        hal_index_t endOffset = (hal_index_t)botSegIt->getEndOffset();
        TopSegmentIteratorPtr topSegIt = mappedSeg->getGenome()->getTopSegmentIterator();
        topSegIt->toParseUp(botSegIt);
        do {
            TopSegmentIteratorPtr newTopSegIt = topSegIt->clone();

            // we map the new target back to see how the offsets have
            // changed.  these changes are then applied to the source segment
            // as deltas
            BottomSegmentIteratorPtr backBotSegIt = botSegIt->clone();
            backBotSegIt->toParseDown(newTopSegIt);
            hal_index_t startBack = (hal_index_t)backBotSegIt->getStartOffset();
            hal_index_t endBack = (hal_index_t)backBotSegIt->getEndOffset();
            assert(startBack >= startOffset);
            assert(endBack >= endOffset);
            SegmentIteratorPtr newSourceSegIt = mappedSeg->sourceClone();
            hal_index_t startDelta = startBack - startOffset;
            hal_index_t endDelta = endBack - endOffset;
            assert((hal_index_t)newSourceSegIt->getLength() > startDelta + endDelta);
            newSourceSegIt->slice(newSourceSegIt->getStartOffset() + startDelta, newSourceSegIt->getEndOffset() + endDelta);

            MappedSegmentPtr newMappedSeg(new MappedSegment(newSourceSegIt, newTopSegIt));

            assert(newMappedSeg->isTop() == true);
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());

            added += mapUp(newMappedSeg, results, doDupes, minLength);
            // stupid that we have to make this check but odn't want to
            // make fundamental api change now
            if (topSegIt->getEndPosition() != rightCutoff) {
                topSegIt->toRight(rightCutoff);
            } else {
                break;
            }
        } while (true);
    }
    return added;
}
Example #2
0
// Set top segments to be equal width and so that segment 1, 2, 3,
// etc. corresponds to parent segment 1, 2, 3, etc.
void setTopSegments(Genome *genome, hal_size_t width) {
    TopSegmentIteratorPtr topIt = genome->getTopSegmentIterator();
    hal_size_t n = genome->getNumTopSegments();
    hal_index_t startPos = 0;
    for (; topIt->getArrayIndex() < n; topIt->toRight(), startPos += width) {
        topIt->setCoordinates(startPos, width);
        topIt->tseg()->setParentIndex(topIt->getArrayIndex());
        topIt->tseg()->setParentReversed(false);
        topIt->tseg()->setBottomParseIndex(NULL_INDEX);
        topIt->tseg()->setNextParalogyIndex(NULL_INDEX);
    }
}
Example #3
0
int MappedSegment::boundComp(const SegmentIteratorPtr &s1, const SegmentIteratorPtr &s2) {
    int res = 0;
    bool flip = s2->getReversed();
    if (flip) {
        s2->toReverse();
    }

    if (s1->isTop() && !s2->isTop()) {
        BottomSegmentIteratorPtr bot = std::dynamic_pointer_cast<BottomSegmentIterator>(s2);
        hal_index_t lb = bot->bseg()->getTopParseIndex();
        hal_index_t ub = lb;
        if ((hal_size_t)bot->getArrayIndex() < bot->getGenome()->getNumBottomSegments() - 1) {
            bot = bot->clone();
            bot->slice(0, 0);
            bot->toRight();
            ub = bot->bseg()->getTopParseIndex();
        }
        if (s1->getArrayIndex() < lb) {
            res = -1;
        } else if (s1->getArrayIndex() > ub) {
            res = 1;
        }
    } else if (!s1->isTop() && s2->isTop()) {
        TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(s2);
        hal_index_t lb = top->tseg()->getBottomParseIndex();
        hal_index_t ub = lb;
        if ((hal_size_t)top->getArrayIndex() < top->getGenome()->getNumTopSegments() - 1) {
            top = top->clone();
            top->slice(0, 0);
            top->toRight();
            ub = top->tseg()->getBottomParseIndex();
        }
        if (s1->getArrayIndex() < lb) {
            res = -1;
        } else if (s1->getArrayIndex() > ub) {
            res = 1;
        }
    }

    if (flip) {
        s2->toReverse();
    }

    return res;
}
void GappedBottomSegmentIterator::toRightNextUngapped(TopSegmentIteratorPtr topSeqIt) const {
    while (topSeqIt->tseg()->hasParent() == false && topSeqIt->getLength() <= _gapThreshold) {
        if ((!topSeqIt->getReversed() && topSeqIt->getTopSegment()->isLast()) ||
            (topSeqIt->getReversed() && topSeqIt->getTopSegment()->isFirst())) {
            break;
        }
        topSeqIt->toRight();
    }
}
Example #5
0
void GenomeCopyTest::checkCallBack(const Alignment *alignment) {
    // FIXME: halAlignment->open() fails miserably but
    // openHalAlignmentReadOnly works? Probably some state isn't cleared
    // on close.
    AlignmentPtr tmp(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS));
    _secondAlignment = tmp;
    const Genome *ancGenome = alignment->openGenome("AncGenome");
    CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome");
    CuAssertTrue(_testCase, ancGenome->getSequenceLength() == 1000000);
    CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == 0);
    CuAssertTrue(_testCase, ancGenome->getNumBottomSegments() == 700000);
    const MetaData *ancMeta = ancGenome->getMetaData();
    CuAssertTrue(_testCase, ancMeta->get("Young") == "Jeezy");
    const Genome *leafGenome = alignment->openGenome("LeafGenome1");
    string ancSeq = "CAT";
    hal_index_t n = ancGenome->getSequenceLength();
    DnaIteratorPtr dnaIt = ancGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        CuAssertTrue(_testCase, dnaIt->getBase() == ancSeq[i]);
    }
    TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator();
    n = leafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        CuAssertTrue(_testCase, topIt->getStartPosition() == topIt->getArrayIndex());
        CuAssertTrue(_testCase, topIt->getLength() == 1);
        CuAssertTrue(_testCase, topIt->tseg()->getParentIndex() == 3);
        CuAssertTrue(_testCase, topIt->tseg()->getParentReversed() == true);
        CuAssertTrue(_testCase, topIt->tseg()->getBottomParseIndex() == 5);
        if (topIt->getArrayIndex() != 6) {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 6);
        } else {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 7);
        }
    }
    BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator();
    n = ancGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        CuAssertTrue(_testCase, botIt->getStartPosition() == botIt->getArrayIndex());
        CuAssertTrue(_testCase, botIt->getLength() == 1);
        CuAssertTrue(_testCase, botIt->bseg()->getChildIndex(0) == 3);
        CuAssertTrue(_testCase, botIt->bseg()->getChildReversed(0) == true);
        CuAssertTrue(_testCase, botIt->bseg()->getTopParseIndex() == 5);
    }

    const Genome *copyRootGenome = _secondAlignment->openGenome("copyRootGenome");
    const Genome *copyLeafGenome = _secondAlignment->openGenome("LeafGenome1");
    CuAssertTrue(_testCase, copyRootGenome->getName() == "copyRootGenome");
    CuAssertTrue(_testCase, copyRootGenome->getSequenceLength() == 1000000);
    CuAssertTrue(_testCase, copyRootGenome->getNumTopSegments() == 0);
    CuAssertTrue(_testCase, copyRootGenome->getNumBottomSegments() == 700000);
    CuAssertTrue(_testCase, copyLeafGenome->getName() == "LeafGenome1");
    CuAssertTrue(_testCase, copyLeafGenome->getSequenceLength() == 1000000);
    CuAssertTrue(_testCase, copyLeafGenome->getNumTopSegments() == 5000);
    CuAssertTrue(_testCase, copyLeafGenome->getNumBottomSegments() == 0);
    const MetaData *copyMeta = copyRootGenome->getMetaData();
    CuAssertTrue(_testCase, copyMeta->get("Young") == "Jeezy");
    n = copyRootGenome->getSequenceLength();
    dnaIt = copyRootGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        CuAssertTrue(_testCase, dnaIt->getBase() == ancSeq[i]);
    }
    topIt = copyLeafGenome->getTopSegmentIterator();
    n = copyLeafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        CuAssertTrue(_testCase, topIt->getStartPosition() == topIt->getArrayIndex());
        CuAssertTrue(_testCase, topIt->getLength() == 1);
        CuAssertTrue(_testCase, topIt->tseg()->getParentIndex() == 3);
        CuAssertTrue(_testCase, topIt->tseg()->getParentReversed() == true);
        CuAssertTrue(_testCase, topIt->tseg()->getBottomParseIndex() == 5);
        if (topIt->getArrayIndex() != 6) {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 6);
        } else {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 7);
        }
    }
    botIt = copyRootGenome->getBottomSegmentIterator();
    n = copyRootGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        CuAssertTrue(_testCase, botIt->getStartPosition() == botIt->getArrayIndex());
        CuAssertTrue(_testCase, botIt->getLength() == 1);
        CuAssertTrue(_testCase, botIt->bseg()->getChildIndex(0) == 3);
        CuAssertTrue(_testCase, botIt->bseg()->getChildReversed(0) == true);
        CuAssertTrue(_testCase, botIt->bseg()->getTopParseIndex() == 5);
    }

    _secondAlignment->close();
    remove(_path.c_str());
}
Example #6
0
void GenomeCopyTest::createCallBack(Alignment *alignment) {
    hal_size_t alignmentSize = alignment->getNumGenomes();
    CuAssertTrue(_testCase, alignmentSize == 0);

    // Hacky: Need a different alignment to test copying the bottom
    // segments correctly.  (the names of a node's children are used
    // when copying bottom segments, and two genomes can't have the same
    // name in the same alignment)
    _path = getTempFile();
    _secondAlignment =
        AlignmentPtr(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS | CREATE_ACCESS));

    Genome *ancGenome = alignment->addRootGenome("AncGenome", 0);
    Genome *leafGenome = alignment->addLeafGenome("LeafGenome1", "AncGenome", 0);
    // This genome will test copyDimensions, copyTopSegments,
    // copyBottomSegments, copySequence, copyMetadata
    Genome *copyRootGenome = _secondAlignment->addRootGenome("copyRootGenome", 0);
    Genome *copyLeafGenome = _secondAlignment->addLeafGenome("LeafGenome1", "copyRootGenome", 0);

    MetaData *ancMeta = ancGenome->getMetaData();
    ancMeta->set("Young", "Jeezy");

    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 0, 700000);
    ancGenome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 0);
    leafGenome->setDimensions(seqVec);
    string ancSeq = "CAT";
    hal_index_t n = ancGenome->getSequenceLength();
    DnaIteratorPtr dnaIt = ancGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        dnaIt->setBase(ancSeq[i]);
    }
    dnaIt->flush();

    n = leafGenome->getSequenceLength();
    dnaIt = leafGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        dnaIt->setBase(ancSeq[i]);
    }
    dnaIt->flush();

    TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator();
    n = leafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        topIt->setCoordinates(topIt->getArrayIndex(), 1);
        topIt->tseg()->setParentIndex(3);
        topIt->tseg()->setParentReversed(true);
        topIt->tseg()->setBottomParseIndex(5);
        if (topIt->getArrayIndex() != 6) {
            topIt->tseg()->setNextParalogyIndex(6);
        } else {
            topIt->tseg()->setNextParalogyIndex(7);
        }
    }
    BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator();
    n = ancGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        botIt->setCoordinates(botIt->getArrayIndex(), 1);
        botIt->bseg()->setChildIndex(0, 3);
        botIt->bseg()->setChildReversed(0, true);
        botIt->bseg()->setTopParseIndex(5);
    }

    seqVec[0] = Sequence::Info("Sequence", 3300, 0, 1100);
    copyRootGenome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 3300, 2200, 0);
    copyLeafGenome->setDimensions(seqVec);
    string copySeq = "TAG";
    dnaIt = copyRootGenome->getDnaIterator();
    n = copyRootGenome->getSequenceLength();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % copySeq.size();
        dnaIt->setBase(copySeq[i]);
    }
    dnaIt->flush();

    dnaIt = copyLeafGenome->getDnaIterator();
    n = copyLeafGenome->getSequenceLength();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % copySeq.size();
        dnaIt->setBase(copySeq[i]);
    }
    dnaIt->flush();

    topIt = copyLeafGenome->getTopSegmentIterator();
    n = copyLeafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        topIt->setCoordinates(7, 8);
        topIt->tseg()->setParentIndex(9);
        topIt->tseg()->setParentReversed(false);
        topIt->tseg()->setBottomParseIndex(11);
        if (topIt->getArrayIndex() != 12) {
            topIt->tseg()->setNextParalogyIndex(12);
        } else {
            topIt->tseg()->setNextParalogyIndex(7);
        }
    }
    botIt = copyRootGenome->getBottomSegmentIterator();
    n = copyRootGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        botIt->setCoordinates(6, 7);
        botIt->bseg()->setChildIndex(0, 8);
        botIt->bseg()->setChildReversed(0, false);
        botIt->bseg()->setTopParseIndex(10);
    }

    ancGenome->copy(copyRootGenome);
    leafGenome->copy(copyLeafGenome);
    _secondAlignment->close();
}
Example #7
0
// note: takes smart pointer as it maybe added to the results
static hal_size_t mapSelf(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, hal_size_t minLength) {
    hal_size_t added = 0;
    if (mappedSeg->isTop() == true) {
        SegmentIteratorPtr target = mappedSeg->getTargetIteratorPtr();
        SegmentIteratorPtr source = mappedSeg->getSourceIteratorPtr();
        TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(target);
        TopSegmentIteratorPtr topCopy = top->clone();
        do {
            // FIXME: why isn't clone() polymorphic?
            SegmentIteratorPtr newSource;
            if (source->isTop()) {
                newSource = std::dynamic_pointer_cast<TopSegmentIterator>(source)->clone();
            } else {
                newSource = std::dynamic_pointer_cast<BottomSegmentIterator>(source)->clone();
            }
            TopSegmentIteratorPtr newTop = topCopy->clone();
            MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, newTop));
            assert(newMappedSeg->getGenome() == mappedSeg->getGenome());
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());
            results.push_back(newMappedSeg);
            ++added;
            if (topCopy->tseg()->hasNextParalogy()) {
                topCopy->toNextParalogy();
            }
        } while (topCopy->tseg()->hasNextParalogy() == true && topCopy->getLength() >= minLength &&
                 topCopy->getArrayIndex() != top->getArrayIndex());
    } else if (mappedSeg->getGenome()->getParent() != NULL) {
        hal_index_t rightCutoff = mappedSeg->getEndPosition();
        BottomSegmentIteratorPtr bottom = mappedSeg->targetAsBottom();
        hal_index_t startOffset = (hal_index_t)bottom->getStartOffset();
        hal_index_t endOffset = (hal_index_t)bottom->getEndOffset();
        TopSegmentIteratorPtr top = mappedSeg->getGenome()->getTopSegmentIterator();
        top->toParseUp(bottom);
        do {
            TopSegmentIteratorPtr topNew = top->clone();

            // we map the new target back to see how the offsets have
            // changed.  these changes are then applied to the source segment
            // as deltas
            BottomSegmentIteratorPtr bottomBack = bottom->clone();
            bottomBack->toParseDown(topNew);
            hal_index_t startBack = (hal_index_t)bottomBack->getStartOffset();
            hal_index_t endBack = (hal_index_t)bottomBack->getEndOffset();
            assert(startBack >= startOffset);
            assert(endBack >= endOffset);
            SegmentIteratorPtr newSource = mappedSeg->sourceClone();
            hal_index_t startDelta = startBack - startOffset;
            hal_index_t endDelta = endBack - endOffset;
            assert((hal_index_t)newSource->getLength() > startDelta + endDelta);
            newSource->slice(newSource->getStartOffset() + startDelta, newSource->getEndOffset() + endDelta);

            MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, topNew));

            assert(newMappedSeg->isTop() == true);
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());

            added += mapSelf(newMappedSeg, results, minLength);
            // stupid that we have to make this check but odn't want to
            // make fundamental api change now
            if (top->getEndPosition() != rightCutoff) {
                top->toRight(rightCutoff);
            } else {
                break;
            }
        } while (true);
    }
    return added;
}