Exemplo n.º 1
0
void TopSegmentIteratorToSiteTest::checkGenome(const Genome *genome) {
    TopSegmentIteratorPtr ti = genome->getTopSegmentIterator();
    for (hal_index_t pos = 0; pos < (hal_index_t)genome->getSequenceLength(); ++pos) {
        ti->toSite(pos);
        CuAssertTrue(_testCase, ti->getStartPosition() == pos);
        CuAssertTrue(_testCase, ti->getLength() == 1);
        ti->toSite(pos, false);
        CuAssertTrue(_testCase, pos >= ti->getStartPosition() && pos < ti->getStartPosition() + (hal_index_t)ti->getLength());
        CuAssertTrue(_testCase, ti->getLength() == ti->getTopSegment()->getLength());
    }
}
Exemplo n.º 2
0
void TopSegmentIteratorParseTest::checkCallBack(const Alignment *alignment) {
    BottomSegmentIteratorPtr bi;
    TopSegmentIteratorPtr ti;

    // case 1
    const Genome *case1 = alignment->openGenome("case1");
    ti = case1->getTopSegmentIterator();
    bi = case1->getBottomSegmentIterator();
    ti->toParseUp(bi);
    CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
    CuAssertTrue(_testCase, bi->getLength() == ti->getLength());
    bi->slice(3, 1);
    ti->toParseUp(bi);
    CuAssertTrue(_testCase, bi->getLength() == bi->getBottomSegment()->getLength() - 4);

    CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
    CuAssertTrue(_testCase, bi->getLength() == ti->getLength());

    // case 2
    const Genome *case2 = alignment->openGenome("case2");
    ti = case2->getTopSegmentIterator();
    bi = case2->getBottomSegmentIterator(1);
    ti->toParseUp(bi);
    CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
    bi->slice(1, 1);
    ti->toParseUp(bi);
    CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

    // case 3
    const Genome *case3 = alignment->openGenome("case3");
    ti = case3->getTopSegmentIterator();
    bi = case3->getBottomSegmentIterator();
    ti->toParseUp(bi);
    CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
    bi->slice(2, 1);
    ti->toParseUp(bi);
    CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

    // case 4
    const Genome *case4 = alignment->openGenome("case4");
    ti = case4->getTopSegmentIterator();
    bi = case4->getBottomSegmentIterator(1);
    ti->toParseUp(bi);
    CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
    bi->slice(2, 2);
    ti->toParseUp(bi);
    CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
}
void GappedBottomSegmentIterator::toRightNextUngapped(TopSegmentIteratorPtr topSeqIt) const {
    while (topSeqIt->tseg()->hasParent() == false && topSeqIt->getLength() <= _gapThreshold) {
        if ((!topSeqIt->getReversed() && topSeqIt->getTopSegment()->isLast()) ||
            (topSeqIt->getReversed() && topSeqIt->getTopSegment()->isFirst())) {
            break;
        }
        topSeqIt->toRight();
    }
}
Exemplo n.º 4
0
// note: takes smart pointer as it maybe added to the results
static hal_size_t mapUp(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, bool doDupes, hal_size_t minLength) {
    const Genome *parent = mappedSeg->getGenome()->getParent();
    assert(parent != NULL);
    hal_size_t added = 0;
    if (mappedSeg->isTop() == true) {
        BottomSegmentIteratorPtr botSegIt = parent->getBottomSegmentIterator();
        TopSegmentIteratorPtr topSegIt = mappedSeg->targetAsTop();
        if (topSegIt->tseg()->hasParent() == true && topSegIt->getLength() >= minLength &&
            (doDupes == true || topSegIt->tseg()->isCanonicalParalog() == true)) {
            botSegIt->toParent(topSegIt);
            mappedSeg->setTarget(std::dynamic_pointer_cast<SegmentIterator>(botSegIt));
            results.push_back(mappedSeg);
            ++added;
        }
    } else {
        hal_index_t rightCutoff = mappedSeg->getEndPosition();
        BottomSegmentIteratorPtr botSegIt = mappedSeg->targetAsBottom();
        hal_index_t startOffset = (hal_index_t)botSegIt->getStartOffset();
        hal_index_t endOffset = (hal_index_t)botSegIt->getEndOffset();
        TopSegmentIteratorPtr topSegIt = mappedSeg->getGenome()->getTopSegmentIterator();
        topSegIt->toParseUp(botSegIt);
        do {
            TopSegmentIteratorPtr newTopSegIt = topSegIt->clone();

            // we map the new target back to see how the offsets have
            // changed.  these changes are then applied to the source segment
            // as deltas
            BottomSegmentIteratorPtr backBotSegIt = botSegIt->clone();
            backBotSegIt->toParseDown(newTopSegIt);
            hal_index_t startBack = (hal_index_t)backBotSegIt->getStartOffset();
            hal_index_t endBack = (hal_index_t)backBotSegIt->getEndOffset();
            assert(startBack >= startOffset);
            assert(endBack >= endOffset);
            SegmentIteratorPtr newSourceSegIt = mappedSeg->sourceClone();
            hal_index_t startDelta = startBack - startOffset;
            hal_index_t endDelta = endBack - endOffset;
            assert((hal_index_t)newSourceSegIt->getLength() > startDelta + endDelta);
            newSourceSegIt->slice(newSourceSegIt->getStartOffset() + startDelta, newSourceSegIt->getEndOffset() + endDelta);

            MappedSegmentPtr newMappedSeg(new MappedSegment(newSourceSegIt, newTopSegIt));

            assert(newMappedSeg->isTop() == true);
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());

            added += mapUp(newMappedSeg, results, doDupes, minLength);
            // stupid that we have to make this check but odn't want to
            // make fundamental api change now
            if (topSegIt->getEndPosition() != rightCutoff) {
                topSegIt->toRight(rightCutoff);
            } else {
                break;
            }
        } while (true);
    }
    return added;
}
Exemplo n.º 5
0
void TopSegmentSimpleIteratorTest::checkCallBack(const Alignment *alignment) {
    const Genome *ancGenome = alignment->openGenome("Anc0");
    CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == _topSegments.size());
    TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0);
    for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) {
        CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
        _topSegments[i].compareTo(tsIt, _testCase);
        tsIt->toRight();
    }
    tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1);
    for (hal_index_t i = ancGenome->getNumTopSegments() - 1; i >= 0; --i) {
        CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
        _topSegments[i].compareTo(tsIt, _testCase);
        tsIt->toLeft();
    }

    tsIt = ancGenome->getTopSegmentIterator(0);
    tsIt->slice(0, tsIt->getLength() - 1);
    for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toRight(tsIt->getStartPosition() + 1);
    }
    tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1);
    tsIt->slice(tsIt->getLength() - 1, 0);
    for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toLeft(tsIt->getStartPosition() - 1);
    }

    tsIt = ancGenome->getTopSegmentIterator(0);
    tsIt->toReverse();
    CuAssertTrue(_testCase, tsIt->getReversed() == true);
    tsIt->slice(tsIt->getLength() - 1, 0);
    for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toLeft(tsIt->getStartPosition() + 1);
    }
    tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1);
    tsIt->toReverse();
    tsIt->slice(0, tsIt->getLength() - 1);
    for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toRight(tsIt->getStartPosition() - 1);
    }
}
Exemplo n.º 6
0
void GenomeCopyTest::checkCallBack(const Alignment *alignment) {
    // FIXME: halAlignment->open() fails miserably but
    // openHalAlignmentReadOnly works? Probably some state isn't cleared
    // on close.
    AlignmentPtr tmp(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS));
    _secondAlignment = tmp;
    const Genome *ancGenome = alignment->openGenome("AncGenome");
    CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome");
    CuAssertTrue(_testCase, ancGenome->getSequenceLength() == 1000000);
    CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == 0);
    CuAssertTrue(_testCase, ancGenome->getNumBottomSegments() == 700000);
    const MetaData *ancMeta = ancGenome->getMetaData();
    CuAssertTrue(_testCase, ancMeta->get("Young") == "Jeezy");
    const Genome *leafGenome = alignment->openGenome("LeafGenome1");
    string ancSeq = "CAT";
    hal_index_t n = ancGenome->getSequenceLength();
    DnaIteratorPtr dnaIt = ancGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        CuAssertTrue(_testCase, dnaIt->getBase() == ancSeq[i]);
    }
    TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator();
    n = leafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        CuAssertTrue(_testCase, topIt->getStartPosition() == topIt->getArrayIndex());
        CuAssertTrue(_testCase, topIt->getLength() == 1);
        CuAssertTrue(_testCase, topIt->tseg()->getParentIndex() == 3);
        CuAssertTrue(_testCase, topIt->tseg()->getParentReversed() == true);
        CuAssertTrue(_testCase, topIt->tseg()->getBottomParseIndex() == 5);
        if (topIt->getArrayIndex() != 6) {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 6);
        } else {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 7);
        }
    }
    BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator();
    n = ancGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        CuAssertTrue(_testCase, botIt->getStartPosition() == botIt->getArrayIndex());
        CuAssertTrue(_testCase, botIt->getLength() == 1);
        CuAssertTrue(_testCase, botIt->bseg()->getChildIndex(0) == 3);
        CuAssertTrue(_testCase, botIt->bseg()->getChildReversed(0) == true);
        CuAssertTrue(_testCase, botIt->bseg()->getTopParseIndex() == 5);
    }

    const Genome *copyRootGenome = _secondAlignment->openGenome("copyRootGenome");
    const Genome *copyLeafGenome = _secondAlignment->openGenome("LeafGenome1");
    CuAssertTrue(_testCase, copyRootGenome->getName() == "copyRootGenome");
    CuAssertTrue(_testCase, copyRootGenome->getSequenceLength() == 1000000);
    CuAssertTrue(_testCase, copyRootGenome->getNumTopSegments() == 0);
    CuAssertTrue(_testCase, copyRootGenome->getNumBottomSegments() == 700000);
    CuAssertTrue(_testCase, copyLeafGenome->getName() == "LeafGenome1");
    CuAssertTrue(_testCase, copyLeafGenome->getSequenceLength() == 1000000);
    CuAssertTrue(_testCase, copyLeafGenome->getNumTopSegments() == 5000);
    CuAssertTrue(_testCase, copyLeafGenome->getNumBottomSegments() == 0);
    const MetaData *copyMeta = copyRootGenome->getMetaData();
    CuAssertTrue(_testCase, copyMeta->get("Young") == "Jeezy");
    n = copyRootGenome->getSequenceLength();
    dnaIt = copyRootGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        CuAssertTrue(_testCase, dnaIt->getBase() == ancSeq[i]);
    }
    topIt = copyLeafGenome->getTopSegmentIterator();
    n = copyLeafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        CuAssertTrue(_testCase, topIt->getStartPosition() == topIt->getArrayIndex());
        CuAssertTrue(_testCase, topIt->getLength() == 1);
        CuAssertTrue(_testCase, topIt->tseg()->getParentIndex() == 3);
        CuAssertTrue(_testCase, topIt->tseg()->getParentReversed() == true);
        CuAssertTrue(_testCase, topIt->tseg()->getBottomParseIndex() == 5);
        if (topIt->getArrayIndex() != 6) {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 6);
        } else {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 7);
        }
    }
    botIt = copyRootGenome->getBottomSegmentIterator();
    n = copyRootGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        CuAssertTrue(_testCase, botIt->getStartPosition() == botIt->getArrayIndex());
        CuAssertTrue(_testCase, botIt->getLength() == 1);
        CuAssertTrue(_testCase, botIt->bseg()->getChildIndex(0) == 3);
        CuAssertTrue(_testCase, botIt->bseg()->getChildReversed(0) == true);
        CuAssertTrue(_testCase, botIt->bseg()->getTopParseIndex() == 5);
    }

    _secondAlignment->close();
    remove(_path.c_str());
}
Exemplo n.º 7
0
// note: takes smart pointer as it maybe added to the results
static hal_size_t mapSelf(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, hal_size_t minLength) {
    hal_size_t added = 0;
    if (mappedSeg->isTop() == true) {
        SegmentIteratorPtr target = mappedSeg->getTargetIteratorPtr();
        SegmentIteratorPtr source = mappedSeg->getSourceIteratorPtr();
        TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(target);
        TopSegmentIteratorPtr topCopy = top->clone();
        do {
            // FIXME: why isn't clone() polymorphic?
            SegmentIteratorPtr newSource;
            if (source->isTop()) {
                newSource = std::dynamic_pointer_cast<TopSegmentIterator>(source)->clone();
            } else {
                newSource = std::dynamic_pointer_cast<BottomSegmentIterator>(source)->clone();
            }
            TopSegmentIteratorPtr newTop = topCopy->clone();
            MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, newTop));
            assert(newMappedSeg->getGenome() == mappedSeg->getGenome());
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());
            results.push_back(newMappedSeg);
            ++added;
            if (topCopy->tseg()->hasNextParalogy()) {
                topCopy->toNextParalogy();
            }
        } while (topCopy->tseg()->hasNextParalogy() == true && topCopy->getLength() >= minLength &&
                 topCopy->getArrayIndex() != top->getArrayIndex());
    } else if (mappedSeg->getGenome()->getParent() != NULL) {
        hal_index_t rightCutoff = mappedSeg->getEndPosition();
        BottomSegmentIteratorPtr bottom = mappedSeg->targetAsBottom();
        hal_index_t startOffset = (hal_index_t)bottom->getStartOffset();
        hal_index_t endOffset = (hal_index_t)bottom->getEndOffset();
        TopSegmentIteratorPtr top = mappedSeg->getGenome()->getTopSegmentIterator();
        top->toParseUp(bottom);
        do {
            TopSegmentIteratorPtr topNew = top->clone();

            // we map the new target back to see how the offsets have
            // changed.  these changes are then applied to the source segment
            // as deltas
            BottomSegmentIteratorPtr bottomBack = bottom->clone();
            bottomBack->toParseDown(topNew);
            hal_index_t startBack = (hal_index_t)bottomBack->getStartOffset();
            hal_index_t endBack = (hal_index_t)bottomBack->getEndOffset();
            assert(startBack >= startOffset);
            assert(endBack >= endOffset);
            SegmentIteratorPtr newSource = mappedSeg->sourceClone();
            hal_index_t startDelta = startBack - startOffset;
            hal_index_t endDelta = endBack - endOffset;
            assert((hal_index_t)newSource->getLength() > startDelta + endDelta);
            newSource->slice(newSource->getStartOffset() + startDelta, newSource->getEndOffset() + endDelta);

            MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, topNew));

            assert(newMappedSeg->isTop() == true);
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());

            added += mapSelf(newMappedSeg, results, minLength);
            // stupid that we have to make this check but odn't want to
            // make fundamental api change now
            if (top->getEndPosition() != rightCutoff) {
                top->toRight(rightCutoff);
            } else {
                break;
            }
        } while (true);
    }
    return added;
}