예제 #1
0
void TopSegmentIsGapTest::createCallBack(Alignment *alignment) {
    size_t numSequences = 3;
    vector<Sequence::Info> seqVec(numSequences);

    BottomSegmentIteratorPtr bi;
    BottomSegmentStruct bs;
    TopSegmentIteratorPtr ti;
    TopSegmentStruct ts;

    Genome *parent1 = alignment->addRootGenome("parent1");
    Genome *child1 = alignment->addLeafGenome("child1", "parent1", 1);

    // set up two genomes.  each with three sequences.  each sequence
    // with 5 segments of length two.  start with segment i in parent
    // aligned with segment i in child.
    for (size_t i = 0; i < numSequences; ++i) {
        string name = "Sequence" + std::to_string(i);
        seqVec[i] = Sequence::Info(name, 10, 5, 5);
    }
    parent1->setDimensions(seqVec);
    child1->setDimensions(seqVec);

    for (bi = parent1->getBottomSegmentIterator(); not bi->atEnd(); bi->toRight()) {
        bs.set(bi->getBottomSegment()->getArrayIndex() * 2, 2);
        bs._children.clear();
        bs._children.push_back(pair<hal_size_t, bool>(bi->getBottomSegment()->getArrayIndex(), false));
        bs.applyTo(bi);
    }

    for (ti = child1->getTopSegmentIterator(); not ti->atEnd(); ti->toRight()) {
        ts.set(ti->getTopSegment()->getArrayIndex() * 2, 2, ti->getTopSegment()->getArrayIndex());
        ts.applyTo(ti);
    }

    // insertion in middle (8th top segment)

    bi = parent1->getBottomSegmentIterator(8);
    ti = child1->getTopSegmentIterator(8);
    assert(bi->getBottomSegment()->getChildIndex(0) == 8 && ti->getTopSegment()->getParentIndex() == 8);
    bi->getBottomSegment()->setChildIndex(0, 9);
    ti->getTopSegment()->setParentIndex(NULL_INDEX);
    ti->toRight();
    ti->getTopSegment()->setParentIndex(8);

    // insertion at begining (10th top segment)

    bi = parent1->getBottomSegmentIterator(10);
    ti = child1->getTopSegmentIterator(10);
    assert(bi->getBottomSegment()->getChildIndex(0) == 10 && ti->getTopSegment()->getParentIndex() == 10);
    bi->getBottomSegment()->setChildIndex(0, 11);
    ti->getTopSegment()->setParentIndex(NULL_INDEX);
    ti->toRight();
    ti->getTopSegment()->setParentIndex(10);

    // just having a null parent is not enough for an insertion
    bi = parent1->getBottomSegmentIterator(2);
    ti = child1->getTopSegmentIterator(2);
    assert(bi->getBottomSegment()->getChildIndex(0) == 2 && ti->getTopSegment()->getParentIndex() == 2);
    ti->getTopSegment()->setParentIndex(NULL_INDEX);
}
예제 #2
0
void SequenceIteratorTest::checkCallBack(const Alignment *alignment) {
    const Genome *ancGenome = alignment->openGenome("AncGenome");

    hal_size_t numSequences = ancGenome->getNumSequences();
    CuAssertTrue(_testCase, numSequences = 1000);

    for (SequenceIteratorPtr seqIt = ancGenome->getSequenceIterator(); not seqIt->atEnd(); seqIt->toNext()) {
        const Sequence *seq = seqIt->getSequence();
        hal_size_t i = seq->getArrayIndex();

        TopSegmentIteratorPtr tsIt = seq->getTopSegmentIterator();
        hal_size_t numTopSegments = seq->getNumTopSegments();
        for (hal_size_t j = 0; j < numTopSegments; ++j) {
            TopSegmentIteratorPtr gtsIt = ancGenome->getTopSegmentIterator((i - 1) * 100 + j);
            const TopSegment *gsTopSegment = gtsIt->getTopSegment();
            const TopSegment *sqTopSegment = tsIt->getTopSegment();

            CuAssertTrue(_testCase, gsTopSegment->getArrayIndex() == sqTopSegment->getArrayIndex());
            tsIt->toRight();
        }

        BottomSegmentIteratorPtr bsIt = seq->getBottomSegmentIterator();
        hal_size_t numBottomSegments = seq->getNumBottomSegments();
        for (hal_size_t j = 0; j < numBottomSegments; ++j) {
            BottomSegmentIteratorPtr gbsIt = ancGenome->getBottomSegmentIterator((i - 1) * 100 + j);
            const BottomSegment *gsBottomSegment = gbsIt->getBottomSegment();
            const BottomSegment *sqBottomSegment = bsIt->getBottomSegment();

            CuAssertTrue(_testCase, gsBottomSegment->getArrayIndex() == sqBottomSegment->getArrayIndex());
            bsIt->toRight();
        }
    }
}
void GappedBottomSegmentIterator::toRightNextUngapped(TopSegmentIteratorPtr topSeqIt) const {
    while (topSeqIt->tseg()->hasParent() == false && topSeqIt->getLength() <= _gapThreshold) {
        if ((!topSeqIt->getReversed() && topSeqIt->getTopSegment()->isLast()) ||
            (topSeqIt->getReversed() && topSeqIt->getTopSegment()->isFirst())) {
            break;
        }
        topSeqIt->toRight();
    }
}
예제 #4
0
void TopSegmentSequenceTest::checkCallBack(const Alignment *alignment) {
    const Genome *ancGenome = alignment->openGenome("Anc0");
    TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(100);
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getStartPosition() == 500);
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getLength() == 9);
    string seq;
    tsIt->getString(seq);
    CuAssertTrue(_testCase, seq == "CACACATTC");
    tsIt->toReverse();
    tsIt->getString(seq);
    CuAssertTrue(_testCase, seq == "GAATGTGTG");
}
예제 #5
0
void TopSegmentSimpleIteratorTest::checkCallBack(const Alignment *alignment) {
    const Genome *ancGenome = alignment->openGenome("Anc0");
    CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == _topSegments.size());
    TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0);
    for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) {
        CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
        _topSegments[i].compareTo(tsIt, _testCase);
        tsIt->toRight();
    }
    tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1);
    for (hal_index_t i = ancGenome->getNumTopSegments() - 1; i >= 0; --i) {
        CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
        _topSegments[i].compareTo(tsIt, _testCase);
        tsIt->toLeft();
    }

    tsIt = ancGenome->getTopSegmentIterator(0);
    tsIt->slice(0, tsIt->getLength() - 1);
    for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toRight(tsIt->getStartPosition() + 1);
    }
    tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1);
    tsIt->slice(tsIt->getLength() - 1, 0);
    for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toLeft(tsIt->getStartPosition() - 1);
    }

    tsIt = ancGenome->getTopSegmentIterator(0);
    tsIt->toReverse();
    CuAssertTrue(_testCase, tsIt->getReversed() == true);
    tsIt->slice(tsIt->getLength() - 1, 0);
    for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toLeft(tsIt->getStartPosition() + 1);
    }
    tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1);
    tsIt->toReverse();
    tsIt->slice(0, tsIt->getLength() - 1);
    for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toRight(tsIt->getStartPosition() - 1);
    }
}
예제 #6
0
void TopSegmentSimpleIteratorTest::createCallBack(Alignment *alignment) {
    Genome *ancGenome = alignment->addRootGenome("Anc0", 0);
    size_t numChildren = 9;
    for (size_t i = 0; i < numChildren; ++i) {
        alignment->addLeafGenome("Leaf" + std::to_string(i), "Anc0", 0.1);
    }
    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 10000);
    ancGenome->setDimensions(seqVec);

    CuAssertTrue(_testCase, ancGenome->getNumChildren() == numChildren);

    _topSegments.clear();
    for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) {
        TopSegmentStruct topSeg;
        topSeg.setRandom();
        topSeg._length = ancGenome->getSequenceLength() / ancGenome->getNumTopSegments();
        topSeg._startPosition = i * topSeg._length;
        _topSegments.push_back(topSeg);
    }

    TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0);
    for (size_t i = 0; not tsIt->atEnd(); tsIt->toRight(), ++i) {
        CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
        _topSegments[i].applyTo(tsIt);
    }
}
예제 #7
0
void TopSegmentStruct::compareTo(TopSegmentIteratorPtr it, CuTest *testCase) const {
    const TopSegment *seg = it->getTopSegment();
    CuAssertTrue(testCase, _length == seg->getLength());
    CuAssertTrue(testCase, _startPosition == seg->getStartPosition());
    CuAssertTrue(testCase, _nextParalogyIndex == seg->getNextParalogyIndex());
    CuAssertTrue(testCase, _parentIndex == seg->getParentIndex());
    CuAssertTrue(testCase, _bottomParseIndex == seg->getBottomParseIndex());
}
예제 #8
0
void TopSegmentStruct::applyTo(TopSegmentIteratorPtr it) const {
    TopSegment *seg = it->getTopSegment();
    seg->setCoordinates(_startPosition, _length);
    seg->setNextParalogyIndex(_nextParalogyIndex);
    seg->setParentIndex(_parentIndex);
    seg->setParentReversed(_parentReversed);
    seg->setBottomParseIndex(_bottomParseIndex);
}
예제 #9
0
파일: halGenome.cpp 프로젝트: dayin1989/hal
void Genome::fixParseInfo()
{
  if (getParent() == NULL || getNumChildren() == 0)
  {
    return;
  }
  
  // copied from CactusHalConverter::updateRootParseInfo() in
  // cactus2hal/src/cactusHalConverter.cpp 
  BottomSegmentIteratorPtr bottomIterator = 
    getBottomSegmentIterator();
  TopSegmentIteratorPtr topIterator = getTopSegmentIterator();
  BottomSegmentIteratorConstPtr bend = getBottomSegmentEndIterator();
  TopSegmentIteratorConstPtr tend = getTopSegmentEndIterator();
  int top = 0, bot = 0;
  while (bottomIterator != bend && topIterator != tend)
  {
    bool bright = false;
    bool tright = false;
    BottomSegment* bseg = bottomIterator->getBottomSegment();
    TopSegment* tseg = topIterator->getTopSegment();
    hal_index_t bstart = bseg->getStartPosition();
    hal_index_t bendidx = bstart + (hal_index_t)bseg->getLength();
    hal_index_t tstart = tseg->getStartPosition();
    hal_index_t tendidx = tstart + (hal_index_t)tseg->getLength();

    if (bstart >= tstart && bstart < tendidx)
    {
      bseg->setTopParseIndex(tseg->getArrayIndex());
    }
    if (bendidx <= tendidx || bstart == bendidx)
    {
      bright = true;
    }
        
    if (tstart >= bstart && tstart < bendidx)
    {
      tseg->setBottomParseIndex(bseg->getArrayIndex());
    }
    if (tendidx <= bendidx || tstart == tendidx)
    {
      tright = true;
    }

    assert(bright || tright);
    if (bright == true)
    {
      bot += 1;
      bottomIterator->toRight();
    }
    if (tright == true)
    {
      top += 1;
      topIterator->toRight();
    }
  }
}
예제 #10
0
void TopSegmentSequenceTest::createCallBack(Alignment *alignment) {
    Genome *ancGenome = alignment->addRootGenome("Anc0", 0);
    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 700000);
    ancGenome->setDimensions(seqVec);

    ancGenome->setSubString("CACACATTC", 500, 9);
    TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(100);
    tsIt->getTopSegment()->setCoordinates(500, 9);
}
예제 #11
0
void TopSegmentIteratorToSiteTest::checkGenome(const Genome *genome) {
    TopSegmentIteratorPtr ti = genome->getTopSegmentIterator();
    for (hal_index_t pos = 0; pos < (hal_index_t)genome->getSequenceLength(); ++pos) {
        ti->toSite(pos);
        CuAssertTrue(_testCase, ti->getStartPosition() == pos);
        CuAssertTrue(_testCase, ti->getLength() == 1);
        ti->toSite(pos, false);
        CuAssertTrue(_testCase, pos >= ti->getStartPosition() && pos < ti->getStartPosition() + (hal_index_t)ti->getLength());
        CuAssertTrue(_testCase, ti->getLength() == ti->getTopSegment()->getLength());
    }
}
void GappedSegmentSimpleIteratorTest::createCallBack(AlignmentPtr alignment)
{
  addIdenticalParentChild(alignment, 2, 100, 5);
  Genome* parent = alignment->openGenome(alignment->getRootName());
  Genome* child = parent->getChild(0);
  TopSegmentIteratorPtr ti = child->getTopSegmentIterator();
  BottomSegmentIteratorPtr bi = parent->getBottomSegmentIterator();
  int i = 0;
  while (ti != child->getTopSegmentEndIterator())
  {
    if (i++ % 2)
    {
      ti->getTopSegment()->setParentReversed(true);
      bi->getBottomSegment()->setChildReversed(0, true);
    }
    ti->toRight();
    bi->toRight();
  }
}