void TopSegmentIsGapTest::createCallBack(Alignment *alignment) {
    size_t numSequences = 3;
    vector<Sequence::Info> seqVec(numSequences);

    BottomSegmentIteratorPtr bi;
    BottomSegmentStruct bs;
    TopSegmentIteratorPtr ti;
    TopSegmentStruct ts;

    Genome *parent1 = alignment->addRootGenome("parent1");
    Genome *child1 = alignment->addLeafGenome("child1", "parent1", 1);

    // set up two genomes.  each with three sequences.  each sequence
    // with 5 segments of length two.  start with segment i in parent
    // aligned with segment i in child.
    for (size_t i = 0; i < numSequences; ++i) {
        string name = "Sequence" + std::to_string(i);
        seqVec[i] = Sequence::Info(name, 10, 5, 5);
    }
    parent1->setDimensions(seqVec);
    child1->setDimensions(seqVec);

    for (bi = parent1->getBottomSegmentIterator(); not bi->atEnd(); bi->toRight()) {
        bs.set(bi->getBottomSegment()->getArrayIndex() * 2, 2);
        bs._children.clear();
        bs._children.push_back(pair<hal_size_t, bool>(bi->getBottomSegment()->getArrayIndex(), false));
        bs.applyTo(bi);
    }

    for (ti = child1->getTopSegmentIterator(); not ti->atEnd(); ti->toRight()) {
        ts.set(ti->getTopSegment()->getArrayIndex() * 2, 2, ti->getTopSegment()->getArrayIndex());
        ts.applyTo(ti);
    }

    // insertion in middle (8th top segment)

    bi = parent1->getBottomSegmentIterator(8);
    ti = child1->getTopSegmentIterator(8);
    assert(bi->getBottomSegment()->getChildIndex(0) == 8 && ti->getTopSegment()->getParentIndex() == 8);
    bi->getBottomSegment()->setChildIndex(0, 9);
    ti->getTopSegment()->setParentIndex(NULL_INDEX);
    ti->toRight();
    ti->getTopSegment()->setParentIndex(8);

    // insertion at begining (10th top segment)

    bi = parent1->getBottomSegmentIterator(10);
    ti = child1->getTopSegmentIterator(10);
    assert(bi->getBottomSegment()->getChildIndex(0) == 10 && ti->getTopSegment()->getParentIndex() == 10);
    bi->getBottomSegment()->setChildIndex(0, 11);
    ti->getTopSegment()->setParentIndex(NULL_INDEX);
    ti->toRight();
    ti->getTopSegment()->setParentIndex(10);

    // just having a null parent is not enough for an insertion
    bi = parent1->getBottomSegmentIterator(2);
    ti = child1->getTopSegmentIterator(2);
    assert(bi->getBottomSegment()->getChildIndex(0) == 2 && ti->getTopSegment()->getParentIndex() == 2);
    ti->getTopSegment()->setParentIndex(NULL_INDEX);
}
void MappedSegmentMapDupeTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // setup simple case were there is an edge from a parent to 
  // child and it is reversed
  Genome* parent = alignment->addRootGenome("parent");
  Genome* child1 = alignment->addLeafGenome("child1", "parent", 1);
  Genome* child2 = alignment->addLeafGenome("child2", "parent", 1);
  seqVec[0] = Sequence::Info("Sequence", 3, 0, 1);
  parent->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  child1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  child2->setDimensions(seqVec);

  parent->setString("CCC");
  child1->setString("CCCTACGTG");
  child2->setString("CCCTACGTG");

  bi = parent->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);
     
  ti = child1->getTopSegmentIterator();
  ts.set(0, 3, 0, true, NULL_INDEX, 1);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 0, true, NULL_INDEX, 2);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 0, true, NULL_INDEX, 0);
  ts.applyTo(ti);

  ti = child2->getTopSegmentIterator();
  ts.set(0, 3, 0, false);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, NULL_INDEX, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, NULL_INDEX, false);
  ts.applyTo(ti);
}
void TopSegmentSimpleIteratorTest::checkCallBack(const Alignment *alignment) {
    const Genome *ancGenome = alignment->openGenome("Anc0");
    CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == _topSegments.size());
    TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0);
    for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) {
        CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
        _topSegments[i].compareTo(tsIt, _testCase);
        tsIt->toRight();
    }
    tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1);
    for (hal_index_t i = ancGenome->getNumTopSegments() - 1; i >= 0; --i) {
        CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
        _topSegments[i].compareTo(tsIt, _testCase);
        tsIt->toLeft();
    }

    tsIt = ancGenome->getTopSegmentIterator(0);
    tsIt->slice(0, tsIt->getLength() - 1);
    for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toRight(tsIt->getStartPosition() + 1);
    }
    tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1);
    tsIt->slice(tsIt->getLength() - 1, 0);
    for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toLeft(tsIt->getStartPosition() - 1);
    }

    tsIt = ancGenome->getTopSegmentIterator(0);
    tsIt->toReverse();
    CuAssertTrue(_testCase, tsIt->getReversed() == true);
    tsIt->slice(tsIt->getLength() - 1, 0);
    for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toLeft(tsIt->getStartPosition() + 1);
    }
    tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1);
    tsIt->toReverse();
    tsIt->slice(0, tsIt->getLength() - 1);
    for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) {
        CuAssertTrue(_testCase, tsIt->getLength() == 1);
        CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
        tsIt->toRight(tsIt->getStartPosition() - 1);
    }
}
void TopSegmentSimpleIteratorTest::createCallBack(Alignment *alignment) {
    Genome *ancGenome = alignment->addRootGenome("Anc0", 0);
    size_t numChildren = 9;
    for (size_t i = 0; i < numChildren; ++i) {
        alignment->addLeafGenome("Leaf" + std::to_string(i), "Anc0", 0.1);
    }
    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 10000);
    ancGenome->setDimensions(seqVec);

    CuAssertTrue(_testCase, ancGenome->getNumChildren() == numChildren);

    _topSegments.clear();
    for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) {
        TopSegmentStruct topSeg;
        topSeg.setRandom();
        topSeg._length = ancGenome->getSequenceLength() / ancGenome->getNumTopSegments();
        topSeg._startPosition = i * topSeg._length;
        _topSegments.push_back(topSeg);
    }

    TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0);
    for (size_t i = 0; not tsIt->atEnd(); tsIt->toRight(), ++i) {
        CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
        _topSegments[i].applyTo(tsIt);
    }
}
void GappedSegmentSimpleIteratorTest2::createCallBack(AlignmentPtr alignment)
{
  addIdenticalParentChild(alignment, 2, 100, 5);
  Genome* parent = alignment->openGenome(alignment->getRootName());
  Genome* child = parent->getChild(0);
  TopSegmentIteratorPtr ti = child->getTopSegmentIterator();
  BottomSegmentIteratorPtr bi = parent->getBottomSegmentIterator();
  hal_index_t i = 0;
  bool reversed = true;
  while (ti != child->getTopSegmentEndIterator())
  {
    if (i % 5 == 0)
    {
      reversed = !reversed;
      if (reversed && i < (hal_index_t)(parent->getNumBottomSegments() - 1))
      {
        makeInversion(ti, 5);
      }
    }

    ti->toRight();
    bi->toRight();
    ++i;
  }
}
Exemple #6
0
void SequenceIteratorTest::checkCallBack(const Alignment *alignment) {
    const Genome *ancGenome = alignment->openGenome("AncGenome");

    hal_size_t numSequences = ancGenome->getNumSequences();
    CuAssertTrue(_testCase, numSequences = 1000);

    for (SequenceIteratorPtr seqIt = ancGenome->getSequenceIterator(); not seqIt->atEnd(); seqIt->toNext()) {
        const Sequence *seq = seqIt->getSequence();
        hal_size_t i = seq->getArrayIndex();

        TopSegmentIteratorPtr tsIt = seq->getTopSegmentIterator();
        hal_size_t numTopSegments = seq->getNumTopSegments();
        for (hal_size_t j = 0; j < numTopSegments; ++j) {
            TopSegmentIteratorPtr gtsIt = ancGenome->getTopSegmentIterator((i - 1) * 100 + j);
            const TopSegment *gsTopSegment = gtsIt->getTopSegment();
            const TopSegment *sqTopSegment = tsIt->getTopSegment();

            CuAssertTrue(_testCase, gsTopSegment->getArrayIndex() == sqTopSegment->getArrayIndex());
            tsIt->toRight();
        }

        BottomSegmentIteratorPtr bsIt = seq->getBottomSegmentIterator();
        hal_size_t numBottomSegments = seq->getNumBottomSegments();
        for (hal_size_t j = 0; j < numBottomSegments; ++j) {
            BottomSegmentIteratorPtr gbsIt = ancGenome->getBottomSegmentIterator((i - 1) * 100 + j);
            const BottomSegment *gsBottomSegment = gbsIt->getBottomSegment();
            const BottomSegment *sqBottomSegment = bsIt->getBottomSegment();

            CuAssertTrue(_testCase, gsBottomSegment->getArrayIndex() == sqBottomSegment->getArrayIndex());
            bsIt->toRight();
        }
    }
}
Exemple #7
0
void Genome::fixParseInfo()
{
  if (getParent() == NULL || getNumChildren() == 0)
  {
    return;
  }
  
  // copied from CactusHalConverter::updateRootParseInfo() in
  // cactus2hal/src/cactusHalConverter.cpp 
  BottomSegmentIteratorPtr bottomIterator = 
    getBottomSegmentIterator();
  TopSegmentIteratorPtr topIterator = getTopSegmentIterator();
  BottomSegmentIteratorConstPtr bend = getBottomSegmentEndIterator();
  TopSegmentIteratorConstPtr tend = getTopSegmentEndIterator();
  int top = 0, bot = 0;
  while (bottomIterator != bend && topIterator != tend)
  {
    bool bright = false;
    bool tright = false;
    BottomSegment* bseg = bottomIterator->getBottomSegment();
    TopSegment* tseg = topIterator->getTopSegment();
    hal_index_t bstart = bseg->getStartPosition();
    hal_index_t bendidx = bstart + (hal_index_t)bseg->getLength();
    hal_index_t tstart = tseg->getStartPosition();
    hal_index_t tendidx = tstart + (hal_index_t)tseg->getLength();

    if (bstart >= tstart && bstart < tendidx)
    {
      bseg->setTopParseIndex(tseg->getArrayIndex());
    }
    if (bendidx <= tendidx || bstart == bendidx)
    {
      bright = true;
    }
        
    if (tstart >= bstart && tstart < bendidx)
    {
      tseg->setBottomParseIndex(bseg->getArrayIndex());
    }
    if (tendidx <= bendidx || tstart == tendidx)
    {
      tright = true;
    }

    assert(bright || tright);
    if (bright == true)
    {
      bot += 1;
      bottomIterator->toRight();
    }
    if (tright == true)
    {
      top += 1;
      topIterator->toRight();
    }
  }
}
void GappedBottomSegmentIterator::toRightNextUngapped(TopSegmentIteratorPtr topSeqIt) const {
    while (topSeqIt->tseg()->hasParent() == false && topSeqIt->getLength() <= _gapThreshold) {
        if ((!topSeqIt->getReversed() && topSeqIt->getTopSegment()->isLast()) ||
            (topSeqIt->getReversed() && topSeqIt->getTopSegment()->isFirst())) {
            break;
        }
        topSeqIt->toRight();
    }
}
Exemple #9
0
// note: takes smart pointer as it maybe added to the results
static hal_size_t mapUp(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, bool doDupes, hal_size_t minLength) {
    const Genome *parent = mappedSeg->getGenome()->getParent();
    assert(parent != NULL);
    hal_size_t added = 0;
    if (mappedSeg->isTop() == true) {
        BottomSegmentIteratorPtr botSegIt = parent->getBottomSegmentIterator();
        TopSegmentIteratorPtr topSegIt = mappedSeg->targetAsTop();
        if (topSegIt->tseg()->hasParent() == true && topSegIt->getLength() >= minLength &&
            (doDupes == true || topSegIt->tseg()->isCanonicalParalog() == true)) {
            botSegIt->toParent(topSegIt);
            mappedSeg->setTarget(std::dynamic_pointer_cast<SegmentIterator>(botSegIt));
            results.push_back(mappedSeg);
            ++added;
        }
    } else {
        hal_index_t rightCutoff = mappedSeg->getEndPosition();
        BottomSegmentIteratorPtr botSegIt = mappedSeg->targetAsBottom();
        hal_index_t startOffset = (hal_index_t)botSegIt->getStartOffset();
        hal_index_t endOffset = (hal_index_t)botSegIt->getEndOffset();
        TopSegmentIteratorPtr topSegIt = mappedSeg->getGenome()->getTopSegmentIterator();
        topSegIt->toParseUp(botSegIt);
        do {
            TopSegmentIteratorPtr newTopSegIt = topSegIt->clone();

            // we map the new target back to see how the offsets have
            // changed.  these changes are then applied to the source segment
            // as deltas
            BottomSegmentIteratorPtr backBotSegIt = botSegIt->clone();
            backBotSegIt->toParseDown(newTopSegIt);
            hal_index_t startBack = (hal_index_t)backBotSegIt->getStartOffset();
            hal_index_t endBack = (hal_index_t)backBotSegIt->getEndOffset();
            assert(startBack >= startOffset);
            assert(endBack >= endOffset);
            SegmentIteratorPtr newSourceSegIt = mappedSeg->sourceClone();
            hal_index_t startDelta = startBack - startOffset;
            hal_index_t endDelta = endBack - endOffset;
            assert((hal_index_t)newSourceSegIt->getLength() > startDelta + endDelta);
            newSourceSegIt->slice(newSourceSegIt->getStartOffset() + startDelta, newSourceSegIt->getEndOffset() + endDelta);

            MappedSegmentPtr newMappedSeg(new MappedSegment(newSourceSegIt, newTopSegIt));

            assert(newMappedSeg->isTop() == true);
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());

            added += mapUp(newMappedSeg, results, doDupes, minLength);
            // stupid that we have to make this check but odn't want to
            // make fundamental api change now
            if (topSegIt->getEndPosition() != rightCutoff) {
                topSegIt->toRight(rightCutoff);
            } else {
                break;
            }
        } while (true);
    }
    return added;
}
Exemple #10
0
// Set top segments to be equal width and so that segment 1, 2, 3,
// etc. corresponds to parent segment 1, 2, 3, etc.
void setTopSegments(Genome *genome, hal_size_t width) {
    TopSegmentIteratorPtr topIt = genome->getTopSegmentIterator();
    hal_size_t n = genome->getNumTopSegments();
    hal_index_t startPos = 0;
    for (; topIt->getArrayIndex() < n; topIt->toRight(), startPos += width) {
        topIt->setCoordinates(startPos, width);
        topIt->tseg()->setParentIndex(topIt->getArrayIndex());
        topIt->tseg()->setParentReversed(false);
        topIt->tseg()->setBottomParseIndex(NULL_INDEX);
        topIt->tseg()->setNextParalogyIndex(NULL_INDEX);
    }
}
Exemple #11
0
void Genome::copyTopSegments(Genome *dest) const
{
  const Genome *inParent = getParent();
  const Genome *outParent = dest->getParent();

  TopSegmentIteratorConstPtr inTop = getTopSegmentIterator();
  TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator();
  hal_size_t n = dest->getNumTopSegments();
  assert(n == 0 || n == getNumTopSegments());

  if (n == 0) {
    // Nothing to do if there are no top segments.
    return;
  }

  BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator();
  BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator();

  for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(),
         outTop->toRight())
  {
    hal_index_t genomePos = inTop->getStartPosition();
    assert(genomePos != NULL_INDEX);
    string inSeqName = getSequenceBySite(genomePos)->getName();
    string outSeqName = dest->getSequenceBySite(genomePos)->getName();
    // if (inSeqName != outSeqName) {
    //   stringstream ss;
    //   ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos;
    //   throw hal_exception(ss.str());
    // }

    outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength());
    outTop->setParentIndex(inTop->getParentIndex());
    outTop->setParentReversed(inTop->getParentReversed());
    outTop->setBottomParseIndex(inTop->getBottomParseIndex());
    outTop->setNextParalogyIndex(inTop->getNextParalogyIndex());

    // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are.
    if (inTop->getParentIndex() != NULL_INDEX) {
      inParentBottomSegIt->toParent(inTop);

      const Sequence *inParentSequence = inParentBottomSegIt->getSequence();

      const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName());

      hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex();
      hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex();

      outTop->setParentIndex(outParentSegmentIndex);
    }
  }
}
Exemple #12
0
int MappedSegment::boundComp(const SegmentIteratorPtr &s1, const SegmentIteratorPtr &s2) {
    int res = 0;
    bool flip = s2->getReversed();
    if (flip) {
        s2->toReverse();
    }

    if (s1->isTop() && !s2->isTop()) {
        BottomSegmentIteratorPtr bot = std::dynamic_pointer_cast<BottomSegmentIterator>(s2);
        hal_index_t lb = bot->bseg()->getTopParseIndex();
        hal_index_t ub = lb;
        if ((hal_size_t)bot->getArrayIndex() < bot->getGenome()->getNumBottomSegments() - 1) {
            bot = bot->clone();
            bot->slice(0, 0);
            bot->toRight();
            ub = bot->bseg()->getTopParseIndex();
        }
        if (s1->getArrayIndex() < lb) {
            res = -1;
        } else if (s1->getArrayIndex() > ub) {
            res = 1;
        }
    } else if (!s1->isTop() && s2->isTop()) {
        TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(s2);
        hal_index_t lb = top->tseg()->getBottomParseIndex();
        hal_index_t ub = lb;
        if ((hal_size_t)top->getArrayIndex() < top->getGenome()->getNumTopSegments() - 1) {
            top = top->clone();
            top->slice(0, 0);
            top->toRight();
            ub = top->tseg()->getBottomParseIndex();
        }
        if (s1->getArrayIndex() < lb) {
            res = -1;
        } else if (s1->getArrayIndex() > ub) {
            res = 1;
        }
    }

    if (flip) {
        s2->toReverse();
    }

    return res;
}
void GappedSegmentSimpleIteratorTest::createCallBack(AlignmentPtr alignment)
{
  addIdenticalParentChild(alignment, 2, 100, 5);
  Genome* parent = alignment->openGenome(alignment->getRootName());
  Genome* child = parent->getChild(0);
  TopSegmentIteratorPtr ti = child->getTopSegmentIterator();
  BottomSegmentIteratorPtr bi = parent->getBottomSegmentIterator();
  int i = 0;
  while (ti != child->getTopSegmentEndIterator())
  {
    if (i++ % 2)
    {
      ti->getTopSegment()->setParentReversed(true);
      bi->getBottomSegment()->setChildReversed(0, true);
    }
    ti->toRight();
    bi->toRight();
  }
}
void TopSegmentIteratorToSiteTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);
  
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // case 1: single segment
  Genome* case1 = alignment->addRootGenome("case1");
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 0);
  case1->setDimensions(seqVec);
  ti = case1->getTopSegmentIterator();
  ts.set(0, 9);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(9, 1);
  ts.applyTo(ti);
  case1 = NULL;

  // case 2: bunch of random segments
  const hal_size_t numSegs = 1133;
  hal_size_t total = 0;
  vector<hal_size_t> segLens(numSegs);
  for (size_t i = 0 ; i < numSegs; ++i)
  {
    hal_size_t len = rand() % 77 + 1;
    segLens[i] = len;
    total += len;
    assert(len > 0);
  }
  Genome* case2 = alignment->addRootGenome("case2");
  seqVec[0] = Sequence::Info("Sequence", total, numSegs, 0);
  case2->setDimensions(seqVec);
  hal_index_t prev = 0;
  for (size_t i = 0 ; i < numSegs; ++i)
  {
    ti = case2->getTopSegmentIterator((hal_index_t)i);
    ts.set(prev, segLens[i]);
    prev += segLens[i];
    ts.applyTo(ti);
  }
}
Exemple #15
0
void GenomeCopyTest::checkCallBack(const Alignment *alignment) {
    // FIXME: halAlignment->open() fails miserably but
    // openHalAlignmentReadOnly works? Probably some state isn't cleared
    // on close.
    AlignmentPtr tmp(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS));
    _secondAlignment = tmp;
    const Genome *ancGenome = alignment->openGenome("AncGenome");
    CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome");
    CuAssertTrue(_testCase, ancGenome->getSequenceLength() == 1000000);
    CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == 0);
    CuAssertTrue(_testCase, ancGenome->getNumBottomSegments() == 700000);
    const MetaData *ancMeta = ancGenome->getMetaData();
    CuAssertTrue(_testCase, ancMeta->get("Young") == "Jeezy");
    const Genome *leafGenome = alignment->openGenome("LeafGenome1");
    string ancSeq = "CAT";
    hal_index_t n = ancGenome->getSequenceLength();
    DnaIteratorPtr dnaIt = ancGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        CuAssertTrue(_testCase, dnaIt->getBase() == ancSeq[i]);
    }
    TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator();
    n = leafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        CuAssertTrue(_testCase, topIt->getStartPosition() == topIt->getArrayIndex());
        CuAssertTrue(_testCase, topIt->getLength() == 1);
        CuAssertTrue(_testCase, topIt->tseg()->getParentIndex() == 3);
        CuAssertTrue(_testCase, topIt->tseg()->getParentReversed() == true);
        CuAssertTrue(_testCase, topIt->tseg()->getBottomParseIndex() == 5);
        if (topIt->getArrayIndex() != 6) {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 6);
        } else {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 7);
        }
    }
    BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator();
    n = ancGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        CuAssertTrue(_testCase, botIt->getStartPosition() == botIt->getArrayIndex());
        CuAssertTrue(_testCase, botIt->getLength() == 1);
        CuAssertTrue(_testCase, botIt->bseg()->getChildIndex(0) == 3);
        CuAssertTrue(_testCase, botIt->bseg()->getChildReversed(0) == true);
        CuAssertTrue(_testCase, botIt->bseg()->getTopParseIndex() == 5);
    }

    const Genome *copyRootGenome = _secondAlignment->openGenome("copyRootGenome");
    const Genome *copyLeafGenome = _secondAlignment->openGenome("LeafGenome1");
    CuAssertTrue(_testCase, copyRootGenome->getName() == "copyRootGenome");
    CuAssertTrue(_testCase, copyRootGenome->getSequenceLength() == 1000000);
    CuAssertTrue(_testCase, copyRootGenome->getNumTopSegments() == 0);
    CuAssertTrue(_testCase, copyRootGenome->getNumBottomSegments() == 700000);
    CuAssertTrue(_testCase, copyLeafGenome->getName() == "LeafGenome1");
    CuAssertTrue(_testCase, copyLeafGenome->getSequenceLength() == 1000000);
    CuAssertTrue(_testCase, copyLeafGenome->getNumTopSegments() == 5000);
    CuAssertTrue(_testCase, copyLeafGenome->getNumBottomSegments() == 0);
    const MetaData *copyMeta = copyRootGenome->getMetaData();
    CuAssertTrue(_testCase, copyMeta->get("Young") == "Jeezy");
    n = copyRootGenome->getSequenceLength();
    dnaIt = copyRootGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        CuAssertTrue(_testCase, dnaIt->getBase() == ancSeq[i]);
    }
    topIt = copyLeafGenome->getTopSegmentIterator();
    n = copyLeafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        CuAssertTrue(_testCase, topIt->getStartPosition() == topIt->getArrayIndex());
        CuAssertTrue(_testCase, topIt->getLength() == 1);
        CuAssertTrue(_testCase, topIt->tseg()->getParentIndex() == 3);
        CuAssertTrue(_testCase, topIt->tseg()->getParentReversed() == true);
        CuAssertTrue(_testCase, topIt->tseg()->getBottomParseIndex() == 5);
        if (topIt->getArrayIndex() != 6) {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 6);
        } else {
            CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 7);
        }
    }
    botIt = copyRootGenome->getBottomSegmentIterator();
    n = copyRootGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        CuAssertTrue(_testCase, botIt->getStartPosition() == botIt->getArrayIndex());
        CuAssertTrue(_testCase, botIt->getLength() == 1);
        CuAssertTrue(_testCase, botIt->bseg()->getChildIndex(0) == 3);
        CuAssertTrue(_testCase, botIt->bseg()->getChildReversed(0) == true);
        CuAssertTrue(_testCase, botIt->bseg()->getTopParseIndex() == 5);
    }

    _secondAlignment->close();
    remove(_path.c_str());
}
Exemple #16
0
void GenomeCopyTest::createCallBack(Alignment *alignment) {
    hal_size_t alignmentSize = alignment->getNumGenomes();
    CuAssertTrue(_testCase, alignmentSize == 0);

    // Hacky: Need a different alignment to test copying the bottom
    // segments correctly.  (the names of a node's children are used
    // when copying bottom segments, and two genomes can't have the same
    // name in the same alignment)
    _path = getTempFile();
    _secondAlignment =
        AlignmentPtr(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS | CREATE_ACCESS));

    Genome *ancGenome = alignment->addRootGenome("AncGenome", 0);
    Genome *leafGenome = alignment->addLeafGenome("LeafGenome1", "AncGenome", 0);
    // This genome will test copyDimensions, copyTopSegments,
    // copyBottomSegments, copySequence, copyMetadata
    Genome *copyRootGenome = _secondAlignment->addRootGenome("copyRootGenome", 0);
    Genome *copyLeafGenome = _secondAlignment->addLeafGenome("LeafGenome1", "copyRootGenome", 0);

    MetaData *ancMeta = ancGenome->getMetaData();
    ancMeta->set("Young", "Jeezy");

    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 0, 700000);
    ancGenome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 0);
    leafGenome->setDimensions(seqVec);
    string ancSeq = "CAT";
    hal_index_t n = ancGenome->getSequenceLength();
    DnaIteratorPtr dnaIt = ancGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        dnaIt->setBase(ancSeq[i]);
    }
    dnaIt->flush();

    n = leafGenome->getSequenceLength();
    dnaIt = leafGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        dnaIt->setBase(ancSeq[i]);
    }
    dnaIt->flush();

    TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator();
    n = leafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        topIt->setCoordinates(topIt->getArrayIndex(), 1);
        topIt->tseg()->setParentIndex(3);
        topIt->tseg()->setParentReversed(true);
        topIt->tseg()->setBottomParseIndex(5);
        if (topIt->getArrayIndex() != 6) {
            topIt->tseg()->setNextParalogyIndex(6);
        } else {
            topIt->tseg()->setNextParalogyIndex(7);
        }
    }
    BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator();
    n = ancGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        botIt->setCoordinates(botIt->getArrayIndex(), 1);
        botIt->bseg()->setChildIndex(0, 3);
        botIt->bseg()->setChildReversed(0, true);
        botIt->bseg()->setTopParseIndex(5);
    }

    seqVec[0] = Sequence::Info("Sequence", 3300, 0, 1100);
    copyRootGenome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 3300, 2200, 0);
    copyLeafGenome->setDimensions(seqVec);
    string copySeq = "TAG";
    dnaIt = copyRootGenome->getDnaIterator();
    n = copyRootGenome->getSequenceLength();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % copySeq.size();
        dnaIt->setBase(copySeq[i]);
    }
    dnaIt->flush();

    dnaIt = copyLeafGenome->getDnaIterator();
    n = copyLeafGenome->getSequenceLength();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % copySeq.size();
        dnaIt->setBase(copySeq[i]);
    }
    dnaIt->flush();

    topIt = copyLeafGenome->getTopSegmentIterator();
    n = copyLeafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        topIt->setCoordinates(7, 8);
        topIt->tseg()->setParentIndex(9);
        topIt->tseg()->setParentReversed(false);
        topIt->tseg()->setBottomParseIndex(11);
        if (topIt->getArrayIndex() != 12) {
            topIt->tseg()->setNextParalogyIndex(12);
        } else {
            topIt->tseg()->setNextParalogyIndex(7);
        }
    }
    botIt = copyRootGenome->getBottomSegmentIterator();
    n = copyRootGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        botIt->setCoordinates(6, 7);
        botIt->bseg()->setChildIndex(0, 8);
        botIt->bseg()->setChildReversed(0, false);
        botIt->bseg()->setTopParseIndex(10);
    }

    ancGenome->copy(copyRootGenome);
    leafGenome->copy(copyLeafGenome);
    _secondAlignment->close();
}
Exemple #17
0
// note: takes smart pointer as it maybe added to the results
static hal_size_t mapSelf(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, hal_size_t minLength) {
    hal_size_t added = 0;
    if (mappedSeg->isTop() == true) {
        SegmentIteratorPtr target = mappedSeg->getTargetIteratorPtr();
        SegmentIteratorPtr source = mappedSeg->getSourceIteratorPtr();
        TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(target);
        TopSegmentIteratorPtr topCopy = top->clone();
        do {
            // FIXME: why isn't clone() polymorphic?
            SegmentIteratorPtr newSource;
            if (source->isTop()) {
                newSource = std::dynamic_pointer_cast<TopSegmentIterator>(source)->clone();
            } else {
                newSource = std::dynamic_pointer_cast<BottomSegmentIterator>(source)->clone();
            }
            TopSegmentIteratorPtr newTop = topCopy->clone();
            MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, newTop));
            assert(newMappedSeg->getGenome() == mappedSeg->getGenome());
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());
            results.push_back(newMappedSeg);
            ++added;
            if (topCopy->tseg()->hasNextParalogy()) {
                topCopy->toNextParalogy();
            }
        } while (topCopy->tseg()->hasNextParalogy() == true && topCopy->getLength() >= minLength &&
                 topCopy->getArrayIndex() != top->getArrayIndex());
    } else if (mappedSeg->getGenome()->getParent() != NULL) {
        hal_index_t rightCutoff = mappedSeg->getEndPosition();
        BottomSegmentIteratorPtr bottom = mappedSeg->targetAsBottom();
        hal_index_t startOffset = (hal_index_t)bottom->getStartOffset();
        hal_index_t endOffset = (hal_index_t)bottom->getEndOffset();
        TopSegmentIteratorPtr top = mappedSeg->getGenome()->getTopSegmentIterator();
        top->toParseUp(bottom);
        do {
            TopSegmentIteratorPtr topNew = top->clone();

            // we map the new target back to see how the offsets have
            // changed.  these changes are then applied to the source segment
            // as deltas
            BottomSegmentIteratorPtr bottomBack = bottom->clone();
            bottomBack->toParseDown(topNew);
            hal_index_t startBack = (hal_index_t)bottomBack->getStartOffset();
            hal_index_t endBack = (hal_index_t)bottomBack->getEndOffset();
            assert(startBack >= startOffset);
            assert(endBack >= endOffset);
            SegmentIteratorPtr newSource = mappedSeg->sourceClone();
            hal_index_t startDelta = startBack - startOffset;
            hal_index_t endDelta = endBack - endOffset;
            assert((hal_index_t)newSource->getLength() > startDelta + endDelta);
            newSource->slice(newSource->getStartOffset() + startDelta, newSource->getEndOffset() + endDelta);

            MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, topNew));

            assert(newMappedSeg->isTop() == true);
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());

            added += mapSelf(newMappedSeg, results, minLength);
            // stupid that we have to make this check but odn't want to
            // make fundamental api change now
            if (top->getEndPosition() != rightCutoff) {
                top->toRight(rightCutoff);
            } else {
                break;
            }
        } while (true);
    }
    return added;
}
void MappedSegmentMapExtraParalogsTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);

  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;

  // Set up a case where all the segments of grandChild1 coalesce with
  // the first segment of grandChild2, but only if using the root as
  // the coalescence limit. Otherwise only the first segments map to
  // each other.
  Genome* root = alignment->addRootGenome("root");
  Genome* parent = alignment->addLeafGenome("parent", "root", 1);
  Genome* grandChild1 = alignment->addLeafGenome("grandChild1", "parent", 1);
  Genome* grandChild2 = alignment->addLeafGenome("grandChild2", "parent", 1);
  seqVec[0] = Sequence::Info("Sequence", 3, 0, 1);
  root->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 3);
  parent->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  grandChild1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  grandChild2->setDimensions(seqVec);

  root->setString("CCC");
  parent->setString("CCCTACGTG");
  grandChild1->setString("CCCTACGTG");
  grandChild2->setString("CCCTACGTG");

  bi = root->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);

  ti = parent->getTopSegmentIterator();
  ts.set(0, 3, 0, false, NULL_INDEX, 1);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 0, false, NULL_INDEX, 2);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 0, false, NULL_INDEX, 0);
  ts.applyTo(ti);

  bi = parent->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);
  bi->toRight();
  bs.set(3, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(1, true));
  bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, true));
  bs.applyTo(bi);
  bi->toRight();
  bs.set(6, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(2, true));
  bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, false));
  bs.applyTo(bi);

  ti = grandChild1->getTopSegmentIterator();
  ts.set(0, 3, 0, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 1, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 2, true);
  ts.applyTo(ti);

  ti = grandChild2->getTopSegmentIterator();
  ts.set(0, 3, 0, false);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, NULL_INDEX, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, NULL_INDEX, false);
  ts.applyTo(ti);

  parent->fixParseInfo();
}
void TopSegmentIteratorParseTest::createCallBack(AlignmentPtr alignment)
{
 vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // case 1: bottom segment aligns perfectly with top segment
  Genome* case1 = alignment->addRootGenome("case1");
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 2);
  case1->setDimensions(seqVec);
  
  ti = case1->getTopSegmentIterator();
  ts.set(0, 10, NULL_INDEX, false, 0, NULL_INDEX);
  ts.applyTo(ti);
  
  bi = case1->getBottomSegmentIterator();
  bs.set(0, 10, 0);
  bs.applyTo(bi);

  // case 2: bottom segment is completely contained in top segment
  Genome* case2 = alignment->addRootGenome("case2");
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 3);
  case2->setDimensions(seqVec);
  
  ti = case2->getTopSegmentIterator();
  ts.set(0, 9, NULL_INDEX, false, 0, NULL_INDEX);
  ts.applyTo(ti);

  bi = case2->getBottomSegmentIterator();
  bs.set(0, 3, 0);
  bs.applyTo(bi);
  bi->toRight();
  bs.set(3, 4, 0);
  bs.applyTo(bi);
  bi->toRight();
  bs.set(7, 3, 0);
  bs.applyTo(bi);

  // case 3 top segment is completely contained in bottom segment
  Genome* case3 = alignment->addRootGenome("case3");
  seqVec[0] = Sequence::Info("Sequence", 10, 3, 2);
  case3->setDimensions(seqVec);

  ti = case3->getTopSegmentIterator();
  ts.set(0, 3, NULL_INDEX, false, 0);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 4, NULL_INDEX, false, 0);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(7, 3, NULL_INDEX, false, 0);
  ts.applyTo(ti);

  bi = case3->getBottomSegmentIterator();
  bs.set(0, 9, 0);
  bs.applyTo(bi);
 
  // case 4: top segment overhangs bottom segment on the left
  Genome* case4 = alignment->addRootGenome("case4");
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 2);
  case4->setDimensions(seqVec);

  ti = case4->getTopSegmentIterator();
  ts.set(0, 9, NULL_INDEX, false, 0);
  ts.applyTo(ti);

  bi = case4->getBottomSegmentIterator();
  bs.set(0, 5, 0);
  bs.applyTo(bi);
  bi->toRight();
  bs.set(5, 5, 0);
  bs.applyTo(bi);
}