Exemple #1
0
void DefaultRearrangement::resetStatus(TopSegmentIteratorConstPtr topSegment)
{
    _id = Invalid;
    assert(topSegment.get());
    _genome = topSegment->getTopSegment()->getGenome();
    _parent = _genome->getParent();
    assert(_parent != NULL);

    _cur->setLeft(topSegment);
    _next->copy(_cur);
    _left->copy(_cur);
    _right->copy(_left);
    assert(_cur->getGapThreshold() == _gapThreshold);
    assert(_next->getGapThreshold() == _gapThreshold);
    assert(_left->getGapThreshold() == _gapThreshold);
    assert(_right->getGapThreshold() == _gapThreshold);
    assert(_leftParent->getGapThreshold() == _gapThreshold);
    assert(_rightParent->getGapThreshold() == _gapThreshold);
    assert(_curParent->getGapThreshold() == _gapThreshold);
    assert(_cur->getAtomic() == _atomic);
    assert(_next->getAtomic() == _atomic);
    assert(_left->getAtomic() == _atomic);
    assert(_right->getAtomic() == _atomic);
    assert(_leftParent->getAtomic() == _atomic);
    assert(_rightParent->getAtomic() == _atomic);
    assert(_curParent->getAtomic() == _atomic);
}
void MappedSegmentMapUpTest::checkCallBack(AlignmentConstPtr alignment)
{
  validateAlignment(alignment);
  const Genome* child1 = alignment->openGenome("child1");
  const Genome* child2 = alignment->openGenome("child2");
  TopSegmentIteratorConstPtr top = child2->getTopSegmentIterator();
  testTopSegment(alignment, top, "parent");
  top->slice(1,2);
  testTopSegment(alignment, top, "parent");
  top->toReverse();
  testTopSegment(alignment, top, "parent");
  top = child1->getTopSegmentIterator();
  testTopSegment(alignment, top, "parent");
  top->slice(1,2);
  testTopSegment(alignment, top, "parent");
  top->toReverse();
  testTopSegment(alignment, top, "parent");
  
  const Genome* g1 = alignment->openGenome("g1");
  for (hal_size_t i = 0; i < g1->getNumTopSegments(); ++i)
  {
    top = g1->getTopSegmentIterator(i);
    testTopSegment(alignment, top, "parent");
    top->slice(1,0);
    testTopSegment(alignment, top, "parent");
    top->toReverse();
    testTopSegment(alignment, top, "parent");
    top->slice(0,1);
    testTopSegment(alignment, top, "parent");
    top->toReverse();
    testTopSegment(alignment, top, "parent");
  }
}
void SummarizeMutations::subsAndGapInserts(
  GappedTopSegmentIteratorConstPtr gappedTop, MutationsStats& stats)
{
  assert(gappedTop->getReversed() == false);
  hal_size_t numGaps = gappedTop->getNumGaps();
  if (numGaps > 0)
  {
    stats._gapInsertionLength.add(gappedTop->getNumGapBases(), numGaps);
  }

  string parent, child;
  TopSegmentIteratorConstPtr l = gappedTop->getLeft();
  TopSegmentIteratorConstPtr r = gappedTop->getRight();
  BottomSegmentIteratorConstPtr p = 
     l->getTopSegment()->getGenome()->getParent()->getBottomSegmentIterator();

  for (TopSegmentIteratorConstPtr i = l->copy(); 
       i->getTopSegment()->getArrayIndex() <= 
          r->getTopSegment()->getArrayIndex();
       i->toRight())
  {
    if (i->hasParent())
    {
      p->toParent(i);
      i->getString(child);
      p->getString(parent);
      assert(child.length() == parent.length());
      for (size_t j = 0; j < child.length(); ++j)
      {
        if (isTransition(child[j], parent[j]))
        {
          ++stats._transitions;
          ++stats._subs;
        }
        else if (isTransversion(child[j], parent[j]))
        {
          ++stats._transversions;
          ++stats._subs;
        }
        else if (isSubstitution(child[j], parent[j]))
        {
          ++stats._subs;
        }
        else if (!isMissingData(child[j]) && !isMissingData(parent[j]))
        {
          ++stats._matches;
        }
      }
    }
  }
}
Exemple #4
0
// If true, _leftParent will store the swapped segment (and _cur will store)
// the other half
// NEED TO REVISE WITH STRONGER CRITERIA -- right now any operation
// next to an endpoint can get confused with a translocation.
bool DefaultRearrangement::scanTranslocationCycle(
    TopSegmentIteratorConstPtr topSegment)
{
    assert(topSegment.get());
    resetStatus(topSegment);
    bool first = _cur->isFirst();
    bool last = _cur->isLast();
    if (_cur->hasParent() == false || (!first && !last))
    {
        return false;
    }

    _leftParent->toParent(_cur);
    bool pFirst = _leftParent->isFirst();
    //bool pLast = _leftParent->isLast();
    _rightParent->copy(_leftParent);

    first ? _right->toRight() : _right->toLeft();
    pFirst ? _rightParent->toRight() : _rightParent->toLeft();

    if (_right->hasParent() == false)
    {
        return true;
    }
    else
    {
        _curParent->toParent(_right);
        return _curParent->equals(_rightParent);
    }
    return false;
}
void TopSegmentIteratorToSiteTest::checkGenome(const Genome* genome)
{
  TopSegmentIteratorConstPtr ti = genome->getTopSegmentIterator();
  for (hal_index_t pos = 0; 
       pos < (hal_index_t)genome->getSequenceLength(); ++pos)
  {
    ti->toSite(pos);
    CuAssertTrue(_testCase, ti->getStartPosition() == pos);
    CuAssertTrue(_testCase, ti->getLength() == 1);
    ti->toSite(pos, false);
    CuAssertTrue(_testCase, pos >= ti->getStartPosition() && 
                 pos < ti->getStartPosition() + (hal_index_t)ti->getLength());
    CuAssertTrue(_testCase, 
                 ti->getLength() == ti->getTopSegment()->getLength());
  }
}
Exemple #6
0
// leaves duplication on _cur and _right
bool DefaultRearrangement::scanDuplicationCycle(
    TopSegmentIteratorConstPtr topSegment)
{
    assert(topSegment.get());
    resetStatus(topSegment);
    return _cur->hasNextParalogy() == true &&
           _cur->isCanonicalParalog() == false;
}
void MappedSegmentParseTest::testTopSegment(AlignmentConstPtr alignment,
                                            TopSegmentIteratorConstPtr top)
{
  const Genome* parent = alignment->openGenome("parent");
  set<MappedSegmentConstPtr> results;
  top->getMappedSegments(results, parent, NULL, false);

  vector<bool> covered(top->getLength(), false);
  
  CuAssertTrue(_testCase, results.size() >= 1);

  set<MappedSegmentConstPtr>::iterator i = results.begin();
  for (; i != results.end(); ++i)
  {
    MappedSegmentConstPtr mseg = *i;
    CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
    CuAssertTrue(_testCase, mseg->getGenome() == parent);
    for (hal_index_t j = mseg->getStartPosition(); j <= mseg->getEndPosition(); 
         ++j)
    {
      CuAssertTrue(_testCase, covered[j] == false);
      covered[j] = true;
    }
    CuAssertTrue(_testCase, mseg->getStartPosition() == 
                 mseg->getSource()->getStartPosition());
    CuAssertTrue(_testCase, mseg->getEndPosition() == 
                 mseg->getSource()->getEndPosition());

    set<MappedSegmentConstPtr> tResults;
    mseg->getMappedSegments(tResults, top->getGenome(), NULL, false);
    CuAssertTrue(_testCase, tResults.size() == 1);
    MappedSegmentConstPtr tmseg = *tResults.begin();
    CuAssertTrue(_testCase, tmseg->getGenome() == top->getGenome());
    CuAssertTrue(_testCase, tmseg->getSource()->getGenome() == 
                 mseg->getGenome());
    CuAssertTrue(_testCase, tmseg->getStartPosition() == 
                 mseg->getStartPosition());
    CuAssertTrue(_testCase, tmseg->getEndPosition() == 
                 mseg->getEndPosition());
    CuAssertTrue(_testCase, tmseg->getSource()->getStartPosition() == 
                 mseg->getStartPosition());
    CuAssertTrue(_testCase, tmseg->getSource()->getEndPosition() == 
                 mseg->getEndPosition());
  }
}
void MappedSegmentMapUpTest::testTopSegment(AlignmentConstPtr alignment,
                                            TopSegmentIteratorConstPtr top,
                                            const string& ancName)
{
  const Genome* parent = alignment->openGenome(ancName);
  set<MappedSegmentConstPtr> results;
  top->getMappedSegments(results, parent, NULL, false);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == top->getReversed());
  BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
  bottom->toParent(top);
  // extra hop for when top is in grand child
  if (bottom->getGenome() != parent)
  {
    TopSegmentIteratorConstPtr temp = 
       bottom->getGenome()->getTopSegmentIterator();
    temp->toParseUp(bottom);
    bottom->toParent(temp);
  }
  CuAssertTrue(_testCase, mseg->getGenome() == bottom->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == bottom->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == bottom->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == bottom->getReversed());
}
void MappedSegmentMapAcrossTest::checkCallBack(AlignmentConstPtr alignment)
{
  validateAlignment(alignment);
  const Genome* child1 = alignment->openGenome("child1");
  const Genome* child2 = alignment->openGenome("child2");
  TopSegmentIteratorConstPtr top = child2->getTopSegmentIterator();
  testTopSegment(alignment, top);
  top->slice(1,2);
  testTopSegment(alignment, top);
  top->toReverse();
  testTopSegment(alignment, top);
  top = child1->getTopSegmentIterator();
  testTopSegment(alignment, top);
  top->slice(1,2);
  testTopSegment(alignment, top);
  top->toReverse();
  testTopSegment(alignment, top);
}
void TopSegmentStruct::compareTo(TopSegmentIteratorConstPtr it, 
                                 CuTest* testCase) const
{
  const TopSegment* seg = it->getTopSegment();
  CuAssertTrue(testCase, _length == seg->getLength());
  CuAssertTrue(testCase, _startPosition == seg->getStartPosition());
  CuAssertTrue(testCase, _nextParalogyIndex == seg->getNextParalogyIndex());
  CuAssertTrue(testCase, _parentIndex == seg->getParentIndex());
  CuAssertTrue(testCase, _bottomParseIndex == seg->getBottomParseIndex());
}
void MappedSegmentMapDownTest::testBottomSegment(
  AlignmentConstPtr alignment,
  BottomSegmentIteratorConstPtr bottom,
  hal_size_t childIndex)
{
  const Genome* child = bottom->getGenome()->getChild(childIndex);
  set<MappedSegmentConstPtr> results;
  bottom->getMappedSegments(results, child, NULL, false);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == 
               bottom->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               bottom->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == bottom->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == bottom->getReversed());
  TopSegmentIteratorConstPtr top = child->getTopSegmentIterator();
  top->toChild(bottom, childIndex);
  CuAssertTrue(_testCase, mseg->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == top->getReversed());
}
Exemple #12
0
bool DefaultRearrangement::identifyInsertionFromLeftBreakpoint(
    TopSegmentIteratorConstPtr topSegment)
{
    assert(topSegment->getReversed() == false);
    if (scanInsertionCycle(topSegment) == true &&
            _cur->hasParent() == false)
    {
        _id = Insertion;
        return true;
    }
    _id = Invalid;
    return false;
}
void TopSegmentSequenceTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* ancGenome = alignment->openGenome("Anc0");
  TopSegmentIteratorConstPtr tsIt = ancGenome->getTopSegmentIterator(100);
  CuAssertTrue(_testCase, tsIt->getTopSegment()->getStartPosition() == 500);
  CuAssertTrue(_testCase, tsIt->getTopSegment()->getLength() == 9);
  string seq;
  tsIt->getString(seq);
  CuAssertTrue(_testCase, seq == "CACACATTC");
  tsIt->toReverse();
  tsIt->getString(seq);
  CuAssertTrue(_testCase, seq == "GAATGTGTG");
}
void DefaultGappedBottomSegmentIterator::toRightNextUngapped(
  TopSegmentIteratorConstPtr ts) const
{
  while (ts->hasParent() == false &&
         ts->getLength() <= _gapThreshold)
  {
    if ((!ts->getReversed() && ts->getTopSegment()->isLast()) ||
         (ts->getReversed() && ts->getTopSegment()->isFirst()))
    {
      break;
    }
    ts->toRight();
  }
}
Exemple #15
0
// Segment is an inverted descendant of another Segment but
// otherwise no rearrangement.
bool DefaultRearrangement::scanInversionCycle(
    TopSegmentIteratorConstPtr topSegment)
{
    assert(topSegment.get());
    resetStatus(topSegment);
    bool first = _cur->isFirst();
    bool last = _cur->isLast();

    if (_cur->hasParent() == false)
    {
        return false;
    }
    _curParent->toParent(_cur);
    if (first == false)
    {
        _left->toLeft();
        if (_left->hasParent() == false)
        {
            return false;
        }
        _leftParent->toParent(_left);
        if (_leftParent->adjacentTo(_curParent) == false)
        {
            return false;
        }
    }
    if (last == false)
    {
        _right->toRight();
        if (_right->hasParent() == false)
        {
            return false;
        }
        _rightParent->toParent(_right);
        if (_rightParent->adjacentTo(_curParent) == false)
        {
            return false;
        }
    }
    return _cur->getParentReversed();
}
Exemple #16
0
// Segment corresponds to no rearrangemnt.  This will happen when
// there is a rearrangement in the homolgous segment in its sibling
// genome.  In general, we can expect about half of segments to correspond
// to such cases.
bool DefaultRearrangement::scanNothingCycle(
    TopSegmentIteratorConstPtr topSegment)
{
    assert(topSegment.get());
    resetStatus(topSegment);
    bool first = _cur->isFirst();
    bool last = _cur->isLast();

    if (_cur->hasParent() == false)
    {
        return false;
    }
    _curParent->toParent(_cur);
    if (first == false)
    {
        _left->toLeft();
        if (_left->hasParent() == false)
        {
            return false;
        }
        _leftParent->toParent(_left);
        if (_leftParent->adjacentTo(_curParent) == false)
        {
            return false;
        }
        if (_left->getParentReversed() == true)
        {
            if (_cur->getParentReversed() == false ||
                    _leftParent->rightOf(_curParent->getStartPosition()) == false)
            {
                return false;
            }
        }
        else
        {
            if (_cur->getParentReversed() == true ||
                    _leftParent->leftOf(_curParent->getStartPosition()) == false)
            {
                return false;
            }
        }
    }
    if (last == false)
    {
        _right->toRight();
        if (_right->hasParent() == false)
        {
            return false;
        }
        _rightParent->toParent(_right);
        if (_rightParent->adjacentTo(_curParent) == false)
        {
            return false;
        }
        if (_right->getParentReversed() == true)
        {
            if (_cur->getParentReversed() == false ||
                    _rightParent->leftOf(_curParent->getStartPosition()) == false)
            {
                return false;
            }
        }
        else
        {
            if (_cur->getParentReversed() == true ||
                    _rightParent->rightOf(_curParent->getStartPosition()) == false)
            {
                return false;
            }
        }
    }
    return last && first ? _cur->getParentReversed() : true;
}
void hal::validateDuplications(const Genome* genome)
{
  const Genome* parent = genome->getParent();
  if (parent == NULL)
  {
    return;
  }
  TopSegmentIteratorConstPtr topIt = genome->getTopSegmentIterator();
  TopSegmentIteratorConstPtr endIt = genome->getTopSegmentEndIterator();
  vector<unsigned char> pcount(parent->getNumBottomSegments(), 0);
  for (; topIt != endIt; topIt->toRight())
  {
    if (topIt->hasParent())
    {
      if (pcount[topIt->getTopSegment()->getParentIndex()] < 250)
      {
        ++pcount[topIt->getTopSegment()->getParentIndex()];
      }
    }
  }
  for (topIt = genome->getTopSegmentIterator(); topIt != endIt; topIt->toRight())
  {
    if (topIt->hasParent())
    {
      size_t count = pcount[topIt->getTopSegment()->getParentIndex()];
      assert(count > 0);
      {
        if (topIt->hasNextParalogy() == false && count > 1)
        {
          stringstream ss;
          ss << "Top Segment " << topIt->getTopSegment()->getArrayIndex()
             << " in genome " << genome->getName() << " is not marked as a"
             << " duplication but it shares its parent " 
             << topIt->getTopSegment()->getArrayIndex() << " with at least " 
             << count - 1 << " other segments in the same genome";
          throw hal_exception(ss.str());
        }  
      }
    }
  }
}
void hal::validateTopSegment(const TopSegment* topSegment)
{
  const Genome* genome = topSegment->getGenome();
  hal_index_t index = topSegment->getArrayIndex();
  if (index < 0 || index >= (hal_index_t)genome->getSequenceLength())
  {
    stringstream ss;
    ss << "Segment out of range " << index << " in genome "
       << genome->getName();
    throw hal_exception(ss.str());
  }

  if (topSegment->getLength() < 1)
  {
    stringstream ss;
    ss << "Top segment " << index  << " in genome " << genome->getName()
       << " has length 0 which is not currently supported";
    throw hal_exception(ss.str());
  }

  const Genome* parentGenome = genome->getParent();
  const hal_index_t parentIndex = topSegment->getParentIndex();
  if (parentGenome != NULL && parentIndex != NULL_INDEX)
  {
    if (parentIndex >= (hal_index_t)parentGenome->getNumBottomSegments())
    {
      stringstream ss;
      ss << "Parent index " << parentIndex << " of segment "
         << topSegment->getArrayIndex() << " out of range in genome "
         << parentGenome->getName();
      throw hal_exception(ss.str());
    }
    BottomSegmentIteratorConstPtr bottomSegmentIterator = 
       parentGenome->getBottomSegmentIterator(parentIndex);
    const BottomSegment* parentSegment = 
       bottomSegmentIterator->getBottomSegment();
    if (topSegment->getLength() != parentSegment->getLength())
    {
      stringstream ss;
      ss << "Parent length of segment " << topSegment->getArrayIndex() 
         << " in genome " << genome->getName() << " has length "
         << parentSegment->getLength() << " which does not match "
         << topSegment->getLength();
      throw hal_exception(ss.str());
    }
  }

  const hal_index_t parseIndex = topSegment->getBottomParseIndex();
  if (parseIndex == NULL_INDEX)
  {
    if (genome->getNumChildren() != 0)
    {
      stringstream ss;
      ss << "Top Segment " << topSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has null parse index";
      throw hal_exception(ss.str());
    }
  }
  else
  {
    if (parseIndex >= (hal_index_t)genome->getNumBottomSegments())
    {
      stringstream ss;
      ss << "Top Segment " << topSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has parse index out of range";
      throw hal_exception(ss.str());
    }
    hal_offset_t parseOffset = topSegment->getBottomParseOffset();
    BottomSegmentIteratorConstPtr bottomSegmentIterator =
       genome->getBottomSegmentIterator(parseIndex);
    const BottomSegment* parseSegment = 
       bottomSegmentIterator->getBottomSegment();
    if (parseOffset >= parseSegment->getLength())
    {
      stringstream ss;
      ss << "Top Segment " << topSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has parse offset out of range";
      throw hal_exception(ss.str());
    }
    if ((hal_index_t)parseOffset + parseSegment->getStartPosition() != 
        topSegment->getStartPosition())
    {
      throw hal_exception("parse index broken in top segment in genome " +
                          genome->getName());
                          
    }
  }

  const hal_index_t paralogyIndex = topSegment->getNextParalogyIndex();
  if (paralogyIndex != NULL_INDEX)
  {
    TopSegmentIteratorConstPtr pti = 
       genome->getTopSegmentIterator(paralogyIndex);
    if (pti->getTopSegment()->getParentIndex() != topSegment->getParentIndex())
    {
      stringstream ss;
      ss << "Top segment " << topSegment->getArrayIndex() 
         << " has parent index "
         << topSegment->getParentIndex() << ", but next paraglog " 
         << topSegment->getNextParalogyIndex() << " has parent Index " 
         << pti->getTopSegment()->getParentIndex() 
         << ". Paralogous top segments must share same parent.";
      throw hal_exception(ss.str());
    }
    if (paralogyIndex == topSegment->getArrayIndex())
    {
      stringstream ss;
      ss << "Top segment " << topSegment->getArrayIndex() 
         << " has paralogy index " << topSegment->getNextParalogyIndex()
         << " which isn't allowed";
      throw hal_exception(ss.str());
    }
  }
}
void MappedSegmentMapExtraParalogsTest::checkCallBack(AlignmentConstPtr alignment)
{
  validateAlignment(alignment);

  const Genome *grandChild1 = alignment->openGenome("grandChild1");
  const Genome *grandChild2 = alignment->openGenome("grandChild2");
  const Genome *root = alignment->openGenome("root");

  TopSegmentIteratorConstPtr top = grandChild2->getTopSegmentIterator();
  set<MappedSegmentConstPtr> results;

  // First, check that by default we will only get the homologies in
  // or before the MRCA. (in this case, just seg 0 of grandChild1).
  top->getMappedSegments(results, grandChild1, NULL, true);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  // Source information should be preserved
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == top->getReversed());

  // Check target information is correct
  CuAssertTrue(_testCase,
               mseg->getGenome() == grandChild1);
  CuAssertTrue(_testCase,
               mseg->getStartPosition() == 2);
  CuAssertTrue(_testCase,
               mseg->getLength() == 3);
  CuAssertTrue(_testCase,
               mseg->getReversed() == true);

  // Check that by using the grandparent as the coalescence limit we
  // will get all the paralogs.
  top->getMappedSegments(results, grandChild1, NULL, true, 0, root);
  CuAssertTrue(_testCase, results.size() == 3);
  set<MappedSegmentConstPtr>::iterator i = results.begin();
  bool found[3] = {false, false, false};
  for (; i != results.end(); ++i)
  {
      // Source information should be preserved
    CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
    CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
                 top->getStartPosition());
    CuAssertTrue(_testCase, 
                 mseg->getSource()->getLength() == top->getLength());
    CuAssertTrue(_testCase, 
                 mseg->getSource()->getReversed() == top->getReversed());
    
    // Check target information is correct
    CuAssertTrue(_testCase,
                 mseg->getGenome() == grandChild1);
    CuAssertTrue(_testCase,
                 mseg->getStartPosition() == 2
                 || mseg->getStartPosition() == 5
                 || mseg->getStartPosition() == 8);
    CuAssertTrue(_testCase,
                 mseg->getLength() == 3);
    CuAssertTrue(_testCase,
                 mseg->getReversed() == true);
    found[mseg->getArrayIndex()] = true;
  }
}
void 
GappedSegmentIteratorIndelTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* child = alignment->openGenome("child");
  const Genome* parent = alignment->openGenome("parent");

  GappedTopSegmentIteratorConstPtr gtsIt = 
     child->getGappedTopSegmentIterator(0, 9999999);

  GappedBottomSegmentIteratorConstPtr gbsIt = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
  GappedTopSegmentIteratorConstPtr gtsItRev = 
     child->getGappedTopSegmentIterator(0, 9999999);
  gtsItRev->toReverse();
  GappedBottomSegmentIteratorConstPtr gbsItRev = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
     gbsItRev->toReverse();

  for (size_t i = 0; i < child->getNumTopSegments(); i += 20)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
    tsIt = gtsIt->getRight();

    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i + 19);

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)bsIt->getBottomSegment()->getArrayIndex() == i);
    bsIt = gbsIt->getRight();
    CuAssertTrue(_testCase, 
                 (size_t)bsIt->getBottomSegment()->getArrayIndex() == i + 19);

    GappedBottomSegmentIteratorConstPtr gappedParent = gbsIt->copy();
    gappedParent->toParent(gtsIt);
    if (gappedParent->getReversed())
    {
      gappedParent->toReverse();
    }
    CuAssertTrue(_testCase,
                 gappedParent->equals(gbsIt));
    GappedTopSegmentIteratorConstPtr gappedChild = gtsIt->copy();
    gappedChild->toChild(gbsIt);
    if (gappedChild->getReversed())
    {
      gappedChild->toReverse();
    }
    CuAssertTrue(_testCase, gappedChild->equals(gtsIt));
    
    gtsIt->toRight();
    gbsIt->toRight();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)tsItRev->getTopSegment()->getArrayIndex() == i + 19);
    tsItRev = gtsItRev->getRight();
    CuAssertTrue(_testCase, 
                 (size_t)tsItRev->getTopSegment()->getArrayIndex() == i);
    gtsItRev->toLeft();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i+19);
    bsItRev = gbsItRev->getRight();
    CuAssertTrue(_testCase, 
                 (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i);
    gbsItRev->toLeft();

    }

  gtsIt = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 20, 9999999);
  gbsIt = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 20, 0, 9999999); 
  gtsItRev = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 20, 9999999);
  gtsItRev->toReverse();
  gbsItRev = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 20, 0, 9999999);
  gbsItRev->toReverse();

  for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; i -= 20)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i - 19);
    tsIt = gtsIt->getRight();
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gtsIt->getReversed() == false);
    gtsIt->toLeft();

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i-19);
    bsIt = gbsIt->getRight();
    CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gbsIt->getReversed() == false);
    gbsIt->toLeft();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i);
    tsItRev = gtsItRev->getRight();
    CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i-19);
    CuAssertTrue(_testCase, gtsItRev->getReversed() == true);
    gtsItRev->toRight();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i);
    bsItRev = gbsItRev->getRight();
    CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex()==i-19);
    CuAssertTrue(_testCase, gbsItRev->getReversed() == true);
    gbsItRev->toRight();
    }

}
void TopSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* ancGenome = alignment->openGenome("Anc0");
  CuAssertTrue(_testCase, 
               ancGenome->getNumTopSegments() == _topSegments.size());
  TopSegmentIteratorConstPtr tsIt = ancGenome->getTopSegmentIterator(0);
  for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i)
  {
    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
    _topSegments[i].compareTo(tsIt, _testCase);
    tsIt->toRight();
  }
  tsIt = ancGenome->getTopSegmentIterator(
    ancGenome->getNumTopSegments() - 1);
  for (hal_index_t i = ancGenome->getNumTopSegments() - 1; i >= 0; --i)
  {
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
    _topSegments[i].compareTo(tsIt, _testCase);
    tsIt->toLeft();
  }

  tsIt = ancGenome->getTopSegmentIterator(0); 
  tsIt->slice(0, tsIt->getLength() - 1);
  for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i)
  {
    CuAssertTrue(_testCase, tsIt->getLength() == 1);
    CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
    tsIt->toRight(tsIt->getStartPosition() + 1);
  }
  tsIt = ancGenome->getTopSegmentIterator(
    ancGenome->getNumTopSegments() - 1);
  tsIt->slice(tsIt->getLength() - 1, 0);
  for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i)
  {
    CuAssertTrue(_testCase, tsIt->getLength() == 1);
    CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
    tsIt->toLeft(tsIt->getStartPosition() - 1);
  }

  tsIt = ancGenome->getTopSegmentIterator(0); 
  tsIt->toReverse();
  CuAssertTrue(_testCase, tsIt->getReversed() == true);
  tsIt->slice(tsIt->getLength() - 1, 0);
  for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i)
  {
    CuAssertTrue(_testCase, tsIt->getLength() == 1);
    CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
    tsIt->toLeft(tsIt->getStartPosition() + 1);
  }
  tsIt = ancGenome->getTopSegmentIterator(
    ancGenome->getNumTopSegments() - 1);
  tsIt->toReverse();
  tsIt->slice(0, tsIt->getLength() - 1);
  for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i)
  {
    CuAssertTrue(_testCase, tsIt->getLength() == 1);
    CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
    tsIt->toRight(tsIt->getStartPosition() - 1);
  }
}
// quickly count subsitutions without loading rearrangement machinery.
// used for benchmarks for basic file scanning... and not much else since
// the interface is still a bit wonky.
void SummarizeMutations::substitutionAnalysis(const Genome* genome, 
                                               MutationsStats& stats)
{
  assert(stats._subs == 0);
  if (genome->getNumChildren() == 0 || genome->getNumBottomSegments() == 0 ||
      (_targetSet && _targetSet->find(genome->getName()) == _targetSet->end()))
  {
    return;
  }
  const Genome* parent = genome->getParent();
  string pname = parent != NULL ? parent->getName() : string();
  StrPair branchName(genome->getName(), pname);

  BottomSegmentIteratorConstPtr bottom = genome->getBottomSegmentIterator();
  TopSegmentIteratorConstPtr top = genome->getChild(0)->getTopSegmentIterator();
  
  string gString, cString;

  hal_size_t n = genome->getNumBottomSegments();
  vector<hal_size_t> children;
  hal_size_t m = genome->getNumChildren();
  for (hal_size_t i = 0; i < m; ++i)
  {
    string cName = genome->getChild(i)->getName();
    if (!_targetSet || 
        (_targetSet && _targetSet->find(cName) != _targetSet->end()))
    {
      children.push_back(i);
    }
  }
  if (children.empty())
  {
    return;
  }

  for (hal_size_t i = 0; i < n; ++i)
  {
    bool readString = false;
    for (size_t j = 0; j < children.size(); ++j)
    {
      if (bottom->hasChild(children[j]))
      {
        if (readString == false)
        {
          bottom->getString(gString);
          readString = true;
        }
        top->toChild(bottom, children[j]);
        top->getString(cString);
        assert(gString.length() == cString.length());
        for (hal_size_t k = 0; k < gString.length(); ++k)
        {
          if (isSubstitution(gString[k], cString[k]))
          {
            ++stats._subs;
          }
        }
      }
    }
    bottom->toRight();
  }
}
Exemple #23
0
// If true, _cur will store the insertion 'candidate'
// It must be further verified that this segment has no parent to
// distinguish between destination of transposition and insertion.
bool DefaultRearrangement::scanInsertionCycle(
    TopSegmentIteratorConstPtr topSegment)
{
    assert(topSegment.get());
    resetStatus(topSegment);

    // eat up any adjacent insertions so they don't get double counted
    while (_next->hasParent() == false && _next->isLast() == false)
    {
        _right->copy(_next);
        _right->toRight();
        if (_right->hasParent() == false)
        {
            _next->copy(_right);
        }
        else
        {
            break;
        }
    }
    _right->copy(_next);
    assert(_next->equals(_cur) || _next->hasParent() == false);

    bool first = _cur->isFirst();
    bool last = _right->isLast();
    if (first && last)
    {
        return false;
    }

    // Case 1a) current segment is left endpoint.  we consider insertion
    // if right neighbour has parent
    if (first)
    {
        _right->toRight();
        if (_cur->hasParent() == false)
        {
            return true;
        }
        else if (_right->hasParent())
        {
            _curParent->toParent(_cur);
            _rightParent->toParent(_right);
            return _rightParent->adjacentTo(_curParent) == false;
        }
    }

    // Case 1b) current segment is right endpoint.  we consider insertion
    // if left neighbour has parent
    else if (last)
    {
        _left->toLeft();
        if (_cur->hasParent() == false)
        {
            return true;
        }
        else if (_left->hasParent())
        {
            _curParent->toParent(_cur);
            _leftParent->toParent(_left);
            return _leftParent->adjacentTo(_curParent) == false;
        }
    }

    // Case 2) current segment has a left neigbhour and a right neigbour
    else
    {
        _left->toLeft();
        _right->toRight();
        if (_left->hasParent() == true && _right->hasParent() == true)
        {
            _leftParent->toParent(_left);
            _rightParent->toParent(_right);
            // Case 2a) Parents are adjacent
            if (_leftParent->adjacentTo(_rightParent))
            {
                return true;
            }
            // Case 2b) Left parent is endpoint
            else if (_leftParent->isFirst() || _leftParent->isLast())
            {
                return _leftParent->getSequence() == _rightParent->getSequence();
            }

            // Case 2c) Right parent is endpoint
            else if (_rightParent->isFirst() || _rightParent->isLast())
            {
                return _leftParent->getSequence() == _rightParent->getSequence();
            }
        }
    }

    return false;
}
void hal::validateBottomSegment(const BottomSegment* bottomSegment)
{
  const Genome* genome = bottomSegment->getGenome();
  hal_index_t index = bottomSegment->getArrayIndex();
  if (index < 0 || index >= (hal_index_t)genome->getSequenceLength())
  {
    stringstream ss;
    ss << "Bottom segment out of range " << index << " in genome "
       << genome->getName();
    throw hal_exception(ss.str());
  }
  
  if (bottomSegment->getLength() < 1)
  {
    stringstream ss;
    ss << "Bottom segment " << index  << " in genome " << genome->getName()
       << " has length 0 which is not currently supported";
    throw hal_exception(ss.str());
  }

  hal_size_t numChildren = bottomSegment->getNumChildren();
  for (hal_size_t child = 0; child < numChildren; ++child)
  {
    const Genome* childGenome = genome->getChild(child);
    const hal_index_t childIndex = bottomSegment->getChildIndex(child);
    if (childGenome != NULL && childIndex != NULL_INDEX)
    {
      if (childIndex >= (hal_index_t)childGenome->getNumTopSegments())
      {
        stringstream ss;
        ss << "Child " << child << " index " <<childIndex << " of segment "
           << bottomSegment->getArrayIndex() << " out of range in genome "
           << childGenome->getName();
        throw hal_exception(ss.str());
      }
      TopSegmentIteratorConstPtr topSegmentIteratr = 
         childGenome->getTopSegmentIterator(childIndex);
      const TopSegment* childSegment = topSegmentIteratr->getTopSegment();
      if (childSegment->getLength() != bottomSegment->getLength())
      {
        stringstream ss;
        ss << "Child " << child << " with index " 
           << childSegment->getArrayIndex()
           << " and start position " << childSegment->getStartPosition() 
           << " and sequence " << childSegment->getSequence()->getName()
           << " has length " << childSegment->getLength()
           << " but parent with index " << bottomSegment->getArrayIndex() 
           << " and start position " << bottomSegment->getStartPosition()
           << " in sequence " << bottomSegment->getSequence()->getName() 
           << " has length " << bottomSegment->getLength();
        throw hal_exception(ss.str());
      }
      if (childSegment->getNextParalogyIndex() == NULL_INDEX &&
          childSegment->getParentIndex() != bottomSegment->getArrayIndex())
      {
        throw hal_exception("parent / child index mismatch (parent=" +
                            genome->getName() + " child=" +
                            childGenome->getName());
      }
      if (childSegment->getParentReversed() != 
          bottomSegment->getChildReversed(child))
      {
        throw hal_exception("parent / child reversal mismatch (parent=" +
                            genome->getName() + " child=" +
                            childGenome->getName());
      }
    }
  }

  const hal_index_t parseIndex = bottomSegment->getTopParseIndex();
  if (parseIndex == NULL_INDEX)
  {
    if (genome->getParent() != NULL)
    {
      stringstream ss;
      ss << "Bottom segment " << bottomSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has null parse index";
      throw hal_exception(ss.str());
    }
  }
  else
  {
    if (parseIndex >= (hal_index_t)genome->getNumTopSegments())
    {
      stringstream ss;
      ss << "BottomSegment " << bottomSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has parse index " << parseIndex 
         << " greater than the number of top segments, " 
         << (hal_index_t)genome->getNumTopSegments();
      throw hal_exception(ss.str());
    }
    TopSegmentIteratorConstPtr parseIterator = 
       genome->getTopSegmentIterator(parseIndex);
    const TopSegment* parseSegment = parseIterator->getTopSegment();
    hal_offset_t parseOffset = bottomSegment->getTopParseOffset();
    if (parseOffset >= parseSegment->getLength())
    {
      stringstream ss;
      ss << "BottomSegment " << bottomSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has parse offset, " << parseOffset 
         << ", greater than the length of the segment, " 
         << parseSegment->getLength();
      throw hal_exception(ss.str());
    }
    if ((hal_index_t)parseOffset + parseSegment->getStartPosition() != 
        bottomSegment->getStartPosition())
    {
      throw hal_exception("parse index broken in bottom segment in genome " +
                          genome->getName());
                          
    }
  }
}
Exemple #25
0
// If true, _leftParent will store the deletion 'candidate'
// It must be further verified that this segment has no child to
// distinguish between source of transposition and deletion.
bool DefaultRearrangement::scanDeletionCycle(
    TopSegmentIteratorConstPtr topSegment)
{
    assert(topSegment.get());
    resetStatus(topSegment);
    assert(_atomic != true || _cur->getNumSegments() == 1);

    bool first = _cur->isFirst();
    bool last = _cur->isLast();

    if (_cur->hasParent() == false || (first && last))
    {
        return false;
    }

    // Case 1) current segment is a right endpoint.  we consider delection
    // if parent has neighbour
    // FIXME: the edge cases are probably very wrong.
    if (last)
    {
        _leftParent->toParent(_cur);
        if (_leftParent->isFirst() == false)
        {
            _leftParent->toLeft();
            return true;
        }
        if (_leftParent->isLast() == false)
        {
            _leftParent->toRight();
            return true;
        }
    }

    // Case 2) Try to find deletion cycle by going right-up-left-left-down
    else
    {
        _rightParent->toParent(_cur);
        // FIXME: the edge cases are probably very wrong.
        if (first) {
            return false;
        }
        _left->toLeft();

        assert(_rightParent->getGapThreshold() == _gapThreshold);
        assert(_cur->getGapThreshold() == _gapThreshold);
        assert(_atomic != true || _rightParent->getNumSegments() == 1);
        assert(_atomic != true || _left->getNumSegments() == 1);
        if (_left->hasParent() == false)
        {
            return false;
        }
        _leftParent->toParent(_left);

        if (_leftParent->getSequence() == _rightParent->getSequence())
        {
            // don't care about inversions
            // so we make sure left is left of right and they are both positive
            if (_leftParent->getReversed() == true)
            {
                _leftParent->toReverse();
            }
            if (_rightParent->getReversed() == true)
            {
                _rightParent->toReverse();
            }
            if (_rightParent->getLeftArrayIndex() < _leftParent->getLeftArrayIndex())
            {
                swap(_leftParent, _rightParent);
            }

            if (_leftParent->isLast())
            {
                return false;
            }

            _leftParent->toRight();
            return _leftParent->adjacentTo(_rightParent);
        }
    }

    return false;
}
void hal::validateSequence(const Sequence* sequence)
{
  // Verify that the DNA sequence doesn't contain funny characters
  DNAIteratorConstPtr dnaIt = sequence->getDNAIterator();
  hal_size_t length = sequence->getSequenceLength();
  for (hal_size_t i = 0; i < length; ++i)
  {
    char c = dnaIt->getChar();
    if (isNucleotide(c) == false)
    {
      stringstream ss;
      ss << "Non-nucleotide character discoverd at position " 
         << i << " of sequence " << sequence->getName() << ": " << c;
      throw hal_exception(ss.str());
    }
  }
  
  // Check the top segments
  if (sequence->getGenome()->getParent() != NULL)
  {
    hal_size_t totalTopLength = 0;
    TopSegmentIteratorConstPtr topIt = sequence->getTopSegmentIterator();
    hal_size_t numTopSegments = sequence->getNumTopSegments();
    for (hal_size_t i = 0; i < numTopSegments; ++i)
    {
      const TopSegment* topSegment = topIt->getTopSegment();
      validateTopSegment(topSegment);
      totalTopLength += topSegment->getLength();
      topIt->toRight();
    }
    if (totalTopLength != length)
    {
      stringstream ss;
      ss << "Sequence " << sequence->getName() << " has length " << length 
         << " but its top segments add up to " << totalTopLength;
      throw hal_exception(ss.str());
    }
  }

  // Check the bottom segments
  if (sequence->getGenome()->getNumChildren() > 0)
  {
    hal_size_t totalBottomLength = 0;
    BottomSegmentIteratorConstPtr bottomIt = 
       sequence->getBottomSegmentIterator();
    hal_size_t numBottomSegments = sequence->getNumBottomSegments();
    for (hal_size_t i = 0; i < numBottomSegments; ++i)
    {
      const BottomSegment* bottomSegment = bottomIt->getBottomSegment();
      validateBottomSegment(bottomSegment);
      totalBottomLength += bottomSegment->getLength();
      bottomIt->toRight();
    }
    if (totalBottomLength != length)
    {
      stringstream ss;
      ss << "Sequence " << sequence->getName() << " has length " << length 
         << " but its bottom segments add up to " << totalBottomLength;
      throw hal_exception(ss.str());
    }
  }
}
void 
GappedSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* child = alignment->openGenome("child");
  const Genome* parent = alignment->openGenome("parent");

  GappedTopSegmentIteratorConstPtr gtsIt = 
     child->getGappedTopSegmentIterator(0, 9999999);
  GappedBottomSegmentIteratorConstPtr gbsIt = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
  GappedTopSegmentIteratorConstPtr gtsItRev = 
     child->getGappedTopSegmentIterator(0, 9999999);
  gtsItRev->toReverse();
  GappedBottomSegmentIteratorConstPtr gbsItRev = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
  gbsItRev->toReverse();

  for (size_t i = 0; i < child->getNumTopSegments(); ++i)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
    gtsIt->toRight();

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)bsIt->getBottomSegment()->getArrayIndex() == i);
    gbsIt->toRight();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)tsItRev->getTopSegment()->getArrayIndex() == i);
    gtsItRev->toLeft();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i);
    gbsItRev->toLeft();
  }

  gtsIt = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 1, 9999999);
  gbsIt = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 1, 0, 9999999);
  gtsItRev = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 1, 9999999);
  gtsItRev->toReverse();
  gbsItRev = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 1, 0, 9999999);
  gbsItRev->toReverse();

  for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; --i)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight()));
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gtsIt->getReversed() == false);
    gtsIt->toLeft();

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight()));
    CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gbsIt->getReversed() == false);
    gbsIt->toLeft();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight()));
    CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gtsItRev->getReversed() == true);
    gtsItRev->toRight();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight()));
    CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gbsItRev->getReversed() == true);
    gbsItRev->toRight();
  }

}
void TopSegmentIteratorParseTest::checkCallBack(AlignmentConstPtr alignment)
{
  BottomSegmentIteratorConstPtr bi;
  TopSegmentIteratorConstPtr ti;

  // case 1
  const Genome* case1 = alignment->openGenome("case1");
  ti = case1->getTopSegmentIterator();
  bi = case1->getBottomSegmentIterator();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  CuAssertTrue(_testCase, bi->getLength() == ti->getLength());
  bi->slice(3, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getLength() == 
               bi->getBottomSegment()->getLength() - 4);

  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  CuAssertTrue(_testCase, bi->getLength() == ti->getLength());

  // case 2
  const Genome* case2 = alignment->openGenome("case2");
  ti = case2->getTopSegmentIterator();
  bi = case2->getBottomSegmentIterator(1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(1, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

  // case 3
  const Genome* case3 = alignment->openGenome("case3");
  ti = case3->getTopSegmentIterator();
  bi = case3->getBottomSegmentIterator();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(2, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

  // case 4
  const Genome* case4 = alignment->openGenome("case4");
  ti = case4->getTopSegmentIterator();
  bi = case4->getBottomSegmentIterator(1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(2, 2);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
}
Exemple #29
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->setDescription("Rertrieve chain (pairwise alignment) "
                                "information from a hal database.\n"
                                "WARNING: THIS TOOL WAS NEVER FINISHED OR"
                                " TESTED. USE AT OWN RISK. PLEASE "
                                "CONSIDER halLiftover --outPSL INSTEAD.");
  optionsParser->addArgument("halFile", "path to hal file to analyze");
  optionsParser->addArgument("genome", "(query) genome to process");
  optionsParser->addOption("sequence", "sequence name in query genome ("
                           "all sequences if not specified)", "\"\"");
  optionsParser->addOption("start", "start position in query genome", 0);
  optionsParser->addOption("length", "maximum length of chain to output.", 0);
  optionsParser->addOption("chainFile", "path for output file.  stdout if not"
                           " specified", "\"\"");
  optionsParser->addOption("maxGap", 
                           "maximum indel length to be considered a gap within"
                           " a chain.", 
                           20);
  

  string halPath;
  string chainPath;
  string genomeName;
  string sequenceName;
  hal_size_t start;
  hal_size_t length;
  hal_size_t maxGap;
  try
  {
    optionsParser->parseOptions(argc, argv);
    halPath = optionsParser->getArgument<string>("halFile");
    genomeName = optionsParser->getArgument<string>("genome");
    sequenceName = optionsParser->getOption<string>("sequence");
    start = optionsParser->getOption<hal_size_t>("start");
    length = optionsParser->getOption<hal_size_t>("length");
    chainPath = optionsParser->getOption<string>("chainFile");
    maxGap = optionsParser->getOption<hal_size_t>("maxGap");
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }
  try
  {
    cerr << "WARNING: THIS TOOL WAS NEVER FINISHED OR TESTED. USE AT OWN RISK."
         << " PLEASE CONSIDER halLiftover --outPSL INSTEAD." <<endl;  

    AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath,
                                                           optionsParser);
    
    
    const Genome* genome = alignment->openGenome(genomeName);
    if (genome == NULL)
    {
      throw hal_exception(string("Genome not found: ") + genomeName);
    }
    hal_index_t endPosition = 
       length > 0 ? start + length : genome->getSequenceLength();

    const Sequence* sequence = NULL;
    if (sequenceName != "\"\"")
    {
      sequence = genome->getSequence(sequenceName);
      if (sequence == NULL)
      {
        throw hal_exception(string("Sequence not found: ") + sequenceName);
      }
      start += sequence->getStartPosition();
      endPosition =  
         length > 0 ? start + length : sequence->getSequenceLength();
    }

    ofstream ofile;
    ostream& outStream = chainPath == "\"\"" ? cout : ofile;
    if (chainPath != "\"\"")
    {
      ofile.open(chainPath.c_str());
      if (!ofile)
      {
        throw hal_exception(string("Error opening output file ") + 
                            chainPath);
      }
    }

    TopSegmentIteratorConstPtr top = genome->getTopSegmentIterator();
    top->toSite(start, false);
    // do slicing here;
    
    GappedTopSegmentIteratorConstPtr gtop = 
       genome->getGappedTopSegmentIterator(top->getArrayIndex(), maxGap);

    // need to review!
    Chain chain;
    chain._id = 0;
    while (gtop->getRightArrayIndex() < 
           (hal_index_t)genome->getNumTopSegments() &&
           gtop->getLeft()->getStartPosition() < endPosition)
    {
      if (gtop->hasParent() == true)
      {
        hal_offset_t leftOffset = 0;
        if ((hal_index_t)start > gtop->getStartPosition() 
            && (hal_index_t)start < gtop->getEndPosition())
        {
          leftOffset = start - gtop->getStartPosition() ;
        }
        hal_offset_t rightOffset = 0;
        if (endPosition - 1 > gtop->getStartPosition() 
            && endPosition - 1 < gtop->getEndPosition())
        {
          rightOffset = gtop->getEndPosition() + 1 - endPosition;
        }
        // need to do offsets for edge cases
        gtIteratorToChain(gtop, chain, leftOffset, rightOffset);
        outStream << chain;
        ++chain._id;
      }
      gtop->toRight();
    }
  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }
  
  return 0;
}
Exemple #30
0
void Genome::copyTopSegments(Genome *dest) const
{
  const Genome *inParent = getParent();
  const Genome *outParent = dest->getParent();

  TopSegmentIteratorConstPtr inTop = getTopSegmentIterator();
  TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator();
  hal_size_t n = dest->getNumTopSegments();
  assert(n == 0 || n == getNumTopSegments());

  if (n == 0) {
    // Nothing to do if there are no top segments.
    return;
  }

  BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator();
  BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator();

  for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(),
         outTop->toRight())
  {
    hal_index_t genomePos = inTop->getStartPosition();
    assert(genomePos != NULL_INDEX);
    string inSeqName = getSequenceBySite(genomePos)->getName();
    string outSeqName = dest->getSequenceBySite(genomePos)->getName();
    // if (inSeqName != outSeqName) {
    //   stringstream ss;
    //   ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos;
    //   throw hal_exception(ss.str());
    // }

    outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength());
    outTop->setParentIndex(inTop->getParentIndex());
    outTop->setParentReversed(inTop->getParentReversed());
    outTop->setBottomParseIndex(inTop->getBottomParseIndex());
    outTop->setNextParalogyIndex(inTop->getNextParalogyIndex());

    // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are.
    if (inTop->getParentIndex() != NULL_INDEX) {
      inParentBottomSegIt->toParent(inTop);

      const Sequence *inParentSequence = inParentBottomSegIt->getSequence();

      const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName());

      hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex();
      hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex();

      outTop->setParentIndex(outParentSegmentIndex);
    }
  }
}