예제 #1
0
void MappedSegmentMapAcrossTest::testTopSegment(AlignmentConstPtr alignment,
                                                TopSegmentIteratorConstPtr top)
{
  const Genome* parent = top->getGenome()->getParent();
  const Genome* other = top->getGenome()->getName() == "child1" ? 
     alignment->openGenome("child2") : alignment->openGenome("child1");
  set<MappedSegmentConstPtr> results;
  top->getMappedSegments(results, other, NULL, false);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == top->getReversed());
  BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
  bottom->toParent(top);
  TopSegmentIteratorConstPtr sister = other->getTopSegmentIterator();
  sister->toChildG(bottom, other);
  CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == sister->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == sister->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == sister->getReversed());
}
예제 #2
0
void MappedSegmentMapUpTest::testTopSegment(AlignmentConstPtr alignment,
                                            TopSegmentIteratorConstPtr top,
                                            const string& ancName)
{
  const Genome* parent = alignment->openGenome(ancName);
  set<MappedSegmentConstPtr> results;
  top->getMappedSegments(results, parent, NULL, false);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == top->getReversed());
  BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
  bottom->toParent(top);
  // extra hop for when top is in grand child
  if (bottom->getGenome() != parent)
  {
    TopSegmentIteratorConstPtr temp = 
       bottom->getGenome()->getTopSegmentIterator();
    temp->toParseUp(bottom);
    bottom->toParent(temp);
  }
  CuAssertTrue(_testCase, mseg->getGenome() == bottom->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == bottom->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == bottom->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == bottom->getReversed());
}
예제 #3
0
파일: halGenome.cpp 프로젝트: dayin1989/hal
void Genome::copyTopSegments(Genome *dest) const
{
  const Genome *inParent = getParent();
  const Genome *outParent = dest->getParent();

  TopSegmentIteratorConstPtr inTop = getTopSegmentIterator();
  TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator();
  hal_size_t n = dest->getNumTopSegments();
  assert(n == 0 || n == getNumTopSegments());

  if (n == 0) {
    // Nothing to do if there are no top segments.
    return;
  }

  BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator();
  BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator();

  for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(),
         outTop->toRight())
  {
    hal_index_t genomePos = inTop->getStartPosition();
    assert(genomePos != NULL_INDEX);
    string inSeqName = getSequenceBySite(genomePos)->getName();
    string outSeqName = dest->getSequenceBySite(genomePos)->getName();
    // if (inSeqName != outSeqName) {
    //   stringstream ss;
    //   ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos;
    //   throw hal_exception(ss.str());
    // }

    outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength());
    outTop->setParentIndex(inTop->getParentIndex());
    outTop->setParentReversed(inTop->getParentReversed());
    outTop->setBottomParseIndex(inTop->getBottomParseIndex());
    outTop->setNextParalogyIndex(inTop->getNextParalogyIndex());

    // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are.
    if (inTop->getParentIndex() != NULL_INDEX) {
      inParentBottomSegIt->toParent(inTop);

      const Sequence *inParentSequence = inParentBottomSegIt->getSequence();

      const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName());

      hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex();
      hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex();

      outTop->setParentIndex(outParentSegmentIndex);
    }
  }
}
void SummarizeMutations::subsAndGapInserts(
  GappedTopSegmentIteratorConstPtr gappedTop, MutationsStats& stats)
{
  assert(gappedTop->getReversed() == false);
  hal_size_t numGaps = gappedTop->getNumGaps();
  if (numGaps > 0)
  {
    stats._gapInsertionLength.add(gappedTop->getNumGapBases(), numGaps);
  }

  string parent, child;
  TopSegmentIteratorConstPtr l = gappedTop->getLeft();
  TopSegmentIteratorConstPtr r = gappedTop->getRight();
  BottomSegmentIteratorConstPtr p = 
     l->getTopSegment()->getGenome()->getParent()->getBottomSegmentIterator();

  for (TopSegmentIteratorConstPtr i = l->copy(); 
       i->getTopSegment()->getArrayIndex() <= 
          r->getTopSegment()->getArrayIndex();
       i->toRight())
  {
    if (i->hasParent())
    {
      p->toParent(i);
      i->getString(child);
      p->getString(parent);
      assert(child.length() == parent.length());
      for (size_t j = 0; j < child.length(); ++j)
      {
        if (isTransition(child[j], parent[j]))
        {
          ++stats._transitions;
          ++stats._subs;
        }
        else if (isTransversion(child[j], parent[j]))
        {
          ++stats._transversions;
          ++stats._subs;
        }
        else if (isSubstitution(child[j], parent[j]))
        {
          ++stats._subs;
        }
        else if (!isMissingData(child[j]) && !isMissingData(parent[j]))
        {
          ++stats._matches;
        }
      }
    }
  }
}
예제 #5
0
void MappedSegmentMapDownTest::checkCallBack(AlignmentConstPtr alignment)
{
  validateAlignment(alignment);
  const Genome* parent = alignment->openGenome("parent");

  BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
  testBottomSegment(alignment, bottom, 0);
  testBottomSegment(alignment, bottom, 1);
  bottom->slice(1,2);
  testBottomSegment(alignment, bottom, 0);
  testBottomSegment(alignment, bottom, 1);
  bottom->toReverse();
  testBottomSegment(alignment, bottom, 0);
  testBottomSegment(alignment, bottom, 1);
}
DefaultGappedBottomSegmentIterator::DefaultGappedBottomSegmentIterator(
  BottomSegmentIteratorConstPtr left,
  hal_size_t childIndex,
  hal_size_t gapThreshold,
  bool atomic) :
  _childIndex(childIndex),
  _gapThreshold(gapThreshold),
  _atomic(atomic)
{
  if (left->getStartOffset() != 0 || left->getEndOffset() != 0)
  {
    throw hal_exception("offset not currently supported in gapped iterators");
  }
  const Genome* child = 
     left->getBottomSegment()->getGenome()->getChild(_childIndex);
  if (child == NULL)
  {
    throw hal_exception("can't init GappedBottomIterator with no child genome");
  }
  assert(_atomic == false || _gapThreshold == 0);
  _left = left->copy();
  _right = left->copy();
  _temp = left->copy();
  _temp2 = left->copy();
  _leftChild = child->getTopSegmentIterator();
  _rightChild = _leftChild->copy();
  _leftDup = _leftChild->copy();
  _rightDup = _leftChild->copy();
  extendRight();
}
예제 #7
0
void MappedSegmentMapDownTest::testBottomSegment(
  AlignmentConstPtr alignment,
  BottomSegmentIteratorConstPtr bottom,
  hal_size_t childIndex)
{
  const Genome* child = bottom->getGenome()->getChild(childIndex);
  set<MappedSegmentConstPtr> results;
  bottom->getMappedSegments(results, child, NULL, false);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == 
               bottom->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               bottom->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == bottom->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == bottom->getReversed());
  TopSegmentIteratorConstPtr top = child->getTopSegmentIterator();
  top->toChild(bottom, childIndex);
  CuAssertTrue(_testCase, mseg->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == top->getReversed());
}
void DefaultGappedBottomSegmentIterator::toRightNextUngapped(
  BottomSegmentIteratorConstPtr bs) const
{
  while (bs->hasChild(_childIndex) == false &&
         bs->getLength() <= _gapThreshold)
  {
    if ((!bs->getReversed() && bs->getBottomSegment()->isLast()) ||
         (bs->getReversed() && bs->getBottomSegment()->isFirst()))
    {
      break;
    }
    bs->toRight();
  }
}
//////////////////////////////////////////////////////////////////////////////
// INTERNAL METHODS
//////////////////////////////////////////////////////////////////////////////
bool DefaultGappedBottomSegmentIterator::compatible(
  BottomSegmentIteratorConstPtr left,
  BottomSegmentIteratorConstPtr right) const
{
  assert(left->hasChild(_childIndex) && right->hasChild(_childIndex));
  assert(left->equals(right) == false);
  
  _leftChild->toChild(left, _childIndex);
  _rightChild->toChild(right, _childIndex);

  if (_leftChild->getTopSegment()->getParentReversed() != 
      _rightChild->getTopSegment()->getParentReversed())
  {
    return false;
  }

  if (_leftChild->hasNextParalogy() != _rightChild->hasNextParalogy())
  {
    return false;
  }
  if ((!_leftChild->getReversed() && 
       _leftChild->leftOf(_rightChild->getStartPosition()) == false) ||
      (_leftChild->getReversed() && 
       _leftChild->rightOf(_rightChild->getStartPosition()) == false))
  {    
    return false;
  }
  
  if (left->getBottomSegment()->getSequence() != 
      right->getBottomSegment()->getSequence() ||
      _leftChild->getTopSegment()->getSequence() != 
      _rightChild->getTopSegment()->getSequence())
  {
    return false;
  }
  
  while (true)
  {
    assert(_leftChild->isLast() == false);
    _leftChild->toRight();
    if (_leftChild->hasParent() == true || 
        _leftChild->getLength() > _gapThreshold)
    {
      if (_leftChild->equals(_rightChild))
      {
        break;
      }
      else
      {
        return false;
      }
    }
  }

  _leftChild->toChild(left, _childIndex);
  _rightChild->toChild(right, _childIndex);
  if (_leftChild->hasNextParalogy() == true)
  {
    _leftDup->copy(_leftChild);
    _leftDup->toNextParalogy();
    _rightDup->copy(_rightChild);
    _rightDup->toNextParalogy();
  
    if ((_leftDup->getReversed() == false && 
         _leftDup->leftOf(_rightDup->getStartPosition()) == false) ||
        (_leftDup->getReversed() == true && 
         _rightDup->leftOf(_leftDup->getStartPosition()) == false))
    {
      return false;
    }
    if (_leftDup->getTopSegment()->getSequence() != 
        _rightDup->getTopSegment()->getSequence())
    {
      return false;
    }

    while (true)
    {
      assert(_leftDup->isLast() == false);
      _leftDup->toRight();
      if (_leftDup->hasParent() == true || 
          _leftDup->getLength() > _gapThreshold)
      {
        if (_leftDup->equals(_rightDup))
        {
          break;
        }
        else
        {
          return false;
        }
      }
    }
  }

  return true;
}
예제 #10
0
void hal::validateSequence(const Sequence* sequence)
{
  // Verify that the DNA sequence doesn't contain funny characters
  DNAIteratorConstPtr dnaIt = sequence->getDNAIterator();
  hal_size_t length = sequence->getSequenceLength();
  for (hal_size_t i = 0; i < length; ++i)
  {
    char c = dnaIt->getChar();
    if (isNucleotide(c) == false)
    {
      stringstream ss;
      ss << "Non-nucleotide character discoverd at position " 
         << i << " of sequence " << sequence->getName() << ": " << c;
      throw hal_exception(ss.str());
    }
  }
  
  // Check the top segments
  if (sequence->getGenome()->getParent() != NULL)
  {
    hal_size_t totalTopLength = 0;
    TopSegmentIteratorConstPtr topIt = sequence->getTopSegmentIterator();
    hal_size_t numTopSegments = sequence->getNumTopSegments();
    for (hal_size_t i = 0; i < numTopSegments; ++i)
    {
      const TopSegment* topSegment = topIt->getTopSegment();
      validateTopSegment(topSegment);
      totalTopLength += topSegment->getLength();
      topIt->toRight();
    }
    if (totalTopLength != length)
    {
      stringstream ss;
      ss << "Sequence " << sequence->getName() << " has length " << length 
         << " but its top segments add up to " << totalTopLength;
      throw hal_exception(ss.str());
    }
  }

  // Check the bottom segments
  if (sequence->getGenome()->getNumChildren() > 0)
  {
    hal_size_t totalBottomLength = 0;
    BottomSegmentIteratorConstPtr bottomIt = 
       sequence->getBottomSegmentIterator();
    hal_size_t numBottomSegments = sequence->getNumBottomSegments();
    for (hal_size_t i = 0; i < numBottomSegments; ++i)
    {
      const BottomSegment* bottomSegment = bottomIt->getBottomSegment();
      validateBottomSegment(bottomSegment);
      totalBottomLength += bottomSegment->getLength();
      bottomIt->toRight();
    }
    if (totalBottomLength != length)
    {
      stringstream ss;
      ss << "Sequence " << sequence->getName() << " has length " << length 
         << " but its bottom segments add up to " << totalBottomLength;
      throw hal_exception(ss.str());
    }
  }
}
예제 #11
0
void hal::validateTopSegment(const TopSegment* topSegment)
{
  const Genome* genome = topSegment->getGenome();
  hal_index_t index = topSegment->getArrayIndex();
  if (index < 0 || index >= (hal_index_t)genome->getSequenceLength())
  {
    stringstream ss;
    ss << "Segment out of range " << index << " in genome "
       << genome->getName();
    throw hal_exception(ss.str());
  }

  if (topSegment->getLength() < 1)
  {
    stringstream ss;
    ss << "Top segment " << index  << " in genome " << genome->getName()
       << " has length 0 which is not currently supported";
    throw hal_exception(ss.str());
  }

  const Genome* parentGenome = genome->getParent();
  const hal_index_t parentIndex = topSegment->getParentIndex();
  if (parentGenome != NULL && parentIndex != NULL_INDEX)
  {
    if (parentIndex >= (hal_index_t)parentGenome->getNumBottomSegments())
    {
      stringstream ss;
      ss << "Parent index " << parentIndex << " of segment "
         << topSegment->getArrayIndex() << " out of range in genome "
         << parentGenome->getName();
      throw hal_exception(ss.str());
    }
    BottomSegmentIteratorConstPtr bottomSegmentIterator = 
       parentGenome->getBottomSegmentIterator(parentIndex);
    const BottomSegment* parentSegment = 
       bottomSegmentIterator->getBottomSegment();
    if (topSegment->getLength() != parentSegment->getLength())
    {
      stringstream ss;
      ss << "Parent length of segment " << topSegment->getArrayIndex() 
         << " in genome " << genome->getName() << " has length "
         << parentSegment->getLength() << " which does not match "
         << topSegment->getLength();
      throw hal_exception(ss.str());
    }
  }

  const hal_index_t parseIndex = topSegment->getBottomParseIndex();
  if (parseIndex == NULL_INDEX)
  {
    if (genome->getNumChildren() != 0)
    {
      stringstream ss;
      ss << "Top Segment " << topSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has null parse index";
      throw hal_exception(ss.str());
    }
  }
  else
  {
    if (parseIndex >= (hal_index_t)genome->getNumBottomSegments())
    {
      stringstream ss;
      ss << "Top Segment " << topSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has parse index out of range";
      throw hal_exception(ss.str());
    }
    hal_offset_t parseOffset = topSegment->getBottomParseOffset();
    BottomSegmentIteratorConstPtr bottomSegmentIterator =
       genome->getBottomSegmentIterator(parseIndex);
    const BottomSegment* parseSegment = 
       bottomSegmentIterator->getBottomSegment();
    if (parseOffset >= parseSegment->getLength())
    {
      stringstream ss;
      ss << "Top Segment " << topSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has parse offset out of range";
      throw hal_exception(ss.str());
    }
    if ((hal_index_t)parseOffset + parseSegment->getStartPosition() != 
        topSegment->getStartPosition())
    {
      throw hal_exception("parse index broken in top segment in genome " +
                          genome->getName());
                          
    }
  }

  const hal_index_t paralogyIndex = topSegment->getNextParalogyIndex();
  if (paralogyIndex != NULL_INDEX)
  {
    TopSegmentIteratorConstPtr pti = 
       genome->getTopSegmentIterator(paralogyIndex);
    if (pti->getTopSegment()->getParentIndex() != topSegment->getParentIndex())
    {
      stringstream ss;
      ss << "Top segment " << topSegment->getArrayIndex() 
         << " has parent index "
         << topSegment->getParentIndex() << ", but next paraglog " 
         << topSegment->getNextParalogyIndex() << " has parent Index " 
         << pti->getTopSegment()->getParentIndex() 
         << ". Paralogous top segments must share same parent.";
      throw hal_exception(ss.str());
    }
    if (paralogyIndex == topSegment->getArrayIndex())
    {
      stringstream ss;
      ss << "Top segment " << topSegment->getArrayIndex() 
         << " has paralogy index " << topSegment->getNextParalogyIndex()
         << " which isn't allowed";
      throw hal_exception(ss.str());
    }
  }
}
void 
GappedSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* child = alignment->openGenome("child");
  const Genome* parent = alignment->openGenome("parent");

  GappedTopSegmentIteratorConstPtr gtsIt = 
     child->getGappedTopSegmentIterator(0, 9999999);
  GappedBottomSegmentIteratorConstPtr gbsIt = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
  GappedTopSegmentIteratorConstPtr gtsItRev = 
     child->getGappedTopSegmentIterator(0, 9999999);
  gtsItRev->toReverse();
  GappedBottomSegmentIteratorConstPtr gbsItRev = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
  gbsItRev->toReverse();

  for (size_t i = 0; i < child->getNumTopSegments(); ++i)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
    gtsIt->toRight();

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)bsIt->getBottomSegment()->getArrayIndex() == i);
    gbsIt->toRight();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)tsItRev->getTopSegment()->getArrayIndex() == i);
    gtsItRev->toLeft();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i);
    gbsItRev->toLeft();
  }

  gtsIt = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 1, 9999999);
  gbsIt = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 1, 0, 9999999);
  gtsItRev = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 1, 9999999);
  gtsItRev->toReverse();
  gbsItRev = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 1, 0, 9999999);
  gbsItRev->toReverse();

  for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; --i)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight()));
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gtsIt->getReversed() == false);
    gtsIt->toLeft();

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight()));
    CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gbsIt->getReversed() == false);
    gbsIt->toLeft();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight()));
    CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gtsItRev->getReversed() == true);
    gtsItRev->toRight();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight()));
    CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gbsItRev->getReversed() == true);
    gbsItRev->toRight();
  }

}
void 
GappedSegmentIteratorIndelTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* child = alignment->openGenome("child");
  const Genome* parent = alignment->openGenome("parent");

  GappedTopSegmentIteratorConstPtr gtsIt = 
     child->getGappedTopSegmentIterator(0, 9999999);

  GappedBottomSegmentIteratorConstPtr gbsIt = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
  GappedTopSegmentIteratorConstPtr gtsItRev = 
     child->getGappedTopSegmentIterator(0, 9999999);
  gtsItRev->toReverse();
  GappedBottomSegmentIteratorConstPtr gbsItRev = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
     gbsItRev->toReverse();

  for (size_t i = 0; i < child->getNumTopSegments(); i += 20)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
    tsIt = gtsIt->getRight();

    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i + 19);

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)bsIt->getBottomSegment()->getArrayIndex() == i);
    bsIt = gbsIt->getRight();
    CuAssertTrue(_testCase, 
                 (size_t)bsIt->getBottomSegment()->getArrayIndex() == i + 19);

    GappedBottomSegmentIteratorConstPtr gappedParent = gbsIt->copy();
    gappedParent->toParent(gtsIt);
    if (gappedParent->getReversed())
    {
      gappedParent->toReverse();
    }
    CuAssertTrue(_testCase,
                 gappedParent->equals(gbsIt));
    GappedTopSegmentIteratorConstPtr gappedChild = gtsIt->copy();
    gappedChild->toChild(gbsIt);
    if (gappedChild->getReversed())
    {
      gappedChild->toReverse();
    }
    CuAssertTrue(_testCase, gappedChild->equals(gtsIt));
    
    gtsIt->toRight();
    gbsIt->toRight();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)tsItRev->getTopSegment()->getArrayIndex() == i + 19);
    tsItRev = gtsItRev->getRight();
    CuAssertTrue(_testCase, 
                 (size_t)tsItRev->getTopSegment()->getArrayIndex() == i);
    gtsItRev->toLeft();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i+19);
    bsItRev = gbsItRev->getRight();
    CuAssertTrue(_testCase, 
                 (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i);
    gbsItRev->toLeft();

    }

  gtsIt = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 20, 9999999);
  gbsIt = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 20, 0, 9999999); 
  gtsItRev = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 20, 9999999);
  gtsItRev->toReverse();
  gbsItRev = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 20, 0, 9999999);
  gbsItRev->toReverse();

  for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; i -= 20)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i - 19);
    tsIt = gtsIt->getRight();
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gtsIt->getReversed() == false);
    gtsIt->toLeft();

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i-19);
    bsIt = gbsIt->getRight();
    CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gbsIt->getReversed() == false);
    gbsIt->toLeft();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i);
    tsItRev = gtsItRev->getRight();
    CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i-19);
    CuAssertTrue(_testCase, gtsItRev->getReversed() == true);
    gtsItRev->toRight();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i);
    bsItRev = gbsItRev->getRight();
    CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex()==i-19);
    CuAssertTrue(_testCase, gbsItRev->getReversed() == true);
    gbsItRev->toRight();
    }

}
// quickly count subsitutions without loading rearrangement machinery.
// used for benchmarks for basic file scanning... and not much else since
// the interface is still a bit wonky.
void SummarizeMutations::substitutionAnalysis(const Genome* genome, 
                                               MutationsStats& stats)
{
  assert(stats._subs == 0);
  if (genome->getNumChildren() == 0 || genome->getNumBottomSegments() == 0 ||
      (_targetSet && _targetSet->find(genome->getName()) == _targetSet->end()))
  {
    return;
  }
  const Genome* parent = genome->getParent();
  string pname = parent != NULL ? parent->getName() : string();
  StrPair branchName(genome->getName(), pname);

  BottomSegmentIteratorConstPtr bottom = genome->getBottomSegmentIterator();
  TopSegmentIteratorConstPtr top = genome->getChild(0)->getTopSegmentIterator();
  
  string gString, cString;

  hal_size_t n = genome->getNumBottomSegments();
  vector<hal_size_t> children;
  hal_size_t m = genome->getNumChildren();
  for (hal_size_t i = 0; i < m; ++i)
  {
    string cName = genome->getChild(i)->getName();
    if (!_targetSet || 
        (_targetSet && _targetSet->find(cName) != _targetSet->end()))
    {
      children.push_back(i);
    }
  }
  if (children.empty())
  {
    return;
  }

  for (hal_size_t i = 0; i < n; ++i)
  {
    bool readString = false;
    for (size_t j = 0; j < children.size(); ++j)
    {
      if (bottom->hasChild(children[j]))
      {
        if (readString == false)
        {
          bottom->getString(gString);
          readString = true;
        }
        top->toChild(bottom, children[j]);
        top->getString(cString);
        assert(gString.length() == cString.length());
        for (hal_size_t k = 0; k < gString.length(); ++k)
        {
          if (isSubstitution(gString[k], cString[k]))
          {
            ++stats._subs;
          }
        }
      }
    }
    bottom->toRight();
  }
}
void TopSegmentIteratorReverseTest::checkCallBack(AlignmentConstPtr alignment)
{
  BottomSegmentIteratorConstPtr bi;
  TopSegmentIteratorConstPtr ti, ti2;

  const Genome* parent1 = alignment->openGenome("parent1");
  const Genome* child1 = alignment->openGenome("child1");

  ti = child1->getTopSegmentIterator();
  bi = parent1->getBottomSegmentIterator();

  ti2 = child1->getTopSegmentIterator();
  ti2->toChild(bi, 0);
  
  CuAssertTrue(_testCase, ti->getStartPosition() == 0);
  CuAssertTrue(_testCase, ti->getLength() == 10);
  CuAssertTrue(_testCase, ti->getReversed() == false);

  CuAssertTrue(_testCase, ti2->getStartPosition() == 9);
  CuAssertTrue(_testCase, ti2->getLength() == 10);
  CuAssertTrue(_testCase, ti2->getReversed() == true);

  bi->slice(1, 3);
  ti2->toChild(bi, 0);
  
  CuAssertTrue(_testCase, bi->getStartPosition() == 1);
  CuAssertTrue(_testCase, bi->getLength() == 6);
  CuAssertTrue(_testCase, ti2->getStartPosition() == 8);
  CuAssertTrue(_testCase, ti2->getLength() == 6);

  string buffer;
  bi->getString(buffer);
  CuAssertTrue(_testCase, buffer == "CCTACG");
  ti2->getString(buffer);
  CuAssertTrue(_testCase, buffer == "CACGTA");

  bi = child1->getBottomSegmentIterator();
  CuAssertTrue(_testCase, bi->getReversed() == false);

  ti->toParseUp(bi);  
  CuAssertTrue(_testCase, ti->getStartPosition() == 0);
  CuAssertTrue(_testCase, ti->getLength() == 5);

  bi->toReverse();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, ti->getStartPosition() == 4);
  CuAssertTrue(_testCase, ti->getLength() == 5);

  bi->toReverse();  
  CuAssertTrue(_testCase, bi->getReversed() == false);
  bi->toRight();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, ti->getStartPosition() == 5);
  CuAssertTrue(_testCase, ti->getLength() == 5);

  bi->toReverse();  
  ti->toParseUp(bi);  
  CuAssertTrue(_testCase, ti->getStartPosition() == 9);
  CuAssertTrue(_testCase, ti->getLength() == 5);
}
void TopSegmentIteratorParseTest::checkCallBack(AlignmentConstPtr alignment)
{
  BottomSegmentIteratorConstPtr bi;
  TopSegmentIteratorConstPtr ti;

  // case 1
  const Genome* case1 = alignment->openGenome("case1");
  ti = case1->getTopSegmentIterator();
  bi = case1->getBottomSegmentIterator();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  CuAssertTrue(_testCase, bi->getLength() == ti->getLength());
  bi->slice(3, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getLength() == 
               bi->getBottomSegment()->getLength() - 4);

  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  CuAssertTrue(_testCase, bi->getLength() == ti->getLength());

  // case 2
  const Genome* case2 = alignment->openGenome("case2");
  ti = case2->getTopSegmentIterator();
  bi = case2->getBottomSegmentIterator(1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(1, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

  // case 3
  const Genome* case3 = alignment->openGenome("case3");
  ti = case3->getTopSegmentIterator();
  bi = case3->getBottomSegmentIterator();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(2, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

  // case 4
  const Genome* case4 = alignment->openGenome("case4");
  ti = case4->getTopSegmentIterator();
  bi = case4->getBottomSegmentIterator(1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(2, 2);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
}
예제 #17
0
void MappedSegmentMapDupeTest::checkCallBack(AlignmentConstPtr alignment)
{
  validateAlignment(alignment);
  const Genome* parent = alignment->openGenome("parent");
  const Genome* child1 = alignment->openGenome("child1");
  const Genome* child2 = alignment->openGenome("child2");

  TopSegmentIteratorConstPtr top = child1->getTopSegmentIterator();
  set<MappedSegmentConstPtr> results;
  top->getMappedSegments(results, child2, NULL, true);
//  CuAssertTrue(_testCase, results.size() == 3);
  
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == top->getReversed());
  BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
  bottom->toParent(top);
  TopSegmentIteratorConstPtr sister = child2->getTopSegmentIterator();
  sister->toChildG(bottom, child2);
  CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == sister->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == sister->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == sister->getReversed());

  top = child2->getTopSegmentIterator();
  results.clear();
  sister = child1->getTopSegmentIterator();
  top->getMappedSegments(results, child1, NULL, true);
  CuAssertTrue(_testCase, results.size() == 3);
  bool found[3] = {false};
  set<MappedSegmentConstPtr>::iterator i = results.begin();
  for (; i != results.end(); ++i)
  {
    MappedSegmentConstPtr mseg = *i;
    CuAssertTrue(_testCase, mseg->getSource()->getGenome() == 
                 top->getGenome());
    CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
                 top->getStartPosition());
    CuAssertTrue(_testCase, 
                 mseg->getSource()->getLength() == top->getLength());
    CuAssertTrue(_testCase, 
                 mseg->getSource()->getReversed() == top->getReversed());
    BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
    bottom->toParent(top);
    TopSegmentIteratorConstPtr sister = child2->getTopSegmentIterator();
    sister->toChildG(bottom, child1);
    CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome());
    CuAssertTrue(_testCase, 
                 mseg->getLength() == sister->getLength());
    found[mseg->getArrayIndex()] = true;
  }
  CuAssertTrue(_testCase, found[0] == true);
  CuAssertTrue(_testCase, found[1] == true);
  CuAssertTrue(_testCase, found[2] == true);
}