void DefaultGappedBottomSegmentIterator::toRightNextUngapped(
  BottomSegmentIteratorConstPtr bs) const
{
  while (bs->hasChild(_childIndex) == false &&
         bs->getLength() <= _gapThreshold)
  {
    if ((!bs->getReversed() && bs->getBottomSegment()->isLast()) ||
         (bs->getReversed() && bs->getBottomSegment()->isFirst()))
    {
      break;
    }
    bs->toRight();
  }
}
DefaultGappedBottomSegmentIterator::DefaultGappedBottomSegmentIterator(
  BottomSegmentIteratorConstPtr left,
  hal_size_t childIndex,
  hal_size_t gapThreshold,
  bool atomic) :
  _childIndex(childIndex),
  _gapThreshold(gapThreshold),
  _atomic(atomic)
{
  if (left->getStartOffset() != 0 || left->getEndOffset() != 0)
  {
    throw hal_exception("offset not currently supported in gapped iterators");
  }
  const Genome* child = 
     left->getBottomSegment()->getGenome()->getChild(_childIndex);
  if (child == NULL)
  {
    throw hal_exception("can't init GappedBottomIterator with no child genome");
  }
  assert(_atomic == false || _gapThreshold == 0);
  _left = left->copy();
  _right = left->copy();
  _temp = left->copy();
  _temp2 = left->copy();
  _leftChild = child->getTopSegmentIterator();
  _rightChild = _leftChild->copy();
  _leftDup = _leftChild->copy();
  _rightDup = _leftChild->copy();
  extendRight();
}
void TopSegmentIteratorParseTest::checkCallBack(AlignmentConstPtr alignment)
{
  BottomSegmentIteratorConstPtr bi;
  TopSegmentIteratorConstPtr ti;

  // case 1
  const Genome* case1 = alignment->openGenome("case1");
  ti = case1->getTopSegmentIterator();
  bi = case1->getBottomSegmentIterator();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  CuAssertTrue(_testCase, bi->getLength() == ti->getLength());
  bi->slice(3, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getLength() == 
               bi->getBottomSegment()->getLength() - 4);

  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  CuAssertTrue(_testCase, bi->getLength() == ti->getLength());

  // case 2
  const Genome* case2 = alignment->openGenome("case2");
  ti = case2->getTopSegmentIterator();
  bi = case2->getBottomSegmentIterator(1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(1, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

  // case 3
  const Genome* case3 = alignment->openGenome("case3");
  ti = case3->getTopSegmentIterator();
  bi = case3->getBottomSegmentIterator();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(2, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

  // case 4
  const Genome* case4 = alignment->openGenome("case4");
  ti = case4->getTopSegmentIterator();
  bi = case4->getBottomSegmentIterator(1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(2, 2);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
}
//////////////////////////////////////////////////////////////////////////////
// INTERNAL METHODS
//////////////////////////////////////////////////////////////////////////////
bool DefaultGappedBottomSegmentIterator::compatible(
  BottomSegmentIteratorConstPtr left,
  BottomSegmentIteratorConstPtr right) const
{
  assert(left->hasChild(_childIndex) && right->hasChild(_childIndex));
  assert(left->equals(right) == false);
  
  _leftChild->toChild(left, _childIndex);
  _rightChild->toChild(right, _childIndex);

  if (_leftChild->getTopSegment()->getParentReversed() != 
      _rightChild->getTopSegment()->getParentReversed())
  {
    return false;
  }

  if (_leftChild->hasNextParalogy() != _rightChild->hasNextParalogy())
  {
    return false;
  }
  if ((!_leftChild->getReversed() && 
       _leftChild->leftOf(_rightChild->getStartPosition()) == false) ||
      (_leftChild->getReversed() && 
       _leftChild->rightOf(_rightChild->getStartPosition()) == false))
  {    
    return false;
  }
  
  if (left->getBottomSegment()->getSequence() != 
      right->getBottomSegment()->getSequence() ||
      _leftChild->getTopSegment()->getSequence() != 
      _rightChild->getTopSegment()->getSequence())
  {
    return false;
  }
  
  while (true)
  {
    assert(_leftChild->isLast() == false);
    _leftChild->toRight();
    if (_leftChild->hasParent() == true || 
        _leftChild->getLength() > _gapThreshold)
    {
      if (_leftChild->equals(_rightChild))
      {
        break;
      }
      else
      {
        return false;
      }
    }
  }

  _leftChild->toChild(left, _childIndex);
  _rightChild->toChild(right, _childIndex);
  if (_leftChild->hasNextParalogy() == true)
  {
    _leftDup->copy(_leftChild);
    _leftDup->toNextParalogy();
    _rightDup->copy(_rightChild);
    _rightDup->toNextParalogy();
  
    if ((_leftDup->getReversed() == false && 
         _leftDup->leftOf(_rightDup->getStartPosition()) == false) ||
        (_leftDup->getReversed() == true && 
         _rightDup->leftOf(_leftDup->getStartPosition()) == false))
    {
      return false;
    }
    if (_leftDup->getTopSegment()->getSequence() != 
        _rightDup->getTopSegment()->getSequence())
    {
      return false;
    }

    while (true)
    {
      assert(_leftDup->isLast() == false);
      _leftDup->toRight();
      if (_leftDup->hasParent() == true || 
          _leftDup->getLength() > _gapThreshold)
      {
        if (_leftDup->equals(_rightDup))
        {
          break;
        }
        else
        {
          return false;
        }
      }
    }
  }

  return true;
}
void hal::validateSequence(const Sequence* sequence)
{
  // Verify that the DNA sequence doesn't contain funny characters
  DNAIteratorConstPtr dnaIt = sequence->getDNAIterator();
  hal_size_t length = sequence->getSequenceLength();
  for (hal_size_t i = 0; i < length; ++i)
  {
    char c = dnaIt->getChar();
    if (isNucleotide(c) == false)
    {
      stringstream ss;
      ss << "Non-nucleotide character discoverd at position " 
         << i << " of sequence " << sequence->getName() << ": " << c;
      throw hal_exception(ss.str());
    }
  }
  
  // Check the top segments
  if (sequence->getGenome()->getParent() != NULL)
  {
    hal_size_t totalTopLength = 0;
    TopSegmentIteratorConstPtr topIt = sequence->getTopSegmentIterator();
    hal_size_t numTopSegments = sequence->getNumTopSegments();
    for (hal_size_t i = 0; i < numTopSegments; ++i)
    {
      const TopSegment* topSegment = topIt->getTopSegment();
      validateTopSegment(topSegment);
      totalTopLength += topSegment->getLength();
      topIt->toRight();
    }
    if (totalTopLength != length)
    {
      stringstream ss;
      ss << "Sequence " << sequence->getName() << " has length " << length 
         << " but its top segments add up to " << totalTopLength;
      throw hal_exception(ss.str());
    }
  }

  // Check the bottom segments
  if (sequence->getGenome()->getNumChildren() > 0)
  {
    hal_size_t totalBottomLength = 0;
    BottomSegmentIteratorConstPtr bottomIt = 
       sequence->getBottomSegmentIterator();
    hal_size_t numBottomSegments = sequence->getNumBottomSegments();
    for (hal_size_t i = 0; i < numBottomSegments; ++i)
    {
      const BottomSegment* bottomSegment = bottomIt->getBottomSegment();
      validateBottomSegment(bottomSegment);
      totalBottomLength += bottomSegment->getLength();
      bottomIt->toRight();
    }
    if (totalBottomLength != length)
    {
      stringstream ss;
      ss << "Sequence " << sequence->getName() << " has length " << length 
         << " but its bottom segments add up to " << totalBottomLength;
      throw hal_exception(ss.str());
    }
  }
}
void hal::validateTopSegment(const TopSegment* topSegment)
{
  const Genome* genome = topSegment->getGenome();
  hal_index_t index = topSegment->getArrayIndex();
  if (index < 0 || index >= (hal_index_t)genome->getSequenceLength())
  {
    stringstream ss;
    ss << "Segment out of range " << index << " in genome "
       << genome->getName();
    throw hal_exception(ss.str());
  }

  if (topSegment->getLength() < 1)
  {
    stringstream ss;
    ss << "Top segment " << index  << " in genome " << genome->getName()
       << " has length 0 which is not currently supported";
    throw hal_exception(ss.str());
  }

  const Genome* parentGenome = genome->getParent();
  const hal_index_t parentIndex = topSegment->getParentIndex();
  if (parentGenome != NULL && parentIndex != NULL_INDEX)
  {
    if (parentIndex >= (hal_index_t)parentGenome->getNumBottomSegments())
    {
      stringstream ss;
      ss << "Parent index " << parentIndex << " of segment "
         << topSegment->getArrayIndex() << " out of range in genome "
         << parentGenome->getName();
      throw hal_exception(ss.str());
    }
    BottomSegmentIteratorConstPtr bottomSegmentIterator = 
       parentGenome->getBottomSegmentIterator(parentIndex);
    const BottomSegment* parentSegment = 
       bottomSegmentIterator->getBottomSegment();
    if (topSegment->getLength() != parentSegment->getLength())
    {
      stringstream ss;
      ss << "Parent length of segment " << topSegment->getArrayIndex() 
         << " in genome " << genome->getName() << " has length "
         << parentSegment->getLength() << " which does not match "
         << topSegment->getLength();
      throw hal_exception(ss.str());
    }
  }

  const hal_index_t parseIndex = topSegment->getBottomParseIndex();
  if (parseIndex == NULL_INDEX)
  {
    if (genome->getNumChildren() != 0)
    {
      stringstream ss;
      ss << "Top Segment " << topSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has null parse index";
      throw hal_exception(ss.str());
    }
  }
  else
  {
    if (parseIndex >= (hal_index_t)genome->getNumBottomSegments())
    {
      stringstream ss;
      ss << "Top Segment " << topSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has parse index out of range";
      throw hal_exception(ss.str());
    }
    hal_offset_t parseOffset = topSegment->getBottomParseOffset();
    BottomSegmentIteratorConstPtr bottomSegmentIterator =
       genome->getBottomSegmentIterator(parseIndex);
    const BottomSegment* parseSegment = 
       bottomSegmentIterator->getBottomSegment();
    if (parseOffset >= parseSegment->getLength())
    {
      stringstream ss;
      ss << "Top Segment " << topSegment->getArrayIndex() << " in genome "
         << genome->getName() << " has parse offset out of range";
      throw hal_exception(ss.str());
    }
    if ((hal_index_t)parseOffset + parseSegment->getStartPosition() != 
        topSegment->getStartPosition())
    {
      throw hal_exception("parse index broken in top segment in genome " +
                          genome->getName());
                          
    }
  }

  const hal_index_t paralogyIndex = topSegment->getNextParalogyIndex();
  if (paralogyIndex != NULL_INDEX)
  {
    TopSegmentIteratorConstPtr pti = 
       genome->getTopSegmentIterator(paralogyIndex);
    if (pti->getTopSegment()->getParentIndex() != topSegment->getParentIndex())
    {
      stringstream ss;
      ss << "Top segment " << topSegment->getArrayIndex() 
         << " has parent index "
         << topSegment->getParentIndex() << ", but next paraglog " 
         << topSegment->getNextParalogyIndex() << " has parent Index " 
         << pti->getTopSegment()->getParentIndex() 
         << ". Paralogous top segments must share same parent.";
      throw hal_exception(ss.str());
    }
    if (paralogyIndex == topSegment->getArrayIndex())
    {
      stringstream ss;
      ss << "Top segment " << topSegment->getArrayIndex() 
         << " has paralogy index " << topSegment->getNextParalogyIndex()
         << " which isn't allowed";
      throw hal_exception(ss.str());
    }
  }
}
void 
GappedSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* child = alignment->openGenome("child");
  const Genome* parent = alignment->openGenome("parent");

  GappedTopSegmentIteratorConstPtr gtsIt = 
     child->getGappedTopSegmentIterator(0, 9999999);
  GappedBottomSegmentIteratorConstPtr gbsIt = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
  GappedTopSegmentIteratorConstPtr gtsItRev = 
     child->getGappedTopSegmentIterator(0, 9999999);
  gtsItRev->toReverse();
  GappedBottomSegmentIteratorConstPtr gbsItRev = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
  gbsItRev->toReverse();

  for (size_t i = 0; i < child->getNumTopSegments(); ++i)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
    gtsIt->toRight();

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)bsIt->getBottomSegment()->getArrayIndex() == i);
    gbsIt->toRight();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)tsItRev->getTopSegment()->getArrayIndex() == i);
    gtsItRev->toLeft();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i);
    gbsItRev->toLeft();
  }

  gtsIt = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 1, 9999999);
  gbsIt = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 1, 0, 9999999);
  gtsItRev = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 1, 9999999);
  gtsItRev->toReverse();
  gbsItRev = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 1, 0, 9999999);
  gbsItRev->toReverse();

  for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; --i)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight()));
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gtsIt->getReversed() == false);
    gtsIt->toLeft();

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight()));
    CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gbsIt->getReversed() == false);
    gbsIt->toLeft();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight()));
    CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gtsItRev->getReversed() == true);
    gtsItRev->toRight();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight()));
    CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gbsItRev->getReversed() == true);
    gbsItRev->toRight();
  }

}
void 
GappedSegmentIteratorIndelTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* child = alignment->openGenome("child");
  const Genome* parent = alignment->openGenome("parent");

  GappedTopSegmentIteratorConstPtr gtsIt = 
     child->getGappedTopSegmentIterator(0, 9999999);

  GappedBottomSegmentIteratorConstPtr gbsIt = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
  GappedTopSegmentIteratorConstPtr gtsItRev = 
     child->getGappedTopSegmentIterator(0, 9999999);
  gtsItRev->toReverse();
  GappedBottomSegmentIteratorConstPtr gbsItRev = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
     gbsItRev->toReverse();

  for (size_t i = 0; i < child->getNumTopSegments(); i += 20)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
    tsIt = gtsIt->getRight();

    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i + 19);

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)bsIt->getBottomSegment()->getArrayIndex() == i);
    bsIt = gbsIt->getRight();
    CuAssertTrue(_testCase, 
                 (size_t)bsIt->getBottomSegment()->getArrayIndex() == i + 19);

    GappedBottomSegmentIteratorConstPtr gappedParent = gbsIt->copy();
    gappedParent->toParent(gtsIt);
    if (gappedParent->getReversed())
    {
      gappedParent->toReverse();
    }
    CuAssertTrue(_testCase,
                 gappedParent->equals(gbsIt));
    GappedTopSegmentIteratorConstPtr gappedChild = gtsIt->copy();
    gappedChild->toChild(gbsIt);
    if (gappedChild->getReversed())
    {
      gappedChild->toReverse();
    }
    CuAssertTrue(_testCase, gappedChild->equals(gtsIt));
    
    gtsIt->toRight();
    gbsIt->toRight();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)tsItRev->getTopSegment()->getArrayIndex() == i + 19);
    tsItRev = gtsItRev->getRight();
    CuAssertTrue(_testCase, 
                 (size_t)tsItRev->getTopSegment()->getArrayIndex() == i);
    gtsItRev->toLeft();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, 
                 (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i+19);
    bsItRev = gbsItRev->getRight();
    CuAssertTrue(_testCase, 
                 (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i);
    gbsItRev->toLeft();

    }

  gtsIt = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 20, 9999999);
  gbsIt = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 20, 0, 9999999); 
  gtsItRev = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 20, 9999999);
  gtsItRev->toReverse();
  gbsItRev = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 20, 0, 9999999);
  gbsItRev->toReverse();

  for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; i -= 20)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i - 19);
    tsIt = gtsIt->getRight();
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gtsIt->getReversed() == false);
    gtsIt->toLeft();

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i-19);
    bsIt = gbsIt->getRight();
    CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gbsIt->getReversed() == false);
    gbsIt->toLeft();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i);
    tsItRev = gtsItRev->getRight();
    CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i-19);
    CuAssertTrue(_testCase, gtsItRev->getReversed() == true);
    gtsItRev->toRight();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i);
    bsItRev = gbsItRev->getRight();
    CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex()==i-19);
    CuAssertTrue(_testCase, gbsItRev->getReversed() == true);
    gbsItRev->toRight();
    }

}