void TopSegmentIteratorReverseTest::checkCallBack(AlignmentConstPtr alignment)
{
  BottomSegmentIteratorConstPtr bi;
  TopSegmentIteratorConstPtr ti, ti2;

  const Genome* parent1 = alignment->openGenome("parent1");
  const Genome* child1 = alignment->openGenome("child1");

  ti = child1->getTopSegmentIterator();
  bi = parent1->getBottomSegmentIterator();

  ti2 = child1->getTopSegmentIterator();
  ti2->toChild(bi, 0);
  
  CuAssertTrue(_testCase, ti->getStartPosition() == 0);
  CuAssertTrue(_testCase, ti->getLength() == 10);
  CuAssertTrue(_testCase, ti->getReversed() == false);

  CuAssertTrue(_testCase, ti2->getStartPosition() == 9);
  CuAssertTrue(_testCase, ti2->getLength() == 10);
  CuAssertTrue(_testCase, ti2->getReversed() == true);

  bi->slice(1, 3);
  ti2->toChild(bi, 0);
  
  CuAssertTrue(_testCase, bi->getStartPosition() == 1);
  CuAssertTrue(_testCase, bi->getLength() == 6);
  CuAssertTrue(_testCase, ti2->getStartPosition() == 8);
  CuAssertTrue(_testCase, ti2->getLength() == 6);

  string buffer;
  bi->getString(buffer);
  CuAssertTrue(_testCase, buffer == "CCTACG");
  ti2->getString(buffer);
  CuAssertTrue(_testCase, buffer == "CACGTA");

  bi = child1->getBottomSegmentIterator();
  CuAssertTrue(_testCase, bi->getReversed() == false);

  ti->toParseUp(bi);  
  CuAssertTrue(_testCase, ti->getStartPosition() == 0);
  CuAssertTrue(_testCase, ti->getLength() == 5);

  bi->toReverse();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, ti->getStartPosition() == 4);
  CuAssertTrue(_testCase, ti->getLength() == 5);

  bi->toReverse();  
  CuAssertTrue(_testCase, bi->getReversed() == false);
  bi->toRight();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, ti->getStartPosition() == 5);
  CuAssertTrue(_testCase, ti->getLength() == 5);

  bi->toReverse();  
  ti->toParseUp(bi);  
  CuAssertTrue(_testCase, ti->getStartPosition() == 9);
  CuAssertTrue(_testCase, ti->getLength() == 5);
}
void DefaultGappedBottomSegmentIterator::toRightNextUngapped(
  BottomSegmentIteratorConstPtr bs) const
{
  while (bs->hasChild(_childIndex) == false &&
         bs->getLength() <= _gapThreshold)
  {
    if ((!bs->getReversed() && bs->getBottomSegment()->isLast()) ||
         (bs->getReversed() && bs->getBottomSegment()->isFirst()))
    {
      break;
    }
    bs->toRight();
  }
}
Ejemplo n.º 3
0
void hal::validateSequence(const Sequence* sequence)
{
  // Verify that the DNA sequence doesn't contain funny characters
  DNAIteratorConstPtr dnaIt = sequence->getDNAIterator();
  hal_size_t length = sequence->getSequenceLength();
  for (hal_size_t i = 0; i < length; ++i)
  {
    char c = dnaIt->getChar();
    if (isNucleotide(c) == false)
    {
      stringstream ss;
      ss << "Non-nucleotide character discoverd at position " 
         << i << " of sequence " << sequence->getName() << ": " << c;
      throw hal_exception(ss.str());
    }
  }
  
  // Check the top segments
  if (sequence->getGenome()->getParent() != NULL)
  {
    hal_size_t totalTopLength = 0;
    TopSegmentIteratorConstPtr topIt = sequence->getTopSegmentIterator();
    hal_size_t numTopSegments = sequence->getNumTopSegments();
    for (hal_size_t i = 0; i < numTopSegments; ++i)
    {
      const TopSegment* topSegment = topIt->getTopSegment();
      validateTopSegment(topSegment);
      totalTopLength += topSegment->getLength();
      topIt->toRight();
    }
    if (totalTopLength != length)
    {
      stringstream ss;
      ss << "Sequence " << sequence->getName() << " has length " << length 
         << " but its top segments add up to " << totalTopLength;
      throw hal_exception(ss.str());
    }
  }

  // Check the bottom segments
  if (sequence->getGenome()->getNumChildren() > 0)
  {
    hal_size_t totalBottomLength = 0;
    BottomSegmentIteratorConstPtr bottomIt = 
       sequence->getBottomSegmentIterator();
    hal_size_t numBottomSegments = sequence->getNumBottomSegments();
    for (hal_size_t i = 0; i < numBottomSegments; ++i)
    {
      const BottomSegment* bottomSegment = bottomIt->getBottomSegment();
      validateBottomSegment(bottomSegment);
      totalBottomLength += bottomSegment->getLength();
      bottomIt->toRight();
    }
    if (totalBottomLength != length)
    {
      stringstream ss;
      ss << "Sequence " << sequence->getName() << " has length " << length 
         << " but its bottom segments add up to " << totalBottomLength;
      throw hal_exception(ss.str());
    }
  }
}
// quickly count subsitutions without loading rearrangement machinery.
// used for benchmarks for basic file scanning... and not much else since
// the interface is still a bit wonky.
void SummarizeMutations::substitutionAnalysis(const Genome* genome, 
                                               MutationsStats& stats)
{
  assert(stats._subs == 0);
  if (genome->getNumChildren() == 0 || genome->getNumBottomSegments() == 0 ||
      (_targetSet && _targetSet->find(genome->getName()) == _targetSet->end()))
  {
    return;
  }
  const Genome* parent = genome->getParent();
  string pname = parent != NULL ? parent->getName() : string();
  StrPair branchName(genome->getName(), pname);

  BottomSegmentIteratorConstPtr bottom = genome->getBottomSegmentIterator();
  TopSegmentIteratorConstPtr top = genome->getChild(0)->getTopSegmentIterator();
  
  string gString, cString;

  hal_size_t n = genome->getNumBottomSegments();
  vector<hal_size_t> children;
  hal_size_t m = genome->getNumChildren();
  for (hal_size_t i = 0; i < m; ++i)
  {
    string cName = genome->getChild(i)->getName();
    if (!_targetSet || 
        (_targetSet && _targetSet->find(cName) != _targetSet->end()))
    {
      children.push_back(i);
    }
  }
  if (children.empty())
  {
    return;
  }

  for (hal_size_t i = 0; i < n; ++i)
  {
    bool readString = false;
    for (size_t j = 0; j < children.size(); ++j)
    {
      if (bottom->hasChild(children[j]))
      {
        if (readString == false)
        {
          bottom->getString(gString);
          readString = true;
        }
        top->toChild(bottom, children[j]);
        top->getString(cString);
        assert(gString.length() == cString.length());
        for (hal_size_t k = 0; k < gString.length(); ++k)
        {
          if (isSubstitution(gString[k], cString[k]))
          {
            ++stats._subs;
          }
        }
      }
    }
    bottom->toRight();
  }
}