Esempio n. 1
0
void MappedSegmentMapDownTest::testBottomSegment(
  AlignmentConstPtr alignment,
  BottomSegmentIteratorConstPtr bottom,
  hal_size_t childIndex)
{
  const Genome* child = bottom->getGenome()->getChild(childIndex);
  set<MappedSegmentConstPtr> results;
  bottom->getMappedSegments(results, child, NULL, false);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == 
               bottom->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               bottom->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == bottom->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == bottom->getReversed());
  TopSegmentIteratorConstPtr top = child->getTopSegmentIterator();
  top->toChild(bottom, childIndex);
  CuAssertTrue(_testCase, mseg->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == top->getReversed());
}
// quickly count subsitutions without loading rearrangement machinery.
// used for benchmarks for basic file scanning... and not much else since
// the interface is still a bit wonky.
void SummarizeMutations::substitutionAnalysis(const Genome* genome, 
                                               MutationsStats& stats)
{
  assert(stats._subs == 0);
  if (genome->getNumChildren() == 0 || genome->getNumBottomSegments() == 0 ||
      (_targetSet && _targetSet->find(genome->getName()) == _targetSet->end()))
  {
    return;
  }
  const Genome* parent = genome->getParent();
  string pname = parent != NULL ? parent->getName() : string();
  StrPair branchName(genome->getName(), pname);

  BottomSegmentIteratorConstPtr bottom = genome->getBottomSegmentIterator();
  TopSegmentIteratorConstPtr top = genome->getChild(0)->getTopSegmentIterator();
  
  string gString, cString;

  hal_size_t n = genome->getNumBottomSegments();
  vector<hal_size_t> children;
  hal_size_t m = genome->getNumChildren();
  for (hal_size_t i = 0; i < m; ++i)
  {
    string cName = genome->getChild(i)->getName();
    if (!_targetSet || 
        (_targetSet && _targetSet->find(cName) != _targetSet->end()))
    {
      children.push_back(i);
    }
  }
  if (children.empty())
  {
    return;
  }

  for (hal_size_t i = 0; i < n; ++i)
  {
    bool readString = false;
    for (size_t j = 0; j < children.size(); ++j)
    {
      if (bottom->hasChild(children[j]))
      {
        if (readString == false)
        {
          bottom->getString(gString);
          readString = true;
        }
        top->toChild(bottom, children[j]);
        top->getString(cString);
        assert(gString.length() == cString.length());
        for (hal_size_t k = 0; k < gString.length(); ++k)
        {
          if (isSubstitution(gString[k], cString[k]))
          {
            ++stats._subs;
          }
        }
      }
    }
    bottom->toRight();
  }
}