bool DefaultGappedBottomSegmentIterator::isMissingData(double nThreshold) const
{
  if (nThreshold >= 1.0)
  {
    return false;
  }
  hal_index_t start = min(_left->getStartPosition(), _right->getEndPosition());
  DNAIteratorConstPtr di = _left->getGenome()->getDNAIterator(start);
  hal_size_t length = getLength();
  size_t maxNs = nThreshold * (double)length;
  size_t Ns = 0;
  char c;
  for (size_t i = 0; i < length; ++i, di->toRight())
  {
    c = di->getChar();
    if (c == 'N' || c == 'n')
    {
      ++Ns;
    }
    if (Ns > maxNs)
    {
      return true;
    }
    if ((length - i) < (maxNs - Ns))
    {
      break;
    }
  }
  return false;
}
Example #2
0
void  MappedSegmentColCompareTest::createColArray()
{
  hal_size_t N = _ref->getSequenceLength();
  _colArray.clear();
  _colArray.resize(N);
  set<const Genome*> tgtSet;
  tgtSet.insert(_tgt);
  ColumnIteratorConstPtr colIt = _ref->getColumnIterator(&tgtSet, 0, 0, 
                                                         NULL_INDEX, false,
                                                         false, false, true);
  while (true)
  {
    const ColumnIterator::ColumnMap* colMap = colIt->getColumnMap();
    ColumnIterator::ColumnMap::const_iterator colMapIt = colMap->begin();
    vector<pair<hal_index_t, bool> > insertList;
    // Pass 1 find all homologies in target
    for (; colMapIt != colMap->end(); colMapIt++)
    {
      if (colMapIt->first->getGenome() == _tgt)
      {
        ColumnIterator::DNASet* dnaSet = colMapIt->second;
        ColumnIterator::DNASet::const_iterator dnaIt = dnaSet->begin();
        for (; dnaIt != dnaSet->end(); ++dnaIt)
        {
          DNAIteratorConstPtr dna = *dnaIt;
          insertList.push_back(
            pair<hal_index_t, bool>(dna->getArrayIndex(), dna->getReversed()));
        }
      }
      else
      {
        CuAssertTrue(_testCase, colMapIt->first->getGenome() == _ref);
      }
    }

    // Pass 2 update each reference position with all homologies found
    // in Pass 1
    for (colMapIt = colMap->begin(); colMapIt != colMap->end(); colMapIt++)
    {
      if (colMapIt->first->getGenome() == _ref)
      {
        ColumnIterator::DNASet* dnaSet = colMapIt->second;
        ColumnIterator::DNASet::const_iterator dnaIt = dnaSet->begin();
        for (; dnaIt != dnaSet->end(); ++dnaIt)
        {
          DNAIteratorConstPtr dna = *dnaIt;
          for (size_t insIdx = 0; insIdx < insertList.size(); ++insIdx)
          {
            _colArray[dna->getArrayIndex()].insert(insertList[insIdx]);
          }
        }
      }
    }   
    if (colIt->lastColumn())
    {
      break;
    }
    colIt->toRight();
  }
}
Example #3
0
void Genome::copySequence(Genome *dest) const
{
  DNAIteratorConstPtr inDna = getDNAIterator();
  DNAIteratorPtr outDna = dest->getDNAIterator();
  hal_size_t n = getSequenceLength();
  assert(n == dest->getSequenceLength());
  for (; (hal_size_t)inDna->getArrayIndex() < n; inDna->toRight(), 
         outDna->toRight())
  {
    outDna->setChar(inDna->getChar());
  }
}
void hal::validateSequence(const Sequence* sequence)
{
  // Verify that the DNA sequence doesn't contain funny characters
  DNAIteratorConstPtr dnaIt = sequence->getDNAIterator();
  hal_size_t length = sequence->getSequenceLength();
  for (hal_size_t i = 0; i < length; ++i)
  {
    char c = dnaIt->getChar();
    if (isNucleotide(c) == false)
    {
      stringstream ss;
      ss << "Non-nucleotide character discoverd at position " 
         << i << " of sequence " << sequence->getName() << ": " << c;
      throw hal_exception(ss.str());
    }
  }
  
  // Check the top segments
  if (sequence->getGenome()->getParent() != NULL)
  {
    hal_size_t totalTopLength = 0;
    TopSegmentIteratorConstPtr topIt = sequence->getTopSegmentIterator();
    hal_size_t numTopSegments = sequence->getNumTopSegments();
    for (hal_size_t i = 0; i < numTopSegments; ++i)
    {
      const TopSegment* topSegment = topIt->getTopSegment();
      validateTopSegment(topSegment);
      totalTopLength += topSegment->getLength();
      topIt->toRight();
    }
    if (totalTopLength != length)
    {
      stringstream ss;
      ss << "Sequence " << sequence->getName() << " has length " << length 
         << " but its top segments add up to " << totalTopLength;
      throw hal_exception(ss.str());
    }
  }

  // Check the bottom segments
  if (sequence->getGenome()->getNumChildren() > 0)
  {
    hal_size_t totalBottomLength = 0;
    BottomSegmentIteratorConstPtr bottomIt = 
       sequence->getBottomSegmentIterator();
    hal_size_t numBottomSegments = sequence->getNumBottomSegments();
    for (hal_size_t i = 0; i < numBottomSegments; ++i)
    {
      const BottomSegment* bottomSegment = bottomIt->getBottomSegment();
      validateBottomSegment(bottomSegment);
      totalBottomLength += bottomSegment->getLength();
      bottomIt->toRight();
    }
    if (totalBottomLength != length)
    {
      stringstream ss;
      ss << "Sequence " << sequence->getName() << " has length " << length 
         << " but its bottom segments add up to " << totalBottomLength;
      throw hal_exception(ss.str());
    }
  }
}