bool DefaultGappedBottomSegmentIterator::isMissingData(double nThreshold) const { if (nThreshold >= 1.0) { return false; } hal_index_t start = min(_left->getStartPosition(), _right->getEndPosition()); DNAIteratorConstPtr di = _left->getGenome()->getDNAIterator(start); hal_size_t length = getLength(); size_t maxNs = nThreshold * (double)length; size_t Ns = 0; char c; for (size_t i = 0; i < length; ++i, di->toRight()) { c = di->getChar(); if (c == 'N' || c == 'n') { ++Ns; } if (Ns > maxNs) { return true; } if ((length - i) < (maxNs - Ns)) { break; } } return false; }
void MappedSegmentColCompareTest::createColArray() { hal_size_t N = _ref->getSequenceLength(); _colArray.clear(); _colArray.resize(N); set<const Genome*> tgtSet; tgtSet.insert(_tgt); ColumnIteratorConstPtr colIt = _ref->getColumnIterator(&tgtSet, 0, 0, NULL_INDEX, false, false, false, true); while (true) { const ColumnIterator::ColumnMap* colMap = colIt->getColumnMap(); ColumnIterator::ColumnMap::const_iterator colMapIt = colMap->begin(); vector<pair<hal_index_t, bool> > insertList; // Pass 1 find all homologies in target for (; colMapIt != colMap->end(); colMapIt++) { if (colMapIt->first->getGenome() == _tgt) { ColumnIterator::DNASet* dnaSet = colMapIt->second; ColumnIterator::DNASet::const_iterator dnaIt = dnaSet->begin(); for (; dnaIt != dnaSet->end(); ++dnaIt) { DNAIteratorConstPtr dna = *dnaIt; insertList.push_back( pair<hal_index_t, bool>(dna->getArrayIndex(), dna->getReversed())); } } else { CuAssertTrue(_testCase, colMapIt->first->getGenome() == _ref); } } // Pass 2 update each reference position with all homologies found // in Pass 1 for (colMapIt = colMap->begin(); colMapIt != colMap->end(); colMapIt++) { if (colMapIt->first->getGenome() == _ref) { ColumnIterator::DNASet* dnaSet = colMapIt->second; ColumnIterator::DNASet::const_iterator dnaIt = dnaSet->begin(); for (; dnaIt != dnaSet->end(); ++dnaIt) { DNAIteratorConstPtr dna = *dnaIt; for (size_t insIdx = 0; insIdx < insertList.size(); ++insIdx) { _colArray[dna->getArrayIndex()].insert(insertList[insIdx]); } } } } if (colIt->lastColumn()) { break; } colIt->toRight(); } }
void Genome::copySequence(Genome *dest) const { DNAIteratorConstPtr inDna = getDNAIterator(); DNAIteratorPtr outDna = dest->getDNAIterator(); hal_size_t n = getSequenceLength(); assert(n == dest->getSequenceLength()); for (; (hal_size_t)inDna->getArrayIndex() < n; inDna->toRight(), outDna->toRight()) { outDna->setChar(inDna->getChar()); } }
void hal::validateSequence(const Sequence* sequence) { // Verify that the DNA sequence doesn't contain funny characters DNAIteratorConstPtr dnaIt = sequence->getDNAIterator(); hal_size_t length = sequence->getSequenceLength(); for (hal_size_t i = 0; i < length; ++i) { char c = dnaIt->getChar(); if (isNucleotide(c) == false) { stringstream ss; ss << "Non-nucleotide character discoverd at position " << i << " of sequence " << sequence->getName() << ": " << c; throw hal_exception(ss.str()); } } // Check the top segments if (sequence->getGenome()->getParent() != NULL) { hal_size_t totalTopLength = 0; TopSegmentIteratorConstPtr topIt = sequence->getTopSegmentIterator(); hal_size_t numTopSegments = sequence->getNumTopSegments(); for (hal_size_t i = 0; i < numTopSegments; ++i) { const TopSegment* topSegment = topIt->getTopSegment(); validateTopSegment(topSegment); totalTopLength += topSegment->getLength(); topIt->toRight(); } if (totalTopLength != length) { stringstream ss; ss << "Sequence " << sequence->getName() << " has length " << length << " but its top segments add up to " << totalTopLength; throw hal_exception(ss.str()); } } // Check the bottom segments if (sequence->getGenome()->getNumChildren() > 0) { hal_size_t totalBottomLength = 0; BottomSegmentIteratorConstPtr bottomIt = sequence->getBottomSegmentIterator(); hal_size_t numBottomSegments = sequence->getNumBottomSegments(); for (hal_size_t i = 0; i < numBottomSegments; ++i) { const BottomSegment* bottomSegment = bottomIt->getBottomSegment(); validateBottomSegment(bottomSegment); totalBottomLength += bottomSegment->getLength(); bottomIt->toRight(); } if (totalBottomLength != length) { stringstream ss; ss << "Sequence " << sequence->getName() << " has length " << length << " but its bottom segments add up to " << totalBottomLength; throw hal_exception(ss.str()); } } }