void TopSegmentIteratorReverseTest::checkCallBack(AlignmentConstPtr alignment) { BottomSegmentIteratorConstPtr bi; TopSegmentIteratorConstPtr ti, ti2; const Genome* parent1 = alignment->openGenome("parent1"); const Genome* child1 = alignment->openGenome("child1"); ti = child1->getTopSegmentIterator(); bi = parent1->getBottomSegmentIterator(); ti2 = child1->getTopSegmentIterator(); ti2->toChild(bi, 0); CuAssertTrue(_testCase, ti->getStartPosition() == 0); CuAssertTrue(_testCase, ti->getLength() == 10); CuAssertTrue(_testCase, ti->getReversed() == false); CuAssertTrue(_testCase, ti2->getStartPosition() == 9); CuAssertTrue(_testCase, ti2->getLength() == 10); CuAssertTrue(_testCase, ti2->getReversed() == true); bi->slice(1, 3); ti2->toChild(bi, 0); CuAssertTrue(_testCase, bi->getStartPosition() == 1); CuAssertTrue(_testCase, bi->getLength() == 6); CuAssertTrue(_testCase, ti2->getStartPosition() == 8); CuAssertTrue(_testCase, ti2->getLength() == 6); string buffer; bi->getString(buffer); CuAssertTrue(_testCase, buffer == "CCTACG"); ti2->getString(buffer); CuAssertTrue(_testCase, buffer == "CACGTA"); bi = child1->getBottomSegmentIterator(); CuAssertTrue(_testCase, bi->getReversed() == false); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 0); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 4); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); CuAssertTrue(_testCase, bi->getReversed() == false); bi->toRight(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 5); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 9); CuAssertTrue(_testCase, ti->getLength() == 5); }
void DefaultGappedBottomSegmentIterator::toRightNextUngapped( BottomSegmentIteratorConstPtr bs) const { while (bs->hasChild(_childIndex) == false && bs->getLength() <= _gapThreshold) { if ((!bs->getReversed() && bs->getBottomSegment()->isLast()) || (bs->getReversed() && bs->getBottomSegment()->isFirst())) { break; } bs->toRight(); } }
void hal::validateSequence(const Sequence* sequence) { // Verify that the DNA sequence doesn't contain funny characters DNAIteratorConstPtr dnaIt = sequence->getDNAIterator(); hal_size_t length = sequence->getSequenceLength(); for (hal_size_t i = 0; i < length; ++i) { char c = dnaIt->getChar(); if (isNucleotide(c) == false) { stringstream ss; ss << "Non-nucleotide character discoverd at position " << i << " of sequence " << sequence->getName() << ": " << c; throw hal_exception(ss.str()); } } // Check the top segments if (sequence->getGenome()->getParent() != NULL) { hal_size_t totalTopLength = 0; TopSegmentIteratorConstPtr topIt = sequence->getTopSegmentIterator(); hal_size_t numTopSegments = sequence->getNumTopSegments(); for (hal_size_t i = 0; i < numTopSegments; ++i) { const TopSegment* topSegment = topIt->getTopSegment(); validateTopSegment(topSegment); totalTopLength += topSegment->getLength(); topIt->toRight(); } if (totalTopLength != length) { stringstream ss; ss << "Sequence " << sequence->getName() << " has length " << length << " but its top segments add up to " << totalTopLength; throw hal_exception(ss.str()); } } // Check the bottom segments if (sequence->getGenome()->getNumChildren() > 0) { hal_size_t totalBottomLength = 0; BottomSegmentIteratorConstPtr bottomIt = sequence->getBottomSegmentIterator(); hal_size_t numBottomSegments = sequence->getNumBottomSegments(); for (hal_size_t i = 0; i < numBottomSegments; ++i) { const BottomSegment* bottomSegment = bottomIt->getBottomSegment(); validateBottomSegment(bottomSegment); totalBottomLength += bottomSegment->getLength(); bottomIt->toRight(); } if (totalBottomLength != length) { stringstream ss; ss << "Sequence " << sequence->getName() << " has length " << length << " but its bottom segments add up to " << totalBottomLength; throw hal_exception(ss.str()); } } }
// quickly count subsitutions without loading rearrangement machinery. // used for benchmarks for basic file scanning... and not much else since // the interface is still a bit wonky. void SummarizeMutations::substitutionAnalysis(const Genome* genome, MutationsStats& stats) { assert(stats._subs == 0); if (genome->getNumChildren() == 0 || genome->getNumBottomSegments() == 0 || (_targetSet && _targetSet->find(genome->getName()) == _targetSet->end())) { return; } const Genome* parent = genome->getParent(); string pname = parent != NULL ? parent->getName() : string(); StrPair branchName(genome->getName(), pname); BottomSegmentIteratorConstPtr bottom = genome->getBottomSegmentIterator(); TopSegmentIteratorConstPtr top = genome->getChild(0)->getTopSegmentIterator(); string gString, cString; hal_size_t n = genome->getNumBottomSegments(); vector<hal_size_t> children; hal_size_t m = genome->getNumChildren(); for (hal_size_t i = 0; i < m; ++i) { string cName = genome->getChild(i)->getName(); if (!_targetSet || (_targetSet && _targetSet->find(cName) != _targetSet->end())) { children.push_back(i); } } if (children.empty()) { return; } for (hal_size_t i = 0; i < n; ++i) { bool readString = false; for (size_t j = 0; j < children.size(); ++j) { if (bottom->hasChild(children[j])) { if (readString == false) { bottom->getString(gString); readString = true; } top->toChild(bottom, children[j]); top->getString(cString); assert(gString.length() == cString.length()); for (hal_size_t k = 0; k < gString.length(); ++k) { if (isSubstitution(gString[k], cString[k])) { ++stats._subs; } } } } bottom->toRight(); } }