void LodExtract::writeUnsampledSequence(const Sequence* outSequence, SegmentIteratorPtr outSegment) { outSegment->setCoordinates(outSequence->getStartPosition(), outSequence->getSequenceLength()); if (outSegment->isTop()) { assert(outSequence->getNumTopSegments() == 1); TopSegmentIteratorPtr top = outSegment.downCast<TopSegmentIteratorPtr>(); top->setParentIndex(NULL_INDEX); top->setParentReversed(false); top->setNextParalogyIndex(NULL_INDEX); top->setBottomParseIndex(NULL_INDEX); } else { assert(outSequence->getNumBottomSegments() == 1); BottomSegmentIteratorPtr bottom = outSegment.downCast<BottomSegmentIteratorPtr>(); hal_size_t numChildren = bottom->getNumChildren(); for (hal_size_t childNum = 0; childNum < numChildren; ++childNum) { bottom->setChildIndex(childNum, NULL_INDEX); bottom->setChildReversed(childNum, false); } bottom->setTopParseIndex(NULL_INDEX); } }
// note: takes smart pointer as it maybe added to the results static hal_size_t mapDown(MappedSegmentPtr mappedSeg, hal_size_t childIndex, list<MappedSegmentPtr> &results, hal_size_t minLength) { const Genome *child = mappedSeg->getGenome()->getChild(childIndex); assert(child != NULL); hal_size_t added = 0; if (mappedSeg->isTop() == false) { TopSegmentIteratorPtr topSegIt = child->getTopSegmentIterator(); SegmentIteratorPtr targetSegIt = mappedSeg->getTargetIteratorPtr(); BottomSegmentIteratorPtr botSegIt = std::dynamic_pointer_cast<BottomSegmentIterator>(targetSegIt); if (botSegIt->bseg()->hasChild(childIndex) == true && botSegIt->getLength() >= minLength) { topSegIt->toChild(botSegIt, childIndex); mappedSeg->setTarget(std::dynamic_pointer_cast<SegmentIterator>(topSegIt)); results.push_back(MappedSegmentPtr(mappedSeg)); ++added; } } else { hal_index_t rightCutoff = mappedSeg->getEndPosition(); TopSegmentIteratorPtr topSegIt = mappedSeg->targetAsTop(); hal_index_t startOffset = (hal_index_t)topSegIt->getStartOffset(); hal_index_t endOffset = (hal_index_t)topSegIt->getEndOffset(); BottomSegmentIteratorPtr botSegIt = mappedSeg->getGenome()->getBottomSegmentIterator(); botSegIt->toParseDown(topSegIt); do { BottomSegmentIteratorPtr newBotSegIt = botSegIt->clone(); // we map the new target back to see how the offsets have // changed. these changes are then applied to the source segment // as deltas TopSegmentIteratorPtr backTopSegIt = topSegIt->clone(); backTopSegIt->toParseUp(newBotSegIt); hal_index_t startBack = (hal_index_t)backTopSegIt->getStartOffset(); hal_index_t endBack = (hal_index_t)backTopSegIt->getEndOffset(); assert(startBack >= startOffset); assert(endBack >= endOffset); SegmentIteratorPtr newSourceSegIt = mappedSeg->sourceClone(); hal_index_t startDelta = startBack - startOffset; hal_index_t endDelta = endBack - endOffset; assert((hal_index_t)newSourceSegIt->getLength() > startDelta + endDelta); newSourceSegIt->slice(newSourceSegIt->getStartOffset() + startDelta, newSourceSegIt->getEndOffset() + endDelta); MappedSegmentPtr newMappedSeg(new MappedSegment(newSourceSegIt, newBotSegIt)); assert(newMappedSeg->isTop() == false); assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome()); added += mapDown(newMappedSeg, childIndex, results, minLength); // stupid that we have to make this check but odn't want to // make fundamental api change now if (botSegIt->getEndPosition() != rightCutoff) { botSegIt->toRight(rightCutoff); } else { break; } } while (true); } return added; }
// note: takes smart pointer as it maybe added to the results static hal_size_t mapUp(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, bool doDupes, hal_size_t minLength) { const Genome *parent = mappedSeg->getGenome()->getParent(); assert(parent != NULL); hal_size_t added = 0; if (mappedSeg->isTop() == true) { BottomSegmentIteratorPtr botSegIt = parent->getBottomSegmentIterator(); TopSegmentIteratorPtr topSegIt = mappedSeg->targetAsTop(); if (topSegIt->tseg()->hasParent() == true && topSegIt->getLength() >= minLength && (doDupes == true || topSegIt->tseg()->isCanonicalParalog() == true)) { botSegIt->toParent(topSegIt); mappedSeg->setTarget(std::dynamic_pointer_cast<SegmentIterator>(botSegIt)); results.push_back(mappedSeg); ++added; } } else { hal_index_t rightCutoff = mappedSeg->getEndPosition(); BottomSegmentIteratorPtr botSegIt = mappedSeg->targetAsBottom(); hal_index_t startOffset = (hal_index_t)botSegIt->getStartOffset(); hal_index_t endOffset = (hal_index_t)botSegIt->getEndOffset(); TopSegmentIteratorPtr topSegIt = mappedSeg->getGenome()->getTopSegmentIterator(); topSegIt->toParseUp(botSegIt); do { TopSegmentIteratorPtr newTopSegIt = topSegIt->clone(); // we map the new target back to see how the offsets have // changed. these changes are then applied to the source segment // as deltas BottomSegmentIteratorPtr backBotSegIt = botSegIt->clone(); backBotSegIt->toParseDown(newTopSegIt); hal_index_t startBack = (hal_index_t)backBotSegIt->getStartOffset(); hal_index_t endBack = (hal_index_t)backBotSegIt->getEndOffset(); assert(startBack >= startOffset); assert(endBack >= endOffset); SegmentIteratorPtr newSourceSegIt = mappedSeg->sourceClone(); hal_index_t startDelta = startBack - startOffset; hal_index_t endDelta = endBack - endOffset; assert((hal_index_t)newSourceSegIt->getLength() > startDelta + endDelta); newSourceSegIt->slice(newSourceSegIt->getStartOffset() + startDelta, newSourceSegIt->getEndOffset() + endDelta); MappedSegmentPtr newMappedSeg(new MappedSegment(newSourceSegIt, newTopSegIt)); assert(newMappedSeg->isTop() == true); assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome()); added += mapUp(newMappedSeg, results, doDupes, minLength); // stupid that we have to make this check but odn't want to // make fundamental api change now if (topSegIt->getEndPosition() != rightCutoff) { topSegIt->toRight(rightCutoff); } else { break; } } while (true); } return added; }
MappedSegment *MappedSegment::clone() const { // FIXME: having both sourceCpySegIt and sourceSegIt seems pointless, same for target. SegmentIteratorPtr sourceCpySegIt; if (_source->isTop()) { sourceCpySegIt = std::dynamic_pointer_cast<TopSegmentIterator>(_source)->clone(); } else { sourceCpySegIt = std::dynamic_pointer_cast<BottomSegmentIterator>(_source)->clone(); } SegmentIteratorPtr sourceSegIt = std::dynamic_pointer_cast<SegmentIterator>(sourceCpySegIt); SegmentIteratorPtr targetCopySegIt; if (_target->isTop()) { targetCopySegIt = std::dynamic_pointer_cast<TopSegmentIterator>(_target)->clone(); } else { targetCopySegIt = std::dynamic_pointer_cast<BottomSegmentIterator>(_target)->clone(); } SegmentIteratorPtr targetSegIt = std::static_pointer_cast<SegmentIterator>(targetCopySegIt); assert(sourceSegIt->getStartPosition() == _source->getStartPosition() && sourceSegIt->getEndPosition() == _source->getEndPosition()); assert(targetSegIt->getStartPosition() == _target->getStartPosition() && targetSegIt->getEndPosition() == _target->getEndPosition()); assert(_source->getLength() == _target->getLength()); assert(sourceSegIt->getLength() == targetSegIt->getLength()); MappedSegment *newSeg = new MappedSegment(sourceSegIt, targetSegIt); assert(newSeg->getStartPosition() == getStartPosition() && newSeg->getEndPosition() == getEndPosition() && newSeg->_source->getStartPosition() == _source->getStartPosition() && newSeg->_source->getEndPosition() == _source->getEndPosition()); assert(newSeg->_source.get() != _source.get() && newSeg->_target.get() != _target.get()); return newSeg; }
int MappedSegment::slowComp(const SegmentIteratorPtr &s1, const SegmentIteratorPtr &s2) { assert(s1->getGenome() == s2->getGenome()); int res = 0; hal_index_t sp1 = s1->getStartPosition(); hal_index_t ep1 = s1->getEndPosition(); hal_index_t sp2 = s2->getStartPosition(); hal_index_t ep2 = s2->getEndPosition(); if (s1->getReversed()) { swap(sp1, ep1); } if (s2->getReversed()) { swap(sp2, ep2); } if (sp1 < sp2) { res = -1; } else if (sp1 > sp2) { res = 1; } else if (ep1 < ep2) { res = -1; } else if (ep1 > ep2) { res = 1; } return res; }
void LodExtract::writeSegments(const Genome* inParent, const vector<const Genome*>& inChildren) { vector<const Genome*> inGenomes = inChildren; inGenomes.push_back(inParent); const Genome* outParent = _outAlignment->openGenome(inParent->getName()); assert(outParent != NULL && outParent->getNumBottomSegments() > 0); BottomSegmentIteratorPtr bottom; TopSegmentIteratorPtr top; SegmentIteratorPtr outSegment; // FOR EVERY GENOME for (hal_size_t i = 0; i < inGenomes.size(); ++i) { const Genome* inGenome = inGenomes[i]; Genome* outGenome = _outAlignment->openGenome(inGenome->getName()); SequenceIteratorPtr outSeqIt = outGenome->getSequenceIterator(); SequenceIteratorConstPtr outSeqEnd = outGenome->getSequenceEndIterator(); // FOR EVERY SEQUENCE IN GENOME for (; outSeqIt != outSeqEnd; outSeqIt->toNext()) { const Sequence* outSequence = outSeqIt->getSequence(); const Sequence* inSequence = inGenome->getSequence(outSequence->getName()); if (outGenome != outParent && outSequence->getNumTopSegments() > 0) { top = outSequence->getTopSegmentIterator(); outSegment = top; } else if (outSequence->getNumBottomSegments() > 0) { bottom = outSequence->getBottomSegmentIterator(); outSegment = bottom; } const LodGraph::SegmentSet* segSet = _graph.getSegmentSet(inSequence); assert(segSet != NULL); LodGraph::SegmentSet::const_iterator segIt = segSet->begin(); if (segSet->size() > 2) { //skip left telomere ++segIt; // use to skip right telomere: LodGraph::SegmentSet::const_iterator segLast = segSet->end(); --segLast; // FOR EVERY SEGMENT IN SEQUENCE for (; segIt != segLast; ++segIt) { // write the HAL array index back to the segment to make // future passes quicker. (*segIt)->setArrayIndex(outSegment->getArrayIndex()); outSegment->setCoordinates((*segIt)->getLeftPos(), (*segIt)->getLength()); assert(outSegment->getSequence()->getName() == inSequence->getName()); outSegment->toRight(); } } else if (outSequence->getSequenceLength() > 0) { assert(segSet->size() == 2); writeUnsampledSequence(outSequence, outSegment); } } } }
int MappedSegment::boundComp(const SegmentIteratorPtr &s1, const SegmentIteratorPtr &s2) { int res = 0; bool flip = s2->getReversed(); if (flip) { s2->toReverse(); } if (s1->isTop() && !s2->isTop()) { BottomSegmentIteratorPtr bot = std::dynamic_pointer_cast<BottomSegmentIterator>(s2); hal_index_t lb = bot->bseg()->getTopParseIndex(); hal_index_t ub = lb; if ((hal_size_t)bot->getArrayIndex() < bot->getGenome()->getNumBottomSegments() - 1) { bot = bot->clone(); bot->slice(0, 0); bot->toRight(); ub = bot->bseg()->getTopParseIndex(); } if (s1->getArrayIndex() < lb) { res = -1; } else if (s1->getArrayIndex() > ub) { res = 1; } } else if (!s1->isTop() && s2->isTop()) { TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(s2); hal_index_t lb = top->tseg()->getBottomParseIndex(); hal_index_t ub = lb; if ((hal_size_t)top->getArrayIndex() < top->getGenome()->getNumTopSegments() - 1) { top = top->clone(); top->slice(0, 0); top->toRight(); ub = top->tseg()->getBottomParseIndex(); } if (s1->getArrayIndex() < lb) { res = -1; } else if (s1->getArrayIndex() > ub) { res = 1; } } if (flip) { s2->toReverse(); } return res; }
int MappedSegment::fastComp(const SegmentIteratorPtr &s1, const SegmentIteratorPtr &s2) { // compare without accessing anything from disk (ie using only index // and offset) int res = 0; assert(s1->getGenome() == s2->getGenome()); if (s1->isTop() != s2->isTop()) { res = boundComp(s1, s2); if (res == 0) { res = slowComp(s1, s2); } } else { if (s1->getArrayIndex() < s2->getArrayIndex()) { res = -1; } else if (s1->getArrayIndex() > s2->getArrayIndex()) { res = 1; } else { hal_offset_t so1 = s1->getStartOffset(); hal_offset_t eo1 = s1->getEndOffset(); if (s1->getReversed()) { swap(so1, eo1); } hal_offset_t so2 = s2->getStartOffset(); hal_offset_t eo2 = s2->getEndOffset(); if (s2->getReversed()) { swap(so2, eo2); } if (so1 < so2) { res = -1; } else if (so1 > so2) { res = 1; } else if (eo1 > eo2) { res = -1; } else if (eo1 < eo2) { res = 1; } } } assert(res == slowComp(s1, s2)); return res; }
/* call main function with smart pointer */ hal_size_t hal::halMapSegmentSP(const SegmentIteratorPtr &source, MappedSegmentSet &outSegments, const Genome *tgtGenome, const std::set<const Genome *> *genomesOnPath, bool doDupes, hal_size_t minLength, const Genome *coalescenceLimit, const Genome *mrca) { return halMapSegment(source.get(), outSegments, tgtGenome, genomesOnPath, doDupes, minLength, coalescenceLimit, mrca); }
// note: takes smart pointer as it maybe added to the results static hal_size_t mapSelf(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, hal_size_t minLength) { hal_size_t added = 0; if (mappedSeg->isTop() == true) { SegmentIteratorPtr target = mappedSeg->getTargetIteratorPtr(); SegmentIteratorPtr source = mappedSeg->getSourceIteratorPtr(); TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(target); TopSegmentIteratorPtr topCopy = top->clone(); do { // FIXME: why isn't clone() polymorphic? SegmentIteratorPtr newSource; if (source->isTop()) { newSource = std::dynamic_pointer_cast<TopSegmentIterator>(source)->clone(); } else { newSource = std::dynamic_pointer_cast<BottomSegmentIterator>(source)->clone(); } TopSegmentIteratorPtr newTop = topCopy->clone(); MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, newTop)); assert(newMappedSeg->getGenome() == mappedSeg->getGenome()); assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome()); results.push_back(newMappedSeg); ++added; if (topCopy->tseg()->hasNextParalogy()) { topCopy->toNextParalogy(); } } while (topCopy->tseg()->hasNextParalogy() == true && topCopy->getLength() >= minLength && topCopy->getArrayIndex() != top->getArrayIndex()); } else if (mappedSeg->getGenome()->getParent() != NULL) { hal_index_t rightCutoff = mappedSeg->getEndPosition(); BottomSegmentIteratorPtr bottom = mappedSeg->targetAsBottom(); hal_index_t startOffset = (hal_index_t)bottom->getStartOffset(); hal_index_t endOffset = (hal_index_t)bottom->getEndOffset(); TopSegmentIteratorPtr top = mappedSeg->getGenome()->getTopSegmentIterator(); top->toParseUp(bottom); do { TopSegmentIteratorPtr topNew = top->clone(); // we map the new target back to see how the offsets have // changed. these changes are then applied to the source segment // as deltas BottomSegmentIteratorPtr bottomBack = bottom->clone(); bottomBack->toParseDown(topNew); hal_index_t startBack = (hal_index_t)bottomBack->getStartOffset(); hal_index_t endBack = (hal_index_t)bottomBack->getEndOffset(); assert(startBack >= startOffset); assert(endBack >= endOffset); SegmentIteratorPtr newSource = mappedSeg->sourceClone(); hal_index_t startDelta = startBack - startOffset; hal_index_t endDelta = endBack - endOffset; assert((hal_index_t)newSource->getLength() > startDelta + endDelta); newSource->slice(newSource->getStartOffset() + startDelta, newSource->getEndOffset() + endDelta); MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, topNew)); assert(newMappedSeg->isTop() == true); assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome()); added += mapSelf(newMappedSeg, results, minLength); // stupid that we have to make this check but odn't want to // make fundamental api change now if (top->getEndPosition() != rightCutoff) { top->toRight(rightCutoff); } else { break; } } while (true); } return added; }