void LodExtract::writeUnsampledSequence(const Sequence* outSequence, SegmentIteratorPtr outSegment) { outSegment->setCoordinates(outSequence->getStartPosition(), outSequence->getSequenceLength()); if (outSegment->isTop()) { assert(outSequence->getNumTopSegments() == 1); TopSegmentIteratorPtr top = outSegment.downCast<TopSegmentIteratorPtr>(); top->setParentIndex(NULL_INDEX); top->setParentReversed(false); top->setNextParalogyIndex(NULL_INDEX); top->setBottomParseIndex(NULL_INDEX); } else { assert(outSequence->getNumBottomSegments() == 1); BottomSegmentIteratorPtr bottom = outSegment.downCast<BottomSegmentIteratorPtr>(); hal_size_t numChildren = bottom->getNumChildren(); for (hal_size_t childNum = 0; childNum < numChildren; ++childNum) { bottom->setChildIndex(childNum, NULL_INDEX); bottom->setChildReversed(childNum, false); } bottom->setTopParseIndex(NULL_INDEX); } }
void Genome::copyTopSegments(Genome *dest) const { const Genome *inParent = getParent(); const Genome *outParent = dest->getParent(); TopSegmentIteratorConstPtr inTop = getTopSegmentIterator(); TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator(); hal_size_t n = dest->getNumTopSegments(); assert(n == 0 || n == getNumTopSegments()); if (n == 0) { // Nothing to do if there are no top segments. return; } BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator(); BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator(); for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(), outTop->toRight()) { hal_index_t genomePos = inTop->getStartPosition(); assert(genomePos != NULL_INDEX); string inSeqName = getSequenceBySite(genomePos)->getName(); string outSeqName = dest->getSequenceBySite(genomePos)->getName(); // if (inSeqName != outSeqName) { // stringstream ss; // ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos; // throw hal_exception(ss.str()); // } outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength()); outTop->setParentIndex(inTop->getParentIndex()); outTop->setParentReversed(inTop->getParentReversed()); outTop->setBottomParseIndex(inTop->getBottomParseIndex()); outTop->setNextParalogyIndex(inTop->getNextParalogyIndex()); // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are. if (inTop->getParentIndex() != NULL_INDEX) { inParentBottomSegIt->toParent(inTop); const Sequence *inParentSequence = inParentBottomSegIt->getSequence(); const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName()); hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex(); hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex(); outTop->setParentIndex(outParentSegmentIndex); } } }
void LodExtract::updateBlockEdges(const Genome* inParentGenome, SegmentMap& segMap, const LodBlock* block, BottomSegmentIteratorPtr bottom, TopSegmentIteratorPtr top) { Genome* outParentGenome = bottom->getGenome(); const LodSegment* rootSeg = NULL; SegmentSet* segSet; SegmentSet::iterator setIt; // Zap all segments in parent genome SegmentMap::iterator mapIt = segMap.find(inParentGenome); if (mapIt != segMap.end()) { segSet = mapIt->second; assert(segSet != NULL); setIt = segSet->begin(); for (; setIt != segSet->end(); ++setIt) { bottom->setArrayIndex(outParentGenome, (*setIt)->getArrayIndex()); for (hal_size_t i = 0; i < bottom->getNumChildren(); ++i) { bottom->setChildIndex(i, NULL_INDEX); bottom->setTopParseIndex(NULL_INDEX); } } // Choose first segment as parent to all segments in the child genome setIt = segSet->begin(); rootSeg = *(setIt); bottom->setArrayIndex(outParentGenome, (*setIt)->getArrayIndex()); } // Do the child genomes const Genome* inGrandParentGenome = inParentGenome->getParent(); SegmentSet::iterator nextIt; for (mapIt = segMap.begin(); mapIt != segMap.end(); ++mapIt) { if (mapIt->first != inParentGenome and mapIt->first != inGrandParentGenome) { Genome* outChildGenome = _outAlignment->openGenome(mapIt->first->getName()); hal_index_t childIndex = outParentGenome->getChildIndex(outChildGenome); assert(childIndex >= 0); segSet = mapIt->second; assert(segSet != NULL); for (setIt = segSet->begin(); setIt != segSet->end(); ++setIt) { top->setArrayIndex(outChildGenome, (*setIt)->getArrayIndex()); top->setBottomParseIndex(NULL_INDEX); // Connect to parent if (rootSeg != NULL) { top->setParentIndex(bottom->getArrayIndex()); bool reversed = (*setIt)->getFlipped() != rootSeg->getFlipped(); top->setParentReversed(reversed); if (setIt == segSet->begin()) { bottom->setChildIndex(childIndex, top->getArrayIndex()); bottom->setChildReversed(childIndex, reversed); } } else { top->setParentIndex(NULL_INDEX); } // Connect to next paralogy SegmentSet::iterator setNext = setIt; ++setNext; if (setNext == segSet->end()) { setNext = segSet->begin(); } if (setNext == setIt) { top->setNextParalogyIndex(NULL_INDEX); } else { top->setNextParalogyIndex((*setNext)->getArrayIndex()); } } } } }