void SequenceUpdateTest::createCallBack(Alignment *alignment) { hal_size_t alignmentSize = alignment->getNumGenomes(); CuAssertTrue(_testCase, alignmentSize == 0); Genome *ancGenome = alignment->addRootGenome("AncGenome", 0); size_t numSequences = 1000; vector<Sequence::Info> seqVec; for (size_t i = 0; i < numSequences; ++i) { hal_size_t len = 1 + i * 5 + i; string name = "sequence" + std::to_string(i); seqVec.push_back(Sequence::Info(name, len, i, i * 2)); } ancGenome->setDimensions(seqVec); alignment->closeGenome(ancGenome); ancGenome = alignment->openGenome("AncGenome"); vector<Sequence::UpdateInfo> updateVec; for (size_t i = 0; i < numSequences / 2; ++i) { const Sequence *sequence = ancGenome->getSequence("sequence" + std::to_string(i)); updateVec.push_back(Sequence::UpdateInfo(sequence->getName(), i * 7)); } ancGenome->updateTopDimensions(updateVec); updateVec.clear(); for (size_t i = 0; i < numSequences / 3; ++i) { const Sequence *sequence = ancGenome->getSequence("sequence" + std::to_string(i)); updateVec.push_back(Sequence::UpdateInfo(sequence->getName(), i * 5)); } ancGenome->updateBottomDimensions(updateVec); }
void LodExtract::writeDimensions( const map<const Sequence*, hal_size_t>& segmentCounts, const string& parentName, const vector<string>& childNames) { // initialize a dimensions list for each (input) genome map<const Genome*, vector<Sequence::Info> > dimMap; map<const Genome*, vector<Sequence::Info> >::iterator dimMapIt; vector<string> newGenomeNames = childNames; newGenomeNames.push_back(parentName); for (size_t i = 0; i < newGenomeNames.size(); ++i) { const Genome* inGenome = _inAlignment->openGenome(newGenomeNames[i]); pair<const Genome*, vector<Sequence::Info> > newEntry; newEntry.first = inGenome; // it's important we keep the sequences in the output genome // in the same order as the sequences in the input genome since // we always use global coordinates! SequenceIteratorConstPtr seqIt = inGenome->getSequenceIterator(); SequenceIteratorConstPtr seqEnd = inGenome->getSequenceEndIterator(); for (; seqIt != seqEnd; seqIt->toNext()) { const Sequence* inSequence = seqIt->getSequence(); map<const Sequence*, hal_size_t>::const_iterator segMapIt; segMapIt = segmentCounts.find(inSequence); // we skip empty sequences for now with below check if (segMapIt != segmentCounts.end()) { vector<Sequence::Info>& segDims = newEntry.second; hal_size_t nTop = inGenome->getName() == parentName ? 0 : segMapIt->second; hal_size_t nBot = inGenome->getName() != parentName ? 0 : segMapIt->second; segDims.push_back(Sequence::Info(inSequence->getName(), inSequence->getSequenceLength(), nTop, nBot)); } } // note potential bug here for genome with no data dimMap.insert(newEntry); } // now that we have the dimensions for each genome, update them in // the output alignment for (dimMapIt = dimMap.begin(); dimMapIt != dimMap.end(); ++dimMapIt) { Genome* newGenome = _outAlignment->openGenome(dimMapIt->first->getName()); assert(newGenome != NULL); vector<Sequence::Info>& segDims = dimMapIt->second; // ROOT if (newGenome->getName() == _outAlignment->getRootName()) { assert(newGenome->getName() == parentName); newGenome->setDimensions(segDims, _keepSequences); } // LEAF else if (newGenome->getName() != parentName) { newGenome->setDimensions(segDims, _keepSequences); } // INTERNAL NODE else { vector<Sequence::UpdateInfo> updateInfo; for (size_t i = 0; i < segDims.size(); ++i) { updateInfo.push_back( Sequence::UpdateInfo(segDims[i]._name, segDims[i]._numBottomSegments)); } newGenome->updateBottomDimensions(updateInfo); } } }