// Set top segments to be equal width and so that segment 1, 2, 3, // etc. corresponds to parent segment 1, 2, 3, etc. void setTopSegments(Genome *genome, hal_size_t width) { TopSegmentIteratorPtr topIt = genome->getTopSegmentIterator(); hal_size_t n = genome->getNumTopSegments(); hal_index_t startPos = 0; for (; topIt->getArrayIndex() < n; topIt->toRight(), startPos += width) { topIt->setCoordinates(startPos, width); topIt->tseg()->setParentIndex(topIt->getArrayIndex()); topIt->tseg()->setParentReversed(false); topIt->tseg()->setBottomParseIndex(NULL_INDEX); topIt->tseg()->setNextParalogyIndex(NULL_INDEX); } }
void Genome::copyTopSegments(Genome *dest) const { const Genome *inParent = getParent(); const Genome *outParent = dest->getParent(); TopSegmentIteratorConstPtr inTop = getTopSegmentIterator(); TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator(); hal_size_t n = dest->getNumTopSegments(); assert(n == 0 || n == getNumTopSegments()); if (n == 0) { // Nothing to do if there are no top segments. return; } BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator(); BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator(); for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(), outTop->toRight()) { hal_index_t genomePos = inTop->getStartPosition(); assert(genomePos != NULL_INDEX); string inSeqName = getSequenceBySite(genomePos)->getName(); string outSeqName = dest->getSequenceBySite(genomePos)->getName(); // if (inSeqName != outSeqName) { // stringstream ss; // ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos; // throw hal_exception(ss.str()); // } outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength()); outTop->setParentIndex(inTop->getParentIndex()); outTop->setParentReversed(inTop->getParentReversed()); outTop->setBottomParseIndex(inTop->getBottomParseIndex()); outTop->setNextParalogyIndex(inTop->getNextParalogyIndex()); // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are. if (inTop->getParentIndex() != NULL_INDEX) { inParentBottomSegIt->toParent(inTop); const Sequence *inParentSequence = inParentBottomSegIt->getSequence(); const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName()); hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex(); hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex(); outTop->setParentIndex(outParentSegmentIndex); } } }
void GenomeCopyTest::createCallBack(Alignment *alignment) { hal_size_t alignmentSize = alignment->getNumGenomes(); CuAssertTrue(_testCase, alignmentSize == 0); // Hacky: Need a different alignment to test copying the bottom // segments correctly. (the names of a node's children are used // when copying bottom segments, and two genomes can't have the same // name in the same alignment) _path = getTempFile(); _secondAlignment = AlignmentPtr(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS | CREATE_ACCESS)); Genome *ancGenome = alignment->addRootGenome("AncGenome", 0); Genome *leafGenome = alignment->addLeafGenome("LeafGenome1", "AncGenome", 0); // This genome will test copyDimensions, copyTopSegments, // copyBottomSegments, copySequence, copyMetadata Genome *copyRootGenome = _secondAlignment->addRootGenome("copyRootGenome", 0); Genome *copyLeafGenome = _secondAlignment->addLeafGenome("LeafGenome1", "copyRootGenome", 0); MetaData *ancMeta = ancGenome->getMetaData(); ancMeta->set("Young", "Jeezy"); vector<Sequence::Info> seqVec(1); seqVec[0] = Sequence::Info("Sequence", 1000000, 0, 700000); ancGenome->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 0); leafGenome->setDimensions(seqVec); string ancSeq = "CAT"; hal_index_t n = ancGenome->getSequenceLength(); DnaIteratorPtr dnaIt = ancGenome->getDnaIterator(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % ancSeq.size(); dnaIt->setBase(ancSeq[i]); } dnaIt->flush(); n = leafGenome->getSequenceLength(); dnaIt = leafGenome->getDnaIterator(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % ancSeq.size(); dnaIt->setBase(ancSeq[i]); } dnaIt->flush(); TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator(); n = leafGenome->getNumTopSegments(); for (; topIt->getArrayIndex() < n; topIt->toRight()) { topIt->setCoordinates(topIt->getArrayIndex(), 1); topIt->tseg()->setParentIndex(3); topIt->tseg()->setParentReversed(true); topIt->tseg()->setBottomParseIndex(5); if (topIt->getArrayIndex() != 6) { topIt->tseg()->setNextParalogyIndex(6); } else { topIt->tseg()->setNextParalogyIndex(7); } } BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator(); n = ancGenome->getNumBottomSegments(); for (; botIt->getArrayIndex() < n; botIt->toRight()) { botIt->setCoordinates(botIt->getArrayIndex(), 1); botIt->bseg()->setChildIndex(0, 3); botIt->bseg()->setChildReversed(0, true); botIt->bseg()->setTopParseIndex(5); } seqVec[0] = Sequence::Info("Sequence", 3300, 0, 1100); copyRootGenome->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 3300, 2200, 0); copyLeafGenome->setDimensions(seqVec); string copySeq = "TAG"; dnaIt = copyRootGenome->getDnaIterator(); n = copyRootGenome->getSequenceLength(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % copySeq.size(); dnaIt->setBase(copySeq[i]); } dnaIt->flush(); dnaIt = copyLeafGenome->getDnaIterator(); n = copyLeafGenome->getSequenceLength(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % copySeq.size(); dnaIt->setBase(copySeq[i]); } dnaIt->flush(); topIt = copyLeafGenome->getTopSegmentIterator(); n = copyLeafGenome->getNumTopSegments(); for (; topIt->getArrayIndex() < n; topIt->toRight()) { topIt->setCoordinates(7, 8); topIt->tseg()->setParentIndex(9); topIt->tseg()->setParentReversed(false); topIt->tseg()->setBottomParseIndex(11); if (topIt->getArrayIndex() != 12) { topIt->tseg()->setNextParalogyIndex(12); } else { topIt->tseg()->setNextParalogyIndex(7); } } botIt = copyRootGenome->getBottomSegmentIterator(); n = copyRootGenome->getNumBottomSegments(); for (; botIt->getArrayIndex() < n; botIt->toRight()) { botIt->setCoordinates(6, 7); botIt->bseg()->setChildIndex(0, 8); botIt->bseg()->setChildReversed(0, false); botIt->bseg()->setTopParseIndex(10); } ancGenome->copy(copyRootGenome); leafGenome->copy(copyLeafGenome); _secondAlignment->close(); }