void MappedSegmentMapUpTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome* child1 = alignment->openGenome("child1"); const Genome* child2 = alignment->openGenome("child2"); TopSegmentIteratorConstPtr top = child2->getTopSegmentIterator(); testTopSegment(alignment, top, "parent"); top->slice(1,2); testTopSegment(alignment, top, "parent"); top->toReverse(); testTopSegment(alignment, top, "parent"); top = child1->getTopSegmentIterator(); testTopSegment(alignment, top, "parent"); top->slice(1,2); testTopSegment(alignment, top, "parent"); top->toReverse(); testTopSegment(alignment, top, "parent"); const Genome* g1 = alignment->openGenome("g1"); for (hal_size_t i = 0; i < g1->getNumTopSegments(); ++i) { top = g1->getTopSegmentIterator(i); testTopSegment(alignment, top, "parent"); top->slice(1,0); testTopSegment(alignment, top, "parent"); top->toReverse(); testTopSegment(alignment, top, "parent"); top->slice(0,1); testTopSegment(alignment, top, "parent"); top->toReverse(); testTopSegment(alignment, top, "parent"); } }
void GenomeCreateTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* dudGenome = alignment->openGenome("Zebra"); CuAssertTrue(_testCase, dudGenome == NULL); const Genome* ancGenome = alignment->openGenome("AncGenome"); const MetaData* ancMeta = ancGenome->getMetaData(); CuAssertTrue(_testCase, ancMeta->get("Young") == "Jeezy"); const Genome* leaf1Genome = alignment->openGenome("Leaf1"); const Genome* leaf2Genome = alignment->openGenome("Leaf2"); const Genome* leaf3Genome = alignment->openGenome("Leaf3"); CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome"); CuAssertTrue(_testCase, leaf1Genome->getName() == "Leaf1"); CuAssertTrue(_testCase, leaf2Genome->getName() == "Leaf2"); CuAssertTrue(_testCase, leaf3Genome->getName() == "Leaf3"); CuAssertTrue(_testCase, ancGenome->getSequenceLength() == 1000000); CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == 5000); CuAssertTrue(_testCase, ancGenome->getNumBottomSegments() == 700000); CuAssertTrue(_testCase, leaf1Genome->getSequenceLength() == 1000000); CuAssertTrue(_testCase, leaf1Genome->getNumTopSegments() == 700000); CuAssertTrue(_testCase, leaf1Genome->getNumBottomSegments() == 0); CuAssertTrue(_testCase, leaf2Genome->getSequenceLength() == 2000000); CuAssertTrue(_testCase, leaf2Genome->getNumTopSegments() == 700000); CuAssertTrue(_testCase, leaf2Genome->getNumBottomSegments() == 0); CuAssertTrue(_testCase, leaf3Genome->getSequenceLength() == 3000000); CuAssertTrue(_testCase, leaf3Genome->getNumTopSegments() == 700000); CuAssertTrue(_testCase, leaf3Genome->getNumBottomSegments() == 0); }
void copyFromTopAlignment(AlignmentConstPtr topAlignment, AlignmentPtr mainAlignment, const string &genomeName) { Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName); const Genome *topReplacedGenome = topAlignment->openGenome(genomeName); topReplacedGenome->copyTopDimensions(mainReplacedGenome); topReplacedGenome->copyTopSegments(mainReplacedGenome); mainReplacedGenome->fixParseInfo(); // Copy bot segments for the parent and top segments for the // siblings of the genome that's being replaced Genome *mainParent = mainReplacedGenome->getParent(); const Genome *topParent = topReplacedGenome->getParent(); topParent->copyBottomDimensions(mainParent); topParent->copyBottomSegments(mainParent); mainParent->fixParseInfo(); vector<string> siblings = mainAlignment->getChildNames(mainParent->getName()); for (size_t i = 0; i < siblings.size(); i++) { if (siblings[i] != genomeName) { Genome *mainChild = mainAlignment->openGenome(siblings[i]); const Genome *topChild = topAlignment->openGenome(siblings[i]); topChild->copyTopDimensions(mainChild); topChild->copyTopSegments(mainChild); mainChild->fixParseInfo(); } } }
void MappedSegmentMapAcrossTest::testTopSegment(AlignmentConstPtr alignment, TopSegmentIteratorConstPtr top) { const Genome* parent = top->getGenome()->getParent(); const Genome* other = top->getGenome()->getName() == "child1" ? alignment->openGenome("child2") : alignment->openGenome("child1"); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, other, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); TopSegmentIteratorConstPtr sister = other->getTopSegmentIterator(); sister->toChildG(bottom, other); CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == sister->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == sister->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == sister->getReversed()); }
void TopSegmentIteratorReverseTest::checkCallBack(AlignmentConstPtr alignment) { BottomSegmentIteratorConstPtr bi; TopSegmentIteratorConstPtr ti, ti2; const Genome* parent1 = alignment->openGenome("parent1"); const Genome* child1 = alignment->openGenome("child1"); ti = child1->getTopSegmentIterator(); bi = parent1->getBottomSegmentIterator(); ti2 = child1->getTopSegmentIterator(); ti2->toChild(bi, 0); CuAssertTrue(_testCase, ti->getStartPosition() == 0); CuAssertTrue(_testCase, ti->getLength() == 10); CuAssertTrue(_testCase, ti->getReversed() == false); CuAssertTrue(_testCase, ti2->getStartPosition() == 9); CuAssertTrue(_testCase, ti2->getLength() == 10); CuAssertTrue(_testCase, ti2->getReversed() == true); bi->slice(1, 3); ti2->toChild(bi, 0); CuAssertTrue(_testCase, bi->getStartPosition() == 1); CuAssertTrue(_testCase, bi->getLength() == 6); CuAssertTrue(_testCase, ti2->getStartPosition() == 8); CuAssertTrue(_testCase, ti2->getLength() == 6); string buffer; bi->getString(buffer); CuAssertTrue(_testCase, buffer == "CCTACG"); ti2->getString(buffer); CuAssertTrue(_testCase, buffer == "CACGTA"); bi = child1->getBottomSegmentIterator(); CuAssertTrue(_testCase, bi->getReversed() == false); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 0); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 4); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); CuAssertTrue(_testCase, bi->getReversed() == false); bi->toRight(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 5); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 9); CuAssertTrue(_testCase, ti->getLength() == 5); }
void TopSegmentIteratorToSiteTest::checkCallBack(AlignmentConstPtr alignment) { TopSegmentIteratorConstPtr bi; // case 1 const Genome* case1 = alignment->openGenome("case1"); checkGenome(case1); // case 2 const Genome* case2 = alignment->openGenome("case2"); checkGenome(case2); }
void TopSegmentIteratorParseTest::checkCallBack(AlignmentConstPtr alignment) { BottomSegmentIteratorConstPtr bi; TopSegmentIteratorConstPtr ti; // case 1 const Genome* case1 = alignment->openGenome("case1"); ti = case1->getTopSegmentIterator(); bi = case1->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); bi->slice(3, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getLength() == bi->getBottomSegment()->getLength() - 4); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); // case 2 const Genome* case2 = alignment->openGenome("case2"); ti = case2->getTopSegmentIterator(); bi = case2->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(1, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 3 const Genome* case3 = alignment->openGenome("case3"); ti = case3->getTopSegmentIterator(); bi = case3->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 4 const Genome* case4 = alignment->openGenome("case4"); ti = case4->getTopSegmentIterator(); bi = case4->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 2); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); }
void MappedSegmentMapUpTest::testTopSegment(AlignmentConstPtr alignment, TopSegmentIteratorConstPtr top, const string& ancName) { const Genome* parent = alignment->openGenome(ancName); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, parent, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); // extra hop for when top is in grand child if (bottom->getGenome() != parent) { TopSegmentIteratorConstPtr temp = bottom->getGenome()->getTopSegmentIterator(); temp->toParseUp(bottom); bottom->toParent(temp); } CuAssertTrue(_testCase, mseg->getGenome() == bottom->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == bottom->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == bottom->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == bottom->getReversed()); }
void LodManager::checkAlignment(hal_size_t minQuery, const string& path, AlignmentConstPtr alignment) { if (alignment->getNumGenomes() == 0) { stringstream ss; ss << "No genomes found in base alignment specified in " << path; throw hal_exception(ss.str()); } #ifndef NDEBUG if (minQuery == 0) { vector<string> leafNames = alignment->getLeafNamesBelow( alignment->getRootName()); string name = !leafNames.empty() ? leafNames[0] : alignment->getRootName(); const Genome* genome = alignment->openGenome(name); bool seqFound = genome->containsDNAArray(); alignment->closeGenome(genome); if (seqFound == false) { stringstream ss; ss << "HAL file for highest level of detail (0) in " << path << "must contain DNA sequence information."; throw hal_exception(ss.str()); } } #endif }
void MappedSegmentColCompareTest::checkCallBack(AlignmentConstPtr alignment) { if (alignment->getNumGenomes() == 0) { return; } validateAlignment(alignment); set<const Genome*> genomeSet; hal::getGenomesInSubTree(alignment->openGenome(alignment->getRootName()), genomeSet); for (set<const Genome*>::iterator i = genomeSet.begin(); i != genomeSet.end(); ++i) { const Genome* srcGenome = *i; for (set<const Genome*>::iterator j = genomeSet.begin(); j != genomeSet.end(); ++j) { const Genome* tgtGenome = *j; if (srcGenome->getSequenceLength() > 0 && tgtGenome->getSequenceLength() > 0) { _ref = srcGenome; _tgt = tgtGenome; createColArray(); createBlockArray(); compareArrays(); } } } }
static void printBranches(ostream& os, AlignmentConstPtr alignment) { const Genome* root = alignment->openGenome(alignment->getRootName()); set<const Genome*> genomes; getGenomesInSubTree(root, genomes); genomes.insert(root); bool first = true; for (set<const Genome*>::iterator i = genomes.begin(); i != genomes.end(); ++i) { if ((*i)->getParent() != NULL) { if (!first) { os << " "; } else { first = false; } os << (*i)->getName(); } } os << endl; }
void GenomeStringTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* ancGenome = alignment->openGenome("AncGenome"); CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome"); string genomeString; ancGenome->getString(genomeString); CuAssertTrue(_testCase, genomeString == _string); }
void GenomeUpdateTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* ancGenome = alignment->openGenome("AncGenome"); CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome"); CuAssertTrue(_testCase, ancGenome->getSequenceLength() == 10000005); CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == 14000); CuAssertTrue(_testCase, ancGenome->getNumBottomSegments() == 2000001); }
void GenomeMetaTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* ancGenome = alignment->openGenome("AncGenome"); const MetaData* ancMeta = ancGenome->getMetaData(); CuAssertTrue(_testCase, ancMeta->get("Young") == "Jeezy"); CuAssertTrue(_testCase, ancGenome->getSequenceLength() == 0); CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == 0); CuAssertTrue(_testCase, ancGenome->getNumBottomSegments() == 0); CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome"); }
void printNumSegments(ostream& os, AlignmentConstPtr alignment, const string& genomeName) { const Genome* genome = alignment->openGenome(genomeName); if (genome == NULL) { throw hal_exception(string("Genome ") + genomeName + " not found."); } os << genome->getNumTopSegments() << " " << genome->getNumBottomSegments() << endl; }
void MappedSegmentParseTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome* g1 = alignment->openGenome("g1"); const Genome* g2 = alignment->openGenome("g2"); const Genome* g3 = alignment->openGenome("g3"); const Genome* g4 = alignment->openGenome("g4"); const Genome* g5 = alignment->openGenome("g5"); const Genome* gs[] = {g1, g2, g3, g4, g5}; for (size_t i = 0; i < 5; ++i) { const Genome* g = gs[i]; for (size_t j = 0; j < g->getNumTopSegments(); ++j) { TopSegmentIteratorConstPtr top = g->getTopSegmentIterator(j); testTopSegment(alignment, top); } } }
void TopSegmentSequenceTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* ancGenome = alignment->openGenome("Anc0"); TopSegmentIteratorConstPtr tsIt = ancGenome->getTopSegmentIterator(100); CuAssertTrue(_testCase, tsIt->getTopSegment()->getStartPosition() == 500); CuAssertTrue(_testCase, tsIt->getTopSegment()->getLength() == 9); string seq; tsIt->getString(seq); CuAssertTrue(_testCase, seq == "CACACATTC"); tsIt->toReverse(); tsIt->getString(seq); CuAssertTrue(_testCase, seq == "GAATGTGTG"); }
void MappedSegmentMapDownTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome* parent = alignment->openGenome("parent"); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); testBottomSegment(alignment, bottom, 0); testBottomSegment(alignment, bottom, 1); bottom->slice(1,2); testBottomSegment(alignment, bottom, 0); testBottomSegment(alignment, bottom, 1); bottom->toReverse(); testBottomSegment(alignment, bottom, 0); testBottomSegment(alignment, bottom, 1); }
void copyFromBottomAlignment(AlignmentConstPtr bottomAlignment, AlignmentPtr mainAlignment, const string &genomeName) { // Copy genome & bottom segments for the genome that's being replaced Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName); const Genome *botReplacedGenome = bottomAlignment->openGenome(genomeName); botReplacedGenome->copyDimensions(mainReplacedGenome); botReplacedGenome->copySequence(mainReplacedGenome); botReplacedGenome->copyBottomDimensions(mainReplacedGenome); botReplacedGenome->copyBottomSegments(mainReplacedGenome); mainReplacedGenome->fixParseInfo(); // Copy top segments for the children vector<string> children = mainAlignment->getChildNames(genomeName); for (size_t i = 0; i < children.size(); i++) { Genome *mainChild = mainAlignment->openGenome(children[i]); const Genome *botChild = bottomAlignment->openGenome(children[i]); botChild->copyTopDimensions(mainChild); botChild->copyTopSegments(mainChild); mainChild->fixParseInfo(); } }
void printGenomes(ostream& os, AlignmentConstPtr alignment) { const Genome* root = alignment->openGenome(alignment->getRootName()); set<const Genome*> genomes; getGenomesInSubTree(root, genomes); genomes.insert(root); for (set<const Genome*>::iterator i = genomes.begin(); i != genomes.end(); ++i) { if (i != genomes.begin()) { os << ","; } os << (*i)->getName(); } os << endl; }
void MappedSegmentParseTest::testTopSegment(AlignmentConstPtr alignment, TopSegmentIteratorConstPtr top) { const Genome* parent = alignment->openGenome("parent"); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, parent, NULL, false); vector<bool> covered(top->getLength(), false); CuAssertTrue(_testCase, results.size() >= 1); set<MappedSegmentConstPtr>::iterator i = results.begin(); for (; i != results.end(); ++i) { MappedSegmentConstPtr mseg = *i; CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getGenome() == parent); for (hal_index_t j = mseg->getStartPosition(); j <= mseg->getEndPosition(); ++j) { CuAssertTrue(_testCase, covered[j] == false); covered[j] = true; } CuAssertTrue(_testCase, mseg->getStartPosition() == mseg->getSource()->getStartPosition()); CuAssertTrue(_testCase, mseg->getEndPosition() == mseg->getSource()->getEndPosition()); set<MappedSegmentConstPtr> tResults; mseg->getMappedSegments(tResults, top->getGenome(), NULL, false); CuAssertTrue(_testCase, tResults.size() == 1); MappedSegmentConstPtr tmseg = *tResults.begin(); CuAssertTrue(_testCase, tmseg->getGenome() == top->getGenome()); CuAssertTrue(_testCase, tmseg->getSource()->getGenome() == mseg->getGenome()); CuAssertTrue(_testCase, tmseg->getStartPosition() == mseg->getStartPosition()); CuAssertTrue(_testCase, tmseg->getEndPosition() == mseg->getEndPosition()); CuAssertTrue(_testCase, tmseg->getSource()->getStartPosition() == mseg->getStartPosition()); CuAssertTrue(_testCase, tmseg->getSource()->getEndPosition() == mseg->getEndPosition()); } }
void MappedSegmentMapAcrossTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome* child1 = alignment->openGenome("child1"); const Genome* child2 = alignment->openGenome("child2"); TopSegmentIteratorConstPtr top = child2->getTopSegmentIterator(); testTopSegment(alignment, top); top->slice(1,2); testTopSegment(alignment, top); top->toReverse(); testTopSegment(alignment, top); top = child1->getTopSegmentIterator(); testTopSegment(alignment, top); top->slice(1,2); testTopSegment(alignment, top); top->toReverse(); testTopSegment(alignment, top); }
void TopSegmentIsGapTest::checkCallBack(AlignmentConstPtr alignment) { BottomSegmentIteratorConstPtr bi; TopSegmentIteratorConstPtr ti; const Genome* child1 = alignment->openGenome("child1"); for (hal_size_t i = 0; i < child1->getNumTopSegments(); ++i) { ti = child1->getTopSegmentIterator(i); if (i == 8 || i == 10) { // CuAssertTrue(_testCase, ti->getTopSegment()->isGapInsertion()); } else { // CuAssertTrue(_testCase, !ti->getTopSegment()->isGapInsertion()); } } }
void printBedSequenceStats(ostream& os, AlignmentConstPtr alignment, const string& genomeName) { const Genome* genome = alignment->openGenome(genomeName); if (genome == NULL) { throw hal_exception(string("Genome ") + genomeName + " not found."); } if (genome->getNumSequences() > 0) { SequenceIteratorConstPtr seqIt = genome->getSequenceIterator(); SequenceIteratorConstPtr seqEnd = genome->getSequenceEndIterator(); for (; !seqIt->equals(seqEnd); seqIt->toNext()) { os << seqIt->getSequence()->getName() << "\t" << 0 << "\t" << seqIt->getSequence()->getSequenceLength() << "\n"; } } os << endl; }
void hal::validateAlignment(AlignmentConstPtr alignment) { deque<string> bfQueue; bfQueue.push_back(alignment->getRootName()); while (bfQueue.empty() == false) { string name = bfQueue.back(); bfQueue.pop_back(); if (name.empty() == false) { const Genome* genome = alignment->openGenome(name); if (genome == NULL) { throw hal_exception("Failure to open genome " + name); } validateGenome(genome); vector<string> childNames = alignment->getChildNames(name); for (size_t i = 0; i < childNames.size(); ++i) { bfQueue.push_front(childNames[i]); } } } }
int main(int argc, char *argv[]) { CLParserPtr optParser = initParser(); string inPath, botAlignmentPath, topAlignmentPath, parentName, insertName, childName, leafName; double upperBranchLength, leafBranchLength; bool noMarkAncestors; try { optParser->parseOptions(argc, argv); inPath = optParser->getArgument<string>("inFile"); botAlignmentPath = optParser->getArgument<string>("botAlignmentFile"); topAlignmentPath = optParser->getArgument<string>("topAlignmentFile"); parentName = optParser->getArgument<string>("parentName"); insertName = optParser->getArgument<string>("insertName"); childName = optParser->getArgument<string>("childName"); leafName = optParser->getArgument<string>("leafName"); upperBranchLength = optParser->getArgument<double>("upperBranchLength"); leafBranchLength = optParser->getArgument<double>("leafBranchLength"); noMarkAncestors = optParser->getFlag("noMarkAncestors"); } catch (exception &e) { optParser->printUsage(cerr); return 1; } AlignmentPtr mainAlignment = openHalAlignment(inPath, optParser); AlignmentConstPtr botAlignment = openHalAlignment(botAlignmentPath, optParser); AlignmentConstPtr topAlignment = openHalAlignment(topAlignmentPath, optParser); mainAlignment->insertGenome(insertName, parentName, childName, upperBranchLength); mainAlignment->addLeafGenome(leafName, insertName, leafBranchLength); // Insert the new intermediate node. Genome *insertGenome = mainAlignment->openGenome(insertName); const Genome *topInsertGenome = topAlignment->openGenome(insertName); const Genome *botInsertGenome = botAlignment->openGenome(insertName); topInsertGenome->copyDimensions(insertGenome); topInsertGenome->copyTopDimensions(insertGenome); botInsertGenome->copyBottomDimensions(insertGenome); topInsertGenome->copySequence(insertGenome); topInsertGenome->copyTopSegments(insertGenome); topInsertGenome->copyMetadata(insertGenome); botInsertGenome->copyBottomSegments(insertGenome); insertGenome->fixParseInfo(); // Copy the bottom segments for the parent genome from the top alignment. Genome *parentGenome = mainAlignment->openGenome(parentName); const Genome *botParentGenome = topAlignment->openGenome(parentName); botParentGenome->copyBottomDimensions(parentGenome); botParentGenome->copyBottomSegments(parentGenome); parentGenome->fixParseInfo(); // Fix the parent's other children as well. vector<string> allChildren = mainAlignment->getChildNames(parentName); for (size_t i = 0; i < allChildren.size(); i++) { if (allChildren[i] != insertName) { Genome *outGenome = mainAlignment->openGenome(allChildren[i]); const Genome *topSegmentsGenome = topAlignment->openGenome(allChildren[i]); topSegmentsGenome->copyTopDimensions(outGenome); topSegmentsGenome->copyTopSegments(outGenome); outGenome->fixParseInfo(); } } // Copy the top segments for the child genome from the bottom alignment. Genome *childGenome = mainAlignment->openGenome(childName); const Genome *topChildGenome = botAlignment->openGenome(childName); topChildGenome->copyTopDimensions(childGenome); topChildGenome->copyTopSegments(childGenome); childGenome->fixParseInfo(); // Copy the entire genome for the leaf from the bottom alignment. Genome *outLeafGenome = mainAlignment->openGenome(leafName); const Genome *inLeafGenome = botAlignment->openGenome(leafName); inLeafGenome->copy(outLeafGenome); if (!noMarkAncestors) { markAncestorsForUpdate(mainAlignment, insertName); } mainAlignment->close(); botAlignment->close(); topAlignment->close(); }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->setDescription("Rertrieve chain (pairwise alignment) " "information from a hal database.\n" "WARNING: THIS TOOL WAS NEVER FINISHED OR" " TESTED. USE AT OWN RISK. PLEASE " "CONSIDER halLiftover --outPSL INSTEAD."); optionsParser->addArgument("halFile", "path to hal file to analyze"); optionsParser->addArgument("genome", "(query) genome to process"); optionsParser->addOption("sequence", "sequence name in query genome (" "all sequences if not specified)", "\"\""); optionsParser->addOption("start", "start position in query genome", 0); optionsParser->addOption("length", "maximum length of chain to output.", 0); optionsParser->addOption("chainFile", "path for output file. stdout if not" " specified", "\"\""); optionsParser->addOption("maxGap", "maximum indel length to be considered a gap within" " a chain.", 20); string halPath; string chainPath; string genomeName; string sequenceName; hal_size_t start; hal_size_t length; hal_size_t maxGap; try { optionsParser->parseOptions(argc, argv); halPath = optionsParser->getArgument<string>("halFile"); genomeName = optionsParser->getArgument<string>("genome"); sequenceName = optionsParser->getOption<string>("sequence"); start = optionsParser->getOption<hal_size_t>("start"); length = optionsParser->getOption<hal_size_t>("length"); chainPath = optionsParser->getOption<string>("chainFile"); maxGap = optionsParser->getOption<hal_size_t>("maxGap"); } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { cerr << "WARNING: THIS TOOL WAS NEVER FINISHED OR TESTED. USE AT OWN RISK." << " PLEASE CONSIDER halLiftover --outPSL INSTEAD." <<endl; AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, optionsParser); const Genome* genome = alignment->openGenome(genomeName); if (genome == NULL) { throw hal_exception(string("Genome not found: ") + genomeName); } hal_index_t endPosition = length > 0 ? start + length : genome->getSequenceLength(); const Sequence* sequence = NULL; if (sequenceName != "\"\"") { sequence = genome->getSequence(sequenceName); if (sequence == NULL) { throw hal_exception(string("Sequence not found: ") + sequenceName); } start += sequence->getStartPosition(); endPosition = length > 0 ? start + length : sequence->getSequenceLength(); } ofstream ofile; ostream& outStream = chainPath == "\"\"" ? cout : ofile; if (chainPath != "\"\"") { ofile.open(chainPath.c_str()); if (!ofile) { throw hal_exception(string("Error opening output file ") + chainPath); } } TopSegmentIteratorConstPtr top = genome->getTopSegmentIterator(); top->toSite(start, false); // do slicing here; GappedTopSegmentIteratorConstPtr gtop = genome->getGappedTopSegmentIterator(top->getArrayIndex(), maxGap); // need to review! Chain chain; chain._id = 0; while (gtop->getRightArrayIndex() < (hal_index_t)genome->getNumTopSegments() && gtop->getLeft()->getStartPosition() < endPosition) { if (gtop->hasParent() == true) { hal_offset_t leftOffset = 0; if ((hal_index_t)start > gtop->getStartPosition() && (hal_index_t)start < gtop->getEndPosition()) { leftOffset = start - gtop->getStartPosition() ; } hal_offset_t rightOffset = 0; if (endPosition - 1 > gtop->getStartPosition() && endPosition - 1 < gtop->getEndPosition()) { rightOffset = gtop->getEndPosition() + 1 - endPosition; } // need to do offsets for edge cases gtIteratorToChain(gtop, chain, leftOffset, rightOffset); outStream << chain; ++chain._id; } gtop->toRight(); } } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }
int main(int argc, char** argv) { CLParserPtr optionsParser = initParser(); string halPath; string srcGenomeName; string srcBedPath; string tgtGenomeName; string tgtBedPath; bool noDupes; bool append; int inBedVersion; int outBedVersion; bool keepExtra; bool outPSL; bool outPSLWithName; bool tab; try { optionsParser->parseOptions(argc, argv); halPath = optionsParser->getArgument<string>("halFile"); srcGenomeName = optionsParser->getArgument<string>("srcGenome"); srcBedPath = optionsParser->getArgument<string>("srcBed"); tgtGenomeName = optionsParser->getArgument<string>("tgtGenome"); tgtBedPath = optionsParser->getArgument<string>("tgtBed"); noDupes = optionsParser->getFlag("noDupes"); append = optionsParser->getFlag("append"); inBedVersion = optionsParser->getOption<int>("inBedVersion"); outBedVersion = optionsParser->getOption<int>("outBedVersion"); keepExtra = optionsParser->getFlag("keepExtra"); outPSL = optionsParser->getFlag("outPSL"); outPSLWithName = optionsParser->getFlag("outPSLWithName"); tab = optionsParser->getFlag("tab"); } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { if (outPSLWithName == true) { outPSL = true; } if (outPSL == true) { outBedVersion = 12; } AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, optionsParser); if (alignment->getNumGenomes() == 0) { throw hal_exception("hal alignmenet is empty"); } const Genome* srcGenome = alignment->openGenome(srcGenomeName); if (srcGenome == NULL) { throw hal_exception(string("srcGenome, ") + srcGenomeName + ", not found in alignment"); } const Genome* tgtGenome = alignment->openGenome(tgtGenomeName); if (tgtGenome == NULL) { throw hal_exception(string("tgtGenome, ") + tgtGenomeName + ", not found in alignment"); } ifstream srcBed; istream* srcBedPtr; if (srcBedPath == "stdin") { srcBedPtr = &cin; } else { srcBed.open(srcBedPath.c_str()); srcBedPtr = &srcBed; if (!srcBed) { throw hal_exception("Error opening srcBed, " + srcBedPath); } } ios_base::openmode mode = append ? ios::out | ios::app : ios_base::out; ofstream tgtBed; ostream* tgtBedPtr; if (tgtBedPath == "stdout") { tgtBedPtr = &cout; } else { tgtBed.open(tgtBedPath.c_str(), mode); tgtBedPtr = &tgtBed; if (!tgtBed) { throw hal_exception("Error opening tgtBed, " + tgtBedPath); } } locale* inLocale = NULL; if (tab == true) { inLocale = new locale(cin.getloc(), new TabSepFacet(cin.getloc())); assert(std::isspace('\t', *inLocale) == true); assert(std::isspace(' ', *inLocale) == false); } BlockLiftover liftover; liftover.convert(alignment, srcGenome, srcBedPtr, tgtGenome, tgtBedPtr, inBedVersion, outBedVersion, keepExtra, !noDupes, outPSL, outPSLWithName, inLocale); delete inLocale; } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }
static void printBranchPath(ostream& os, AlignmentConstPtr alignment, const vector<string>& genomeNames, bool keepRoot) { set<const Genome*> inputSet; for (size_t i = 0; i < genomeNames.size(); ++i) { const Genome* genome = alignment->openGenome(genomeNames[i]); if (genome == NULL) { throw hal_exception(string("Genome ") + genomeNames[i] + " not found"); } inputSet.insert(genome); } set<const Genome*> outputSet; getGenomesInSpanningTree(inputSet, outputSet); vector<const Genome*> outputVec; // if given two genomes, sort the output to be the actual path frmo the // first to the second. if (genomeNames.size() == 2) { set<const Genome*> visitSet(outputSet); outputVec.push_back(alignment->openGenome(genomeNames[0])); visitSet.erase(alignment->openGenome(genomeNames[0])); while (outputVec.back()->getName() != genomeNames[1]) { const Genome* cur = outputVec.back(); set<const Genome*>::iterator i = visitSet.find(cur->getParent()); if (i == visitSet.end()) { for (size_t childIdx = 0; childIdx < cur->getNumChildren(); ++childIdx) { i = visitSet.find(cur->getChild(childIdx)); if (i != visitSet.end()) { break; } } } if (i != visitSet.end()) { outputVec.push_back(*i); visitSet.erase(i); } else { throw hal_exception(string("error determining path from ") + genomeNames[0] + " to " + genomeNames[1]); } } } else { outputVec.resize(outputSet.size()); copy(outputSet.begin(), outputSet.end(), outputVec.begin()); } for (vector<const Genome*>::const_iterator j = outputVec.begin(); j != outputVec.end(); ++j) { const Genome* genome = *j; if (keepRoot == true || (genome->getParent() != NULL && outputSet.find(genome->getParent()) != outputSet.end())) { os << genome->getName() << " "; } } os << endl; }
void GappedSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* child = alignment->openGenome("child"); const Genome* parent = alignment->openGenome("parent"); GappedTopSegmentIteratorConstPtr gtsIt = child->getGappedTopSegmentIterator(0, 9999999); GappedBottomSegmentIteratorConstPtr gbsIt = parent->getGappedBottomSegmentIterator(0, 0, 9999999); GappedTopSegmentIteratorConstPtr gtsItRev = child->getGappedTopSegmentIterator(0, 9999999); gtsItRev->toReverse(); GappedBottomSegmentIteratorConstPtr gbsItRev = parent->getGappedBottomSegmentIterator(0, 0, 9999999); gbsItRev->toReverse(); for (size_t i = 0; i < child->getNumTopSegments(); ++i) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight())); CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); gtsIt->toRight(); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight())); CuAssertTrue(_testCase, (size_t)bsIt->getBottomSegment()->getArrayIndex() == i); gbsIt->toRight(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight())); CuAssertTrue(_testCase, (size_t)tsItRev->getTopSegment()->getArrayIndex() == i); gtsItRev->toLeft(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight())); CuAssertTrue(_testCase, (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i); gbsItRev->toLeft(); } gtsIt = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 1, 9999999); gbsIt = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 1, 0, 9999999); gtsItRev = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 1, 9999999); gtsItRev->toReverse(); gbsItRev = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 1, 0, 9999999); gbsItRev->toReverse(); for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; --i) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight())); CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gtsIt->getReversed() == false); gtsIt->toLeft(); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight())); CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gbsIt->getReversed() == false); gbsIt->toLeft(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight())); CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gtsItRev->getReversed() == true); gtsItRev->toRight(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight())); CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gbsItRev->getReversed() == true); gbsItRev->toRight(); } }