void MappedSegmentColCompareTest::checkCallBack(AlignmentConstPtr alignment) { if (alignment->getNumGenomes() == 0) { return; } validateAlignment(alignment); set<const Genome*> genomeSet; hal::getGenomesInSubTree(alignment->openGenome(alignment->getRootName()), genomeSet); for (set<const Genome*>::iterator i = genomeSet.begin(); i != genomeSet.end(); ++i) { const Genome* srcGenome = *i; for (set<const Genome*>::iterator j = genomeSet.begin(); j != genomeSet.end(); ++j) { const Genome* tgtGenome = *j; if (srcGenome->getSequenceLength() > 0 && tgtGenome->getSequenceLength() > 0) { _ref = srcGenome; _tgt = tgtGenome; createColArray(); createBlockArray(); compareArrays(); } } } }
void LodManager::checkAlignment(hal_size_t minQuery, const string& path, AlignmentConstPtr alignment) { if (alignment->getNumGenomes() == 0) { stringstream ss; ss << "No genomes found in base alignment specified in " << path; throw hal_exception(ss.str()); } #ifndef NDEBUG if (minQuery == 0) { vector<string> leafNames = alignment->getLeafNamesBelow( alignment->getRootName()); string name = !leafNames.empty() ? leafNames[0] : alignment->getRootName(); const Genome* genome = alignment->openGenome(name); bool seqFound = genome->containsDNAArray(); alignment->closeGenome(genome); if (seqFound == false) { stringstream ss; ss << "HAL file for highest level of detail (0) in " << path << "must contain DNA sequence information."; throw hal_exception(ss.str()); } } #endif }
int main(int argc, char** argv) { CLParserPtr optionsParser = initParser(); string halPath; string srcGenomeName; string srcBedPath; string tgtGenomeName; string tgtBedPath; bool noDupes; bool append; int inBedVersion; int outBedVersion; bool keepExtra; bool outPSL; bool outPSLWithName; bool tab; try { optionsParser->parseOptions(argc, argv); halPath = optionsParser->getArgument<string>("halFile"); srcGenomeName = optionsParser->getArgument<string>("srcGenome"); srcBedPath = optionsParser->getArgument<string>("srcBed"); tgtGenomeName = optionsParser->getArgument<string>("tgtGenome"); tgtBedPath = optionsParser->getArgument<string>("tgtBed"); noDupes = optionsParser->getFlag("noDupes"); append = optionsParser->getFlag("append"); inBedVersion = optionsParser->getOption<int>("inBedVersion"); outBedVersion = optionsParser->getOption<int>("outBedVersion"); keepExtra = optionsParser->getFlag("keepExtra"); outPSL = optionsParser->getFlag("outPSL"); outPSLWithName = optionsParser->getFlag("outPSLWithName"); tab = optionsParser->getFlag("tab"); } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { if (outPSLWithName == true) { outPSL = true; } if (outPSL == true) { outBedVersion = 12; } AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, optionsParser); if (alignment->getNumGenomes() == 0) { throw hal_exception("hal alignmenet is empty"); } const Genome* srcGenome = alignment->openGenome(srcGenomeName); if (srcGenome == NULL) { throw hal_exception(string("srcGenome, ") + srcGenomeName + ", not found in alignment"); } const Genome* tgtGenome = alignment->openGenome(tgtGenomeName); if (tgtGenome == NULL) { throw hal_exception(string("tgtGenome, ") + tgtGenomeName + ", not found in alignment"); } ifstream srcBed; istream* srcBedPtr; if (srcBedPath == "stdin") { srcBedPtr = &cin; } else { srcBed.open(srcBedPath.c_str()); srcBedPtr = &srcBed; if (!srcBed) { throw hal_exception("Error opening srcBed, " + srcBedPath); } } ios_base::openmode mode = append ? ios::out | ios::app : ios_base::out; ofstream tgtBed; ostream* tgtBedPtr; if (tgtBedPath == "stdout") { tgtBedPtr = &cout; } else { tgtBed.open(tgtBedPath.c_str(), mode); tgtBedPtr = &tgtBed; if (!tgtBed) { throw hal_exception("Error opening tgtBed, " + tgtBedPath); } } locale* inLocale = NULL; if (tab == true) { inLocale = new locale(cin.getloc(), new TabSepFacet(cin.getloc())); assert(std::isspace('\t', *inLocale) == true); assert(std::isspace(' ', *inLocale) == false); } BlockLiftover liftover; liftover.convert(alignment, srcGenome, srcBedPtr, tgtGenome, tgtBedPtr, inBedVersion, outBedVersion, keepExtra, !noDupes, outPSL, outPSLWithName, inLocale); delete inLocale; } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->setDescription("Retrieve basic statistics from a hal database"); optionsParser->addArgument("halFile", "path to hal file to analyze"); optionsParser->addOptionFlag("genomes", "print only a list of genomes " "in alignment", false); optionsParser->addOption("sequences", "print list of sequences in given " "genome", "\"\""); optionsParser->addOption("sequenceStats", "print stats for each sequence in " "given genome", "\"\""); optionsParser->addOption("bedSequences", "print sequences of given genome " "in bed format", "\"\""); optionsParser->addOptionFlag("tree", "print only the NEWICK tree", false); optionsParser->addOptionFlag("branches", "print list of branches. " "Each branch is specified by the child genome", false); optionsParser->addOption("span", "print branches on path (or spanning tree) " "between comma " "separated list of genomes", "\"\""); optionsParser->addOption("spanRoot", "print genomes on path" "(or spanning tree) between comma " "separated list of genomes. Different from --span" "only in that the spanning tree root is also " "given", "\"\""); optionsParser->addOption("children", "print names of children of given " "genome", "\"\""); optionsParser->addOptionFlag("root", "print root genome name", false); optionsParser->addOption("parent", "print name of parent of given genome", "\"\""); optionsParser->addOption("branchLength", "print branch length between " "given genome and its parent in the tree", "\"\""); optionsParser->addOption("numSegments", "print numTopSegments " "numBottomSegments for given genome.", "\"\""); optionsParser->addOption("baseComp", "print base composition for given " "genome by sampling every step bases. Parameter " "value is of the form genome,step. Ex: " "--baseComp human,1000. The ouptut is of the form " "fraction_of_As fraction_of_Gs fraction_of_Cs " "fraction_of_Ts.", "\"\""); optionsParser->addOption("genomeMetaData", "print metadata for given genome, " "one entry per line, tab-seperated.", "\"\""); optionsParser->addOption("chromSizes", "print the name and length of each" " sequence in a given genome. This is a subset" " of the" " information returned by --sequenceStats but is" " useful because it is in the format used by" " wigToBigWig", "\"\""); optionsParser->addOption("percentID", "print % ID of a genome with all other genomes." "Only non-duplicated and unambiguous sites are" "considered", "\"\""); optionsParser->addOption("coverage", "print histogram of coverage of a genome with" " all genomes", "\"\""); optionsParser->addOption("topSegments", "print coordinates of all top segments of given" " genome in BED format.", "\"\""); optionsParser->addOption("bottomSegments", "print coordinates of all bottom segments of given" " genome in BED format.", "\"\""); optionsParser->addOptionFlag("allCoverage", "print histogram of coverage from all genomes to" " all genomes", false); string path; bool listGenomes; string sequencesFromGenome; string sequenceStatsFromGenome; string bedSequencesFromGenome; string spanGenomes; string spanRootGenomes; bool tree; bool branches; string childrenFromGenome; string parentFromGenome; bool printRoot; string nameForBL; string numSegmentsGenome; string baseCompPair; string genomeMetaData; string chromSizesFromGenome; string percentID; string coverage; string topSegments; string bottomSegments; bool allCoverage; try { optionsParser->parseOptions(argc, argv); path = optionsParser->getArgument<string>("halFile"); listGenomes = optionsParser->getFlag("genomes"); sequencesFromGenome = optionsParser->getOption<string>("sequences"); sequenceStatsFromGenome = optionsParser->getOption<string>("sequenceStats"); bedSequencesFromGenome = optionsParser->getOption<string>("bedSequences"); tree = optionsParser->getFlag("tree"); spanGenomes = optionsParser->getOption<string>("span"); spanRootGenomes = optionsParser->getOption<string>("spanRoot"); branches = optionsParser->getFlag("branches"); childrenFromGenome = optionsParser->getOption<string>("children"); parentFromGenome = optionsParser->getOption<string>("parent"); printRoot = optionsParser->getFlag("root"); nameForBL = optionsParser->getOption<string>("branchLength"); numSegmentsGenome = optionsParser->getOption<string>("numSegments"); baseCompPair = optionsParser->getOption<string>("baseComp"); genomeMetaData = optionsParser->getOption<string>("genomeMetaData"); chromSizesFromGenome = optionsParser->getOption<string>("chromSizes"); percentID = optionsParser->getOption<string>("percentID"); coverage = optionsParser->getOption<string>("coverage"); topSegments = optionsParser->getOption<string>("topSegments"); bottomSegments = optionsParser->getOption<string>("bottomSegments"); allCoverage = optionsParser->getFlag("allCoverage"); size_t optCount = listGenomes == true ? 1 : 0; if (sequencesFromGenome != "\"\"") ++optCount; if (tree == true) ++optCount; if (sequenceStatsFromGenome != "\"\"") ++optCount; if (bedSequencesFromGenome != "\"\"") ++optCount; if (spanGenomes != "\"\"") ++optCount; if (spanRootGenomes != "\"\"") ++optCount; if (branches) ++ optCount; if (childrenFromGenome != "\"\"") ++optCount; if (parentFromGenome != "\"\"") ++optCount; if (printRoot) ++optCount; if (nameForBL != "\"\"") ++optCount; if (numSegmentsGenome != "\"\"") ++optCount; if (baseCompPair != "\"\"") ++optCount; if (genomeMetaData != "\"\"") ++optCount; if (chromSizesFromGenome != "\"\"") ++optCount; if (percentID != "\"\"") ++optCount; if (coverage != "\"\"") ++optCount; if (topSegments != "\"\"") ++optCount; if (bottomSegments != "\"\"") ++optCount; if (allCoverage) ++optCount; if (optCount > 1) { throw hal_exception("--genomes, --sequences, --tree, --span, --spanRoot, " "--branches, --sequenceStats, --children, --parent, " "--bedSequences, --root, --numSegments, --baseComp, " "--genomeMetaData, --chromSizes, --percentID, " "--coverage, --topSegments, --bottomSegments, " "--allCoverage " "and --branchLength options are exclusive"); } } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser); if (listGenomes == true && alignment->getNumGenomes() > 0) { printGenomes(cout, alignment); } else if (sequencesFromGenome != "\"\"") { printSequences(cout, alignment, sequencesFromGenome); } else if (tree == true) { cout << alignment->getNewickTree() << endl; } else if (sequenceStatsFromGenome != "\"\"") { printSequenceStats(cout, alignment, sequenceStatsFromGenome); } else if (bedSequencesFromGenome != "\"\"") { printBedSequenceStats(cout, alignment, bedSequencesFromGenome); } else if (spanGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanGenomes, ","), false); } else if (spanRootGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanRootGenomes, ","), true); } else if (branches == true) { printBranches(cout, alignment); } else if (childrenFromGenome != "\"\"") { printChildren(cout, alignment, childrenFromGenome); } else if (parentFromGenome != "\"\"") { printParent(cout, alignment, parentFromGenome); } else if (printRoot == true) { printRootName(cout, alignment); } else if (nameForBL != "\"\"") { printBranchLength(cout, alignment, nameForBL); } else if (numSegmentsGenome != "\"\"") { printNumSegments(cout, alignment, numSegmentsGenome); } else if (baseCompPair != "\"\"") { printBaseComp(cout, alignment, baseCompPair); } else if (genomeMetaData != "\"\"") { printGenomeMetaData(cout, alignment, genomeMetaData); } else if (chromSizesFromGenome != "\"\"") { printChromSizes(cout, alignment, chromSizesFromGenome); } else if (percentID != "\"\"") { printPercentID(cout, alignment, percentID); } else if (coverage != "\"\"") { printCoverage(cout, alignment, coverage); } else if (topSegments != "\"\"") { printSegments(cout, alignment, topSegments, true); } else if (bottomSegments != "\"\"") { printSegments(cout, alignment, bottomSegments, false); } else if (allCoverage) { printAllCoverage(cout, alignment); } else { HalStats halStats(alignment); cout << endl << "hal v" << alignment->getVersion() << "\n" << halStats; } } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->setDescription("Rertrieve basic statics from a hal database"); optionsParser->addArgument("halFile", "path to hal file to analyze"); optionsParser->addOptionFlag("genomes", "print only a list of genomes " "in alignment", false); optionsParser->addOption("sequences", "print list of sequences in given " "genome", "\"\""); optionsParser->addOption("sequenceStats", "print stats for each sequence in " "given genome", "\"\""); optionsParser->addOptionFlag("tree", "print only the NEWICK tree", false); optionsParser->addOptionFlag("branches", "print list of branches. " "Each branch is specified by the child genome", false); optionsParser->addOption("span", "print branches on path (or spanning tree) " "between comma " "separated list of genomes", "\"\""); optionsParser->addOption("spanRoot", "print genomes on path" "(or spanning tree) between comma " "separated list of genomes. Different from --path" "only in that the spanning tree root is also " "given", "\"\""); string path; bool listGenomes; string sequencesFromGenome; string sequenceStatsFromGenome; string spanGenomes; string spanRootGenomes; bool tree; bool branches; try { optionsParser->parseOptions(argc, argv); path = optionsParser->getArgument<string>("halFile"); listGenomes = optionsParser->getFlag("genomes"); sequencesFromGenome = optionsParser->getOption<string>("sequences"); sequenceStatsFromGenome = optionsParser->getOption<string>("sequenceStats"); tree = optionsParser->getFlag("tree"); spanGenomes = optionsParser->getOption<string>("span"); spanRootGenomes = optionsParser->getOption<string>("spanRoot"); branches = optionsParser->getFlag("branches"); size_t optCount = listGenomes == true ? 1 : 0; if (sequencesFromGenome != "\"\"") ++optCount; if (tree == true) ++optCount; if (sequenceStatsFromGenome != "\"\"") ++optCount; if (spanGenomes != "\"\"") ++optCount; if (spanRootGenomes != "\"\"") ++optCount; if (branches) ++optCount; if (optCount > 1) { throw hal_exception("--genomes, --sequences, --tree, --span, " "--spanRoot, --branches " "and --sequenceStats " "options are mutually exclusive"); } } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser); if (listGenomes == true && alignment->getNumGenomes() > 0) { printGenomes(cout, alignment); } else if (sequencesFromGenome != "\"\"") { printSequences(cout, alignment, sequencesFromGenome); } else if (tree == true) { cout << alignment->getNewickTree() << endl; } else if (sequenceStatsFromGenome != "\"\"") { printSequenceStats(cout, alignment, sequenceStatsFromGenome); } else if (spanGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanGenomes, ","), false); } else if (spanRootGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanRootGenomes, ","), true); } else if (branches == true) { printBranches(cout, alignment); } else { HalStats halStats(alignment); cout << endl << "hal v" << alignment->getVersion() << "\n" << halStats; } } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }