void LodExtract::createInterpolatedAlignment(AlignmentConstPtr inAlignment, AlignmentPtr outAlignment, double scale, const string& tree, const string& rootName, bool keepSequences, bool allSequences, double probeFrac, double minSeqFrac) { _inAlignment = inAlignment; _outAlignment = outAlignment; _keepSequences = keepSequences; _allSequences = allSequences; _probeFrac = probeFrac; _minSeqFrac = minSeqFrac; string newTree = tree.empty() ? inAlignment->getNewickTree() : tree; createTree(newTree, rootName); cout << "tree = " << _outAlignment->getNewickTree() << endl; deque<string> bfQueue; bfQueue.push_front(_outAlignment->getRootName()); while (!bfQueue.empty()) { string genomeName = bfQueue.back(); bfQueue.pop_back(); vector<string> childNames = _outAlignment->getChildNames(genomeName); if (!childNames.empty()) { convertInternalNode(genomeName, scale); for (size_t childIdx = 0; childIdx < childNames.size(); childIdx++) { bfQueue.push_back(childNames[childIdx]); } } } }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->setDescription("Retrieve basic statistics from a hal database"); optionsParser->addArgument("halFile", "path to hal file to analyze"); optionsParser->addOptionFlag("genomes", "print only a list of genomes " "in alignment", false); optionsParser->addOption("sequences", "print list of sequences in given " "genome", "\"\""); optionsParser->addOption("sequenceStats", "print stats for each sequence in " "given genome", "\"\""); optionsParser->addOption("bedSequences", "print sequences of given genome " "in bed format", "\"\""); optionsParser->addOptionFlag("tree", "print only the NEWICK tree", false); optionsParser->addOptionFlag("branches", "print list of branches. " "Each branch is specified by the child genome", false); optionsParser->addOption("span", "print branches on path (or spanning tree) " "between comma " "separated list of genomes", "\"\""); optionsParser->addOption("spanRoot", "print genomes on path" "(or spanning tree) between comma " "separated list of genomes. Different from --span" "only in that the spanning tree root is also " "given", "\"\""); optionsParser->addOption("children", "print names of children of given " "genome", "\"\""); optionsParser->addOptionFlag("root", "print root genome name", false); optionsParser->addOption("parent", "print name of parent of given genome", "\"\""); optionsParser->addOption("branchLength", "print branch length between " "given genome and its parent in the tree", "\"\""); optionsParser->addOption("numSegments", "print numTopSegments " "numBottomSegments for given genome.", "\"\""); optionsParser->addOption("baseComp", "print base composition for given " "genome by sampling every step bases. Parameter " "value is of the form genome,step. Ex: " "--baseComp human,1000. The ouptut is of the form " "fraction_of_As fraction_of_Gs fraction_of_Cs " "fraction_of_Ts.", "\"\""); optionsParser->addOption("genomeMetaData", "print metadata for given genome, " "one entry per line, tab-seperated.", "\"\""); optionsParser->addOption("chromSizes", "print the name and length of each" " sequence in a given genome. This is a subset" " of the" " information returned by --sequenceStats but is" " useful because it is in the format used by" " wigToBigWig", "\"\""); optionsParser->addOption("percentID", "print % ID of a genome with all other genomes." "Only non-duplicated and unambiguous sites are" "considered", "\"\""); optionsParser->addOption("coverage", "print histogram of coverage of a genome with" " all genomes", "\"\""); optionsParser->addOption("topSegments", "print coordinates of all top segments of given" " genome in BED format.", "\"\""); optionsParser->addOption("bottomSegments", "print coordinates of all bottom segments of given" " genome in BED format.", "\"\""); optionsParser->addOptionFlag("allCoverage", "print histogram of coverage from all genomes to" " all genomes", false); string path; bool listGenomes; string sequencesFromGenome; string sequenceStatsFromGenome; string bedSequencesFromGenome; string spanGenomes; string spanRootGenomes; bool tree; bool branches; string childrenFromGenome; string parentFromGenome; bool printRoot; string nameForBL; string numSegmentsGenome; string baseCompPair; string genomeMetaData; string chromSizesFromGenome; string percentID; string coverage; string topSegments; string bottomSegments; bool allCoverage; try { optionsParser->parseOptions(argc, argv); path = optionsParser->getArgument<string>("halFile"); listGenomes = optionsParser->getFlag("genomes"); sequencesFromGenome = optionsParser->getOption<string>("sequences"); sequenceStatsFromGenome = optionsParser->getOption<string>("sequenceStats"); bedSequencesFromGenome = optionsParser->getOption<string>("bedSequences"); tree = optionsParser->getFlag("tree"); spanGenomes = optionsParser->getOption<string>("span"); spanRootGenomes = optionsParser->getOption<string>("spanRoot"); branches = optionsParser->getFlag("branches"); childrenFromGenome = optionsParser->getOption<string>("children"); parentFromGenome = optionsParser->getOption<string>("parent"); printRoot = optionsParser->getFlag("root"); nameForBL = optionsParser->getOption<string>("branchLength"); numSegmentsGenome = optionsParser->getOption<string>("numSegments"); baseCompPair = optionsParser->getOption<string>("baseComp"); genomeMetaData = optionsParser->getOption<string>("genomeMetaData"); chromSizesFromGenome = optionsParser->getOption<string>("chromSizes"); percentID = optionsParser->getOption<string>("percentID"); coverage = optionsParser->getOption<string>("coverage"); topSegments = optionsParser->getOption<string>("topSegments"); bottomSegments = optionsParser->getOption<string>("bottomSegments"); allCoverage = optionsParser->getFlag("allCoverage"); size_t optCount = listGenomes == true ? 1 : 0; if (sequencesFromGenome != "\"\"") ++optCount; if (tree == true) ++optCount; if (sequenceStatsFromGenome != "\"\"") ++optCount; if (bedSequencesFromGenome != "\"\"") ++optCount; if (spanGenomes != "\"\"") ++optCount; if (spanRootGenomes != "\"\"") ++optCount; if (branches) ++ optCount; if (childrenFromGenome != "\"\"") ++optCount; if (parentFromGenome != "\"\"") ++optCount; if (printRoot) ++optCount; if (nameForBL != "\"\"") ++optCount; if (numSegmentsGenome != "\"\"") ++optCount; if (baseCompPair != "\"\"") ++optCount; if (genomeMetaData != "\"\"") ++optCount; if (chromSizesFromGenome != "\"\"") ++optCount; if (percentID != "\"\"") ++optCount; if (coverage != "\"\"") ++optCount; if (topSegments != "\"\"") ++optCount; if (bottomSegments != "\"\"") ++optCount; if (allCoverage) ++optCount; if (optCount > 1) { throw hal_exception("--genomes, --sequences, --tree, --span, --spanRoot, " "--branches, --sequenceStats, --children, --parent, " "--bedSequences, --root, --numSegments, --baseComp, " "--genomeMetaData, --chromSizes, --percentID, " "--coverage, --topSegments, --bottomSegments, " "--allCoverage " "and --branchLength options are exclusive"); } } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser); if (listGenomes == true && alignment->getNumGenomes() > 0) { printGenomes(cout, alignment); } else if (sequencesFromGenome != "\"\"") { printSequences(cout, alignment, sequencesFromGenome); } else if (tree == true) { cout << alignment->getNewickTree() << endl; } else if (sequenceStatsFromGenome != "\"\"") { printSequenceStats(cout, alignment, sequenceStatsFromGenome); } else if (bedSequencesFromGenome != "\"\"") { printBedSequenceStats(cout, alignment, bedSequencesFromGenome); } else if (spanGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanGenomes, ","), false); } else if (spanRootGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanRootGenomes, ","), true); } else if (branches == true) { printBranches(cout, alignment); } else if (childrenFromGenome != "\"\"") { printChildren(cout, alignment, childrenFromGenome); } else if (parentFromGenome != "\"\"") { printParent(cout, alignment, parentFromGenome); } else if (printRoot == true) { printRootName(cout, alignment); } else if (nameForBL != "\"\"") { printBranchLength(cout, alignment, nameForBL); } else if (numSegmentsGenome != "\"\"") { printNumSegments(cout, alignment, numSegmentsGenome); } else if (baseCompPair != "\"\"") { printBaseComp(cout, alignment, baseCompPair); } else if (genomeMetaData != "\"\"") { printGenomeMetaData(cout, alignment, genomeMetaData); } else if (chromSizesFromGenome != "\"\"") { printChromSizes(cout, alignment, chromSizesFromGenome); } else if (percentID != "\"\"") { printPercentID(cout, alignment, percentID); } else if (coverage != "\"\"") { printCoverage(cout, alignment, coverage); } else if (topSegments != "\"\"") { printSegments(cout, alignment, topSegments, true); } else if (bottomSegments != "\"\"") { printSegments(cout, alignment, bottomSegments, false); } else if (allCoverage) { printAllCoverage(cout, alignment); } else { HalStats halStats(alignment); cout << endl << "hal v" << alignment->getVersion() << "\n" << halStats; } } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->setDescription("Rertrieve basic statics from a hal database"); optionsParser->addArgument("halFile", "path to hal file to analyze"); optionsParser->addOptionFlag("genomes", "print only a list of genomes " "in alignment", false); optionsParser->addOption("sequences", "print list of sequences in given " "genome", "\"\""); optionsParser->addOption("sequenceStats", "print stats for each sequence in " "given genome", "\"\""); optionsParser->addOptionFlag("tree", "print only the NEWICK tree", false); optionsParser->addOptionFlag("branches", "print list of branches. " "Each branch is specified by the child genome", false); optionsParser->addOption("span", "print branches on path (or spanning tree) " "between comma " "separated list of genomes", "\"\""); optionsParser->addOption("spanRoot", "print genomes on path" "(or spanning tree) between comma " "separated list of genomes. Different from --path" "only in that the spanning tree root is also " "given", "\"\""); string path; bool listGenomes; string sequencesFromGenome; string sequenceStatsFromGenome; string spanGenomes; string spanRootGenomes; bool tree; bool branches; try { optionsParser->parseOptions(argc, argv); path = optionsParser->getArgument<string>("halFile"); listGenomes = optionsParser->getFlag("genomes"); sequencesFromGenome = optionsParser->getOption<string>("sequences"); sequenceStatsFromGenome = optionsParser->getOption<string>("sequenceStats"); tree = optionsParser->getFlag("tree"); spanGenomes = optionsParser->getOption<string>("span"); spanRootGenomes = optionsParser->getOption<string>("spanRoot"); branches = optionsParser->getFlag("branches"); size_t optCount = listGenomes == true ? 1 : 0; if (sequencesFromGenome != "\"\"") ++optCount; if (tree == true) ++optCount; if (sequenceStatsFromGenome != "\"\"") ++optCount; if (spanGenomes != "\"\"") ++optCount; if (spanRootGenomes != "\"\"") ++optCount; if (branches) ++optCount; if (optCount > 1) { throw hal_exception("--genomes, --sequences, --tree, --span, " "--spanRoot, --branches " "and --sequenceStats " "options are mutually exclusive"); } } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser); if (listGenomes == true && alignment->getNumGenomes() > 0) { printGenomes(cout, alignment); } else if (sequencesFromGenome != "\"\"") { printSequences(cout, alignment, sequencesFromGenome); } else if (tree == true) { cout << alignment->getNewickTree() << endl; } else if (sequenceStatsFromGenome != "\"\"") { printSequenceStats(cout, alignment, sequenceStatsFromGenome); } else if (spanGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanGenomes, ","), false); } else if (spanRootGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanRootGenomes, ","), true); } else if (branches == true) { printBranches(cout, alignment); } else { HalStats halStats(alignment); cout << endl << "hal v" << alignment->getVersion() << "\n" << halStats; } } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }