static CLParserPtr initParser() { CLParserPtr optionsParser = hdf5CLParserInstance(true); optionsParser->addArgument("inFile", "existing tree"); optionsParser->addArgument("deleteNode", "(leaf) genome to delete"); optionsParser->addOptionFlag("noMarkAncestors", "don't mark ancestors for" " update", false); return optionsParser; }
int main(int argc, char *argv[]) { CLParserPtr optParser = initParser(); string inPath, deleteNode; bool noMarkAncestors; try { optParser->parseOptions(argc, argv); inPath = optParser->getArgument<string>("inFile"); deleteNode = optParser->getArgument<string>("deleteNode"); noMarkAncestors = optParser->getFlag("noMarkAncestors"); } catch (exception &e) { optParser->printUsage(cerr); return 1; } AlignmentPtr alignment = openHalAlignment(inPath, optParser); if (!noMarkAncestors) { markAncestorsForUpdate(alignment, deleteNode); } alignment->removeGenome(deleteNode); return 0; }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->addArgument("halFile", "path to hal file to validate"); optionsParser->setDescription("Check if hal database is valid"); string path; try { optionsParser->parseOptions(argc, argv); path = optionsParser->getArgument<string>("halFile"); } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser); validateAlignment(alignment); } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } cout << "\nFile valid" << endl; return 0; }
static CLParserPtr initParser() { CLParserPtr optionsParser = hdf5CLParserInstance(true); optionsParser->addArgument("inFile", "existing tree"); optionsParser->addOption("bottomAlignmentFile", "hal file containing an " "alignment of the genome and its children. " "Required for non-leaf genomes.", "\"\""); optionsParser->addOption("topAlignmentFile", "hal file containing an " "alignment of the genome, its parent, and " "its siblings. Required if the genome to be " "replaced is not the root.", "\"\""); optionsParser->addArgument("genomeName", "name of genome to be replaced"); optionsParser->addOptionFlag("noMarkAncestors", "don't mark ancestors for" " update", false); return optionsParser; }
static CLParserPtr initParser() { CLParserPtr optionsParser = hdf5CLParserInstance(true); optionsParser->addArgument("inFile", "existing tree"); optionsParser->addArgument("botAlignmentFile", "tree containing insert, its " "proper bottom segments, and the new leaf genome"); optionsParser->addArgument("topAlignmentFile", "tree containing insert, its " "parent, and its proper top segments"); optionsParser->addArgument("parentName", "insert's future parent"); optionsParser->addArgument("insertName", "insert name"); optionsParser->addArgument("childName", "insert's future child"); optionsParser->addArgument("leafName", "name of new leaf genome"); optionsParser->addArgument("upperBranchLength", "length of branch from parent" " to insert"); optionsParser->addArgument("leafBranchLength", "leaf branch length"); optionsParser->addOptionFlag("noMarkAncestors", "don't mark ancestors for" " update", false); return optionsParser; }
int main(int argc, char *argv[]) { CLParserPtr optParser = initParser(); string inPath, botAlignmentPath, topAlignmentPath, parentName, insertName, childName, leafName; double upperBranchLength, leafBranchLength; bool noMarkAncestors; try { optParser->parseOptions(argc, argv); inPath = optParser->getArgument<string>("inFile"); botAlignmentPath = optParser->getArgument<string>("botAlignmentFile"); topAlignmentPath = optParser->getArgument<string>("topAlignmentFile"); parentName = optParser->getArgument<string>("parentName"); insertName = optParser->getArgument<string>("insertName"); childName = optParser->getArgument<string>("childName"); leafName = optParser->getArgument<string>("leafName"); upperBranchLength = optParser->getArgument<double>("upperBranchLength"); leafBranchLength = optParser->getArgument<double>("leafBranchLength"); noMarkAncestors = optParser->getFlag("noMarkAncestors"); } catch (exception &e) { optParser->printUsage(cerr); return 1; } AlignmentPtr mainAlignment = openHalAlignment(inPath, optParser); AlignmentConstPtr botAlignment = openHalAlignment(botAlignmentPath, optParser); AlignmentConstPtr topAlignment = openHalAlignment(topAlignmentPath, optParser); mainAlignment->insertGenome(insertName, parentName, childName, upperBranchLength); mainAlignment->addLeafGenome(leafName, insertName, leafBranchLength); // Insert the new intermediate node. Genome *insertGenome = mainAlignment->openGenome(insertName); const Genome *topInsertGenome = topAlignment->openGenome(insertName); const Genome *botInsertGenome = botAlignment->openGenome(insertName); topInsertGenome->copyDimensions(insertGenome); topInsertGenome->copyTopDimensions(insertGenome); botInsertGenome->copyBottomDimensions(insertGenome); topInsertGenome->copySequence(insertGenome); topInsertGenome->copyTopSegments(insertGenome); topInsertGenome->copyMetadata(insertGenome); botInsertGenome->copyBottomSegments(insertGenome); insertGenome->fixParseInfo(); // Copy the bottom segments for the parent genome from the top alignment. Genome *parentGenome = mainAlignment->openGenome(parentName); const Genome *botParentGenome = topAlignment->openGenome(parentName); botParentGenome->copyBottomDimensions(parentGenome); botParentGenome->copyBottomSegments(parentGenome); parentGenome->fixParseInfo(); // Fix the parent's other children as well. vector<string> allChildren = mainAlignment->getChildNames(parentName); for (size_t i = 0; i < allChildren.size(); i++) { if (allChildren[i] != insertName) { Genome *outGenome = mainAlignment->openGenome(allChildren[i]); const Genome *topSegmentsGenome = topAlignment->openGenome(allChildren[i]); topSegmentsGenome->copyTopDimensions(outGenome); topSegmentsGenome->copyTopSegments(outGenome); outGenome->fixParseInfo(); } } // Copy the top segments for the child genome from the bottom alignment. Genome *childGenome = mainAlignment->openGenome(childName); const Genome *topChildGenome = botAlignment->openGenome(childName); topChildGenome->copyTopDimensions(childGenome); topChildGenome->copyTopSegments(childGenome); childGenome->fixParseInfo(); // Copy the entire genome for the leaf from the bottom alignment. Genome *outLeafGenome = mainAlignment->openGenome(leafName); const Genome *inLeafGenome = botAlignment->openGenome(leafName); inLeafGenome->copy(outLeafGenome); if (!noMarkAncestors) { markAncestorsForUpdate(mainAlignment, insertName); } mainAlignment->close(); botAlignment->close(); topAlignment->close(); }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->setDescription("Rertrieve chain (pairwise alignment) " "information from a hal database.\n" "WARNING: THIS TOOL WAS NEVER FINISHED OR" " TESTED. USE AT OWN RISK. PLEASE " "CONSIDER halLiftover --outPSL INSTEAD."); optionsParser->addArgument("halFile", "path to hal file to analyze"); optionsParser->addArgument("genome", "(query) genome to process"); optionsParser->addOption("sequence", "sequence name in query genome (" "all sequences if not specified)", "\"\""); optionsParser->addOption("start", "start position in query genome", 0); optionsParser->addOption("length", "maximum length of chain to output.", 0); optionsParser->addOption("chainFile", "path for output file. stdout if not" " specified", "\"\""); optionsParser->addOption("maxGap", "maximum indel length to be considered a gap within" " a chain.", 20); string halPath; string chainPath; string genomeName; string sequenceName; hal_size_t start; hal_size_t length; hal_size_t maxGap; try { optionsParser->parseOptions(argc, argv); halPath = optionsParser->getArgument<string>("halFile"); genomeName = optionsParser->getArgument<string>("genome"); sequenceName = optionsParser->getOption<string>("sequence"); start = optionsParser->getOption<hal_size_t>("start"); length = optionsParser->getOption<hal_size_t>("length"); chainPath = optionsParser->getOption<string>("chainFile"); maxGap = optionsParser->getOption<hal_size_t>("maxGap"); } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { cerr << "WARNING: THIS TOOL WAS NEVER FINISHED OR TESTED. USE AT OWN RISK." << " PLEASE CONSIDER halLiftover --outPSL INSTEAD." <<endl; AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, optionsParser); const Genome* genome = alignment->openGenome(genomeName); if (genome == NULL) { throw hal_exception(string("Genome not found: ") + genomeName); } hal_index_t endPosition = length > 0 ? start + length : genome->getSequenceLength(); const Sequence* sequence = NULL; if (sequenceName != "\"\"") { sequence = genome->getSequence(sequenceName); if (sequence == NULL) { throw hal_exception(string("Sequence not found: ") + sequenceName); } start += sequence->getStartPosition(); endPosition = length > 0 ? start + length : sequence->getSequenceLength(); } ofstream ofile; ostream& outStream = chainPath == "\"\"" ? cout : ofile; if (chainPath != "\"\"") { ofile.open(chainPath.c_str()); if (!ofile) { throw hal_exception(string("Error opening output file ") + chainPath); } } TopSegmentIteratorConstPtr top = genome->getTopSegmentIterator(); top->toSite(start, false); // do slicing here; GappedTopSegmentIteratorConstPtr gtop = genome->getGappedTopSegmentIterator(top->getArrayIndex(), maxGap); // need to review! Chain chain; chain._id = 0; while (gtop->getRightArrayIndex() < (hal_index_t)genome->getNumTopSegments() && gtop->getLeft()->getStartPosition() < endPosition) { if (gtop->hasParent() == true) { hal_offset_t leftOffset = 0; if ((hal_index_t)start > gtop->getStartPosition() && (hal_index_t)start < gtop->getEndPosition()) { leftOffset = start - gtop->getStartPosition() ; } hal_offset_t rightOffset = 0; if (endPosition - 1 > gtop->getStartPosition() && endPosition - 1 < gtop->getEndPosition()) { rightOffset = gtop->getEndPosition() + 1 - endPosition; } // need to do offsets for edge cases gtIteratorToChain(gtop, chain, leftOffset, rightOffset); outStream << chain; ++chain._id; } gtop->toRight(); } } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }
int main(int argc, char** argv) { CLParserPtr optionsParser = initParser(); string halPath; string srcGenomeName; string srcBedPath; string tgtGenomeName; string tgtBedPath; bool noDupes; bool append; int inBedVersion; int outBedVersion; bool keepExtra; bool outPSL; bool outPSLWithName; bool tab; try { optionsParser->parseOptions(argc, argv); halPath = optionsParser->getArgument<string>("halFile"); srcGenomeName = optionsParser->getArgument<string>("srcGenome"); srcBedPath = optionsParser->getArgument<string>("srcBed"); tgtGenomeName = optionsParser->getArgument<string>("tgtGenome"); tgtBedPath = optionsParser->getArgument<string>("tgtBed"); noDupes = optionsParser->getFlag("noDupes"); append = optionsParser->getFlag("append"); inBedVersion = optionsParser->getOption<int>("inBedVersion"); outBedVersion = optionsParser->getOption<int>("outBedVersion"); keepExtra = optionsParser->getFlag("keepExtra"); outPSL = optionsParser->getFlag("outPSL"); outPSLWithName = optionsParser->getFlag("outPSLWithName"); tab = optionsParser->getFlag("tab"); } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { if (outPSLWithName == true) { outPSL = true; } if (outPSL == true) { outBedVersion = 12; } AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, optionsParser); if (alignment->getNumGenomes() == 0) { throw hal_exception("hal alignmenet is empty"); } const Genome* srcGenome = alignment->openGenome(srcGenomeName); if (srcGenome == NULL) { throw hal_exception(string("srcGenome, ") + srcGenomeName + ", not found in alignment"); } const Genome* tgtGenome = alignment->openGenome(tgtGenomeName); if (tgtGenome == NULL) { throw hal_exception(string("tgtGenome, ") + tgtGenomeName + ", not found in alignment"); } ifstream srcBed; istream* srcBedPtr; if (srcBedPath == "stdin") { srcBedPtr = &cin; } else { srcBed.open(srcBedPath.c_str()); srcBedPtr = &srcBed; if (!srcBed) { throw hal_exception("Error opening srcBed, " + srcBedPath); } } ios_base::openmode mode = append ? ios::out | ios::app : ios_base::out; ofstream tgtBed; ostream* tgtBedPtr; if (tgtBedPath == "stdout") { tgtBedPtr = &cout; } else { tgtBed.open(tgtBedPath.c_str(), mode); tgtBedPtr = &tgtBed; if (!tgtBed) { throw hal_exception("Error opening tgtBed, " + tgtBedPath); } } locale* inLocale = NULL; if (tab == true) { inLocale = new locale(cin.getloc(), new TabSepFacet(cin.getloc())); assert(std::isspace('\t', *inLocale) == true); assert(std::isspace(' ', *inLocale) == false); } BlockLiftover liftover; liftover.convert(alignment, srcGenome, srcBedPtr, tgtGenome, tgtBedPtr, inBedVersion, outBedVersion, keepExtra, !noDupes, outPSL, outPSLWithName, inLocale); delete inLocale; } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }
static CLParserPtr initParser() { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->addArgument("halFile", "input hal file"); optionsParser->addArgument("srcGenome", "source genome name"); optionsParser->addArgument("srcBed", "path of input bed file. set as stdin " "to stream from standard input"); optionsParser->addArgument("tgtGenome", "target genome name"); optionsParser->addArgument("tgtBed", "path of output bed file. set as stdout" " to stream to standard output."); optionsParser->addOptionFlag("noDupes", "do not map between duplications in" " graph.", false); optionsParser->addOptionFlag("append", "append results to tgtBed", false); optionsParser->addOption("inBedVersion", "bed version of input file " "as integer between 3 and 9 or 12 reflecting " "the number of columns (see bed " "format specification for more details). Will " "be autodetected by default.", 0); optionsParser->addOption("outBedVersion", "bed version of output file " "as integer between 3 and 9 or 12 reflecting " "the number of columns (see bed " "format specification for more details). Will " "be same as input by default.", 0); optionsParser->addOptionFlag("outPSL", "write output in PSL instead of " "bed format. overrides --outBedVersion when " "specified.", false); optionsParser->addOptionFlag("outPSLWithName", "write output as input BED name followed by PSL line instead of " "bed format. overrides --outBedVersion when " "specified.", false); optionsParser->addOptionFlag("keepExtra", "keep extra columns. these are " "columns in the input beyond the specified or " "detected bed version, and which are cut by " "default.", false); optionsParser->addOptionFlag("tab", "input is tab-separated. this allows" " column entries to contain spaces. if this" " flag is not set, both spaces and tabs are" " used to separate input columns.", false); optionsParser->setDescription("Map BED genome interval coordinates between " "two genomes."); return optionsParser; }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->setDescription("Retrieve basic statistics from a hal database"); optionsParser->addArgument("halFile", "path to hal file to analyze"); optionsParser->addOptionFlag("genomes", "print only a list of genomes " "in alignment", false); optionsParser->addOption("sequences", "print list of sequences in given " "genome", "\"\""); optionsParser->addOption("sequenceStats", "print stats for each sequence in " "given genome", "\"\""); optionsParser->addOption("bedSequences", "print sequences of given genome " "in bed format", "\"\""); optionsParser->addOptionFlag("tree", "print only the NEWICK tree", false); optionsParser->addOptionFlag("branches", "print list of branches. " "Each branch is specified by the child genome", false); optionsParser->addOption("span", "print branches on path (or spanning tree) " "between comma " "separated list of genomes", "\"\""); optionsParser->addOption("spanRoot", "print genomes on path" "(or spanning tree) between comma " "separated list of genomes. Different from --span" "only in that the spanning tree root is also " "given", "\"\""); optionsParser->addOption("children", "print names of children of given " "genome", "\"\""); optionsParser->addOptionFlag("root", "print root genome name", false); optionsParser->addOption("parent", "print name of parent of given genome", "\"\""); optionsParser->addOption("branchLength", "print branch length between " "given genome and its parent in the tree", "\"\""); optionsParser->addOption("numSegments", "print numTopSegments " "numBottomSegments for given genome.", "\"\""); optionsParser->addOption("baseComp", "print base composition for given " "genome by sampling every step bases. Parameter " "value is of the form genome,step. Ex: " "--baseComp human,1000. The ouptut is of the form " "fraction_of_As fraction_of_Gs fraction_of_Cs " "fraction_of_Ts.", "\"\""); optionsParser->addOption("genomeMetaData", "print metadata for given genome, " "one entry per line, tab-seperated.", "\"\""); optionsParser->addOption("chromSizes", "print the name and length of each" " sequence in a given genome. This is a subset" " of the" " information returned by --sequenceStats but is" " useful because it is in the format used by" " wigToBigWig", "\"\""); optionsParser->addOption("percentID", "print % ID of a genome with all other genomes." "Only non-duplicated and unambiguous sites are" "considered", "\"\""); optionsParser->addOption("coverage", "print histogram of coverage of a genome with" " all genomes", "\"\""); optionsParser->addOption("topSegments", "print coordinates of all top segments of given" " genome in BED format.", "\"\""); optionsParser->addOption("bottomSegments", "print coordinates of all bottom segments of given" " genome in BED format.", "\"\""); optionsParser->addOptionFlag("allCoverage", "print histogram of coverage from all genomes to" " all genomes", false); string path; bool listGenomes; string sequencesFromGenome; string sequenceStatsFromGenome; string bedSequencesFromGenome; string spanGenomes; string spanRootGenomes; bool tree; bool branches; string childrenFromGenome; string parentFromGenome; bool printRoot; string nameForBL; string numSegmentsGenome; string baseCompPair; string genomeMetaData; string chromSizesFromGenome; string percentID; string coverage; string topSegments; string bottomSegments; bool allCoverage; try { optionsParser->parseOptions(argc, argv); path = optionsParser->getArgument<string>("halFile"); listGenomes = optionsParser->getFlag("genomes"); sequencesFromGenome = optionsParser->getOption<string>("sequences"); sequenceStatsFromGenome = optionsParser->getOption<string>("sequenceStats"); bedSequencesFromGenome = optionsParser->getOption<string>("bedSequences"); tree = optionsParser->getFlag("tree"); spanGenomes = optionsParser->getOption<string>("span"); spanRootGenomes = optionsParser->getOption<string>("spanRoot"); branches = optionsParser->getFlag("branches"); childrenFromGenome = optionsParser->getOption<string>("children"); parentFromGenome = optionsParser->getOption<string>("parent"); printRoot = optionsParser->getFlag("root"); nameForBL = optionsParser->getOption<string>("branchLength"); numSegmentsGenome = optionsParser->getOption<string>("numSegments"); baseCompPair = optionsParser->getOption<string>("baseComp"); genomeMetaData = optionsParser->getOption<string>("genomeMetaData"); chromSizesFromGenome = optionsParser->getOption<string>("chromSizes"); percentID = optionsParser->getOption<string>("percentID"); coverage = optionsParser->getOption<string>("coverage"); topSegments = optionsParser->getOption<string>("topSegments"); bottomSegments = optionsParser->getOption<string>("bottomSegments"); allCoverage = optionsParser->getFlag("allCoverage"); size_t optCount = listGenomes == true ? 1 : 0; if (sequencesFromGenome != "\"\"") ++optCount; if (tree == true) ++optCount; if (sequenceStatsFromGenome != "\"\"") ++optCount; if (bedSequencesFromGenome != "\"\"") ++optCount; if (spanGenomes != "\"\"") ++optCount; if (spanRootGenomes != "\"\"") ++optCount; if (branches) ++ optCount; if (childrenFromGenome != "\"\"") ++optCount; if (parentFromGenome != "\"\"") ++optCount; if (printRoot) ++optCount; if (nameForBL != "\"\"") ++optCount; if (numSegmentsGenome != "\"\"") ++optCount; if (baseCompPair != "\"\"") ++optCount; if (genomeMetaData != "\"\"") ++optCount; if (chromSizesFromGenome != "\"\"") ++optCount; if (percentID != "\"\"") ++optCount; if (coverage != "\"\"") ++optCount; if (topSegments != "\"\"") ++optCount; if (bottomSegments != "\"\"") ++optCount; if (allCoverage) ++optCount; if (optCount > 1) { throw hal_exception("--genomes, --sequences, --tree, --span, --spanRoot, " "--branches, --sequenceStats, --children, --parent, " "--bedSequences, --root, --numSegments, --baseComp, " "--genomeMetaData, --chromSizes, --percentID, " "--coverage, --topSegments, --bottomSegments, " "--allCoverage " "and --branchLength options are exclusive"); } } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser); if (listGenomes == true && alignment->getNumGenomes() > 0) { printGenomes(cout, alignment); } else if (sequencesFromGenome != "\"\"") { printSequences(cout, alignment, sequencesFromGenome); } else if (tree == true) { cout << alignment->getNewickTree() << endl; } else if (sequenceStatsFromGenome != "\"\"") { printSequenceStats(cout, alignment, sequenceStatsFromGenome); } else if (bedSequencesFromGenome != "\"\"") { printBedSequenceStats(cout, alignment, bedSequencesFromGenome); } else if (spanGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanGenomes, ","), false); } else if (spanRootGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanRootGenomes, ","), true); } else if (branches == true) { printBranches(cout, alignment); } else if (childrenFromGenome != "\"\"") { printChildren(cout, alignment, childrenFromGenome); } else if (parentFromGenome != "\"\"") { printParent(cout, alignment, parentFromGenome); } else if (printRoot == true) { printRootName(cout, alignment); } else if (nameForBL != "\"\"") { printBranchLength(cout, alignment, nameForBL); } else if (numSegmentsGenome != "\"\"") { printNumSegments(cout, alignment, numSegmentsGenome); } else if (baseCompPair != "\"\"") { printBaseComp(cout, alignment, baseCompPair); } else if (genomeMetaData != "\"\"") { printGenomeMetaData(cout, alignment, genomeMetaData); } else if (chromSizesFromGenome != "\"\"") { printChromSizes(cout, alignment, chromSizesFromGenome); } else if (percentID != "\"\"") { printPercentID(cout, alignment, percentID); } else if (coverage != "\"\"") { printCoverage(cout, alignment, coverage); } else if (topSegments != "\"\"") { printSegments(cout, alignment, topSegments, true); } else if (bottomSegments != "\"\"") { printSegments(cout, alignment, bottomSegments, false); } else if (allCoverage) { printAllCoverage(cout, alignment); } else { HalStats halStats(alignment); cout << endl << "hal v" << alignment->getVersion() << "\n" << halStats; } } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->setDescription("Write masked intervals of genome into bed " "file"); optionsParser->addArgument("halFile", "path to hal file to analyze"); optionsParser->addArgument("genome", "name of genome to process"); optionsParser->addOption("maskFile", "path to bed file to write to", "stdout"); optionsParser->addOption("extend", "extend masked regions by given num. " "of bases.", 0); optionsParser->addOption("extendPct", "extend masked regions by percentage" " of their lengths", 0); string halPath; string genomeName; string bedPath; hal_size_t extend; double extendPct; try { optionsParser->parseOptions(argc, argv); halPath = optionsParser->getArgument<string>("halFile"); genomeName = optionsParser->getArgument<string>("genome"); bedPath = optionsParser->getOption<string>("maskFile"); extend = optionsParser->getOption<hal_size_t>("extend"); extendPct = optionsParser->getOption<double>("extendPct"); if (extend != 0 && extendPct != 0.) { throw hal_exception("--extend and --extendPct options are exclusive."); } } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, optionsParser); const Genome* genome = alignment->openGenome(genomeName); if (genome == NULL) { throw hal_exception(string("Genome ") + genomeName + " not found."); } ostream* bedStream = &cout; bool newBed = false; if (bedPath != "stdout") { bedStream = new ofstream(bedPath.c_str()); newBed = true; } if (!bedStream) { throw hal_exception(string("Error opening ") + bedPath + " for writing"); } MaskExtractor mask; mask.extract(alignment, genome, bedStream, extend, extendPct); if (newBed) { delete bedStream; } } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }
int main(int argc, char *argv[]) { CLParserPtr optParser = initParser(); string inPath, bottomAlignmentFile, topAlignmentFile, genomeName; bool noMarkAncestors; try { optParser->parseOptions(argc, argv); inPath = optParser->getArgument<string>("inFile"); bottomAlignmentFile = optParser->getOption<string>("bottomAlignmentFile"); topAlignmentFile = optParser->getOption<string>("topAlignmentFile"); genomeName = optParser->getArgument<string>("genomeName"); noMarkAncestors = optParser->getFlag("noMarkAncestors"); } catch (exception &e) { optParser->printUsage(cerr); return 1; } AlignmentPtr mainAlignment = openHalAlignment(inPath, optParser); AlignmentConstPtr bottomAlignment; AlignmentConstPtr topAlignment; bool useTopAlignment = mainAlignment->getRootName() != genomeName; bool useBottomAlignment = mainAlignment->getChildNames(genomeName).size() != 0; Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName); if (useTopAlignment) { // Not a root genome. Can update using a top alignment. if (topAlignmentFile == "\"\"") { throw hal_exception("Cannot replace non-root genome without a top " "alignment file."); } topAlignment = openHalAlignment(topAlignmentFile, optParser); const Genome *topReplacedGenome = topAlignment->openGenome(genomeName); topReplacedGenome->copyDimensions(mainReplacedGenome); topReplacedGenome->copySequence(mainReplacedGenome); } if (useBottomAlignment) { // Not a leaf genome. Can update using a bottom alignment. if (bottomAlignmentFile == "\"\"") { throw hal_exception("Cannot replace non-leaf genome without a bottom " "alignment file."); } bottomAlignment = openHalAlignment(bottomAlignmentFile, optParser); const Genome *botReplacedGenome = bottomAlignment->openGenome(genomeName); botReplacedGenome->copyDimensions(mainReplacedGenome); botReplacedGenome->copySequence(mainReplacedGenome); } if (!useTopAlignment && !useBottomAlignment) { throw hal_exception("Root genome is also a leaf genome."); } if (useBottomAlignment) { copyFromBottomAlignment(bottomAlignment, mainAlignment, genomeName); } if (useTopAlignment) { copyFromTopAlignment(topAlignment, mainAlignment, genomeName); } // Clear update flag if present, since the genome has just been updated. MetaData *metaData = mainReplacedGenome->getMetaData(); if (metaData->has("needsUpdate")) { metaData->set("needsUpdate", "false"); } if (!noMarkAncestors) { markAncestorsForUpdate(mainAlignment, genomeName); } if (useTopAlignment) { topAlignment->close(); } if (useBottomAlignment) { bottomAlignment->close(); } mainAlignment->close(); }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->setDescription("Rertrieve basic statics from a hal database"); optionsParser->addArgument("halFile", "path to hal file to analyze"); optionsParser->addOptionFlag("genomes", "print only a list of genomes " "in alignment", false); optionsParser->addOption("sequences", "print list of sequences in given " "genome", "\"\""); optionsParser->addOption("sequenceStats", "print stats for each sequence in " "given genome", "\"\""); optionsParser->addOptionFlag("tree", "print only the NEWICK tree", false); optionsParser->addOptionFlag("branches", "print list of branches. " "Each branch is specified by the child genome", false); optionsParser->addOption("span", "print branches on path (or spanning tree) " "between comma " "separated list of genomes", "\"\""); optionsParser->addOption("spanRoot", "print genomes on path" "(or spanning tree) between comma " "separated list of genomes. Different from --path" "only in that the spanning tree root is also " "given", "\"\""); string path; bool listGenomes; string sequencesFromGenome; string sequenceStatsFromGenome; string spanGenomes; string spanRootGenomes; bool tree; bool branches; try { optionsParser->parseOptions(argc, argv); path = optionsParser->getArgument<string>("halFile"); listGenomes = optionsParser->getFlag("genomes"); sequencesFromGenome = optionsParser->getOption<string>("sequences"); sequenceStatsFromGenome = optionsParser->getOption<string>("sequenceStats"); tree = optionsParser->getFlag("tree"); spanGenomes = optionsParser->getOption<string>("span"); spanRootGenomes = optionsParser->getOption<string>("spanRoot"); branches = optionsParser->getFlag("branches"); size_t optCount = listGenomes == true ? 1 : 0; if (sequencesFromGenome != "\"\"") ++optCount; if (tree == true) ++optCount; if (sequenceStatsFromGenome != "\"\"") ++optCount; if (spanGenomes != "\"\"") ++optCount; if (spanRootGenomes != "\"\"") ++optCount; if (branches) ++optCount; if (optCount > 1) { throw hal_exception("--genomes, --sequences, --tree, --span, " "--spanRoot, --branches " "and --sequenceStats " "options are mutually exclusive"); } } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser); if (listGenomes == true && alignment->getNumGenomes() > 0) { printGenomes(cout, alignment); } else if (sequencesFromGenome != "\"\"") { printSequences(cout, alignment, sequencesFromGenome); } else if (tree == true) { cout << alignment->getNewickTree() << endl; } else if (sequenceStatsFromGenome != "\"\"") { printSequenceStats(cout, alignment, sequenceStatsFromGenome); } else if (spanGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanGenomes, ","), false); } else if (spanRootGenomes != "\"\"") { printBranchPath(cout, alignment, chopString(spanRootGenomes, ","), true); } else if (branches == true) { printBranches(cout, alignment); } else { HalStats halStats(alignment); cout << endl << "hal v" << alignment->getVersion() << "\n" << halStats; } } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }