Beispiel #1
0
static CLParserPtr initParser()
{
  CLParserPtr optionsParser = hdf5CLParserInstance(true);
  optionsParser->addArgument("inFile", "existing tree");
  optionsParser->addArgument("deleteNode", "(leaf) genome to delete");
  optionsParser->addOptionFlag("noMarkAncestors", "don't mark ancestors for"
                               " update", false);
  return optionsParser;
}
Beispiel #2
0
static CLParserPtr initParser()
{
  CLParserPtr optionsParser = hdf5CLParserInstance(true);
  optionsParser->addArgument("inFile", "existing tree");
  optionsParser->addOption("bottomAlignmentFile", "hal file containing an "
                           "alignment of the genome and its children. "
                           "Required for non-leaf genomes.", "\"\"");
  optionsParser->addOption("topAlignmentFile", "hal file containing an "
                           "alignment of the genome, its parent, and "
                           "its siblings. Required if the genome to be "
                           "replaced is not the root.", "\"\"");
  optionsParser->addArgument("genomeName", "name of genome to be replaced");
  optionsParser->addOptionFlag("noMarkAncestors", "don't mark ancestors for"
                               " update", false);
  return optionsParser;
}
Beispiel #3
0
static CLParserPtr initParser()
{
  CLParserPtr optionsParser = hdf5CLParserInstance(true);
  optionsParser->addArgument("inFile", "existing tree");
  optionsParser->addArgument("botAlignmentFile", "tree containing insert, its "
                             "proper bottom segments, and the new leaf genome");
  optionsParser->addArgument("topAlignmentFile", "tree containing insert, its "
                             "parent, and its proper top segments");
  optionsParser->addArgument("parentName", "insert's future parent");
  optionsParser->addArgument("insertName", "insert name");
  optionsParser->addArgument("childName", "insert's future child");
  optionsParser->addArgument("leafName", "name of new leaf genome");
  optionsParser->addArgument("upperBranchLength", "length of branch from parent"
                             " to insert");
  optionsParser->addArgument("leafBranchLength", "leaf branch length");
  optionsParser->addOptionFlag("noMarkAncestors", "don't mark ancestors for"
                               " update", false);
  return optionsParser;
}
Beispiel #4
0
static CLParserPtr initParser()
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->addArgument("halFile", "input hal file");
  optionsParser->addArgument("srcGenome", "source genome name");
  optionsParser->addArgument("srcBed", "path of input bed file.  set as stdin "
                             "to stream from standard input");
  optionsParser->addArgument("tgtGenome", "target genome name");
  optionsParser->addArgument("tgtBed", "path of output bed file.  set as stdout"
                             " to stream to standard output.");
  optionsParser->addOptionFlag("noDupes", "do not map between duplications in"
                               " graph.", false);
  optionsParser->addOptionFlag("append", "append results to tgtBed", false);
  optionsParser->addOption("inBedVersion", "bed version of input file "
                           "as integer between 3 and 9 or 12 reflecting "
                           "the number of columns (see bed "
                           "format specification for more details). Will "
                           "be autodetected by default.", 0);
  optionsParser->addOption("outBedVersion", "bed version of output file "
                           "as integer between 3 and 9 or 12 reflecting "
                           "the number of columns (see bed "
                           "format specification for more details). Will "
                           "be same as input by default.", 0);
  optionsParser->addOption("coalescenceLimit", "coalescence limit genome:"
                           " the genome at or above the MRCA of source"
                           " and target at which we stop looking for"
                           " homologies (default: MRCA)",
                           "");
  optionsParser->addOptionFlag("outPSL", "write output in PSL instead of "
                               "bed format. overrides --outBedVersion when "
                               "specified.", false);
  optionsParser->addOptionFlag("outPSLWithName", "write output as input BED name followed by PSL line instead of "
                               "bed format. overrides --outBedVersion when "
                               "specified.", false);
  optionsParser->addOptionFlag("keepExtra", "keep extra columns. these are "
                               "columns in the input beyond the specified or "
                               "detected bed version, and which are cut by "
                               "default.", false);
  optionsParser->addOptionFlag("tab", "input is tab-separated. this allows"
                               " column entries to contain spaces.  if this"
                               " flag is not set, both spaces and tabs are"
                               " used to separate input columns.", false);
  optionsParser->setDescription("Map BED genome interval coordinates between "
                                "two genomes.");
  return optionsParser;
}
Beispiel #5
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->setDescription("Retrieve basic statistics from a hal database");
  optionsParser->addArgument("halFile", "path to hal file to analyze");
  optionsParser->addOptionFlag("genomes", "print only a list of genomes "
                               "in alignment", false);
  optionsParser->addOption("sequences", "print list of sequences in given "
                           "genome", "\"\"");
  optionsParser->addOption("sequenceStats", "print stats for each sequence in "
                           "given genome", "\"\"");
  optionsParser->addOption("bedSequences", "print sequences of given genome "
                           "in bed format",
                           "\"\"");
  optionsParser->addOptionFlag("tree", "print only the NEWICK tree", false);
  optionsParser->addOptionFlag("branches", "print list of branches. "
                               "Each branch is specified by the child genome", 
                               false);
  optionsParser->addOption("span", "print branches on path (or spanning tree) "
                           "between comma "
                           "separated list of genomes", "\"\"");
  optionsParser->addOption("spanRoot", "print genomes on path" 
                           "(or spanning tree) between comma "
                           "separated list of genomes.  Different from --span"
                           "only in that the spanning tree root is also "
                           "given", "\"\"");
  optionsParser->addOption("children", "print names of children of given "
                           "genome", "\"\"");
  optionsParser->addOptionFlag("root", "print root genome name", false);
  optionsParser->addOption("parent", "print name of parent of given genome",
                           "\"\"");
  optionsParser->addOption("branchLength", "print branch length between "
                           "given genome and its parent in the tree",
                           "\"\"");
  optionsParser->addOption("numSegments", "print numTopSegments "
                           "numBottomSegments for given genome.",
                           "\"\"");
  optionsParser->addOption("baseComp", "print base composition for given "
                           "genome by sampling every step bases. Parameter "
                           "value is of the form genome,step.  Ex: "
                           "--baseComp human,1000.  The ouptut is of the form "
                           "fraction_of_As fraction_of_Gs fraction_of_Cs "
                           "fraction_of_Ts.", 
                           "\"\"");
  optionsParser->addOption("genomeMetaData", "print metadata for given genome, "
                           "one entry per line, tab-seperated.", "\"\"");
  optionsParser->addOption("chromSizes", "print the name and length of each"
                           " sequence in a given genome.  This is a subset"
                           " of the"
                           " information returned by --sequenceStats but is"
                           " useful because it is in the format used by"
                           " wigToBigWig", 
                           "\"\"");
  optionsParser->addOption("percentID",
                           "print % ID of a genome with all other genomes."
                           "Only non-duplicated and unambiguous sites are"
                           "considered",
                           "\"\"");
  optionsParser->addOption("coverage",
                           "print histogram of coverage of a genome with"
                           " all genomes", "\"\"");
  optionsParser->addOption("topSegments",
                           "print coordinates of all top segments of given"
                           " genome in BED format.", "\"\"");
  optionsParser->addOption("bottomSegments",
                           "print coordinates of all bottom segments of given"
                           " genome in BED format.", "\"\"");
  optionsParser->addOptionFlag("allCoverage",
                               "print histogram of coverage from all genomes to"
                               " all genomes", false);


  string path;
  bool listGenomes;
  string sequencesFromGenome;
  string sequenceStatsFromGenome;
  string bedSequencesFromGenome;
  string spanGenomes;
  string spanRootGenomes;
  bool tree;
  bool branches;
  string childrenFromGenome;
  string parentFromGenome;
  bool printRoot;
  string nameForBL;
  string numSegmentsGenome;
  string baseCompPair;
  string genomeMetaData;
  string chromSizesFromGenome;
  string percentID;
  string coverage;
  string topSegments;
  string bottomSegments;
  bool allCoverage;
  try
  {
    optionsParser->parseOptions(argc, argv);
    path = optionsParser->getArgument<string>("halFile");
    listGenomes = optionsParser->getFlag("genomes");
    sequencesFromGenome = optionsParser->getOption<string>("sequences");
    sequenceStatsFromGenome = optionsParser->getOption<string>("sequenceStats");
    bedSequencesFromGenome = optionsParser->getOption<string>("bedSequences");
    tree = optionsParser->getFlag("tree");
    spanGenomes = optionsParser->getOption<string>("span");
    spanRootGenomes = optionsParser->getOption<string>("spanRoot");
    branches = optionsParser->getFlag("branches");
    childrenFromGenome = optionsParser->getOption<string>("children");
    parentFromGenome = optionsParser->getOption<string>("parent");
    printRoot = optionsParser->getFlag("root");
    nameForBL = optionsParser->getOption<string>("branchLength");
    numSegmentsGenome = optionsParser->getOption<string>("numSegments");
    baseCompPair = optionsParser->getOption<string>("baseComp");
    genomeMetaData = optionsParser->getOption<string>("genomeMetaData");
    chromSizesFromGenome = optionsParser->getOption<string>("chromSizes");
    percentID = optionsParser->getOption<string>("percentID");
    coverage = optionsParser->getOption<string>("coverage");
    topSegments = optionsParser->getOption<string>("topSegments");
    bottomSegments = optionsParser->getOption<string>("bottomSegments");
    allCoverage = optionsParser->getFlag("allCoverage");

    size_t optCount = listGenomes == true ? 1 : 0;
    if (sequencesFromGenome != "\"\"") ++optCount;
    if (tree == true) ++optCount;
    if (sequenceStatsFromGenome != "\"\"") ++optCount;
    if (bedSequencesFromGenome != "\"\"") ++optCount;
    if (spanGenomes != "\"\"") ++optCount;
    if (spanRootGenomes != "\"\"") ++optCount;
    if (branches) ++ optCount;
    if (childrenFromGenome != "\"\"") ++optCount;
    if (parentFromGenome != "\"\"") ++optCount;
    if (printRoot) ++optCount;
    if (nameForBL != "\"\"") ++optCount;
    if (numSegmentsGenome != "\"\"") ++optCount;
    if (baseCompPair != "\"\"") ++optCount;
    if (genomeMetaData != "\"\"") ++optCount;
    if (chromSizesFromGenome != "\"\"") ++optCount;
    if (percentID != "\"\"") ++optCount;
    if (coverage != "\"\"") ++optCount;
    if (topSegments != "\"\"") ++optCount;
    if (bottomSegments != "\"\"") ++optCount;
    if (allCoverage) ++optCount;
    if (optCount > 1)
    {
      throw hal_exception("--genomes, --sequences, --tree, --span, --spanRoot, "
                          "--branches, --sequenceStats, --children, --parent, "
                          "--bedSequences, --root, --numSegments, --baseComp, "
                          "--genomeMetaData, --chromSizes, --percentID, "
                          "--coverage,  --topSegments, --bottomSegments, "
                          "--allCoverage "
                          "and --branchLength options are exclusive");
    }
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }
  try
  {
    AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser);

    if (listGenomes == true && alignment->getNumGenomes() > 0)
    {
      printGenomes(cout, alignment);
    }
    else if (sequencesFromGenome != "\"\"")
    {
      printSequences(cout, alignment, sequencesFromGenome);
    }
    else if (tree == true)
    {
      cout << alignment->getNewickTree() << endl;
    }
    else if (sequenceStatsFromGenome != "\"\"")
    {
      printSequenceStats(cout, alignment, sequenceStatsFromGenome);
    }
    else if (bedSequencesFromGenome != "\"\"")
    {
      printBedSequenceStats(cout, alignment, bedSequencesFromGenome);
    }
    else if (spanGenomes !=  "\"\"")
    {
      printBranchPath(cout, alignment, chopString(spanGenomes, ","), false);
    }
    else if (spanRootGenomes !=  "\"\"")
    {
      printBranchPath(cout, alignment, chopString(spanRootGenomes, ","), true);
    }
    else if (branches == true)
    {
      printBranches(cout, alignment);
    }
    else if (childrenFromGenome != "\"\"")
    {
      printChildren(cout, alignment, childrenFromGenome);
    }
    else if (parentFromGenome != "\"\"")
    {
      printParent(cout, alignment, parentFromGenome);
    }
    else if (printRoot == true)
    {
      printRootName(cout, alignment);
    }
    else if (nameForBL != "\"\"")
    {
      printBranchLength(cout, alignment, nameForBL);
    }
    else if (numSegmentsGenome != "\"\"")
    {
      printNumSegments(cout, alignment, numSegmentsGenome);
    }
    else if (baseCompPair != "\"\"")
    {
      printBaseComp(cout, alignment, baseCompPair);
    }
    else if (genomeMetaData != "\"\"")
    {
      printGenomeMetaData(cout, alignment, genomeMetaData);
    }
    else if (chromSizesFromGenome != "\"\"")
    {
      printChromSizes(cout, alignment, chromSizesFromGenome);
    }
    else if (percentID != "\"\"")
    {
      printPercentID(cout, alignment, percentID);
    }
    else if (coverage != "\"\"") {
      printCoverage(cout, alignment, coverage);
    }
    else if (topSegments != "\"\"") {
      printSegments(cout, alignment, topSegments, true);
    }
    else if (bottomSegments != "\"\"") {
      printSegments(cout, alignment, bottomSegments, false);
    } else if (allCoverage) {
      printAllCoverage(cout, alignment);
    }
    else
    {
      HalStats halStats(alignment);
      cout << endl << "hal v" << alignment->getVersion() << "\n" << halStats;
    }
  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }
  
  return 0;
}
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->setDescription("Rertrieve basic statics from a hal database");
  optionsParser->addArgument("halFile", "path to hal file to analyze");
  optionsParser->addOptionFlag("genomes", "print only a list of genomes "
                               "in alignment", false);
  optionsParser->addOption("sequences", "print list of sequences in given "
                           "genome", "\"\"");
  optionsParser->addOption("sequenceStats", "print stats for each sequence in "
                           "given genome", "\"\"");
  optionsParser->addOptionFlag("tree", "print only the NEWICK tree", false);
  optionsParser->addOptionFlag("branches", "print list of branches. "
                               "Each branch is specified by the child genome", 
                               false);
  optionsParser->addOption("span", "print branches on path (or spanning tree) "
                           "between comma "
                           "separated list of genomes", "\"\"");
  optionsParser->addOption("spanRoot", "print genomes on path" 
                           "(or spanning tree) between comma "
                           "separated list of genomes.  Different from --path"
                           "only in that the spanning tree root is also "
                           "given", "\"\"");

  string path;
  bool listGenomes;
  string sequencesFromGenome;
  string sequenceStatsFromGenome;
  string spanGenomes;
  string spanRootGenomes;
  bool tree;
  bool branches;
  try
  {
    optionsParser->parseOptions(argc, argv);
    path = optionsParser->getArgument<string>("halFile");
    listGenomes = optionsParser->getFlag("genomes");
    sequencesFromGenome = optionsParser->getOption<string>("sequences");
    sequenceStatsFromGenome = optionsParser->getOption<string>("sequenceStats");
    tree = optionsParser->getFlag("tree");
    spanGenomes = optionsParser->getOption<string>("span");
    spanRootGenomes = optionsParser->getOption<string>("spanRoot");
    branches = optionsParser->getFlag("branches");

    size_t optCount = listGenomes == true ? 1 : 0;
    if (sequencesFromGenome != "\"\"") ++optCount;
    if (tree == true) ++optCount;
    if (sequenceStatsFromGenome != "\"\"") ++optCount;
    if (spanGenomes != "\"\"") ++optCount;
    if (spanRootGenomes != "\"\"") ++optCount;
    if (branches) ++optCount;
    if (optCount > 1)
    {
      throw hal_exception("--genomes, --sequences, --tree, --span, "
                          "--spanRoot, --branches "
                          "and --sequenceStats " 
                          "options are mutually exclusive");
    }        
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }
  try
  {
    AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser);

    if (listGenomes == true && alignment->getNumGenomes() > 0)
    {
      printGenomes(cout, alignment);
    }
    else if (sequencesFromGenome != "\"\"")
    {
      printSequences(cout, alignment, sequencesFromGenome);
    }
    else if (tree == true)
    {
      cout << alignment->getNewickTree() << endl;
    }
    else if (sequenceStatsFromGenome != "\"\"")
    {
      printSequenceStats(cout, alignment, sequenceStatsFromGenome);
    }
    else if (spanGenomes !=  "\"\"")
    {
      printBranchPath(cout, alignment, chopString(spanGenomes, ","), false);
    }
    else if (spanRootGenomes !=  "\"\"")
    {
      printBranchPath(cout, alignment, chopString(spanRootGenomes, ","), true);
    }
    else if (branches == true)
    {
      printBranches(cout, alignment);
    }
    else
    {
      HalStats halStats(alignment);
      cout << endl << "hal v" << alignment->getVersion() << "\n" << halStats;
    }
  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }
  
  return 0;
}