Esempio n. 1
0
int main(int argc, char *argv[])
{
  CLParserPtr optParser = initParser();
  string inPath, bottomAlignmentFile, topAlignmentFile, genomeName;
  bool noMarkAncestors;
  try {
    optParser->parseOptions(argc, argv);
    inPath = optParser->getArgument<string>("inFile");
    bottomAlignmentFile = optParser->getOption<string>("bottomAlignmentFile");
    topAlignmentFile = optParser->getOption<string>("topAlignmentFile");
    genomeName = optParser->getArgument<string>("genomeName");
    noMarkAncestors = optParser->getFlag("noMarkAncestors");
  } catch (exception &e) {
    optParser->printUsage(cerr);
    return 1;
  }
  AlignmentPtr mainAlignment = openHalAlignment(inPath, optParser);
  AlignmentConstPtr bottomAlignment;
  AlignmentConstPtr topAlignment;
  bool useTopAlignment = mainAlignment->getRootName() != genomeName;
  bool useBottomAlignment = mainAlignment->getChildNames(genomeName).size() != 0;
  Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName);
  if (useTopAlignment) {
    // Not a root genome. Can update using a top alignment.
    if (topAlignmentFile == "\"\"") {
      throw hal_exception("Cannot replace non-root genome without a top "
                          "alignment file.");
    }
    topAlignment = openHalAlignment(topAlignmentFile,
                                    optParser);
    const Genome *topReplacedGenome = topAlignment->openGenome(genomeName);
    topReplacedGenome->copyDimensions(mainReplacedGenome);
    topReplacedGenome->copySequence(mainReplacedGenome);
    
  }
  if (useBottomAlignment) {
    // Not a leaf genome. Can update using a bottom alignment.
    if (bottomAlignmentFile == "\"\"") {
      throw hal_exception("Cannot replace non-leaf genome without a bottom "
                          "alignment file.");
    }
    bottomAlignment = openHalAlignment(bottomAlignmentFile, optParser);
    const Genome *botReplacedGenome = bottomAlignment->openGenome(genomeName);
    botReplacedGenome->copyDimensions(mainReplacedGenome);
    botReplacedGenome->copySequence(mainReplacedGenome);
  }
  if (!useTopAlignment && !useBottomAlignment) {
    throw hal_exception("Root genome is also a leaf genome.");
  }
  if (useBottomAlignment) {
    copyFromBottomAlignment(bottomAlignment, mainAlignment, genomeName);
  }
  if (useTopAlignment) {
    copyFromTopAlignment(topAlignment, mainAlignment, genomeName);
  }

  // Clear update flag if present, since the genome has just been updated.
  MetaData *metaData = mainReplacedGenome->getMetaData();
  if (metaData->has("needsUpdate")) {
    metaData->set("needsUpdate", "false");
  }

  if (!noMarkAncestors) {
    markAncestorsForUpdate(mainAlignment, genomeName);
  }
  if (useTopAlignment) {
    topAlignment->close();
  }
  if (useBottomAlignment) {
    bottomAlignment->close();
  }
  mainAlignment->close();
}
Esempio n. 2
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->setDescription("Write masked intervals of genome into bed "
                                "file");
  optionsParser->addArgument("halFile", "path to hal file to analyze");
  optionsParser->addArgument("genome", "name of genome to process");
  optionsParser->addOption("maskFile", "path to bed file to write to", 
                           "stdout");
  optionsParser->addOption("extend", "extend masked regions by given num. "
                           "of bases.", 0);
  optionsParser->addOption("extendPct", "extend masked regions by percentage"
                           " of their lengths", 0);

  string halPath;
  string genomeName;
  string bedPath;
  hal_size_t extend;
  double extendPct;
  try
  {
    optionsParser->parseOptions(argc, argv);
    halPath = optionsParser->getArgument<string>("halFile");
    genomeName = optionsParser->getArgument<string>("genome");
    bedPath = optionsParser->getOption<string>("maskFile");
    extend = optionsParser->getOption<hal_size_t>("extend");
    extendPct = optionsParser->getOption<double>("extendPct");

    if (extend != 0 && extendPct != 0.)
    {
      throw hal_exception("--extend and --extendPct options are exclusive.");
    }
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }
  try
  {
    AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, 
                                                           optionsParser);

    const Genome* genome = alignment->openGenome(genomeName);
    if (genome == NULL)
    {
      throw hal_exception(string("Genome ") + genomeName + " not found.");
    }

    ostream* bedStream = &cout;
    bool newBed = false;
    if (bedPath != "stdout")
    {
      bedStream = new ofstream(bedPath.c_str());
      newBed = true;
    }
    if (!bedStream)
    {
      throw hal_exception(string("Error opening ") + bedPath + " for writing");
    }

    MaskExtractor mask;
    mask.extract(alignment, genome, bedStream, extend, extendPct);

    if (newBed)
    {
      delete bedStream;
    }
  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }
  
  return 0;
}
Esempio n. 3
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->setDescription("Rertrieve basic statics from a hal database");
  optionsParser->addArgument("halFile", "path to hal file to analyze");
  optionsParser->addOptionFlag("genomes", "print only a list of genomes "
                               "in alignment", false);
  optionsParser->addOption("sequences", "print list of sequences in given "
                           "genome", "\"\"");
  optionsParser->addOption("sequenceStats", "print stats for each sequence in "
                           "given genome", "\"\"");
  optionsParser->addOptionFlag("tree", "print only the NEWICK tree", false);
  optionsParser->addOptionFlag("branches", "print list of branches. "
                               "Each branch is specified by the child genome", 
                               false);
  optionsParser->addOption("span", "print branches on path (or spanning tree) "
                           "between comma "
                           "separated list of genomes", "\"\"");
  optionsParser->addOption("spanRoot", "print genomes on path" 
                           "(or spanning tree) between comma "
                           "separated list of genomes.  Different from --path"
                           "only in that the spanning tree root is also "
                           "given", "\"\"");

  string path;
  bool listGenomes;
  string sequencesFromGenome;
  string sequenceStatsFromGenome;
  string spanGenomes;
  string spanRootGenomes;
  bool tree;
  bool branches;
  try
  {
    optionsParser->parseOptions(argc, argv);
    path = optionsParser->getArgument<string>("halFile");
    listGenomes = optionsParser->getFlag("genomes");
    sequencesFromGenome = optionsParser->getOption<string>("sequences");
    sequenceStatsFromGenome = optionsParser->getOption<string>("sequenceStats");
    tree = optionsParser->getFlag("tree");
    spanGenomes = optionsParser->getOption<string>("span");
    spanRootGenomes = optionsParser->getOption<string>("spanRoot");
    branches = optionsParser->getFlag("branches");

    size_t optCount = listGenomes == true ? 1 : 0;
    if (sequencesFromGenome != "\"\"") ++optCount;
    if (tree == true) ++optCount;
    if (sequenceStatsFromGenome != "\"\"") ++optCount;
    if (spanGenomes != "\"\"") ++optCount;
    if (spanRootGenomes != "\"\"") ++optCount;
    if (branches) ++optCount;
    if (optCount > 1)
    {
      throw hal_exception("--genomes, --sequences, --tree, --span, "
                          "--spanRoot, --branches "
                          "and --sequenceStats " 
                          "options are mutually exclusive");
    }        
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }
  try
  {
    AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser);

    if (listGenomes == true && alignment->getNumGenomes() > 0)
    {
      printGenomes(cout, alignment);
    }
    else if (sequencesFromGenome != "\"\"")
    {
      printSequences(cout, alignment, sequencesFromGenome);
    }
    else if (tree == true)
    {
      cout << alignment->getNewickTree() << endl;
    }
    else if (sequenceStatsFromGenome != "\"\"")
    {
      printSequenceStats(cout, alignment, sequenceStatsFromGenome);
    }
    else if (spanGenomes !=  "\"\"")
    {
      printBranchPath(cout, alignment, chopString(spanGenomes, ","), false);
    }
    else if (spanRootGenomes !=  "\"\"")
    {
      printBranchPath(cout, alignment, chopString(spanRootGenomes, ","), true);
    }
    else if (branches == true)
    {
      printBranches(cout, alignment);
    }
    else
    {
      HalStats halStats(alignment);
      cout << endl << "hal v" << alignment->getVersion() << "\n" << halStats;
    }
  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }
  
  return 0;
}