Пример #1
0
static CLParserPtr initParser()
{
  CLParserPtr optionsParser = hdf5CLParserInstance(true);
  optionsParser->addArgument("inFile", "existing tree");
  optionsParser->addArgument("deleteNode", "(leaf) genome to delete");
  optionsParser->addOptionFlag("noMarkAncestors", "don't mark ancestors for"
                               " update", false);
  return optionsParser;
}
Пример #2
0
int main(int argc, char *argv[])
{
  CLParserPtr optParser = initParser();
  string inPath, deleteNode;
  bool noMarkAncestors;
  try {
    optParser->parseOptions(argc, argv);
    inPath = optParser->getArgument<string>("inFile");
    deleteNode = optParser->getArgument<string>("deleteNode");
    noMarkAncestors = optParser->getFlag("noMarkAncestors");
  } catch (exception &e) {
    optParser->printUsage(cerr);
    return 1;
  }
  AlignmentPtr alignment = openHalAlignment(inPath, optParser);
  if (!noMarkAncestors) {
    markAncestorsForUpdate(alignment, deleteNode);
  }
  alignment->removeGenome(deleteNode);
  return 0;
}
Пример #3
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->addArgument("halFile", "path to hal file to validate");
  optionsParser->setDescription("Check if hal database is valid");
  string path;
  try
  {
    optionsParser->parseOptions(argc, argv);
    path = optionsParser->getArgument<string>("halFile");
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }
  try
  {
    AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser);
    validateAlignment(alignment);
  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }
  cout << "\nFile valid" << endl;
  
  return 0;
}
Пример #4
0
static CLParserPtr initParser()
{
  CLParserPtr optionsParser = hdf5CLParserInstance(true);
  optionsParser->addArgument("inFile", "existing tree");
  optionsParser->addOption("bottomAlignmentFile", "hal file containing an "
                           "alignment of the genome and its children. "
                           "Required for non-leaf genomes.", "\"\"");
  optionsParser->addOption("topAlignmentFile", "hal file containing an "
                           "alignment of the genome, its parent, and "
                           "its siblings. Required if the genome to be "
                           "replaced is not the root.", "\"\"");
  optionsParser->addArgument("genomeName", "name of genome to be replaced");
  optionsParser->addOptionFlag("noMarkAncestors", "don't mark ancestors for"
                               " update", false);
  return optionsParser;
}
Пример #5
0
static CLParserPtr initParser()
{
  CLParserPtr optionsParser = hdf5CLParserInstance(true);
  optionsParser->addArgument("inFile", "existing tree");
  optionsParser->addArgument("botAlignmentFile", "tree containing insert, its "
                             "proper bottom segments, and the new leaf genome");
  optionsParser->addArgument("topAlignmentFile", "tree containing insert, its "
                             "parent, and its proper top segments");
  optionsParser->addArgument("parentName", "insert's future parent");
  optionsParser->addArgument("insertName", "insert name");
  optionsParser->addArgument("childName", "insert's future child");
  optionsParser->addArgument("leafName", "name of new leaf genome");
  optionsParser->addArgument("upperBranchLength", "length of branch from parent"
                             " to insert");
  optionsParser->addArgument("leafBranchLength", "leaf branch length");
  optionsParser->addOptionFlag("noMarkAncestors", "don't mark ancestors for"
                               " update", false);
  return optionsParser;
}
Пример #6
0
int main(int argc, char *argv[])
{
  CLParserPtr optParser = initParser();
  string inPath, botAlignmentPath, topAlignmentPath, parentName, insertName,
    childName, leafName;
  double upperBranchLength, leafBranchLength;
  bool noMarkAncestors;
  try {
    optParser->parseOptions(argc, argv);
    inPath = optParser->getArgument<string>("inFile");
    botAlignmentPath = optParser->getArgument<string>("botAlignmentFile");
    topAlignmentPath = optParser->getArgument<string>("topAlignmentFile");
    parentName = optParser->getArgument<string>("parentName");
    insertName = optParser->getArgument<string>("insertName");
    childName = optParser->getArgument<string>("childName");
    leafName = optParser->getArgument<string>("leafName");
    upperBranchLength = optParser->getArgument<double>("upperBranchLength");
    leafBranchLength = optParser->getArgument<double>("leafBranchLength");
    noMarkAncestors = optParser->getFlag("noMarkAncestors");
  } catch (exception &e) {
    optParser->printUsage(cerr);
    return 1;
  }
  AlignmentPtr mainAlignment = openHalAlignment(inPath, optParser);
  AlignmentConstPtr botAlignment = openHalAlignment(botAlignmentPath,
                                                    optParser);
  AlignmentConstPtr topAlignment = openHalAlignment(topAlignmentPath,
                                                    optParser);
  mainAlignment->insertGenome(insertName, parentName, childName,
                              upperBranchLength);
  mainAlignment->addLeafGenome(leafName, insertName, leafBranchLength);
  // Insert the new intermediate node.
  Genome *insertGenome = mainAlignment->openGenome(insertName);
  const Genome *topInsertGenome = topAlignment->openGenome(insertName);
  const Genome *botInsertGenome = botAlignment->openGenome(insertName);
  topInsertGenome->copyDimensions(insertGenome);
  topInsertGenome->copyTopDimensions(insertGenome);
  botInsertGenome->copyBottomDimensions(insertGenome);
  topInsertGenome->copySequence(insertGenome);
  topInsertGenome->copyTopSegments(insertGenome);
  topInsertGenome->copyMetadata(insertGenome);
  botInsertGenome->copyBottomSegments(insertGenome);
  insertGenome->fixParseInfo();

  // Copy the bottom segments for the parent genome from the top alignment.
  Genome *parentGenome = mainAlignment->openGenome(parentName);
  const Genome *botParentGenome = topAlignment->openGenome(parentName);
  botParentGenome->copyBottomDimensions(parentGenome);
  botParentGenome->copyBottomSegments(parentGenome);
  parentGenome->fixParseInfo();

  // Fix the parent's other children as well.
  vector<string> allChildren = mainAlignment->getChildNames(parentName);
  for (size_t i = 0; i < allChildren.size(); i++)
  {
    if (allChildren[i] != insertName)
    {
      Genome *outGenome = mainAlignment->openGenome(allChildren[i]);
      const Genome *topSegmentsGenome = topAlignment->openGenome(allChildren[i]);
      topSegmentsGenome->copyTopDimensions(outGenome);
      topSegmentsGenome->copyTopSegments(outGenome);
      outGenome->fixParseInfo();
            
    }
  }

  // Copy the top segments for the child genome from the bottom alignment.
  Genome *childGenome = mainAlignment->openGenome(childName);
  const Genome *topChildGenome = botAlignment->openGenome(childName);
  topChildGenome->copyTopDimensions(childGenome);
  topChildGenome->copyTopSegments(childGenome);
  childGenome->fixParseInfo();

  // Copy the entire genome for the leaf from the bottom alignment.
  Genome *outLeafGenome = mainAlignment->openGenome(leafName);
  const Genome *inLeafGenome = botAlignment->openGenome(leafName);
  inLeafGenome->copy(outLeafGenome);
  if (!noMarkAncestors) {
    markAncestorsForUpdate(mainAlignment, insertName);
  }
  mainAlignment->close();
  botAlignment->close();
  topAlignment->close();
}
Пример #7
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->setDescription("Rertrieve chain (pairwise alignment) "
                                "information from a hal database.\n"
                                "WARNING: THIS TOOL WAS NEVER FINISHED OR"
                                " TESTED. USE AT OWN RISK. PLEASE "
                                "CONSIDER halLiftover --outPSL INSTEAD.");
  optionsParser->addArgument("halFile", "path to hal file to analyze");
  optionsParser->addArgument("genome", "(query) genome to process");
  optionsParser->addOption("sequence", "sequence name in query genome ("
                           "all sequences if not specified)", "\"\"");
  optionsParser->addOption("start", "start position in query genome", 0);
  optionsParser->addOption("length", "maximum length of chain to output.", 0);
  optionsParser->addOption("chainFile", "path for output file.  stdout if not"
                           " specified", "\"\"");
  optionsParser->addOption("maxGap", 
                           "maximum indel length to be considered a gap within"
                           " a chain.", 
                           20);
  

  string halPath;
  string chainPath;
  string genomeName;
  string sequenceName;
  hal_size_t start;
  hal_size_t length;
  hal_size_t maxGap;
  try
  {
    optionsParser->parseOptions(argc, argv);
    halPath = optionsParser->getArgument<string>("halFile");
    genomeName = optionsParser->getArgument<string>("genome");
    sequenceName = optionsParser->getOption<string>("sequence");
    start = optionsParser->getOption<hal_size_t>("start");
    length = optionsParser->getOption<hal_size_t>("length");
    chainPath = optionsParser->getOption<string>("chainFile");
    maxGap = optionsParser->getOption<hal_size_t>("maxGap");
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }
  try
  {
    cerr << "WARNING: THIS TOOL WAS NEVER FINISHED OR TESTED. USE AT OWN RISK."
         << " PLEASE CONSIDER halLiftover --outPSL INSTEAD." <<endl;  

    AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath,
                                                           optionsParser);
    
    
    const Genome* genome = alignment->openGenome(genomeName);
    if (genome == NULL)
    {
      throw hal_exception(string("Genome not found: ") + genomeName);
    }
    hal_index_t endPosition = 
       length > 0 ? start + length : genome->getSequenceLength();

    const Sequence* sequence = NULL;
    if (sequenceName != "\"\"")
    {
      sequence = genome->getSequence(sequenceName);
      if (sequence == NULL)
      {
        throw hal_exception(string("Sequence not found: ") + sequenceName);
      }
      start += sequence->getStartPosition();
      endPosition =  
         length > 0 ? start + length : sequence->getSequenceLength();
    }

    ofstream ofile;
    ostream& outStream = chainPath == "\"\"" ? cout : ofile;
    if (chainPath != "\"\"")
    {
      ofile.open(chainPath.c_str());
      if (!ofile)
      {
        throw hal_exception(string("Error opening output file ") + 
                            chainPath);
      }
    }

    TopSegmentIteratorConstPtr top = genome->getTopSegmentIterator();
    top->toSite(start, false);
    // do slicing here;
    
    GappedTopSegmentIteratorConstPtr gtop = 
       genome->getGappedTopSegmentIterator(top->getArrayIndex(), maxGap);

    // need to review!
    Chain chain;
    chain._id = 0;
    while (gtop->getRightArrayIndex() < 
           (hal_index_t)genome->getNumTopSegments() &&
           gtop->getLeft()->getStartPosition() < endPosition)
    {
      if (gtop->hasParent() == true)
      {
        hal_offset_t leftOffset = 0;
        if ((hal_index_t)start > gtop->getStartPosition() 
            && (hal_index_t)start < gtop->getEndPosition())
        {
          leftOffset = start - gtop->getStartPosition() ;
        }
        hal_offset_t rightOffset = 0;
        if (endPosition - 1 > gtop->getStartPosition() 
            && endPosition - 1 < gtop->getEndPosition())
        {
          rightOffset = gtop->getEndPosition() + 1 - endPosition;
        }
        // need to do offsets for edge cases
        gtIteratorToChain(gtop, chain, leftOffset, rightOffset);
        outStream << chain;
        ++chain._id;
      }
      gtop->toRight();
    }
  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }
  
  return 0;
}
Пример #8
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = initParser();

  string halPath;
  string srcGenomeName;
  string srcBedPath;
  string tgtGenomeName;
  string tgtBedPath;
  bool noDupes;
  bool append;
  int inBedVersion;
  int outBedVersion;
  bool keepExtra;
  bool outPSL;
  bool outPSLWithName;
  bool tab;
  try
  {
    optionsParser->parseOptions(argc, argv);
    halPath = optionsParser->getArgument<string>("halFile");
    srcGenomeName = optionsParser->getArgument<string>("srcGenome");
    srcBedPath =  optionsParser->getArgument<string>("srcBed");
    tgtGenomeName = optionsParser->getArgument<string>("tgtGenome");
    tgtBedPath =  optionsParser->getArgument<string>("tgtBed");
    noDupes = optionsParser->getFlag("noDupes");
    append = optionsParser->getFlag("append");
    inBedVersion = optionsParser->getOption<int>("inBedVersion");
    outBedVersion = optionsParser->getOption<int>("outBedVersion");
    keepExtra = optionsParser->getFlag("keepExtra");
    outPSL = optionsParser->getFlag("outPSL");
    outPSLWithName = optionsParser->getFlag("outPSLWithName");
    tab = optionsParser->getFlag("tab");
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }

  try
  {
    if (outPSLWithName == true)
    {
      outPSL = true;
    }
    if (outPSL == true)
    {
      outBedVersion = 12;
    }

    AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, 
                                                           optionsParser);
    if (alignment->getNumGenomes() == 0)
    {
      throw hal_exception("hal alignmenet is empty");
    }

    const Genome* srcGenome = alignment->openGenome(srcGenomeName);
    if (srcGenome == NULL)
    {
      throw hal_exception(string("srcGenome, ") + srcGenomeName + 
                          ", not found in alignment");
    }
    const Genome* tgtGenome = alignment->openGenome(tgtGenomeName);
    if (tgtGenome == NULL)
    {
      throw hal_exception(string("tgtGenome, ") + tgtGenomeName + 
                          ", not found in alignment");
    }
    
    ifstream srcBed;
    istream* srcBedPtr;
    if (srcBedPath == "stdin")
    {
      srcBedPtr = &cin;
    }
    else
    {
      srcBed.open(srcBedPath.c_str());
      srcBedPtr = &srcBed;
      if (!srcBed)
      {
        throw hal_exception("Error opening srcBed, " + srcBedPath);
      }
    }
    
    ios_base::openmode mode = append ? ios::out | ios::app : ios_base::out;
    ofstream tgtBed;
    ostream* tgtBedPtr;
    if (tgtBedPath == "stdout")
    {
      tgtBedPtr = &cout;
    }
    else
    {      
      tgtBed.open(tgtBedPath.c_str(), mode);
      tgtBedPtr = &tgtBed;
      if (!tgtBed)
      {
        throw hal_exception("Error opening tgtBed, " + tgtBedPath);
      }
    }

    locale* inLocale = NULL;
    if (tab == true)
    {
      inLocale = new locale(cin.getloc(), new TabSepFacet(cin.getloc()));
      assert(std::isspace('\t', *inLocale) == true);
      assert(std::isspace(' ', *inLocale) == false);
    }
    
    BlockLiftover liftover;
    liftover.convert(alignment, srcGenome, srcBedPtr, tgtGenome, tgtBedPtr,
                     inBedVersion, outBedVersion, keepExtra, !noDupes,
                     outPSL, outPSLWithName, inLocale);
    
    delete inLocale;

  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }

  return 0;
}
Пример #9
0
static CLParserPtr initParser()
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->addArgument("halFile", "input hal file");
  optionsParser->addArgument("srcGenome", "source genome name");
  optionsParser->addArgument("srcBed", "path of input bed file.  set as stdin "
                             "to stream from standard input");
  optionsParser->addArgument("tgtGenome", "target genome name");
  optionsParser->addArgument("tgtBed", "path of output bed file.  set as stdout"
                             " to stream to standard output.");
  optionsParser->addOptionFlag("noDupes", "do not map between duplications in"
                               " graph.", false);
  optionsParser->addOptionFlag("append", "append results to tgtBed", false);
  optionsParser->addOption("inBedVersion", "bed version of input file "
                           "as integer between 3 and 9 or 12 reflecting "
                           "the number of columns (see bed "
                           "format specification for more details). Will "
                           "be autodetected by default.", 0);
  optionsParser->addOption("outBedVersion", "bed version of output file "
                           "as integer between 3 and 9 or 12 reflecting "
                           "the number of columns (see bed "
                           "format specification for more details). Will "
                           "be same as input by default.", 0);
  optionsParser->addOptionFlag("outPSL", "write output in PSL instead of "
                               "bed format. overrides --outBedVersion when "
                               "specified.", false);
  optionsParser->addOptionFlag("outPSLWithName", "write output as input BED name followed by PSL line instead of "
                               "bed format. overrides --outBedVersion when "
                               "specified.", false);
  optionsParser->addOptionFlag("keepExtra", "keep extra columns. these are "
                               "columns in the input beyond the specified or "
                               "detected bed version, and which are cut by "
                               "default.", false);
  optionsParser->addOptionFlag("tab", "input is tab-separated. this allows"
                               " column entries to contain spaces.  if this"
                               " flag is not set, both spaces and tabs are"
                               " used to separate input columns.", false);
  optionsParser->setDescription("Map BED genome interval coordinates between "
                                "two genomes.");
  return optionsParser;
}
Пример #10
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->setDescription("Retrieve basic statistics from a hal database");
  optionsParser->addArgument("halFile", "path to hal file to analyze");
  optionsParser->addOptionFlag("genomes", "print only a list of genomes "
                               "in alignment", false);
  optionsParser->addOption("sequences", "print list of sequences in given "
                           "genome", "\"\"");
  optionsParser->addOption("sequenceStats", "print stats for each sequence in "
                           "given genome", "\"\"");
  optionsParser->addOption("bedSequences", "print sequences of given genome "
                           "in bed format",
                           "\"\"");
  optionsParser->addOptionFlag("tree", "print only the NEWICK tree", false);
  optionsParser->addOptionFlag("branches", "print list of branches. "
                               "Each branch is specified by the child genome", 
                               false);
  optionsParser->addOption("span", "print branches on path (or spanning tree) "
                           "between comma "
                           "separated list of genomes", "\"\"");
  optionsParser->addOption("spanRoot", "print genomes on path" 
                           "(or spanning tree) between comma "
                           "separated list of genomes.  Different from --span"
                           "only in that the spanning tree root is also "
                           "given", "\"\"");
  optionsParser->addOption("children", "print names of children of given "
                           "genome", "\"\"");
  optionsParser->addOptionFlag("root", "print root genome name", false);
  optionsParser->addOption("parent", "print name of parent of given genome",
                           "\"\"");
  optionsParser->addOption("branchLength", "print branch length between "
                           "given genome and its parent in the tree",
                           "\"\"");
  optionsParser->addOption("numSegments", "print numTopSegments "
                           "numBottomSegments for given genome.",
                           "\"\"");
  optionsParser->addOption("baseComp", "print base composition for given "
                           "genome by sampling every step bases. Parameter "
                           "value is of the form genome,step.  Ex: "
                           "--baseComp human,1000.  The ouptut is of the form "
                           "fraction_of_As fraction_of_Gs fraction_of_Cs "
                           "fraction_of_Ts.", 
                           "\"\"");
  optionsParser->addOption("genomeMetaData", "print metadata for given genome, "
                           "one entry per line, tab-seperated.", "\"\"");
  optionsParser->addOption("chromSizes", "print the name and length of each"
                           " sequence in a given genome.  This is a subset"
                           " of the"
                           " information returned by --sequenceStats but is"
                           " useful because it is in the format used by"
                           " wigToBigWig", 
                           "\"\"");
  optionsParser->addOption("percentID",
                           "print % ID of a genome with all other genomes."
                           "Only non-duplicated and unambiguous sites are"
                           "considered",
                           "\"\"");
  optionsParser->addOption("coverage",
                           "print histogram of coverage of a genome with"
                           " all genomes", "\"\"");
  optionsParser->addOption("topSegments",
                           "print coordinates of all top segments of given"
                           " genome in BED format.", "\"\"");
  optionsParser->addOption("bottomSegments",
                           "print coordinates of all bottom segments of given"
                           " genome in BED format.", "\"\"");
  optionsParser->addOptionFlag("allCoverage",
                               "print histogram of coverage from all genomes to"
                               " all genomes", false);


  string path;
  bool listGenomes;
  string sequencesFromGenome;
  string sequenceStatsFromGenome;
  string bedSequencesFromGenome;
  string spanGenomes;
  string spanRootGenomes;
  bool tree;
  bool branches;
  string childrenFromGenome;
  string parentFromGenome;
  bool printRoot;
  string nameForBL;
  string numSegmentsGenome;
  string baseCompPair;
  string genomeMetaData;
  string chromSizesFromGenome;
  string percentID;
  string coverage;
  string topSegments;
  string bottomSegments;
  bool allCoverage;
  try
  {
    optionsParser->parseOptions(argc, argv);
    path = optionsParser->getArgument<string>("halFile");
    listGenomes = optionsParser->getFlag("genomes");
    sequencesFromGenome = optionsParser->getOption<string>("sequences");
    sequenceStatsFromGenome = optionsParser->getOption<string>("sequenceStats");
    bedSequencesFromGenome = optionsParser->getOption<string>("bedSequences");
    tree = optionsParser->getFlag("tree");
    spanGenomes = optionsParser->getOption<string>("span");
    spanRootGenomes = optionsParser->getOption<string>("spanRoot");
    branches = optionsParser->getFlag("branches");
    childrenFromGenome = optionsParser->getOption<string>("children");
    parentFromGenome = optionsParser->getOption<string>("parent");
    printRoot = optionsParser->getFlag("root");
    nameForBL = optionsParser->getOption<string>("branchLength");
    numSegmentsGenome = optionsParser->getOption<string>("numSegments");
    baseCompPair = optionsParser->getOption<string>("baseComp");
    genomeMetaData = optionsParser->getOption<string>("genomeMetaData");
    chromSizesFromGenome = optionsParser->getOption<string>("chromSizes");
    percentID = optionsParser->getOption<string>("percentID");
    coverage = optionsParser->getOption<string>("coverage");
    topSegments = optionsParser->getOption<string>("topSegments");
    bottomSegments = optionsParser->getOption<string>("bottomSegments");
    allCoverage = optionsParser->getFlag("allCoverage");

    size_t optCount = listGenomes == true ? 1 : 0;
    if (sequencesFromGenome != "\"\"") ++optCount;
    if (tree == true) ++optCount;
    if (sequenceStatsFromGenome != "\"\"") ++optCount;
    if (bedSequencesFromGenome != "\"\"") ++optCount;
    if (spanGenomes != "\"\"") ++optCount;
    if (spanRootGenomes != "\"\"") ++optCount;
    if (branches) ++ optCount;
    if (childrenFromGenome != "\"\"") ++optCount;
    if (parentFromGenome != "\"\"") ++optCount;
    if (printRoot) ++optCount;
    if (nameForBL != "\"\"") ++optCount;
    if (numSegmentsGenome != "\"\"") ++optCount;
    if (baseCompPair != "\"\"") ++optCount;
    if (genomeMetaData != "\"\"") ++optCount;
    if (chromSizesFromGenome != "\"\"") ++optCount;
    if (percentID != "\"\"") ++optCount;
    if (coverage != "\"\"") ++optCount;
    if (topSegments != "\"\"") ++optCount;
    if (bottomSegments != "\"\"") ++optCount;
    if (allCoverage) ++optCount;
    if (optCount > 1)
    {
      throw hal_exception("--genomes, --sequences, --tree, --span, --spanRoot, "
                          "--branches, --sequenceStats, --children, --parent, "
                          "--bedSequences, --root, --numSegments, --baseComp, "
                          "--genomeMetaData, --chromSizes, --percentID, "
                          "--coverage,  --topSegments, --bottomSegments, "
                          "--allCoverage "
                          "and --branchLength options are exclusive");
    }
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }
  try
  {
    AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser);

    if (listGenomes == true && alignment->getNumGenomes() > 0)
    {
      printGenomes(cout, alignment);
    }
    else if (sequencesFromGenome != "\"\"")
    {
      printSequences(cout, alignment, sequencesFromGenome);
    }
    else if (tree == true)
    {
      cout << alignment->getNewickTree() << endl;
    }
    else if (sequenceStatsFromGenome != "\"\"")
    {
      printSequenceStats(cout, alignment, sequenceStatsFromGenome);
    }
    else if (bedSequencesFromGenome != "\"\"")
    {
      printBedSequenceStats(cout, alignment, bedSequencesFromGenome);
    }
    else if (spanGenomes !=  "\"\"")
    {
      printBranchPath(cout, alignment, chopString(spanGenomes, ","), false);
    }
    else if (spanRootGenomes !=  "\"\"")
    {
      printBranchPath(cout, alignment, chopString(spanRootGenomes, ","), true);
    }
    else if (branches == true)
    {
      printBranches(cout, alignment);
    }
    else if (childrenFromGenome != "\"\"")
    {
      printChildren(cout, alignment, childrenFromGenome);
    }
    else if (parentFromGenome != "\"\"")
    {
      printParent(cout, alignment, parentFromGenome);
    }
    else if (printRoot == true)
    {
      printRootName(cout, alignment);
    }
    else if (nameForBL != "\"\"")
    {
      printBranchLength(cout, alignment, nameForBL);
    }
    else if (numSegmentsGenome != "\"\"")
    {
      printNumSegments(cout, alignment, numSegmentsGenome);
    }
    else if (baseCompPair != "\"\"")
    {
      printBaseComp(cout, alignment, baseCompPair);
    }
    else if (genomeMetaData != "\"\"")
    {
      printGenomeMetaData(cout, alignment, genomeMetaData);
    }
    else if (chromSizesFromGenome != "\"\"")
    {
      printChromSizes(cout, alignment, chromSizesFromGenome);
    }
    else if (percentID != "\"\"")
    {
      printPercentID(cout, alignment, percentID);
    }
    else if (coverage != "\"\"") {
      printCoverage(cout, alignment, coverage);
    }
    else if (topSegments != "\"\"") {
      printSegments(cout, alignment, topSegments, true);
    }
    else if (bottomSegments != "\"\"") {
      printSegments(cout, alignment, bottomSegments, false);
    } else if (allCoverage) {
      printAllCoverage(cout, alignment);
    }
    else
    {
      HalStats halStats(alignment);
      cout << endl << "hal v" << alignment->getVersion() << "\n" << halStats;
    }
  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }
  
  return 0;
}
Пример #11
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->setDescription("Write masked intervals of genome into bed "
                                "file");
  optionsParser->addArgument("halFile", "path to hal file to analyze");
  optionsParser->addArgument("genome", "name of genome to process");
  optionsParser->addOption("maskFile", "path to bed file to write to", 
                           "stdout");
  optionsParser->addOption("extend", "extend masked regions by given num. "
                           "of bases.", 0);
  optionsParser->addOption("extendPct", "extend masked regions by percentage"
                           " of their lengths", 0);

  string halPath;
  string genomeName;
  string bedPath;
  hal_size_t extend;
  double extendPct;
  try
  {
    optionsParser->parseOptions(argc, argv);
    halPath = optionsParser->getArgument<string>("halFile");
    genomeName = optionsParser->getArgument<string>("genome");
    bedPath = optionsParser->getOption<string>("maskFile");
    extend = optionsParser->getOption<hal_size_t>("extend");
    extendPct = optionsParser->getOption<double>("extendPct");

    if (extend != 0 && extendPct != 0.)
    {
      throw hal_exception("--extend and --extendPct options are exclusive.");
    }
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }
  try
  {
    AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, 
                                                           optionsParser);

    const Genome* genome = alignment->openGenome(genomeName);
    if (genome == NULL)
    {
      throw hal_exception(string("Genome ") + genomeName + " not found.");
    }

    ostream* bedStream = &cout;
    bool newBed = false;
    if (bedPath != "stdout")
    {
      bedStream = new ofstream(bedPath.c_str());
      newBed = true;
    }
    if (!bedStream)
    {
      throw hal_exception(string("Error opening ") + bedPath + " for writing");
    }

    MaskExtractor mask;
    mask.extract(alignment, genome, bedStream, extend, extendPct);

    if (newBed)
    {
      delete bedStream;
    }
  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }
  
  return 0;
}
Пример #12
0
int main(int argc, char *argv[])
{
  CLParserPtr optParser = initParser();
  string inPath, bottomAlignmentFile, topAlignmentFile, genomeName;
  bool noMarkAncestors;
  try {
    optParser->parseOptions(argc, argv);
    inPath = optParser->getArgument<string>("inFile");
    bottomAlignmentFile = optParser->getOption<string>("bottomAlignmentFile");
    topAlignmentFile = optParser->getOption<string>("topAlignmentFile");
    genomeName = optParser->getArgument<string>("genomeName");
    noMarkAncestors = optParser->getFlag("noMarkAncestors");
  } catch (exception &e) {
    optParser->printUsage(cerr);
    return 1;
  }
  AlignmentPtr mainAlignment = openHalAlignment(inPath, optParser);
  AlignmentConstPtr bottomAlignment;
  AlignmentConstPtr topAlignment;
  bool useTopAlignment = mainAlignment->getRootName() != genomeName;
  bool useBottomAlignment = mainAlignment->getChildNames(genomeName).size() != 0;
  Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName);
  if (useTopAlignment) {
    // Not a root genome. Can update using a top alignment.
    if (topAlignmentFile == "\"\"") {
      throw hal_exception("Cannot replace non-root genome without a top "
                          "alignment file.");
    }
    topAlignment = openHalAlignment(topAlignmentFile,
                                    optParser);
    const Genome *topReplacedGenome = topAlignment->openGenome(genomeName);
    topReplacedGenome->copyDimensions(mainReplacedGenome);
    topReplacedGenome->copySequence(mainReplacedGenome);
    
  }
  if (useBottomAlignment) {
    // Not a leaf genome. Can update using a bottom alignment.
    if (bottomAlignmentFile == "\"\"") {
      throw hal_exception("Cannot replace non-leaf genome without a bottom "
                          "alignment file.");
    }
    bottomAlignment = openHalAlignment(bottomAlignmentFile, optParser);
    const Genome *botReplacedGenome = bottomAlignment->openGenome(genomeName);
    botReplacedGenome->copyDimensions(mainReplacedGenome);
    botReplacedGenome->copySequence(mainReplacedGenome);
  }
  if (!useTopAlignment && !useBottomAlignment) {
    throw hal_exception("Root genome is also a leaf genome.");
  }
  if (useBottomAlignment) {
    copyFromBottomAlignment(bottomAlignment, mainAlignment, genomeName);
  }
  if (useTopAlignment) {
    copyFromTopAlignment(topAlignment, mainAlignment, genomeName);
  }

  // Clear update flag if present, since the genome has just been updated.
  MetaData *metaData = mainReplacedGenome->getMetaData();
  if (metaData->has("needsUpdate")) {
    metaData->set("needsUpdate", "false");
  }

  if (!noMarkAncestors) {
    markAncestorsForUpdate(mainAlignment, genomeName);
  }
  if (useTopAlignment) {
    topAlignment->close();
  }
  if (useBottomAlignment) {
    bottomAlignment->close();
  }
  mainAlignment->close();
}
Пример #13
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->setDescription("Rertrieve basic statics from a hal database");
  optionsParser->addArgument("halFile", "path to hal file to analyze");
  optionsParser->addOptionFlag("genomes", "print only a list of genomes "
                               "in alignment", false);
  optionsParser->addOption("sequences", "print list of sequences in given "
                           "genome", "\"\"");
  optionsParser->addOption("sequenceStats", "print stats for each sequence in "
                           "given genome", "\"\"");
  optionsParser->addOptionFlag("tree", "print only the NEWICK tree", false);
  optionsParser->addOptionFlag("branches", "print list of branches. "
                               "Each branch is specified by the child genome", 
                               false);
  optionsParser->addOption("span", "print branches on path (or spanning tree) "
                           "between comma "
                           "separated list of genomes", "\"\"");
  optionsParser->addOption("spanRoot", "print genomes on path" 
                           "(or spanning tree) between comma "
                           "separated list of genomes.  Different from --path"
                           "only in that the spanning tree root is also "
                           "given", "\"\"");

  string path;
  bool listGenomes;
  string sequencesFromGenome;
  string sequenceStatsFromGenome;
  string spanGenomes;
  string spanRootGenomes;
  bool tree;
  bool branches;
  try
  {
    optionsParser->parseOptions(argc, argv);
    path = optionsParser->getArgument<string>("halFile");
    listGenomes = optionsParser->getFlag("genomes");
    sequencesFromGenome = optionsParser->getOption<string>("sequences");
    sequenceStatsFromGenome = optionsParser->getOption<string>("sequenceStats");
    tree = optionsParser->getFlag("tree");
    spanGenomes = optionsParser->getOption<string>("span");
    spanRootGenomes = optionsParser->getOption<string>("spanRoot");
    branches = optionsParser->getFlag("branches");

    size_t optCount = listGenomes == true ? 1 : 0;
    if (sequencesFromGenome != "\"\"") ++optCount;
    if (tree == true) ++optCount;
    if (sequenceStatsFromGenome != "\"\"") ++optCount;
    if (spanGenomes != "\"\"") ++optCount;
    if (spanRootGenomes != "\"\"") ++optCount;
    if (branches) ++optCount;
    if (optCount > 1)
    {
      throw hal_exception("--genomes, --sequences, --tree, --span, "
                          "--spanRoot, --branches "
                          "and --sequenceStats " 
                          "options are mutually exclusive");
    }        
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }
  try
  {
    AlignmentConstPtr alignment = openHalAlignmentReadOnly(path, optionsParser);

    if (listGenomes == true && alignment->getNumGenomes() > 0)
    {
      printGenomes(cout, alignment);
    }
    else if (sequencesFromGenome != "\"\"")
    {
      printSequences(cout, alignment, sequencesFromGenome);
    }
    else if (tree == true)
    {
      cout << alignment->getNewickTree() << endl;
    }
    else if (sequenceStatsFromGenome != "\"\"")
    {
      printSequenceStats(cout, alignment, sequenceStatsFromGenome);
    }
    else if (spanGenomes !=  "\"\"")
    {
      printBranchPath(cout, alignment, chopString(spanGenomes, ","), false);
    }
    else if (spanRootGenomes !=  "\"\"")
    {
      printBranchPath(cout, alignment, chopString(spanRootGenomes, ","), true);
    }
    else if (branches == true)
    {
      printBranches(cout, alignment);
    }
    else
    {
      HalStats halStats(alignment);
      cout << endl << "hal v" << alignment->getVersion() << "\n" << halStats;
    }
  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }
  
  return 0;
}