Beispiel #1
0
void MappedSegmentMapUpTest::checkCallBack(AlignmentConstPtr alignment)
{
  validateAlignment(alignment);
  const Genome* child1 = alignment->openGenome("child1");
  const Genome* child2 = alignment->openGenome("child2");
  TopSegmentIteratorConstPtr top = child2->getTopSegmentIterator();
  testTopSegment(alignment, top, "parent");
  top->slice(1,2);
  testTopSegment(alignment, top, "parent");
  top->toReverse();
  testTopSegment(alignment, top, "parent");
  top = child1->getTopSegmentIterator();
  testTopSegment(alignment, top, "parent");
  top->slice(1,2);
  testTopSegment(alignment, top, "parent");
  top->toReverse();
  testTopSegment(alignment, top, "parent");
  
  const Genome* g1 = alignment->openGenome("g1");
  for (hal_size_t i = 0; i < g1->getNumTopSegments(); ++i)
  {
    top = g1->getTopSegmentIterator(i);
    testTopSegment(alignment, top, "parent");
    top->slice(1,0);
    testTopSegment(alignment, top, "parent");
    top->toReverse();
    testTopSegment(alignment, top, "parent");
    top->slice(0,1);
    testTopSegment(alignment, top, "parent");
    top->toReverse();
    testTopSegment(alignment, top, "parent");
  }
}
void GenomeCreateTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* dudGenome = alignment->openGenome("Zebra");
  CuAssertTrue(_testCase, dudGenome == NULL);
  const Genome* ancGenome = alignment->openGenome("AncGenome");
  const MetaData* ancMeta = ancGenome->getMetaData();
  CuAssertTrue(_testCase, ancMeta->get("Young") == "Jeezy");
  const Genome* leaf1Genome = alignment->openGenome("Leaf1");
  const Genome* leaf2Genome = alignment->openGenome("Leaf2");
  const Genome* leaf3Genome = alignment->openGenome("Leaf3");
  CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome");
  CuAssertTrue(_testCase, leaf1Genome->getName() == "Leaf1");
  CuAssertTrue(_testCase, leaf2Genome->getName() == "Leaf2");  
  CuAssertTrue(_testCase, leaf3Genome->getName() == "Leaf3");
  CuAssertTrue(_testCase, ancGenome->getSequenceLength() == 1000000);
  CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == 5000);
  CuAssertTrue(_testCase, ancGenome->getNumBottomSegments() == 700000);
  CuAssertTrue(_testCase, leaf1Genome->getSequenceLength() == 1000000);
  CuAssertTrue(_testCase, leaf1Genome->getNumTopSegments() == 700000);
  CuAssertTrue(_testCase, leaf1Genome->getNumBottomSegments() == 0);
  CuAssertTrue(_testCase, leaf2Genome->getSequenceLength() == 2000000);
  CuAssertTrue(_testCase, leaf2Genome->getNumTopSegments() == 700000);
  CuAssertTrue(_testCase, leaf2Genome->getNumBottomSegments() == 0);
  CuAssertTrue(_testCase, leaf3Genome->getSequenceLength() == 3000000);
  CuAssertTrue(_testCase, leaf3Genome->getNumTopSegments() == 700000);
  CuAssertTrue(_testCase, leaf3Genome->getNumBottomSegments() == 0);
}
Beispiel #3
0
void copyFromTopAlignment(AlignmentConstPtr topAlignment,
                          AlignmentPtr mainAlignment, const string &genomeName)
{
  Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName);
  const Genome *topReplacedGenome = topAlignment->openGenome(genomeName);
  topReplacedGenome->copyTopDimensions(mainReplacedGenome);
  topReplacedGenome->copyTopSegments(mainReplacedGenome);
  mainReplacedGenome->fixParseInfo();
  // Copy bot segments for the parent and top segments for the
  // siblings of the genome that's being replaced
  Genome *mainParent = mainReplacedGenome->getParent();
  const Genome *topParent = topReplacedGenome->getParent();
  topParent->copyBottomDimensions(mainParent);
  topParent->copyBottomSegments(mainParent);
  mainParent->fixParseInfo();
  vector<string> siblings = mainAlignment->getChildNames(mainParent->getName());
  for (size_t i = 0; i < siblings.size(); i++)
  {
    if (siblings[i] != genomeName)
    {
      Genome *mainChild = mainAlignment->openGenome(siblings[i]);
      const Genome *topChild  = topAlignment->openGenome(siblings[i]);
      topChild->copyTopDimensions(mainChild);
      topChild->copyTopSegments(mainChild);
      mainChild->fixParseInfo();
    }
  }
}
Beispiel #4
0
void MappedSegmentMapAcrossTest::testTopSegment(AlignmentConstPtr alignment,
                                                TopSegmentIteratorConstPtr top)
{
  const Genome* parent = top->getGenome()->getParent();
  const Genome* other = top->getGenome()->getName() == "child1" ? 
     alignment->openGenome("child2") : alignment->openGenome("child1");
  set<MappedSegmentConstPtr> results;
  top->getMappedSegments(results, other, NULL, false);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == top->getReversed());
  BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
  bottom->toParent(top);
  TopSegmentIteratorConstPtr sister = other->getTopSegmentIterator();
  sister->toChildG(bottom, other);
  CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == sister->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == sister->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == sister->getReversed());
}
void TopSegmentIteratorReverseTest::checkCallBack(AlignmentConstPtr alignment)
{
  BottomSegmentIteratorConstPtr bi;
  TopSegmentIteratorConstPtr ti, ti2;

  const Genome* parent1 = alignment->openGenome("parent1");
  const Genome* child1 = alignment->openGenome("child1");

  ti = child1->getTopSegmentIterator();
  bi = parent1->getBottomSegmentIterator();

  ti2 = child1->getTopSegmentIterator();
  ti2->toChild(bi, 0);
  
  CuAssertTrue(_testCase, ti->getStartPosition() == 0);
  CuAssertTrue(_testCase, ti->getLength() == 10);
  CuAssertTrue(_testCase, ti->getReversed() == false);

  CuAssertTrue(_testCase, ti2->getStartPosition() == 9);
  CuAssertTrue(_testCase, ti2->getLength() == 10);
  CuAssertTrue(_testCase, ti2->getReversed() == true);

  bi->slice(1, 3);
  ti2->toChild(bi, 0);
  
  CuAssertTrue(_testCase, bi->getStartPosition() == 1);
  CuAssertTrue(_testCase, bi->getLength() == 6);
  CuAssertTrue(_testCase, ti2->getStartPosition() == 8);
  CuAssertTrue(_testCase, ti2->getLength() == 6);

  string buffer;
  bi->getString(buffer);
  CuAssertTrue(_testCase, buffer == "CCTACG");
  ti2->getString(buffer);
  CuAssertTrue(_testCase, buffer == "CACGTA");

  bi = child1->getBottomSegmentIterator();
  CuAssertTrue(_testCase, bi->getReversed() == false);

  ti->toParseUp(bi);  
  CuAssertTrue(_testCase, ti->getStartPosition() == 0);
  CuAssertTrue(_testCase, ti->getLength() == 5);

  bi->toReverse();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, ti->getStartPosition() == 4);
  CuAssertTrue(_testCase, ti->getLength() == 5);

  bi->toReverse();  
  CuAssertTrue(_testCase, bi->getReversed() == false);
  bi->toRight();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, ti->getStartPosition() == 5);
  CuAssertTrue(_testCase, ti->getLength() == 5);

  bi->toReverse();  
  ti->toParseUp(bi);  
  CuAssertTrue(_testCase, ti->getStartPosition() == 9);
  CuAssertTrue(_testCase, ti->getLength() == 5);
}
void TopSegmentIteratorToSiteTest::checkCallBack(AlignmentConstPtr alignment)
{
  TopSegmentIteratorConstPtr bi;

  // case 1
  const Genome* case1 = alignment->openGenome("case1");
  checkGenome(case1);

  // case 2
  const Genome* case2 = alignment->openGenome("case2");
  checkGenome(case2);
}
void TopSegmentIteratorParseTest::checkCallBack(AlignmentConstPtr alignment)
{
  BottomSegmentIteratorConstPtr bi;
  TopSegmentIteratorConstPtr ti;

  // case 1
  const Genome* case1 = alignment->openGenome("case1");
  ti = case1->getTopSegmentIterator();
  bi = case1->getBottomSegmentIterator();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  CuAssertTrue(_testCase, bi->getLength() == ti->getLength());
  bi->slice(3, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getLength() == 
               bi->getBottomSegment()->getLength() - 4);

  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  CuAssertTrue(_testCase, bi->getLength() == ti->getLength());

  // case 2
  const Genome* case2 = alignment->openGenome("case2");
  ti = case2->getTopSegmentIterator();
  bi = case2->getBottomSegmentIterator(1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(1, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

  // case 3
  const Genome* case3 = alignment->openGenome("case3");
  ti = case3->getTopSegmentIterator();
  bi = case3->getBottomSegmentIterator();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(2, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

  // case 4
  const Genome* case4 = alignment->openGenome("case4");
  ti = case4->getTopSegmentIterator();
  bi = case4->getBottomSegmentIterator(1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(2, 2);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
}
Beispiel #8
0
void MappedSegmentMapUpTest::testTopSegment(AlignmentConstPtr alignment,
                                            TopSegmentIteratorConstPtr top,
                                            const string& ancName)
{
  const Genome* parent = alignment->openGenome(ancName);
  set<MappedSegmentConstPtr> results;
  top->getMappedSegments(results, parent, NULL, false);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == top->getReversed());
  BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
  bottom->toParent(top);
  // extra hop for when top is in grand child
  if (bottom->getGenome() != parent)
  {
    TopSegmentIteratorConstPtr temp = 
       bottom->getGenome()->getTopSegmentIterator();
    temp->toParseUp(bottom);
    bottom->toParent(temp);
  }
  CuAssertTrue(_testCase, mseg->getGenome() == bottom->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == bottom->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == bottom->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == bottom->getReversed());
}
Beispiel #9
0
void LodManager::checkAlignment(hal_size_t minQuery,
                                const string& path,
                                AlignmentConstPtr alignment)
{
  if (alignment->getNumGenomes() == 0)
  {
    stringstream ss;
    ss << "No genomes found in base alignment specified in " << path;
    throw hal_exception(ss.str());
  }

#ifndef NDEBUG
  if (minQuery == 0)
  {
    vector<string> leafNames = alignment->getLeafNamesBelow(
      alignment->getRootName());
    string name = !leafNames.empty() ? leafNames[0] : alignment->getRootName();
    const Genome* genome = alignment->openGenome(name);
    
    bool seqFound = genome->containsDNAArray();
    alignment->closeGenome(genome);
    if (seqFound == false)
    {
      stringstream ss;
      ss << "HAL file for highest level of detail (0) in " << path 
         << "must contain DNA sequence information.";
      throw hal_exception(ss.str());
    }
  }
#endif
}
Beispiel #10
0
void  MappedSegmentColCompareTest::checkCallBack(AlignmentConstPtr alignment)
{
  if (alignment->getNumGenomes() == 0)
  {
    return;
  }

  validateAlignment(alignment);
  set<const Genome*> genomeSet;
  hal::getGenomesInSubTree(alignment->openGenome(alignment->getRootName()), 
                           genomeSet);
  for (set<const Genome*>::iterator i = genomeSet.begin(); i != genomeSet.end();
       ++i)
  {
    const Genome* srcGenome = *i;
    for (set<const Genome*>::iterator j = genomeSet.begin(); 
         j != genomeSet.end(); ++j)
    {
      const Genome* tgtGenome = *j;

      if (srcGenome->getSequenceLength() > 0 && 
          tgtGenome->getSequenceLength() > 0)
      {
        _ref = srcGenome;
        _tgt = tgtGenome;
        createColArray();
        createBlockArray();
        compareArrays();
      }
    }
  }
}
Beispiel #11
0
static void printBranches(ostream& os, AlignmentConstPtr alignment)
{
  const Genome* root = alignment->openGenome(alignment->getRootName());
  set<const Genome*> genomes;
  getGenomesInSubTree(root, genomes);
  genomes.insert(root);
  bool first = true;
  for (set<const Genome*>::iterator i = genomes.begin(); i != genomes.end();
       ++i)
  {
    if ((*i)->getParent() != NULL)
    {
      if (!first)
      {
        os << " ";
      }
      else
      {
        first = false;
      }
      os << (*i)->getName();
    }
  }
  os << endl;      
}
void GenomeStringTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* ancGenome = alignment->openGenome("AncGenome");
  CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome");
  string genomeString;
  ancGenome->getString(genomeString);
  CuAssertTrue(_testCase, genomeString == _string);
}
void GenomeUpdateTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* ancGenome = alignment->openGenome("AncGenome");
  CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome");
  CuAssertTrue(_testCase, ancGenome->getSequenceLength() == 10000005);
  CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == 14000);
  CuAssertTrue(_testCase, ancGenome->getNumBottomSegments() == 2000001);
}
void GenomeMetaTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* ancGenome = alignment->openGenome("AncGenome");
  const MetaData* ancMeta = ancGenome->getMetaData();
  CuAssertTrue(_testCase, ancMeta->get("Young") == "Jeezy");
  CuAssertTrue(_testCase, ancGenome->getSequenceLength() == 0);
  CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == 0);
  CuAssertTrue(_testCase, ancGenome->getNumBottomSegments() == 0);
  CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome");
}
Beispiel #15
0
void printNumSegments(ostream& os, AlignmentConstPtr alignment, 
                      const string& genomeName)
{
  const Genome* genome = alignment->openGenome(genomeName);
  if (genome == NULL)
  {
    throw hal_exception(string("Genome ") + genomeName + " not found.");
  }
  os << genome->getNumTopSegments() << " " << genome->getNumBottomSegments()
     << endl;
}
Beispiel #16
0
void MappedSegmentParseTest::checkCallBack(AlignmentConstPtr alignment)
{
  validateAlignment(alignment);
  const Genome* g1 = alignment->openGenome("g1");
  const Genome* g2 = alignment->openGenome("g2");
  const Genome* g3 = alignment->openGenome("g3");
  const Genome* g4 = alignment->openGenome("g4");
  const Genome* g5 = alignment->openGenome("g5");
  const Genome* gs[] = {g1, g2, g3, g4, g5};
  
  for (size_t i = 0; i < 5; ++i)
  {
    const Genome* g = gs[i];
    for (size_t j = 0; j < g->getNumTopSegments(); ++j)
    {
      TopSegmentIteratorConstPtr top = g->getTopSegmentIterator(j);
      testTopSegment(alignment, top);
    }
  }
}
void TopSegmentSequenceTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* ancGenome = alignment->openGenome("Anc0");
  TopSegmentIteratorConstPtr tsIt = ancGenome->getTopSegmentIterator(100);
  CuAssertTrue(_testCase, tsIt->getTopSegment()->getStartPosition() == 500);
  CuAssertTrue(_testCase, tsIt->getTopSegment()->getLength() == 9);
  string seq;
  tsIt->getString(seq);
  CuAssertTrue(_testCase, seq == "CACACATTC");
  tsIt->toReverse();
  tsIt->getString(seq);
  CuAssertTrue(_testCase, seq == "GAATGTGTG");
}
Beispiel #18
0
void MappedSegmentMapDownTest::checkCallBack(AlignmentConstPtr alignment)
{
  validateAlignment(alignment);
  const Genome* parent = alignment->openGenome("parent");

  BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
  testBottomSegment(alignment, bottom, 0);
  testBottomSegment(alignment, bottom, 1);
  bottom->slice(1,2);
  testBottomSegment(alignment, bottom, 0);
  testBottomSegment(alignment, bottom, 1);
  bottom->toReverse();
  testBottomSegment(alignment, bottom, 0);
  testBottomSegment(alignment, bottom, 1);
}
Beispiel #19
0
void copyFromBottomAlignment(AlignmentConstPtr bottomAlignment,
                             AlignmentPtr mainAlignment,
                             const string &genomeName)
{
  // Copy genome & bottom segments for the genome that's being replaced
  Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName);
  const Genome *botReplacedGenome = bottomAlignment->openGenome(genomeName);
  botReplacedGenome->copyDimensions(mainReplacedGenome);
  botReplacedGenome->copySequence(mainReplacedGenome);
  botReplacedGenome->copyBottomDimensions(mainReplacedGenome);
  botReplacedGenome->copyBottomSegments(mainReplacedGenome);
  mainReplacedGenome->fixParseInfo();

  // Copy top segments for the children
  vector<string> children = mainAlignment->getChildNames(genomeName);  
  for (size_t i = 0; i < children.size(); i++)
  {
    Genome *mainChild = mainAlignment->openGenome(children[i]);
    const Genome *botChild  = bottomAlignment->openGenome(children[i]);
    botChild->copyTopDimensions(mainChild);
    botChild->copyTopSegments(mainChild);
    mainChild->fixParseInfo();
  }
}
void printGenomes(ostream& os, AlignmentConstPtr alignment)
{
  const Genome* root = alignment->openGenome(alignment->getRootName());
  set<const Genome*> genomes;
  getGenomesInSubTree(root, genomes);
  genomes.insert(root);
  for (set<const Genome*>::iterator i = genomes.begin(); i != genomes.end();
       ++i)
  {
    if (i != genomes.begin())
    {
      os << ",";
    }
    os << (*i)->getName();
  }
  os << endl;      
}
Beispiel #21
0
void MappedSegmentParseTest::testTopSegment(AlignmentConstPtr alignment,
                                            TopSegmentIteratorConstPtr top)
{
  const Genome* parent = alignment->openGenome("parent");
  set<MappedSegmentConstPtr> results;
  top->getMappedSegments(results, parent, NULL, false);

  vector<bool> covered(top->getLength(), false);
  
  CuAssertTrue(_testCase, results.size() >= 1);

  set<MappedSegmentConstPtr>::iterator i = results.begin();
  for (; i != results.end(); ++i)
  {
    MappedSegmentConstPtr mseg = *i;
    CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
    CuAssertTrue(_testCase, mseg->getGenome() == parent);
    for (hal_index_t j = mseg->getStartPosition(); j <= mseg->getEndPosition(); 
         ++j)
    {
      CuAssertTrue(_testCase, covered[j] == false);
      covered[j] = true;
    }
    CuAssertTrue(_testCase, mseg->getStartPosition() == 
                 mseg->getSource()->getStartPosition());
    CuAssertTrue(_testCase, mseg->getEndPosition() == 
                 mseg->getSource()->getEndPosition());

    set<MappedSegmentConstPtr> tResults;
    mseg->getMappedSegments(tResults, top->getGenome(), NULL, false);
    CuAssertTrue(_testCase, tResults.size() == 1);
    MappedSegmentConstPtr tmseg = *tResults.begin();
    CuAssertTrue(_testCase, tmseg->getGenome() == top->getGenome());
    CuAssertTrue(_testCase, tmseg->getSource()->getGenome() == 
                 mseg->getGenome());
    CuAssertTrue(_testCase, tmseg->getStartPosition() == 
                 mseg->getStartPosition());
    CuAssertTrue(_testCase, tmseg->getEndPosition() == 
                 mseg->getEndPosition());
    CuAssertTrue(_testCase, tmseg->getSource()->getStartPosition() == 
                 mseg->getStartPosition());
    CuAssertTrue(_testCase, tmseg->getSource()->getEndPosition() == 
                 mseg->getEndPosition());
  }
}
Beispiel #22
0
void MappedSegmentMapAcrossTest::checkCallBack(AlignmentConstPtr alignment)
{
  validateAlignment(alignment);
  const Genome* child1 = alignment->openGenome("child1");
  const Genome* child2 = alignment->openGenome("child2");
  TopSegmentIteratorConstPtr top = child2->getTopSegmentIterator();
  testTopSegment(alignment, top);
  top->slice(1,2);
  testTopSegment(alignment, top);
  top->toReverse();
  testTopSegment(alignment, top);
  top = child1->getTopSegmentIterator();
  testTopSegment(alignment, top);
  top->slice(1,2);
  testTopSegment(alignment, top);
  top->toReverse();
  testTopSegment(alignment, top);
}
void TopSegmentIsGapTest::checkCallBack(AlignmentConstPtr alignment)
{
  BottomSegmentIteratorConstPtr bi;
  TopSegmentIteratorConstPtr ti;

  const Genome* child1 = alignment->openGenome("child1");

  for (hal_size_t i = 0; i < child1->getNumTopSegments(); ++i)
  {
    ti = child1->getTopSegmentIterator(i);
    if (i == 8 || i == 10)
    {
//      CuAssertTrue(_testCase, ti->getTopSegment()->isGapInsertion());
    }
    else
    {
//      CuAssertTrue(_testCase, !ti->getTopSegment()->isGapInsertion());
    }
  }
}
Beispiel #24
0
void printBedSequenceStats(ostream& os, AlignmentConstPtr alignment, 
                           const string& genomeName)
{
  const Genome* genome = alignment->openGenome(genomeName);
  if (genome == NULL)
  {
    throw hal_exception(string("Genome ") + genomeName + " not found.");
  }
  if (genome->getNumSequences() > 0)
  {
    SequenceIteratorConstPtr seqIt = genome->getSequenceIterator();
    SequenceIteratorConstPtr seqEnd = genome->getSequenceEndIterator();

    for (; !seqIt->equals(seqEnd); seqIt->toNext())
    {
      os << seqIt->getSequence()->getName() << "\t"
         << 0 << "\t"
         << seqIt->getSequence()->getSequenceLength() << "\n";
    }
  }
  os << endl;
}
void hal::validateAlignment(AlignmentConstPtr alignment)
{
  deque<string> bfQueue;
  bfQueue.push_back(alignment->getRootName());
  while (bfQueue.empty() == false)
  {
    string name = bfQueue.back();
    bfQueue.pop_back();
    if (name.empty() == false)
    {
      const Genome* genome = alignment->openGenome(name);
      if (genome == NULL)
      {
        throw hal_exception("Failure to open genome " + name);
      }
      validateGenome(genome);
      vector<string> childNames = alignment->getChildNames(name);
      for (size_t i = 0; i < childNames.size(); ++i)
      {
        bfQueue.push_front(childNames[i]);
      }
    }
  }
}
Beispiel #26
0
int main(int argc, char *argv[])
{
  CLParserPtr optParser = initParser();
  string inPath, botAlignmentPath, topAlignmentPath, parentName, insertName,
    childName, leafName;
  double upperBranchLength, leafBranchLength;
  bool noMarkAncestors;
  try {
    optParser->parseOptions(argc, argv);
    inPath = optParser->getArgument<string>("inFile");
    botAlignmentPath = optParser->getArgument<string>("botAlignmentFile");
    topAlignmentPath = optParser->getArgument<string>("topAlignmentFile");
    parentName = optParser->getArgument<string>("parentName");
    insertName = optParser->getArgument<string>("insertName");
    childName = optParser->getArgument<string>("childName");
    leafName = optParser->getArgument<string>("leafName");
    upperBranchLength = optParser->getArgument<double>("upperBranchLength");
    leafBranchLength = optParser->getArgument<double>("leafBranchLength");
    noMarkAncestors = optParser->getFlag("noMarkAncestors");
  } catch (exception &e) {
    optParser->printUsage(cerr);
    return 1;
  }
  AlignmentPtr mainAlignment = openHalAlignment(inPath, optParser);
  AlignmentConstPtr botAlignment = openHalAlignment(botAlignmentPath,
                                                    optParser);
  AlignmentConstPtr topAlignment = openHalAlignment(topAlignmentPath,
                                                    optParser);
  mainAlignment->insertGenome(insertName, parentName, childName,
                              upperBranchLength);
  mainAlignment->addLeafGenome(leafName, insertName, leafBranchLength);
  // Insert the new intermediate node.
  Genome *insertGenome = mainAlignment->openGenome(insertName);
  const Genome *topInsertGenome = topAlignment->openGenome(insertName);
  const Genome *botInsertGenome = botAlignment->openGenome(insertName);
  topInsertGenome->copyDimensions(insertGenome);
  topInsertGenome->copyTopDimensions(insertGenome);
  botInsertGenome->copyBottomDimensions(insertGenome);
  topInsertGenome->copySequence(insertGenome);
  topInsertGenome->copyTopSegments(insertGenome);
  topInsertGenome->copyMetadata(insertGenome);
  botInsertGenome->copyBottomSegments(insertGenome);
  insertGenome->fixParseInfo();

  // Copy the bottom segments for the parent genome from the top alignment.
  Genome *parentGenome = mainAlignment->openGenome(parentName);
  const Genome *botParentGenome = topAlignment->openGenome(parentName);
  botParentGenome->copyBottomDimensions(parentGenome);
  botParentGenome->copyBottomSegments(parentGenome);
  parentGenome->fixParseInfo();

  // Fix the parent's other children as well.
  vector<string> allChildren = mainAlignment->getChildNames(parentName);
  for (size_t i = 0; i < allChildren.size(); i++)
  {
    if (allChildren[i] != insertName)
    {
      Genome *outGenome = mainAlignment->openGenome(allChildren[i]);
      const Genome *topSegmentsGenome = topAlignment->openGenome(allChildren[i]);
      topSegmentsGenome->copyTopDimensions(outGenome);
      topSegmentsGenome->copyTopSegments(outGenome);
      outGenome->fixParseInfo();
            
    }
  }

  // Copy the top segments for the child genome from the bottom alignment.
  Genome *childGenome = mainAlignment->openGenome(childName);
  const Genome *topChildGenome = botAlignment->openGenome(childName);
  topChildGenome->copyTopDimensions(childGenome);
  topChildGenome->copyTopSegments(childGenome);
  childGenome->fixParseInfo();

  // Copy the entire genome for the leaf from the bottom alignment.
  Genome *outLeafGenome = mainAlignment->openGenome(leafName);
  const Genome *inLeafGenome = botAlignment->openGenome(leafName);
  inLeafGenome->copy(outLeafGenome);
  if (!noMarkAncestors) {
    markAncestorsForUpdate(mainAlignment, insertName);
  }
  mainAlignment->close();
  botAlignment->close();
  topAlignment->close();
}
Beispiel #27
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = hdf5CLParserInstance();
  optionsParser->setDescription("Rertrieve chain (pairwise alignment) "
                                "information from a hal database.\n"
                                "WARNING: THIS TOOL WAS NEVER FINISHED OR"
                                " TESTED. USE AT OWN RISK. PLEASE "
                                "CONSIDER halLiftover --outPSL INSTEAD.");
  optionsParser->addArgument("halFile", "path to hal file to analyze");
  optionsParser->addArgument("genome", "(query) genome to process");
  optionsParser->addOption("sequence", "sequence name in query genome ("
                           "all sequences if not specified)", "\"\"");
  optionsParser->addOption("start", "start position in query genome", 0);
  optionsParser->addOption("length", "maximum length of chain to output.", 0);
  optionsParser->addOption("chainFile", "path for output file.  stdout if not"
                           " specified", "\"\"");
  optionsParser->addOption("maxGap", 
                           "maximum indel length to be considered a gap within"
                           " a chain.", 
                           20);
  

  string halPath;
  string chainPath;
  string genomeName;
  string sequenceName;
  hal_size_t start;
  hal_size_t length;
  hal_size_t maxGap;
  try
  {
    optionsParser->parseOptions(argc, argv);
    halPath = optionsParser->getArgument<string>("halFile");
    genomeName = optionsParser->getArgument<string>("genome");
    sequenceName = optionsParser->getOption<string>("sequence");
    start = optionsParser->getOption<hal_size_t>("start");
    length = optionsParser->getOption<hal_size_t>("length");
    chainPath = optionsParser->getOption<string>("chainFile");
    maxGap = optionsParser->getOption<hal_size_t>("maxGap");
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }
  try
  {
    cerr << "WARNING: THIS TOOL WAS NEVER FINISHED OR TESTED. USE AT OWN RISK."
         << " PLEASE CONSIDER halLiftover --outPSL INSTEAD." <<endl;  

    AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath,
                                                           optionsParser);
    
    
    const Genome* genome = alignment->openGenome(genomeName);
    if (genome == NULL)
    {
      throw hal_exception(string("Genome not found: ") + genomeName);
    }
    hal_index_t endPosition = 
       length > 0 ? start + length : genome->getSequenceLength();

    const Sequence* sequence = NULL;
    if (sequenceName != "\"\"")
    {
      sequence = genome->getSequence(sequenceName);
      if (sequence == NULL)
      {
        throw hal_exception(string("Sequence not found: ") + sequenceName);
      }
      start += sequence->getStartPosition();
      endPosition =  
         length > 0 ? start + length : sequence->getSequenceLength();
    }

    ofstream ofile;
    ostream& outStream = chainPath == "\"\"" ? cout : ofile;
    if (chainPath != "\"\"")
    {
      ofile.open(chainPath.c_str());
      if (!ofile)
      {
        throw hal_exception(string("Error opening output file ") + 
                            chainPath);
      }
    }

    TopSegmentIteratorConstPtr top = genome->getTopSegmentIterator();
    top->toSite(start, false);
    // do slicing here;
    
    GappedTopSegmentIteratorConstPtr gtop = 
       genome->getGappedTopSegmentIterator(top->getArrayIndex(), maxGap);

    // need to review!
    Chain chain;
    chain._id = 0;
    while (gtop->getRightArrayIndex() < 
           (hal_index_t)genome->getNumTopSegments() &&
           gtop->getLeft()->getStartPosition() < endPosition)
    {
      if (gtop->hasParent() == true)
      {
        hal_offset_t leftOffset = 0;
        if ((hal_index_t)start > gtop->getStartPosition() 
            && (hal_index_t)start < gtop->getEndPosition())
        {
          leftOffset = start - gtop->getStartPosition() ;
        }
        hal_offset_t rightOffset = 0;
        if (endPosition - 1 > gtop->getStartPosition() 
            && endPosition - 1 < gtop->getEndPosition())
        {
          rightOffset = gtop->getEndPosition() + 1 - endPosition;
        }
        // need to do offsets for edge cases
        gtIteratorToChain(gtop, chain, leftOffset, rightOffset);
        outStream << chain;
        ++chain._id;
      }
      gtop->toRight();
    }
  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }
  
  return 0;
}
Beispiel #28
0
int main(int argc, char** argv)
{
  CLParserPtr optionsParser = initParser();

  string halPath;
  string srcGenomeName;
  string srcBedPath;
  string tgtGenomeName;
  string tgtBedPath;
  bool noDupes;
  bool append;
  int inBedVersion;
  int outBedVersion;
  bool keepExtra;
  bool outPSL;
  bool outPSLWithName;
  bool tab;
  try
  {
    optionsParser->parseOptions(argc, argv);
    halPath = optionsParser->getArgument<string>("halFile");
    srcGenomeName = optionsParser->getArgument<string>("srcGenome");
    srcBedPath =  optionsParser->getArgument<string>("srcBed");
    tgtGenomeName = optionsParser->getArgument<string>("tgtGenome");
    tgtBedPath =  optionsParser->getArgument<string>("tgtBed");
    noDupes = optionsParser->getFlag("noDupes");
    append = optionsParser->getFlag("append");
    inBedVersion = optionsParser->getOption<int>("inBedVersion");
    outBedVersion = optionsParser->getOption<int>("outBedVersion");
    keepExtra = optionsParser->getFlag("keepExtra");
    outPSL = optionsParser->getFlag("outPSL");
    outPSLWithName = optionsParser->getFlag("outPSLWithName");
    tab = optionsParser->getFlag("tab");
  }
  catch(exception& e)
  {
    cerr << e.what() << endl;
    optionsParser->printUsage(cerr);
    exit(1);
  }

  try
  {
    if (outPSLWithName == true)
    {
      outPSL = true;
    }
    if (outPSL == true)
    {
      outBedVersion = 12;
    }

    AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, 
                                                           optionsParser);
    if (alignment->getNumGenomes() == 0)
    {
      throw hal_exception("hal alignmenet is empty");
    }

    const Genome* srcGenome = alignment->openGenome(srcGenomeName);
    if (srcGenome == NULL)
    {
      throw hal_exception(string("srcGenome, ") + srcGenomeName + 
                          ", not found in alignment");
    }
    const Genome* tgtGenome = alignment->openGenome(tgtGenomeName);
    if (tgtGenome == NULL)
    {
      throw hal_exception(string("tgtGenome, ") + tgtGenomeName + 
                          ", not found in alignment");
    }
    
    ifstream srcBed;
    istream* srcBedPtr;
    if (srcBedPath == "stdin")
    {
      srcBedPtr = &cin;
    }
    else
    {
      srcBed.open(srcBedPath.c_str());
      srcBedPtr = &srcBed;
      if (!srcBed)
      {
        throw hal_exception("Error opening srcBed, " + srcBedPath);
      }
    }
    
    ios_base::openmode mode = append ? ios::out | ios::app : ios_base::out;
    ofstream tgtBed;
    ostream* tgtBedPtr;
    if (tgtBedPath == "stdout")
    {
      tgtBedPtr = &cout;
    }
    else
    {      
      tgtBed.open(tgtBedPath.c_str(), mode);
      tgtBedPtr = &tgtBed;
      if (!tgtBed)
      {
        throw hal_exception("Error opening tgtBed, " + tgtBedPath);
      }
    }

    locale* inLocale = NULL;
    if (tab == true)
    {
      inLocale = new locale(cin.getloc(), new TabSepFacet(cin.getloc()));
      assert(std::isspace('\t', *inLocale) == true);
      assert(std::isspace(' ', *inLocale) == false);
    }
    
    BlockLiftover liftover;
    liftover.convert(alignment, srcGenome, srcBedPtr, tgtGenome, tgtBedPtr,
                     inBedVersion, outBedVersion, keepExtra, !noDupes,
                     outPSL, outPSLWithName, inLocale);
    
    delete inLocale;

  }
  catch(hal_exception& e)
  {
    cerr << "hal exception caught: " << e.what() << endl;
    return 1;
  }
  catch(exception& e)
  {
    cerr << "Exception caught: " << e.what() << endl;
    return 1;
  }

  return 0;
}
Beispiel #29
0
static void printBranchPath(ostream& os, AlignmentConstPtr alignment, 
                            const vector<string>& genomeNames, 
                            bool keepRoot)
{
  set<const Genome*> inputSet;
  for (size_t i = 0; i < genomeNames.size(); ++i)
  {
    const Genome* genome = alignment->openGenome(genomeNames[i]);
    if (genome == NULL)
    {
      throw hal_exception(string("Genome ") + genomeNames[i] + " not found");
    }
    inputSet.insert(genome);
  }
  set<const Genome*> outputSet;
  getGenomesInSpanningTree(inputSet, outputSet);
  
  vector<const Genome*> outputVec;
  // if given two genomes, sort the output to be the actual path frmo the 
  // first to the second. 
  if (genomeNames.size() == 2)
  {
    set<const Genome*> visitSet(outputSet);
    outputVec.push_back(alignment->openGenome(genomeNames[0]));
    visitSet.erase(alignment->openGenome(genomeNames[0]));
    while (outputVec.back()->getName() != genomeNames[1])
    {
      const Genome* cur = outputVec.back();
      set<const Genome*>::iterator i = visitSet.find(cur->getParent());
      if (i == visitSet.end())
      {
        for (size_t childIdx = 0; childIdx < cur->getNumChildren(); ++childIdx)
        {
          i = visitSet.find(cur->getChild(childIdx));
          if (i != visitSet.end())
          {
            break;
          }
        }
      }
      if (i != visitSet.end())
      {
        outputVec.push_back(*i);
        visitSet.erase(i);
      }
      else
      {
        throw hal_exception(string("error determining path from ") +
                            genomeNames[0] + " to " + genomeNames[1]);
      }
    }
  }
  else
  {
    outputVec.resize(outputSet.size());
    copy(outputSet.begin(), outputSet.end(), outputVec.begin());
  }
  
  for (vector<const Genome*>::const_iterator j = outputVec.begin(); 
       j != outputVec.end(); ++j)
  {
    const Genome* genome = *j;
    if (keepRoot == true || 
        (genome->getParent() != NULL &&  
         outputSet.find(genome->getParent()) != outputSet.end()))
    {
      os << genome->getName() << " ";
    }
  }
  os << endl;
}
void 
GappedSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* child = alignment->openGenome("child");
  const Genome* parent = alignment->openGenome("parent");

  GappedTopSegmentIteratorConstPtr gtsIt = 
     child->getGappedTopSegmentIterator(0, 9999999);
  GappedBottomSegmentIteratorConstPtr gbsIt = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
  GappedTopSegmentIteratorConstPtr gtsItRev = 
     child->getGappedTopSegmentIterator(0, 9999999);
  gtsItRev->toReverse();
  GappedBottomSegmentIteratorConstPtr gbsItRev = 
     parent->getGappedBottomSegmentIterator(0, 0, 9999999);
  gbsItRev->toReverse();

  for (size_t i = 0; i < child->getNumTopSegments(); ++i)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
    gtsIt->toRight();

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)bsIt->getBottomSegment()->getArrayIndex() == i);
    gbsIt->toRight();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)tsItRev->getTopSegment()->getArrayIndex() == i);
    gtsItRev->toLeft();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight()));
    CuAssertTrue(_testCase, 
                 (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i);
    gbsItRev->toLeft();
  }

  gtsIt = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 1, 9999999);
  gbsIt = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 1, 0, 9999999);
  gtsItRev = child->getGappedTopSegmentIterator(
    child->getNumTopSegments() - 1, 9999999);
  gtsItRev->toReverse();
  gbsItRev = parent->getGappedBottomSegmentIterator(
    child->getNumTopSegments() - 1, 0, 9999999);
  gbsItRev->toReverse();

  for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; --i)
  {
    TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft();
    CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight()));
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gtsIt->getReversed() == false);
    gtsIt->toLeft();

    BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft();
    CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight()));
    CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gbsIt->getReversed() == false);
    gbsIt->toLeft();

    TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft();
    CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight()));
    CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gtsItRev->getReversed() == true);
    gtsItRev->toRight();

    BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft();
    CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight()));
    CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i);
    CuAssertTrue(_testCase, gbsItRev->getReversed() == true);
    gbsItRev->toRight();
  }

}