void MappedSegmentMapAcrossTest::testTopSegment(AlignmentConstPtr alignment,
                                                TopSegmentIteratorConstPtr top)
{
  const Genome* parent = top->getGenome()->getParent();
  const Genome* other = top->getGenome()->getName() == "child1" ? 
     alignment->openGenome("child2") : alignment->openGenome("child1");
  set<MappedSegmentConstPtr> results;
  top->getMappedSegments(results, other, NULL, false);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == top->getReversed());
  BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
  bottom->toParent(top);
  TopSegmentIteratorConstPtr sister = other->getTopSegmentIterator();
  sister->toChildG(bottom, other);
  CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == sister->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == sister->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == sister->getReversed());
}
Exemple #2
0
void Genome::copyTopSegments(Genome *dest) const
{
  const Genome *inParent = getParent();
  const Genome *outParent = dest->getParent();

  TopSegmentIteratorConstPtr inTop = getTopSegmentIterator();
  TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator();
  hal_size_t n = dest->getNumTopSegments();
  assert(n == 0 || n == getNumTopSegments());

  if (n == 0) {
    // Nothing to do if there are no top segments.
    return;
  }

  BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator();
  BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator();

  for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(),
         outTop->toRight())
  {
    hal_index_t genomePos = inTop->getStartPosition();
    assert(genomePos != NULL_INDEX);
    string inSeqName = getSequenceBySite(genomePos)->getName();
    string outSeqName = dest->getSequenceBySite(genomePos)->getName();
    // if (inSeqName != outSeqName) {
    //   stringstream ss;
    //   ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos;
    //   throw hal_exception(ss.str());
    // }

    outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength());
    outTop->setParentIndex(inTop->getParentIndex());
    outTop->setParentReversed(inTop->getParentReversed());
    outTop->setBottomParseIndex(inTop->getBottomParseIndex());
    outTop->setNextParalogyIndex(inTop->getNextParalogyIndex());

    // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are.
    if (inTop->getParentIndex() != NULL_INDEX) {
      inParentBottomSegIt->toParent(inTop);

      const Sequence *inParentSequence = inParentBottomSegIt->getSequence();

      const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName());

      hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex();
      hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex();

      outTop->setParentIndex(outParentSegmentIndex);
    }
  }
}
void TopSegmentIteratorToSiteTest::checkGenome(const Genome* genome)
{
  TopSegmentIteratorConstPtr ti = genome->getTopSegmentIterator();
  for (hal_index_t pos = 0; 
       pos < (hal_index_t)genome->getSequenceLength(); ++pos)
  {
    ti->toSite(pos);
    CuAssertTrue(_testCase, ti->getStartPosition() == pos);
    CuAssertTrue(_testCase, ti->getLength() == 1);
    ti->toSite(pos, false);
    CuAssertTrue(_testCase, pos >= ti->getStartPosition() && 
                 pos < ti->getStartPosition() + (hal_index_t)ti->getLength());
    CuAssertTrue(_testCase, 
                 ti->getLength() == ti->getTopSegment()->getLength());
  }
}
void MappedSegmentMapDownTest::testBottomSegment(
  AlignmentConstPtr alignment,
  BottomSegmentIteratorConstPtr bottom,
  hal_size_t childIndex)
{
  const Genome* child = bottom->getGenome()->getChild(childIndex);
  set<MappedSegmentConstPtr> results;
  bottom->getMappedSegments(results, child, NULL, false);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == 
               bottom->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               bottom->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == bottom->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == bottom->getReversed());
  TopSegmentIteratorConstPtr top = child->getTopSegmentIterator();
  top->toChild(bottom, childIndex);
  CuAssertTrue(_testCase, mseg->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == top->getReversed());
}
void MappedSegmentMapUpTest::testTopSegment(AlignmentConstPtr alignment,
                                            TopSegmentIteratorConstPtr top,
                                            const string& ancName)
{
  const Genome* parent = alignment->openGenome(ancName);
  set<MappedSegmentConstPtr> results;
  top->getMappedSegments(results, parent, NULL, false);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == top->getReversed());
  BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
  bottom->toParent(top);
  // extra hop for when top is in grand child
  if (bottom->getGenome() != parent)
  {
    TopSegmentIteratorConstPtr temp = 
       bottom->getGenome()->getTopSegmentIterator();
    temp->toParseUp(bottom);
    bottom->toParent(temp);
  }
  CuAssertTrue(_testCase, mseg->getGenome() == bottom->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == bottom->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == bottom->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == bottom->getReversed());
}
void TopSegmentIteratorParseTest::checkCallBack(AlignmentConstPtr alignment)
{
  BottomSegmentIteratorConstPtr bi;
  TopSegmentIteratorConstPtr ti;

  // case 1
  const Genome* case1 = alignment->openGenome("case1");
  ti = case1->getTopSegmentIterator();
  bi = case1->getBottomSegmentIterator();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  CuAssertTrue(_testCase, bi->getLength() == ti->getLength());
  bi->slice(3, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getLength() == 
               bi->getBottomSegment()->getLength() - 4);

  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  CuAssertTrue(_testCase, bi->getLength() == ti->getLength());

  // case 2
  const Genome* case2 = alignment->openGenome("case2");
  ti = case2->getTopSegmentIterator();
  bi = case2->getBottomSegmentIterator(1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(1, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

  // case 3
  const Genome* case3 = alignment->openGenome("case3");
  ti = case3->getTopSegmentIterator();
  bi = case3->getBottomSegmentIterator();
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(2, 1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());

  // case 4
  const Genome* case4 = alignment->openGenome("case4");
  ti = case4->getTopSegmentIterator();
  bi = case4->getBottomSegmentIterator(1);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
  bi->slice(2, 2);
  ti->toParseUp(bi);
  CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition());
}
void TopSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment)
{
  const Genome* ancGenome = alignment->openGenome("Anc0");
  CuAssertTrue(_testCase, 
               ancGenome->getNumTopSegments() == _topSegments.size());
  TopSegmentIteratorConstPtr tsIt = ancGenome->getTopSegmentIterator(0);
  for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i)
  {
    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
    _topSegments[i].compareTo(tsIt, _testCase);
    tsIt->toRight();
  }
  tsIt = ancGenome->getTopSegmentIterator(
    ancGenome->getNumTopSegments() - 1);
  for (hal_index_t i = ancGenome->getNumTopSegments() - 1; i >= 0; --i)
  {
    CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i);
    _topSegments[i].compareTo(tsIt, _testCase);
    tsIt->toLeft();
  }

  tsIt = ancGenome->getTopSegmentIterator(0); 
  tsIt->slice(0, tsIt->getLength() - 1);
  for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i)
  {
    CuAssertTrue(_testCase, tsIt->getLength() == 1);
    CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
    tsIt->toRight(tsIt->getStartPosition() + 1);
  }
  tsIt = ancGenome->getTopSegmentIterator(
    ancGenome->getNumTopSegments() - 1);
  tsIt->slice(tsIt->getLength() - 1, 0);
  for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i)
  {
    CuAssertTrue(_testCase, tsIt->getLength() == 1);
    CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
    tsIt->toLeft(tsIt->getStartPosition() - 1);
  }

  tsIt = ancGenome->getTopSegmentIterator(0); 
  tsIt->toReverse();
  CuAssertTrue(_testCase, tsIt->getReversed() == true);
  tsIt->slice(tsIt->getLength() - 1, 0);
  for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i)
  {
    CuAssertTrue(_testCase, tsIt->getLength() == 1);
    CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
    tsIt->toLeft(tsIt->getStartPosition() + 1);
  }
  tsIt = ancGenome->getTopSegmentIterator(
    ancGenome->getNumTopSegments() - 1);
  tsIt->toReverse();
  tsIt->slice(0, tsIt->getLength() - 1);
  for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i)
  {
    CuAssertTrue(_testCase, tsIt->getLength() == 1);
    CuAssertTrue(_testCase, tsIt->getStartPosition() == i);
    tsIt->toRight(tsIt->getStartPosition() - 1);
  }
}
void MappedSegmentMapExtraParalogsTest::checkCallBack(AlignmentConstPtr alignment)
{
  validateAlignment(alignment);

  const Genome *grandChild1 = alignment->openGenome("grandChild1");
  const Genome *grandChild2 = alignment->openGenome("grandChild2");
  const Genome *root = alignment->openGenome("root");

  TopSegmentIteratorConstPtr top = grandChild2->getTopSegmentIterator();
  set<MappedSegmentConstPtr> results;

  // First, check that by default we will only get the homologies in
  // or before the MRCA. (in this case, just seg 0 of grandChild1).
  top->getMappedSegments(results, grandChild1, NULL, true);
  CuAssertTrue(_testCase, results.size() == 1);
  MappedSegmentConstPtr mseg = *results.begin();
  // Source information should be preserved
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == top->getReversed());

  // Check target information is correct
  CuAssertTrue(_testCase,
               mseg->getGenome() == grandChild1);
  CuAssertTrue(_testCase,
               mseg->getStartPosition() == 2);
  CuAssertTrue(_testCase,
               mseg->getLength() == 3);
  CuAssertTrue(_testCase,
               mseg->getReversed() == true);

  // Check that by using the grandparent as the coalescence limit we
  // will get all the paralogs.
  top->getMappedSegments(results, grandChild1, NULL, true, 0, root);
  CuAssertTrue(_testCase, results.size() == 3);
  set<MappedSegmentConstPtr>::iterator i = results.begin();
  bool found[3] = {false, false, false};
  for (; i != results.end(); ++i)
  {
      // Source information should be preserved
    CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
    CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
                 top->getStartPosition());
    CuAssertTrue(_testCase, 
                 mseg->getSource()->getLength() == top->getLength());
    CuAssertTrue(_testCase, 
                 mseg->getSource()->getReversed() == top->getReversed());
    
    // Check target information is correct
    CuAssertTrue(_testCase,
                 mseg->getGenome() == grandChild1);
    CuAssertTrue(_testCase,
                 mseg->getStartPosition() == 2
                 || mseg->getStartPosition() == 5
                 || mseg->getStartPosition() == 8);
    CuAssertTrue(_testCase,
                 mseg->getLength() == 3);
    CuAssertTrue(_testCase,
                 mseg->getReversed() == true);
    found[mseg->getArrayIndex()] = true;
  }
}
void MappedSegmentMapDupeTest::checkCallBack(AlignmentConstPtr alignment)
{
  validateAlignment(alignment);
  const Genome* parent = alignment->openGenome("parent");
  const Genome* child1 = alignment->openGenome("child1");
  const Genome* child2 = alignment->openGenome("child2");

  TopSegmentIteratorConstPtr top = child1->getTopSegmentIterator();
  set<MappedSegmentConstPtr> results;
  top->getMappedSegments(results, child2, NULL, true);
//  CuAssertTrue(_testCase, results.size() == 3);
  
  MappedSegmentConstPtr mseg = *results.begin();
  CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome());
  CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
               top->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getLength() == top->getLength());
  CuAssertTrue(_testCase, 
               mseg->getSource()->getReversed() == top->getReversed());
  BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
  bottom->toParent(top);
  TopSegmentIteratorConstPtr sister = child2->getTopSegmentIterator();
  sister->toChildG(bottom, child2);
  CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome());
  CuAssertTrue(_testCase, 
               mseg->getStartPosition() == sister->getStartPosition());
  CuAssertTrue(_testCase, 
               mseg->getLength() == sister->getLength());
  CuAssertTrue(_testCase, 
               mseg->getReversed() == sister->getReversed());

  top = child2->getTopSegmentIterator();
  results.clear();
  sister = child1->getTopSegmentIterator();
  top->getMappedSegments(results, child1, NULL, true);
  CuAssertTrue(_testCase, results.size() == 3);
  bool found[3] = {false};
  set<MappedSegmentConstPtr>::iterator i = results.begin();
  for (; i != results.end(); ++i)
  {
    MappedSegmentConstPtr mseg = *i;
    CuAssertTrue(_testCase, mseg->getSource()->getGenome() == 
                 top->getGenome());
    CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == 
                 top->getStartPosition());
    CuAssertTrue(_testCase, 
                 mseg->getSource()->getLength() == top->getLength());
    CuAssertTrue(_testCase, 
                 mseg->getSource()->getReversed() == top->getReversed());
    BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator();
    bottom->toParent(top);
    TopSegmentIteratorConstPtr sister = child2->getTopSegmentIterator();
    sister->toChildG(bottom, child1);
    CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome());
    CuAssertTrue(_testCase, 
                 mseg->getLength() == sister->getLength());
    found[mseg->getArrayIndex()] = true;
  }
  CuAssertTrue(_testCase, found[0] == true);
  CuAssertTrue(_testCase, found[1] == true);
  CuAssertTrue(_testCase, found[2] == true);
}