Beispiel #1
0
void TopSegmentIsGapTest::createCallBack(Alignment *alignment) {
    size_t numSequences = 3;
    vector<Sequence::Info> seqVec(numSequences);

    BottomSegmentIteratorPtr bi;
    BottomSegmentStruct bs;
    TopSegmentIteratorPtr ti;
    TopSegmentStruct ts;

    Genome *parent1 = alignment->addRootGenome("parent1");
    Genome *child1 = alignment->addLeafGenome("child1", "parent1", 1);

    // set up two genomes.  each with three sequences.  each sequence
    // with 5 segments of length two.  start with segment i in parent
    // aligned with segment i in child.
    for (size_t i = 0; i < numSequences; ++i) {
        string name = "Sequence" + std::to_string(i);
        seqVec[i] = Sequence::Info(name, 10, 5, 5);
    }
    parent1->setDimensions(seqVec);
    child1->setDimensions(seqVec);

    for (bi = parent1->getBottomSegmentIterator(); not bi->atEnd(); bi->toRight()) {
        bs.set(bi->getBottomSegment()->getArrayIndex() * 2, 2);
        bs._children.clear();
        bs._children.push_back(pair<hal_size_t, bool>(bi->getBottomSegment()->getArrayIndex(), false));
        bs.applyTo(bi);
    }

    for (ti = child1->getTopSegmentIterator(); not ti->atEnd(); ti->toRight()) {
        ts.set(ti->getTopSegment()->getArrayIndex() * 2, 2, ti->getTopSegment()->getArrayIndex());
        ts.applyTo(ti);
    }

    // insertion in middle (8th top segment)

    bi = parent1->getBottomSegmentIterator(8);
    ti = child1->getTopSegmentIterator(8);
    assert(bi->getBottomSegment()->getChildIndex(0) == 8 && ti->getTopSegment()->getParentIndex() == 8);
    bi->getBottomSegment()->setChildIndex(0, 9);
    ti->getTopSegment()->setParentIndex(NULL_INDEX);
    ti->toRight();
    ti->getTopSegment()->setParentIndex(8);

    // insertion at begining (10th top segment)

    bi = parent1->getBottomSegmentIterator(10);
    ti = child1->getTopSegmentIterator(10);
    assert(bi->getBottomSegment()->getChildIndex(0) == 10 && ti->getTopSegment()->getParentIndex() == 10);
    bi->getBottomSegment()->setChildIndex(0, 11);
    ti->getTopSegment()->setParentIndex(NULL_INDEX);
    ti->toRight();
    ti->getTopSegment()->setParentIndex(10);

    // just having a null parent is not enough for an insertion
    bi = parent1->getBottomSegmentIterator(2);
    ti = child1->getTopSegmentIterator(2);
    assert(bi->getBottomSegment()->getChildIndex(0) == 2 && ti->getTopSegment()->getParentIndex() == 2);
    ti->getTopSegment()->setParentIndex(NULL_INDEX);
}
Beispiel #2
0
void MappedSegmentMapDupeTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // setup simple case were there is an edge from a parent to 
  // child and it is reversed
  Genome* parent = alignment->addRootGenome("parent");
  Genome* child1 = alignment->addLeafGenome("child1", "parent", 1);
  Genome* child2 = alignment->addLeafGenome("child2", "parent", 1);
  seqVec[0] = Sequence::Info("Sequence", 3, 0, 1);
  parent->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  child1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  child2->setDimensions(seqVec);

  parent->setString("CCC");
  child1->setString("CCCTACGTG");
  child2->setString("CCCTACGTG");

  bi = parent->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);
     
  ti = child1->getTopSegmentIterator();
  ts.set(0, 3, 0, true, NULL_INDEX, 1);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 0, true, NULL_INDEX, 2);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 0, true, NULL_INDEX, 0);
  ts.applyTo(ti);

  ti = child2->getTopSegmentIterator();
  ts.set(0, 3, 0, false);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, NULL_INDEX, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, NULL_INDEX, false);
  ts.applyTo(ti);
}
void TopSegmentIteratorReverseTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // setup simple case were there is an edge from a parent to 
  // child and it is reversed
  Genome* parent1 = alignment->addRootGenome("parent1");
  Genome* child1 = alignment->addLeafGenome("child1", "parent1", 1);
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 2);
  parent1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 2);
  child1->setDimensions(seqVec);

  parent1->setString("CCCTACGTGC");
  child1->setString("CCCTACGTGC");

  bi = parent1->getBottomSegmentIterator();
  bs.set(0, 10, 0);
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs.applyTo(bi);
     
  ti = child1->getTopSegmentIterator();
  ts.set(0, 10, 0, true, 0);
  ts.applyTo(ti);

  bi = child1->getBottomSegmentIterator();
  bs.set(0, 5, 0);
  bs._children.clear();
  bs.applyTo(bi);
  bi->toRight();
  bs.set(5, 5, 0);
  bs.applyTo(bi);
}
void TopSegmentIteratorParseTest::createCallBack(AlignmentPtr alignment)
{
 vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // case 1: bottom segment aligns perfectly with top segment
  Genome* case1 = alignment->addRootGenome("case1");
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 2);
  case1->setDimensions(seqVec);
  
  ti = case1->getTopSegmentIterator();
  ts.set(0, 10, NULL_INDEX, false, 0, NULL_INDEX);
  ts.applyTo(ti);
  
  bi = case1->getBottomSegmentIterator();
  bs.set(0, 10, 0);
  bs.applyTo(bi);

  // case 2: bottom segment is completely contained in top segment
  Genome* case2 = alignment->addRootGenome("case2");
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 3);
  case2->setDimensions(seqVec);
  
  ti = case2->getTopSegmentIterator();
  ts.set(0, 9, NULL_INDEX, false, 0, NULL_INDEX);
  ts.applyTo(ti);

  bi = case2->getBottomSegmentIterator();
  bs.set(0, 3, 0);
  bs.applyTo(bi);
  bi->toRight();
  bs.set(3, 4, 0);
  bs.applyTo(bi);
  bi->toRight();
  bs.set(7, 3, 0);
  bs.applyTo(bi);

  // case 3 top segment is completely contained in bottom segment
  Genome* case3 = alignment->addRootGenome("case3");
  seqVec[0] = Sequence::Info("Sequence", 10, 3, 2);
  case3->setDimensions(seqVec);

  ti = case3->getTopSegmentIterator();
  ts.set(0, 3, NULL_INDEX, false, 0);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 4, NULL_INDEX, false, 0);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(7, 3, NULL_INDEX, false, 0);
  ts.applyTo(ti);

  bi = case3->getBottomSegmentIterator();
  bs.set(0, 9, 0);
  bs.applyTo(bi);
 
  // case 4: top segment overhangs bottom segment on the left
  Genome* case4 = alignment->addRootGenome("case4");
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 2);
  case4->setDimensions(seqVec);

  ti = case4->getTopSegmentIterator();
  ts.set(0, 9, NULL_INDEX, false, 0);
  ts.applyTo(ti);

  bi = case4->getBottomSegmentIterator();
  bs.set(0, 5, 0);
  bs.applyTo(bi);
  bi->toRight();
  bs.set(5, 5, 0);
  bs.applyTo(bi);
}
Beispiel #5
0
void MappedSegmentMapExtraParalogsTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);

  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;

  // Set up a case where all the segments of grandChild1 coalesce with
  // the first segment of grandChild2, but only if using the root as
  // the coalescence limit. Otherwise only the first segments map to
  // each other.
  Genome* root = alignment->addRootGenome("root");
  Genome* parent = alignment->addLeafGenome("parent", "root", 1);
  Genome* grandChild1 = alignment->addLeafGenome("grandChild1", "parent", 1);
  Genome* grandChild2 = alignment->addLeafGenome("grandChild2", "parent", 1);
  seqVec[0] = Sequence::Info("Sequence", 3, 0, 1);
  root->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 3);
  parent->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  grandChild1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  grandChild2->setDimensions(seqVec);

  root->setString("CCC");
  parent->setString("CCCTACGTG");
  grandChild1->setString("CCCTACGTG");
  grandChild2->setString("CCCTACGTG");

  bi = root->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);

  ti = parent->getTopSegmentIterator();
  ts.set(0, 3, 0, false, NULL_INDEX, 1);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 0, false, NULL_INDEX, 2);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 0, false, NULL_INDEX, 0);
  ts.applyTo(ti);

  bi = parent->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);
  bi->toRight();
  bs.set(3, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(1, true));
  bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, true));
  bs.applyTo(bi);
  bi->toRight();
  bs.set(6, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(2, true));
  bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, false));
  bs.applyTo(bi);

  ti = grandChild1->getTopSegmentIterator();
  ts.set(0, 3, 0, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 1, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 2, true);
  ts.applyTo(ti);

  ti = grandChild2->getTopSegmentIterator();
  ts.set(0, 3, 0, false);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, NULL_INDEX, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, NULL_INDEX, false);
  ts.applyTo(ti);

  parent->fixParseInfo();
}
Beispiel #6
0
void MappedSegmentMapUpTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // setup simple case were there is an edge from a parent to 
  // child1 and it is reversed and nonreversed to child2
  Genome* parent = alignment->addRootGenome("parent");
  Genome* child1 = alignment->addLeafGenome("child1", "parent", 1);
  Genome* child2 = alignment->addLeafGenome("child2", "parent", 1);
  // add a bunch of grandchildren with no rearrangemnts to test
  // simple parsing
  Genome* g1 = alignment->addLeafGenome("g1", "child2", 1);
  Genome* g2 = alignment->addLeafGenome("g2", "g1", 1);
  Genome* g3 = alignment->addLeafGenome("g3", "g2", 1);
  Genome* g4 = alignment->addLeafGenome("g4", "g3", 1);
  Genome* g5 = alignment->addLeafGenome("g5", "g4", 1);
  // add some with random inversions
  Genome* gi1 = alignment->addLeafGenome("gi1", "child1", 1);
  Genome* gi2 = alignment->addLeafGenome("gi2", "gi1", 1);
  Genome* gi3 = alignment->addLeafGenome("gi3", "gi2", 1);
  Genome* gi4 = alignment->addLeafGenome("gi4", "gi3", 1);
  Genome* gi5 = alignment->addLeafGenome("gi5", "gi4", 1);
  Genome* gs[] = {g1, g2, g3, g4, g5};
  Genome* gis[] = {gi1, gi2, gi3, gi4, gi5};
  seqVec[0] = Sequence::Info("Sequence", 12, 0, 1);
  parent->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 1, 6);
  child1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 1, 6);
  child2->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 6, 4);
  g1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 4, 3);
  g2->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 3, 2);
  g3->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 2, 12);
  g4->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 12, 0);
  g5->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 6, 4);
  gi1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 4, 3);
  gi2->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 3, 2);
  gi3->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 2, 12);
  gi4->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 12, 0);
  gi5->setDimensions(seqVec);


  parent->setString("CCCTACTTGTGC");
  child1->setString("CCCTACTTGTGC");
  child2->setString("CCCTACTTGTGC");
  for (size_t i = 0; i < 5; ++i)
  {
    gs[i]->setString("TCCTACTTGTGC");
    gis[i]->setString("TCCTACTTGTGC");
  }

  bi = parent->getBottomSegmentIterator();
  bs.set(0, 12);
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);
     
  ti = child1->getTopSegmentIterator();
  ts.set(0, 12, 0, true, 0);
  ts.applyTo(ti);

  ti = child2->getTopSegmentIterator();
  ts.set(0, 12, 0, false, 0);
  ts.applyTo(ti);
  
  for (size_t i = 0; i < 6; ++i)
  {
    bi = child2->getBottomSegmentIterator(i);
    bs.set(i * 2, 2, 0);
    bs._children.clear();
    bs._children.push_back(pair<hal_size_t, bool>(i, false));
    bs.applyTo(bi);

    ti = g1->getTopSegmentIterator(i);
    ts.set(i * 2, 2, i, false);
    ts.applyTo(ti);
  }

  for (size_t i = 0; i < 6; ++i)
  {
    bi = child1->getBottomSegmentIterator(i);
    bs.set(i * 2, 2, 0);
    bs._children.clear();
    bs._children.push_back(pair<hal_size_t, bool>(i, false));
    bs.applyTo(bi);

    ti = gi1->getTopSegmentIterator(i);
    ts.set(i * 2, 2, i, false);
    ts.applyTo(ti);
  }

  for (size_t i = 0; i < 5; ++i)
  {
    const Genome* g = gs[i];
    const Genome* parent = g->getParent();
    const Genome* child = i == 4 ? NULL : g->getChild(0);
    hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments();
    hal_size_t psegLen = parent->getSequenceLength() / 
       parent->getNumTopSegments();
    hal_size_t csegLen = 0;
    if (child)
    {
      csegLen =  child->getSequenceLength() / child->getNumTopSegments();
    }
    
    for (size_t j = 0; j < g->getNumTopSegments(); ++j)
    {
      bool inv = false;
      bi = parent->getBottomSegmentIterator(j);
      bs.set(j * segLen, segLen, (j * segLen) / psegLen);
      bs._children.clear();
      bs._children.push_back(pair<hal_size_t, bool>(j, inv));
      bs.applyTo(bi);

      hal_index_t bparse = NULL_INDEX;
      if (child != NULL)
      {
        bparse = (j * segLen) / csegLen;
      }
      ti = g->getTopSegmentIterator(j);
      ts.set(j * segLen, segLen, j, inv, bparse);
      ts.applyTo(ti);      
    }
  }
  
  for (size_t i = 0; i < 5; ++i)
  {
    const Genome* g = gis[i];
    const Genome* parent = g->getParent();
    const Genome* child = i == 4 ? NULL : g->getChild(0);
    hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments();
    hal_size_t psegLen = parent->getSequenceLength() / 
       parent->getNumTopSegments();
    hal_size_t csegLen = 0;
    if (child)
    {
      csegLen =  child->getSequenceLength() / child->getNumTopSegments();
    }
    
    for (size_t j = 0; j < g->getNumTopSegments(); ++j)
    {
      bool inv = rand() % 4 == 0;
      bi = parent->getBottomSegmentIterator(j);
      bs.set(j * segLen, segLen, (j * segLen) / psegLen);
      bs._children.clear();
      bs._children.push_back(pair<hal_size_t, bool>(j, inv));
      bs.applyTo(bi);

      hal_index_t bparse = NULL_INDEX;
      if (child != NULL)
      {
        bparse = (j * segLen) / csegLen;
      }
      ti = g->getTopSegmentIterator(j);
      ts.set(j * segLen, segLen, j, inv, bparse);
      ts.applyTo(ti);      
    }
  }

}