void GappedSegmentSimpleIteratorTest2::createCallBack(AlignmentPtr alignment)
{
  addIdenticalParentChild(alignment, 2, 100, 5);
  Genome* parent = alignment->openGenome(alignment->getRootName());
  Genome* child = parent->getChild(0);
  TopSegmentIteratorPtr ti = child->getTopSegmentIterator();
  BottomSegmentIteratorPtr bi = parent->getBottomSegmentIterator();
  hal_index_t i = 0;
  bool reversed = true;
  while (ti != child->getTopSegmentEndIterator())
  {
    if (i % 5 == 0)
    {
      reversed = !reversed;
      if (reversed && i < (hal_index_t)(parent->getNumBottomSegments() - 1))
      {
        makeInversion(ti, 5);
      }
    }

    ti->toRight();
    bi->toRight();
    ++i;
  }
}
void GappedSegmentIteratorIndelTest::createCallBack(AlignmentPtr alignment)
{
  addIdenticalParentChild(alignment, 1, 20, 5);
  Genome* parent = alignment->openGenome(alignment->getRootName());
  Genome* child = parent->getChild(0);
  TopSegmentIteratorPtr ti = child->getTopSegmentIterator();
  BottomSegmentIteratorPtr bi = parent->getBottomSegmentIterator();
//  int i = 0;
//  bool reversed = true;

  bi = parent->getBottomSegmentIterator(0);
  makeDelGap(bi);
  bi = parent->getBottomSegmentIterator(3);
  makeDelGap(bi);
/*
  ti = child->getTopSegmentIterator(1);
  makeInsGap(ti);
  ti = child->getTopSegmentIterator(21);
  makeInsGap(ti);
  ti = child->getTopSegmentIterator(28);
  makeInsGap(ti);
*/  
/*  for (size_t i = 0; i < 20; ++i)
  {
    cout << i << ": ";
    bi = parent->getBottomSegmentIterator(i);
    ti = child->getTopSegmentIterator(i);
    cout << "ci=" << bi->getBottomSegment()->getChildIndex(0) 
         << " pi=" << ti->getTopSegment()->getParentIndex() << endl;
         }*/
}
Пример #3
0
void copyFromTopAlignment(AlignmentConstPtr topAlignment,
                          AlignmentPtr mainAlignment, const string &genomeName)
{
  Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName);
  const Genome *topReplacedGenome = topAlignment->openGenome(genomeName);
  topReplacedGenome->copyTopDimensions(mainReplacedGenome);
  topReplacedGenome->copyTopSegments(mainReplacedGenome);
  mainReplacedGenome->fixParseInfo();
  // Copy bot segments for the parent and top segments for the
  // siblings of the genome that's being replaced
  Genome *mainParent = mainReplacedGenome->getParent();
  const Genome *topParent = topReplacedGenome->getParent();
  topParent->copyBottomDimensions(mainParent);
  topParent->copyBottomSegments(mainParent);
  mainParent->fixParseInfo();
  vector<string> siblings = mainAlignment->getChildNames(mainParent->getName());
  for (size_t i = 0; i < siblings.size(); i++)
  {
    if (siblings[i] != genomeName)
    {
      Genome *mainChild = mainAlignment->openGenome(siblings[i]);
      const Genome *topChild  = topAlignment->openGenome(siblings[i]);
      topChild->copyTopDimensions(mainChild);
      topChild->copyTopSegments(mainChild);
      mainChild->fixParseInfo();
    }
  }
}
TempReadAlignment::TempReadAlignment(AlignmentPtr alignment, 
                                     char* path)
  : _path(path)
{
  alignment->close();
  alignment->open(_path, true);
  _alignment = alignment;
}
Пример #5
0
void GenomeMetaTest::createCallBack(AlignmentPtr alignment)
{
  hal_size_t alignmentSize = alignment->getNumGenomes();
  CuAssertTrue(_testCase, alignmentSize == 0);
  
  Genome* ancGenome = alignment->addRootGenome("AncGenome", 0);
 
  MetaData* ancMeta = ancGenome->getMetaData();
  ancMeta->set("Young", "Jeezy");
}
Пример #6
0
void GenomeStringTest::createCallBack(AlignmentPtr alignment)
{
  hal_size_t alignmentSize = alignment->getNumGenomes();
  CuAssertTrue(_testCase, alignmentSize == 0);
  hal_size_t seqLength = 28889943;
  Genome* ancGenome = alignment->addRootGenome("AncGenome", 0);
  vector<Sequence::Info> seqVec(1);
  seqVec[0] = Sequence::Info("Sequence", seqLength, 5000, 700000);
  ancGenome->setDimensions(seqVec);  
  
  _string = randomString(seqLength);
  ancGenome->setString(_string);
}
Пример #7
0
void MappedSegmentMapDupeTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // setup simple case were there is an edge from a parent to 
  // child and it is reversed
  Genome* parent = alignment->addRootGenome("parent");
  Genome* child1 = alignment->addLeafGenome("child1", "parent", 1);
  Genome* child2 = alignment->addLeafGenome("child2", "parent", 1);
  seqVec[0] = Sequence::Info("Sequence", 3, 0, 1);
  parent->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  child1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  child2->setDimensions(seqVec);

  parent->setString("CCC");
  child1->setString("CCCTACGTG");
  child2->setString("CCCTACGTG");

  bi = parent->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);
     
  ti = child1->getTopSegmentIterator();
  ts.set(0, 3, 0, true, NULL_INDEX, 1);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 0, true, NULL_INDEX, 2);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 0, true, NULL_INDEX, 0);
  ts.applyTo(ti);

  ti = child2->getTopSegmentIterator();
  ts.set(0, 3, 0, false);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, NULL_INDEX, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, NULL_INDEX, false);
  ts.applyTo(ti);
}
Пример #8
0
void GenomeUpdateTest::createCallBack(AlignmentPtr alignment)
{
  hal_size_t alignmentSize = alignment->getNumGenomes();
  CuAssertTrue(_testCase, alignmentSize == 0);
  
  Genome* ancGenome = alignment->addRootGenome("AncGenome", 0);
  vector<Sequence::Info> seqVec(1);
  seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 700000);
  ancGenome->setDimensions(seqVec);  
  alignment->close();

  alignment->open(_createPath, false);
  ancGenome = alignment->openGenome("AncGenome");
  seqVec[0] = Sequence::Info("Sequence", 10000005, 14000, 2000001);
  ancGenome->setDimensions(seqVec);  
}
void GappedSegmentSimpleIteratorTest::createCallBack(AlignmentPtr alignment)
{
  addIdenticalParentChild(alignment, 2, 100, 5);
  Genome* parent = alignment->openGenome(alignment->getRootName());
  Genome* child = parent->getChild(0);
  TopSegmentIteratorPtr ti = child->getTopSegmentIterator();
  BottomSegmentIteratorPtr bi = parent->getBottomSegmentIterator();
  int i = 0;
  while (ti != child->getTopSegmentEndIterator())
  {
    if (i++ % 2)
    {
      ti->getTopSegment()->setParentReversed(true);
      bi->getBottomSegment()->setChildReversed(0, true);
    }
    ti->toRight();
    bi->toRight();
  }
}
void TopSegmentIteratorToSiteTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);
  
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // case 1: single segment
  Genome* case1 = alignment->addRootGenome("case1");
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 0);
  case1->setDimensions(seqVec);
  ti = case1->getTopSegmentIterator();
  ts.set(0, 9);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(9, 1);
  ts.applyTo(ti);
  case1 = NULL;

  // case 2: bunch of random segments
  const hal_size_t numSegs = 1133;
  hal_size_t total = 0;
  vector<hal_size_t> segLens(numSegs);
  for (size_t i = 0 ; i < numSegs; ++i)
  {
    hal_size_t len = rand() % 77 + 1;
    segLens[i] = len;
    total += len;
    assert(len > 0);
  }
  Genome* case2 = alignment->addRootGenome("case2");
  seqVec[0] = Sequence::Info("Sequence", total, numSegs, 0);
  case2->setDimensions(seqVec);
  hal_index_t prev = 0;
  for (size_t i = 0 ; i < numSegs; ++i)
  {
    ti = case2->getTopSegmentIterator((hal_index_t)i);
    ts.set(prev, segLens[i]);
    prev += segLens[i];
    ts.applyTo(ti);
  }
}
void TopSegmentSequenceTest::createCallBack(AlignmentPtr alignment)
{
  Genome* ancGenome = alignment->addRootGenome("Anc0", 0);
  vector<Sequence::Info> seqVec(1);
  seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 700000);
  ancGenome->setDimensions(seqVec);

  ancGenome->setSubString("CACACATTC", 500, 9);
  TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(100);
  tsIt->getTopSegment()->setCoordinates(500, 9);
}
Пример #12
0
int main(int argc, char *argv[])
{
  CLParserPtr optParser = initParser();
  string inPath, deleteNode;
  bool noMarkAncestors;
  try {
    optParser->parseOptions(argc, argv);
    inPath = optParser->getArgument<string>("inFile");
    deleteNode = optParser->getArgument<string>("deleteNode");
    noMarkAncestors = optParser->getFlag("noMarkAncestors");
  } catch (exception &e) {
    optParser->printUsage(cerr);
    return 1;
  }
  AlignmentPtr alignment = openHalAlignment(inPath, optParser);
  if (!noMarkAncestors) {
    markAncestorsForUpdate(alignment, deleteNode);
  }
  alignment->removeGenome(deleteNode);
  return 0;
}
void TopSegmentIteratorReverseTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // setup simple case were there is an edge from a parent to 
  // child and it is reversed
  Genome* parent1 = alignment->addRootGenome("parent1");
  Genome* child1 = alignment->addLeafGenome("child1", "parent1", 1);
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 2);
  parent1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 2);
  child1->setDimensions(seqVec);

  parent1->setString("CCCTACGTGC");
  child1->setString("CCCTACGTGC");

  bi = parent1->getBottomSegmentIterator();
  bs.set(0, 10, 0);
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs.applyTo(bi);
     
  ti = child1->getTopSegmentIterator();
  ts.set(0, 10, 0, true, 0);
  ts.applyTo(ti);

  bi = child1->getBottomSegmentIterator();
  bs.set(0, 5, 0);
  bs._children.clear();
  bs.applyTo(bi);
  bi->toRight();
  bs.set(5, 5, 0);
  bs.applyTo(bi);
}
Пример #14
0
void GenomeCreateTest::createCallBack(AlignmentPtr alignment)
{
  hal_size_t alignmentSize = alignment->getNumGenomes();
  CuAssertTrue(_testCase, alignmentSize == 0);
  
  Genome* ancGenome = alignment->addRootGenome("AncGenome", 0);
  Genome* leaf1Genome = alignment->addLeafGenome("Leaf1", "AncGenome", 0.1);
  Genome* leaf2Genome = alignment->addLeafGenome("Leaf2", "AncGenome", 0.2);
  Genome* leaf3Genome = alignment->addLeafGenome("Leaf3", "AncGenome", 0.3);
 
  MetaData* ancMeta = ancGenome->getMetaData();
  ancMeta->set("Young", "Jeezy");

  vector<Sequence::Info> seqVec(1);
  seqVec[0] =Sequence::Info("Sequence", 1000000, 5000, 700000);
  ancGenome->setDimensions(seqVec);
  seqVec[0] =Sequence::Info("Sequence", 1000000, 700000, 0);
  leaf1Genome->setDimensions(seqVec);
  seqVec[0] =Sequence::Info("Sequence", 2000000, 700000, 0);
  leaf2Genome->setDimensions(seqVec);
  seqVec[0] =Sequence::Info("Sequence", 3000000, 700000, 0);
  leaf3Genome->setDimensions(seqVec);
}
void TopSegmentSimpleIteratorTest::createCallBack(AlignmentPtr alignment)
{
  Genome* ancGenome = alignment->addRootGenome("Anc0", 0);
  size_t numChildren = 9;
  for (size_t i = 0; i < numChildren; ++i)
  {
    std::stringstream ss;
    ss << i;
    alignment->addLeafGenome(string("Leaf") + ss.str(), "Anc0", 0.1);
  }
  vector<Sequence::Info> seqVec(1);
  seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 10000);
  ancGenome->setDimensions(seqVec);
  
  CuAssertTrue(_testCase, ancGenome->getNumChildren() == numChildren);
  
  _topSegments.clear();
  for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i)
  {
    TopSegmentStruct topSeg;
    topSeg.setRandom();
    topSeg._length = 
       ancGenome->getSequenceLength() / ancGenome->getNumTopSegments();
    topSeg._startPosition = i * topSeg._length;
    _topSegments.push_back(topSeg);
  }
  
  TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0);
  TopSegmentIteratorConstPtr tsEnd = 
     ancGenome->getTopSegmentEndIterator();
  for (size_t i = 0; tsIt != tsEnd; tsIt->toRight(), ++i)
  {
    CuAssertTrue(_testCase, 
                 (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
    _topSegments[i].applyTo(tsIt);
  }
}
Пример #16
0
void copyFromBottomAlignment(AlignmentConstPtr bottomAlignment,
                             AlignmentPtr mainAlignment,
                             const string &genomeName)
{
  // Copy genome & bottom segments for the genome that's being replaced
  Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName);
  const Genome *botReplacedGenome = bottomAlignment->openGenome(genomeName);
  botReplacedGenome->copyDimensions(mainReplacedGenome);
  botReplacedGenome->copySequence(mainReplacedGenome);
  botReplacedGenome->copyBottomDimensions(mainReplacedGenome);
  botReplacedGenome->copyBottomSegments(mainReplacedGenome);
  mainReplacedGenome->fixParseInfo();

  // Copy top segments for the children
  vector<string> children = mainAlignment->getChildNames(genomeName);  
  for (size_t i = 0; i < children.size(); i++)
  {
    Genome *mainChild = mainAlignment->openGenome(children[i]);
    const Genome *botChild  = bottomAlignment->openGenome(children[i]);
    botChild->copyTopDimensions(mainChild);
    botChild->copyTopSegments(mainChild);
    mainChild->fixParseInfo();
  }
}
Пример #17
0
int main(int argc, char *argv[])
{
  CLParserPtr optParser = initParser();
  string inPath, botAlignmentPath, topAlignmentPath, parentName, insertName,
    childName, leafName;
  double upperBranchLength, leafBranchLength;
  bool noMarkAncestors;
  try {
    optParser->parseOptions(argc, argv);
    inPath = optParser->getArgument<string>("inFile");
    botAlignmentPath = optParser->getArgument<string>("botAlignmentFile");
    topAlignmentPath = optParser->getArgument<string>("topAlignmentFile");
    parentName = optParser->getArgument<string>("parentName");
    insertName = optParser->getArgument<string>("insertName");
    childName = optParser->getArgument<string>("childName");
    leafName = optParser->getArgument<string>("leafName");
    upperBranchLength = optParser->getArgument<double>("upperBranchLength");
    leafBranchLength = optParser->getArgument<double>("leafBranchLength");
    noMarkAncestors = optParser->getFlag("noMarkAncestors");
  } catch (exception &e) {
    optParser->printUsage(cerr);
    return 1;
  }
  AlignmentPtr mainAlignment = openHalAlignment(inPath, optParser);
  AlignmentConstPtr botAlignment = openHalAlignment(botAlignmentPath,
                                                    optParser);
  AlignmentConstPtr topAlignment = openHalAlignment(topAlignmentPath,
                                                    optParser);
  mainAlignment->insertGenome(insertName, parentName, childName,
                              upperBranchLength);
  mainAlignment->addLeafGenome(leafName, insertName, leafBranchLength);
  // Insert the new intermediate node.
  Genome *insertGenome = mainAlignment->openGenome(insertName);
  const Genome *topInsertGenome = topAlignment->openGenome(insertName);
  const Genome *botInsertGenome = botAlignment->openGenome(insertName);
  topInsertGenome->copyDimensions(insertGenome);
  topInsertGenome->copyTopDimensions(insertGenome);
  botInsertGenome->copyBottomDimensions(insertGenome);
  topInsertGenome->copySequence(insertGenome);
  topInsertGenome->copyTopSegments(insertGenome);
  topInsertGenome->copyMetadata(insertGenome);
  botInsertGenome->copyBottomSegments(insertGenome);
  insertGenome->fixParseInfo();

  // Copy the bottom segments for the parent genome from the top alignment.
  Genome *parentGenome = mainAlignment->openGenome(parentName);
  const Genome *botParentGenome = topAlignment->openGenome(parentName);
  botParentGenome->copyBottomDimensions(parentGenome);
  botParentGenome->copyBottomSegments(parentGenome);
  parentGenome->fixParseInfo();

  // Fix the parent's other children as well.
  vector<string> allChildren = mainAlignment->getChildNames(parentName);
  for (size_t i = 0; i < allChildren.size(); i++)
  {
    if (allChildren[i] != insertName)
    {
      Genome *outGenome = mainAlignment->openGenome(allChildren[i]);
      const Genome *topSegmentsGenome = topAlignment->openGenome(allChildren[i]);
      topSegmentsGenome->copyTopDimensions(outGenome);
      topSegmentsGenome->copyTopSegments(outGenome);
      outGenome->fixParseInfo();
            
    }
  }

  // Copy the top segments for the child genome from the bottom alignment.
  Genome *childGenome = mainAlignment->openGenome(childName);
  const Genome *topChildGenome = botAlignment->openGenome(childName);
  topChildGenome->copyTopDimensions(childGenome);
  topChildGenome->copyTopSegments(childGenome);
  childGenome->fixParseInfo();

  // Copy the entire genome for the leaf from the bottom alignment.
  Genome *outLeafGenome = mainAlignment->openGenome(leafName);
  const Genome *inLeafGenome = botAlignment->openGenome(leafName);
  inLeafGenome->copy(outLeafGenome);
  if (!noMarkAncestors) {
    markAncestorsForUpdate(mainAlignment, insertName);
  }
  mainAlignment->close();
  botAlignment->close();
  topAlignment->close();
}
Пример #18
0
int main(int argc, char *argv[])
{
  CLParserPtr optParser = initParser();
  string inPath, bottomAlignmentFile, topAlignmentFile, genomeName;
  bool noMarkAncestors;
  try {
    optParser->parseOptions(argc, argv);
    inPath = optParser->getArgument<string>("inFile");
    bottomAlignmentFile = optParser->getOption<string>("bottomAlignmentFile");
    topAlignmentFile = optParser->getOption<string>("topAlignmentFile");
    genomeName = optParser->getArgument<string>("genomeName");
    noMarkAncestors = optParser->getFlag("noMarkAncestors");
  } catch (exception &e) {
    optParser->printUsage(cerr);
    return 1;
  }
  AlignmentPtr mainAlignment = openHalAlignment(inPath, optParser);
  AlignmentConstPtr bottomAlignment;
  AlignmentConstPtr topAlignment;
  bool useTopAlignment = mainAlignment->getRootName() != genomeName;
  bool useBottomAlignment = mainAlignment->getChildNames(genomeName).size() != 0;
  Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName);
  if (useTopAlignment) {
    // Not a root genome. Can update using a top alignment.
    if (topAlignmentFile == "\"\"") {
      throw hal_exception("Cannot replace non-root genome without a top "
                          "alignment file.");
    }
    topAlignment = openHalAlignment(topAlignmentFile,
                                    optParser);
    const Genome *topReplacedGenome = topAlignment->openGenome(genomeName);
    topReplacedGenome->copyDimensions(mainReplacedGenome);
    topReplacedGenome->copySequence(mainReplacedGenome);
    
  }
  if (useBottomAlignment) {
    // Not a leaf genome. Can update using a bottom alignment.
    if (bottomAlignmentFile == "\"\"") {
      throw hal_exception("Cannot replace non-leaf genome without a bottom "
                          "alignment file.");
    }
    bottomAlignment = openHalAlignment(bottomAlignmentFile, optParser);
    const Genome *botReplacedGenome = bottomAlignment->openGenome(genomeName);
    botReplacedGenome->copyDimensions(mainReplacedGenome);
    botReplacedGenome->copySequence(mainReplacedGenome);
  }
  if (!useTopAlignment && !useBottomAlignment) {
    throw hal_exception("Root genome is also a leaf genome.");
  }
  if (useBottomAlignment) {
    copyFromBottomAlignment(bottomAlignment, mainAlignment, genomeName);
  }
  if (useTopAlignment) {
    copyFromTopAlignment(topAlignment, mainAlignment, genomeName);
  }

  // Clear update flag if present, since the genome has just been updated.
  MetaData *metaData = mainReplacedGenome->getMetaData();
  if (metaData->has("needsUpdate")) {
    metaData->set("needsUpdate", "false");
  }

  if (!noMarkAncestors) {
    markAncestorsForUpdate(mainAlignment, genomeName);
  }
  if (useTopAlignment) {
    topAlignment->close();
  }
  if (useBottomAlignment) {
    bottomAlignment->close();
  }
  mainAlignment->close();
}
Пример #19
0
void MappedSegmentMapUpTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // setup simple case were there is an edge from a parent to 
  // child1 and it is reversed and nonreversed to child2
  Genome* parent = alignment->addRootGenome("parent");
  Genome* child1 = alignment->addLeafGenome("child1", "parent", 1);
  Genome* child2 = alignment->addLeafGenome("child2", "parent", 1);
  // add a bunch of grandchildren with no rearrangemnts to test
  // simple parsing
  Genome* g1 = alignment->addLeafGenome("g1", "child2", 1);
  Genome* g2 = alignment->addLeafGenome("g2", "g1", 1);
  Genome* g3 = alignment->addLeafGenome("g3", "g2", 1);
  Genome* g4 = alignment->addLeafGenome("g4", "g3", 1);
  Genome* g5 = alignment->addLeafGenome("g5", "g4", 1);
  // add some with random inversions
  Genome* gi1 = alignment->addLeafGenome("gi1", "child1", 1);
  Genome* gi2 = alignment->addLeafGenome("gi2", "gi1", 1);
  Genome* gi3 = alignment->addLeafGenome("gi3", "gi2", 1);
  Genome* gi4 = alignment->addLeafGenome("gi4", "gi3", 1);
  Genome* gi5 = alignment->addLeafGenome("gi5", "gi4", 1);
  Genome* gs[] = {g1, g2, g3, g4, g5};
  Genome* gis[] = {gi1, gi2, gi3, gi4, gi5};
  seqVec[0] = Sequence::Info("Sequence", 12, 0, 1);
  parent->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 1, 6);
  child1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 1, 6);
  child2->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 6, 4);
  g1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 4, 3);
  g2->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 3, 2);
  g3->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 2, 12);
  g4->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 12, 0);
  g5->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 6, 4);
  gi1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 4, 3);
  gi2->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 3, 2);
  gi3->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 2, 12);
  gi4->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 12, 0);
  gi5->setDimensions(seqVec);


  parent->setString("CCCTACTTGTGC");
  child1->setString("CCCTACTTGTGC");
  child2->setString("CCCTACTTGTGC");
  for (size_t i = 0; i < 5; ++i)
  {
    gs[i]->setString("TCCTACTTGTGC");
    gis[i]->setString("TCCTACTTGTGC");
  }

  bi = parent->getBottomSegmentIterator();
  bs.set(0, 12);
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);
     
  ti = child1->getTopSegmentIterator();
  ts.set(0, 12, 0, true, 0);
  ts.applyTo(ti);

  ti = child2->getTopSegmentIterator();
  ts.set(0, 12, 0, false, 0);
  ts.applyTo(ti);
  
  for (size_t i = 0; i < 6; ++i)
  {
    bi = child2->getBottomSegmentIterator(i);
    bs.set(i * 2, 2, 0);
    bs._children.clear();
    bs._children.push_back(pair<hal_size_t, bool>(i, false));
    bs.applyTo(bi);

    ti = g1->getTopSegmentIterator(i);
    ts.set(i * 2, 2, i, false);
    ts.applyTo(ti);
  }

  for (size_t i = 0; i < 6; ++i)
  {
    bi = child1->getBottomSegmentIterator(i);
    bs.set(i * 2, 2, 0);
    bs._children.clear();
    bs._children.push_back(pair<hal_size_t, bool>(i, false));
    bs.applyTo(bi);

    ti = gi1->getTopSegmentIterator(i);
    ts.set(i * 2, 2, i, false);
    ts.applyTo(ti);
  }

  for (size_t i = 0; i < 5; ++i)
  {
    const Genome* g = gs[i];
    const Genome* parent = g->getParent();
    const Genome* child = i == 4 ? NULL : g->getChild(0);
    hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments();
    hal_size_t psegLen = parent->getSequenceLength() / 
       parent->getNumTopSegments();
    hal_size_t csegLen = 0;
    if (child)
    {
      csegLen =  child->getSequenceLength() / child->getNumTopSegments();
    }
    
    for (size_t j = 0; j < g->getNumTopSegments(); ++j)
    {
      bool inv = false;
      bi = parent->getBottomSegmentIterator(j);
      bs.set(j * segLen, segLen, (j * segLen) / psegLen);
      bs._children.clear();
      bs._children.push_back(pair<hal_size_t, bool>(j, inv));
      bs.applyTo(bi);

      hal_index_t bparse = NULL_INDEX;
      if (child != NULL)
      {
        bparse = (j * segLen) / csegLen;
      }
      ti = g->getTopSegmentIterator(j);
      ts.set(j * segLen, segLen, j, inv, bparse);
      ts.applyTo(ti);      
    }
  }
  
  for (size_t i = 0; i < 5; ++i)
  {
    const Genome* g = gis[i];
    const Genome* parent = g->getParent();
    const Genome* child = i == 4 ? NULL : g->getChild(0);
    hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments();
    hal_size_t psegLen = parent->getSequenceLength() / 
       parent->getNumTopSegments();
    hal_size_t csegLen = 0;
    if (child)
    {
      csegLen =  child->getSequenceLength() / child->getNumTopSegments();
    }
    
    for (size_t j = 0; j < g->getNumTopSegments(); ++j)
    {
      bool inv = rand() % 4 == 0;
      bi = parent->getBottomSegmentIterator(j);
      bs.set(j * segLen, segLen, (j * segLen) / psegLen);
      bs._children.clear();
      bs._children.push_back(pair<hal_size_t, bool>(j, inv));
      bs.applyTo(bi);

      hal_index_t bparse = NULL_INDEX;
      if (child != NULL)
      {
        bparse = (j * segLen) / csegLen;
      }
      ti = g->getTopSegmentIterator(j);
      ts.set(j * segLen, segLen, j, inv, bparse);
      ts.applyTo(ti);      
    }
  }

}
Пример #20
0
void MappedSegmentMapExtraParalogsTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);

  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;

  // Set up a case where all the segments of grandChild1 coalesce with
  // the first segment of grandChild2, but only if using the root as
  // the coalescence limit. Otherwise only the first segments map to
  // each other.
  Genome* root = alignment->addRootGenome("root");
  Genome* parent = alignment->addLeafGenome("parent", "root", 1);
  Genome* grandChild1 = alignment->addLeafGenome("grandChild1", "parent", 1);
  Genome* grandChild2 = alignment->addLeafGenome("grandChild2", "parent", 1);
  seqVec[0] = Sequence::Info("Sequence", 3, 0, 1);
  root->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 3);
  parent->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  grandChild1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  grandChild2->setDimensions(seqVec);

  root->setString("CCC");
  parent->setString("CCCTACGTG");
  grandChild1->setString("CCCTACGTG");
  grandChild2->setString("CCCTACGTG");

  bi = root->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);

  ti = parent->getTopSegmentIterator();
  ts.set(0, 3, 0, false, NULL_INDEX, 1);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 0, false, NULL_INDEX, 2);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 0, false, NULL_INDEX, 0);
  ts.applyTo(ti);

  bi = parent->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);
  bi->toRight();
  bs.set(3, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(1, true));
  bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, true));
  bs.applyTo(bi);
  bi->toRight();
  bs.set(6, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(2, true));
  bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, false));
  bs.applyTo(bi);

  ti = grandChild1->getTopSegmentIterator();
  ts.set(0, 3, 0, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 1, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 2, true);
  ts.applyTo(ti);

  ti = grandChild2->getTopSegmentIterator();
  ts.set(0, 3, 0, false);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, NULL_INDEX, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, NULL_INDEX, false);
  ts.applyTo(ti);

  parent->fixParseInfo();
}
void TopSegmentIteratorParseTest::createCallBack(AlignmentPtr alignment)
{
 vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // case 1: bottom segment aligns perfectly with top segment
  Genome* case1 = alignment->addRootGenome("case1");
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 2);
  case1->setDimensions(seqVec);
  
  ti = case1->getTopSegmentIterator();
  ts.set(0, 10, NULL_INDEX, false, 0, NULL_INDEX);
  ts.applyTo(ti);
  
  bi = case1->getBottomSegmentIterator();
  bs.set(0, 10, 0);
  bs.applyTo(bi);

  // case 2: bottom segment is completely contained in top segment
  Genome* case2 = alignment->addRootGenome("case2");
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 3);
  case2->setDimensions(seqVec);
  
  ti = case2->getTopSegmentIterator();
  ts.set(0, 9, NULL_INDEX, false, 0, NULL_INDEX);
  ts.applyTo(ti);

  bi = case2->getBottomSegmentIterator();
  bs.set(0, 3, 0);
  bs.applyTo(bi);
  bi->toRight();
  bs.set(3, 4, 0);
  bs.applyTo(bi);
  bi->toRight();
  bs.set(7, 3, 0);
  bs.applyTo(bi);

  // case 3 top segment is completely contained in bottom segment
  Genome* case3 = alignment->addRootGenome("case3");
  seqVec[0] = Sequence::Info("Sequence", 10, 3, 2);
  case3->setDimensions(seqVec);

  ti = case3->getTopSegmentIterator();
  ts.set(0, 3, NULL_INDEX, false, 0);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 4, NULL_INDEX, false, 0);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(7, 3, NULL_INDEX, false, 0);
  ts.applyTo(ti);

  bi = case3->getBottomSegmentIterator();
  bs.set(0, 9, 0);
  bs.applyTo(bi);
 
  // case 4: top segment overhangs bottom segment on the left
  Genome* case4 = alignment->addRootGenome("case4");
  seqVec[0] = Sequence::Info("Sequence", 10, 2, 2);
  case4->setDimensions(seqVec);

  ti = case4->getTopSegmentIterator();
  ts.set(0, 9, NULL_INDEX, false, 0);
  ts.applyTo(ti);

  bi = case4->getBottomSegmentIterator();
  bs.set(0, 5, 0);
  bs.applyTo(bi);
  bi->toRight();
  bs.set(5, 5, 0);
  bs.applyTo(bi);
}
void TopSegmentIsGapTest::createCallBack(AlignmentPtr alignment)
{
  size_t numSequences = 3;
  vector<Sequence::Info> seqVec(numSequences);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  Genome* parent1 = alignment->addRootGenome("parent1");
  Genome* child1 = alignment->addLeafGenome("child1", "parent1", 1);

  // set up two genomes.  each with three sequences.  each sequence
  // with 5 segments of length two.  start with segment i in parent
  // aligned with segment i in child.
  for (size_t i = 0; i < numSequences; ++i)
  {
    stringstream ss;
    ss << "Sequence" << i;
    string name = ss.str();
    seqVec[i] = Sequence::Info(name, 10, 5, 5);
  }
  parent1->setDimensions(seqVec);
  child1->setDimensions(seqVec);

  bi = parent1->getBottomSegmentIterator();
  for (; bi != parent1->getBottomSegmentEndIterator(); bi->toRight())
  {
    bs.set(bi->getBottomSegment()->getArrayIndex() * 2, 2);
    bs._children.clear();
    bs._children.push_back(pair<hal_size_t, bool>(
                            bi->getBottomSegment()->getArrayIndex(), 
                            false));
    bs.applyTo(bi);
  }
     
  ti = child1->getTopSegmentIterator();
  for (; ti != child1->getTopSegmentEndIterator(); ti->toRight())
  {
    ts.set(ti->getTopSegment()->getArrayIndex() * 2, 2, 
           ti->getTopSegment()->getArrayIndex());
    ts.applyTo(ti);
  }

  // insertion in middle (8th top segment)

  bi = parent1->getBottomSegmentIterator(8);
  ti = child1->getTopSegmentIterator(8);
  assert(bi->getBottomSegment()->getChildIndex(0) == 8 &&
         ti->getTopSegment()->getParentIndex() == 8);
  bi->getBottomSegment()->setChildIndex(0, 9);
  ti->getTopSegment()->setParentIndex(NULL_INDEX);
  ti->toRight();
  ti->getTopSegment()->setParentIndex(8);
  
  // insertion at begining (10th top segment)
  
  bi = parent1->getBottomSegmentIterator(10);
  ti = child1->getTopSegmentIterator(10);
  assert(bi->getBottomSegment()->getChildIndex(0) == 10 &&
         ti->getTopSegment()->getParentIndex() == 10);
  bi->getBottomSegment()->setChildIndex(0, 11);
  ti->getTopSegment()->setParentIndex(NULL_INDEX);
  ti->toRight();
  ti->getTopSegment()->setParentIndex(10);

  // just having a null parent is not enough for an insertion
  bi = parent1->getBottomSegmentIterator(2);
  ti = child1->getTopSegmentIterator(2);
  assert(bi->getBottomSegment()->getChildIndex(0) == 2 &&
         ti->getTopSegment()->getParentIndex() == 2);
  ti->getTopSegment()->setParentIndex(NULL_INDEX);
}