コード例 #1
0
ファイル: halGenomeTest.cpp プロジェクト: glennhickey/hal
void GenomeUpdateTest::createCallBack(Alignment *alignment) {
    hal_size_t alignmentSize = alignment->getNumGenomes();
    CuAssertTrue(_testCase, alignmentSize == 0);

    Genome *ancGenome = alignment->addRootGenome("AncGenome", 0);
    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 700000);
    ancGenome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 10000005, 14000, 2000001);
    ancGenome->setDimensions(seqVec);
}
コード例 #2
0
ファイル: halTopSegmentTest.cpp プロジェクト: glennhickey/hal
void TopSegmentSimpleIteratorTest::createCallBack(Alignment *alignment) {
    Genome *ancGenome = alignment->addRootGenome("Anc0", 0);
    size_t numChildren = 9;
    for (size_t i = 0; i < numChildren; ++i) {
        alignment->addLeafGenome("Leaf" + std::to_string(i), "Anc0", 0.1);
    }
    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 10000);
    ancGenome->setDimensions(seqVec);

    CuAssertTrue(_testCase, ancGenome->getNumChildren() == numChildren);

    _topSegments.clear();
    for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) {
        TopSegmentStruct topSeg;
        topSeg.setRandom();
        topSeg._length = ancGenome->getSequenceLength() / ancGenome->getNumTopSegments();
        topSeg._startPosition = i * topSeg._length;
        _topSegments.push_back(topSeg);
    }

    TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0);
    for (size_t i = 0; not tsIt->atEnd(); tsIt->toRight(), ++i) {
        CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i);
        _topSegments[i].applyTo(tsIt);
    }
}
コード例 #3
0
ファイル: halSequenceTest.cpp プロジェクト: glennhickey/hal
void SequenceUpdateTest::createCallBack(Alignment *alignment) {
    hal_size_t alignmentSize = alignment->getNumGenomes();
    CuAssertTrue(_testCase, alignmentSize == 0);

    Genome *ancGenome = alignment->addRootGenome("AncGenome", 0);

    size_t numSequences = 1000;
    vector<Sequence::Info> seqVec;
    for (size_t i = 0; i < numSequences; ++i) {
        hal_size_t len = 1 + i * 5 + i;
        string name = "sequence" + std::to_string(i);
        seqVec.push_back(Sequence::Info(name, len, i, i * 2));
    }
    ancGenome->setDimensions(seqVec);
    alignment->closeGenome(ancGenome);
    ancGenome = alignment->openGenome("AncGenome");

    vector<Sequence::UpdateInfo> updateVec;
    for (size_t i = 0; i < numSequences / 2; ++i) {
        const Sequence *sequence = ancGenome->getSequence("sequence" + std::to_string(i));
        updateVec.push_back(Sequence::UpdateInfo(sequence->getName(), i * 7));
    }
    ancGenome->updateTopDimensions(updateVec);

    updateVec.clear();
    for (size_t i = 0; i < numSequences / 3; ++i) {
        const Sequence *sequence = ancGenome->getSequence("sequence" + std::to_string(i));
        updateVec.push_back(Sequence::UpdateInfo(sequence->getName(), i * 5));
    }
    ancGenome->updateBottomDimensions(updateVec);
}
コード例 #4
0
ファイル: halTopSegmentTest.cpp プロジェクト: glennhickey/hal
void TopSegmentSequenceTest::createCallBack(Alignment *alignment) {
    Genome *ancGenome = alignment->addRootGenome("Anc0", 0);
    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 700000);
    ancGenome->setDimensions(seqVec);

    ancGenome->setSubString("CACACATTC", 500, 9);
    TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(100);
    tsIt->getTopSegment()->setCoordinates(500, 9);
}
コード例 #5
0
ファイル: halGenomeTest.cpp プロジェクト: glennhickey/hal
void GenomeStringTest::createCallBack(Alignment *alignment) {
    hal_size_t alignmentSize = alignment->getNumGenomes();
    CuAssertTrue(_testCase, alignmentSize == 0);
    hal_size_t seqLength = 28889943;
    Genome *ancGenome = alignment->addRootGenome("AncGenome", 0);
    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", seqLength, 5000, 700000);
    ancGenome->setDimensions(seqVec);

    _string = randomString(seqLength);
    ancGenome->setString(_string);
}
コード例 #6
0
void MappedSegmentMapDupeTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // setup simple case were there is an edge from a parent to 
  // child and it is reversed
  Genome* parent = alignment->addRootGenome("parent");
  Genome* child1 = alignment->addLeafGenome("child1", "parent", 1);
  Genome* child2 = alignment->addLeafGenome("child2", "parent", 1);
  seqVec[0] = Sequence::Info("Sequence", 3, 0, 1);
  parent->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  child1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  child2->setDimensions(seqVec);

  parent->setString("CCC");
  child1->setString("CCCTACGTG");
  child2->setString("CCCTACGTG");

  bi = parent->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);
     
  ti = child1->getTopSegmentIterator();
  ts.set(0, 3, 0, true, NULL_INDEX, 1);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 0, true, NULL_INDEX, 2);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 0, true, NULL_INDEX, 0);
  ts.applyTo(ti);

  ti = child2->getTopSegmentIterator();
  ts.set(0, 3, 0, false);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, NULL_INDEX, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, NULL_INDEX, false);
  ts.applyTo(ti);
}
コード例 #7
0
ファイル: halSequenceTest.cpp プロジェクト: glennhickey/hal
void SequenceCreateTest::createCallBack(Alignment *alignment) {
    hal_size_t alignmentSize = alignment->getNumGenomes();
    CuAssertTrue(_testCase, alignmentSize == 0);

    Genome *ancGenome = alignment->addRootGenome("AncGenome", 0);

    size_t numSequences = 1000;
    vector<Sequence::Info> seqVec;
    for (size_t i = 0; i < numSequences; ++i) {
        hal_size_t len = 1 + i * 5 + i;
        seqVec.push_back(Sequence::Info("sequence" + std::to_string(i), len, i, i * 2));
    }
    ancGenome->setDimensions(seqVec);
}
コード例 #8
0
ファイル: halSequenceTest.cpp プロジェクト: glennhickey/hal
void SequenceRenameTest::createCallBack(Alignment *alignment) {
    Genome *ancGenome = alignment->addRootGenome("AncGenome", 0);

    vector<Sequence::Info> seqVec;
    seqVec.push_back(Sequence::Info("seq1", 40, 20, 20));
    seqVec.push_back(Sequence::Info("seq2", 50, 20, 20));
    seqVec.push_back(Sequence::Info("anotherSeq", 60, 20, 20));
    ancGenome->setDimensions(seqVec);
    alignment->closeGenome(ancGenome);

    ancGenome = alignment->openGenome("AncGenome");
    ancGenome->getSequence("seq1")->setName("foobar");
    ancGenome->getSequence("seq2")->setName("a_really_really_super_long_name");
    ancGenome->getSequence("anotherSeq")->setName("short");
    ancGenome->getSequence("short")->setName("again");
}
コード例 #9
0
ファイル: halGenomeTest.cpp プロジェクト: glennhickey/hal
void GenomeCreateTest::createCallBack(Alignment *alignment) {
    hal_size_t alignmentSize = alignment->getNumGenomes();
    CuAssertTrue(_testCase, alignmentSize == 0);

    Genome *ancGenome = alignment->addRootGenome("AncGenome", 0);
    Genome *leaf1Genome = alignment->addLeafGenome("Leaf1", "AncGenome", 0.1);
    Genome *leaf2Genome = alignment->addLeafGenome("Leaf2", "AncGenome", 0.2);
    Genome *leaf3Genome = alignment->addLeafGenome("Leaf3", "AncGenome", 0.3);

    MetaData *ancMeta = ancGenome->getMetaData();
    ancMeta->set("Young", "Jeezy");

    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 700000);
    ancGenome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 700000, 0);
    leaf1Genome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 2000000, 700000, 0);
    leaf2Genome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 3000000, 700000, 0);
    leaf3Genome->setDimensions(seqVec);
}
コード例 #10
0
ファイル: halGenomeTest.cpp プロジェクト: glennhickey/hal
// Test copying when the sequences aren't in the same order.
//
// Create an alignment with "Sequence1" positions aligned to
// "Sequence1" positions, and "Sequence2" to "Sequence2", but try
// copying the segments to an alignment with "Sequence2" before
// "Sequence1" in the ordering.
void GenomeCopySegmentsWhenSequencesOutOfOrderTest::createCallBack(Alignment *alignment) {
    hal_size_t alignmentSize = alignment->getNumGenomes();
    CuAssertTrue(_testCase, alignmentSize == 0);

    // Hacky: Need a different alignment to test copying the bottom
    // segments correctly.  (the names of a node's children are used
    // when copying bottom segments, and two genomes can't have the same
    // name in the same alignment)
    _path = getTempFile();
    _secondAlignment = AlignmentPtr(getTestAlignmentInstances(alignment->getStorageFormat(), _path, CREATE_ACCESS));

    Genome *rootGenome = alignment->addRootGenome("root", 0);
    Genome *internalGenome = alignment->addLeafGenome("internal", "root", 0);
    Genome *leaf1Genome = alignment->addLeafGenome("leaf1", "root", 0);
    Genome *leaf2Genome = alignment->addLeafGenome("leaf2", "internal", 0);
    Genome *copyRootGenome = _secondAlignment->addRootGenome("root", 0);
    Genome *copyInternalGenome = _secondAlignment->addLeafGenome("internal", "root", 0);
    Genome *copyLeaf1Genome = _secondAlignment->addLeafGenome("leaf1", "root", 0);
    Genome *copyLeaf2Genome = _secondAlignment->addLeafGenome("leaf2", "internal", 0);

    vector<Sequence::Info> seqVec(2);
    seqVec[0] = Sequence::Info("Sequence1", 130, 0, 13);
    seqVec[1] = Sequence::Info("Sequence2", 170, 0, 17);
    rootGenome->setDimensions(seqVec);
    rootGenome->setString(randomString(rootGenome->getSequenceLength()));
    seqVec[0] = Sequence::Info("Sequence1", 130, 13, 13);
    seqVec[1] = Sequence::Info("Sequence2", 170, 17, 17);
    internalGenome->setDimensions(seqVec);
    internalGenome->setString(randomString(internalGenome->getSequenceLength()));
    seqVec[0] = Sequence::Info("Sequence1", 130, 13, 0);
    seqVec[1] = Sequence::Info("Sequence2", 170, 17, 0);
    leaf1Genome->setDimensions(seqVec);
    leaf1Genome->setString(randomString(leaf1Genome->getSequenceLength()));
    leaf2Genome->setDimensions(seqVec);
    leaf2Genome->setString(randomString(leaf2Genome->getSequenceLength()));

    setTopSegments(internalGenome, 10);
    setTopSegments(leaf1Genome, 10);
    setTopSegments(leaf2Genome, 10);

    setBottomSegments(rootGenome, 10);
    setBottomSegments(internalGenome, 10);

    rootGenome->fixParseInfo();
    internalGenome->fixParseInfo();
    leaf1Genome->fixParseInfo();
    leaf2Genome->fixParseInfo();

    seqVec[0] = Sequence::Info("Sequence1", 130, 0, 13);
    seqVec[1] = Sequence::Info("Sequence2", 170, 0, 17);
    copyRootGenome->setDimensions(seqVec);
    copyRootGenome->setString(randomString(copyRootGenome->getSequenceLength()));
    seqVec[0] = Sequence::Info("Sequence1", 130, 13, 0);
    seqVec[1] = Sequence::Info("Sequence2", 170, 17, 0);
    copyLeaf1Genome->setDimensions(seqVec);
    copyLeaf2Genome->setDimensions(seqVec);
    copyLeaf1Genome->setString(randomString(copyLeaf1Genome->getSequenceLength()));
    copyLeaf2Genome->setString(randomString(copyLeaf2Genome->getSequenceLength()));
    seqVec[0] = Sequence::Info("Sequence2", 170, 17, 17);
    seqVec[1] = Sequence::Info("Sequence1", 130, 13, 13);
    copyInternalGenome->setDimensions(seqVec);
    copyInternalGenome->setString(randomString(copyInternalGenome->getSequenceLength()));

    rootGenome->copyBottomDimensions(copyRootGenome);
    rootGenome->copyBottomSegments(copyRootGenome);
    copyRootGenome->fixParseInfo();

    internalGenome->copyBottomDimensions(copyInternalGenome);
    internalGenome->copyBottomSegments(copyInternalGenome);
    internalGenome->copyTopDimensions(copyInternalGenome);
    internalGenome->copyTopSegments(copyInternalGenome);
    copyInternalGenome->fixParseInfo();

    leaf1Genome->copyTopDimensions(copyLeaf1Genome);
    leaf1Genome->copyTopSegments(copyLeaf1Genome);
    copyLeaf1Genome->fixParseInfo();

    leaf2Genome->copyTopDimensions(copyLeaf2Genome);
    leaf2Genome->copyTopSegments(copyLeaf2Genome);
    copyLeaf2Genome->fixParseInfo();

    _secondAlignment->close();
}
コード例 #11
0
ファイル: halGenomeTest.cpp プロジェクト: glennhickey/hal
void GenomeCopyTest::createCallBack(Alignment *alignment) {
    hal_size_t alignmentSize = alignment->getNumGenomes();
    CuAssertTrue(_testCase, alignmentSize == 0);

    // Hacky: Need a different alignment to test copying the bottom
    // segments correctly.  (the names of a node's children are used
    // when copying bottom segments, and two genomes can't have the same
    // name in the same alignment)
    _path = getTempFile();
    _secondAlignment =
        AlignmentPtr(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS | CREATE_ACCESS));

    Genome *ancGenome = alignment->addRootGenome("AncGenome", 0);
    Genome *leafGenome = alignment->addLeafGenome("LeafGenome1", "AncGenome", 0);
    // This genome will test copyDimensions, copyTopSegments,
    // copyBottomSegments, copySequence, copyMetadata
    Genome *copyRootGenome = _secondAlignment->addRootGenome("copyRootGenome", 0);
    Genome *copyLeafGenome = _secondAlignment->addLeafGenome("LeafGenome1", "copyRootGenome", 0);

    MetaData *ancMeta = ancGenome->getMetaData();
    ancMeta->set("Young", "Jeezy");

    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 0, 700000);
    ancGenome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 0);
    leafGenome->setDimensions(seqVec);
    string ancSeq = "CAT";
    hal_index_t n = ancGenome->getSequenceLength();
    DnaIteratorPtr dnaIt = ancGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        dnaIt->setBase(ancSeq[i]);
    }
    dnaIt->flush();

    n = leafGenome->getSequenceLength();
    dnaIt = leafGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        dnaIt->setBase(ancSeq[i]);
    }
    dnaIt->flush();

    TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator();
    n = leafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        topIt->setCoordinates(topIt->getArrayIndex(), 1);
        topIt->tseg()->setParentIndex(3);
        topIt->tseg()->setParentReversed(true);
        topIt->tseg()->setBottomParseIndex(5);
        if (topIt->getArrayIndex() != 6) {
            topIt->tseg()->setNextParalogyIndex(6);
        } else {
            topIt->tseg()->setNextParalogyIndex(7);
        }
    }
    BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator();
    n = ancGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        botIt->setCoordinates(botIt->getArrayIndex(), 1);
        botIt->bseg()->setChildIndex(0, 3);
        botIt->bseg()->setChildReversed(0, true);
        botIt->bseg()->setTopParseIndex(5);
    }

    seqVec[0] = Sequence::Info("Sequence", 3300, 0, 1100);
    copyRootGenome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 3300, 2200, 0);
    copyLeafGenome->setDimensions(seqVec);
    string copySeq = "TAG";
    dnaIt = copyRootGenome->getDnaIterator();
    n = copyRootGenome->getSequenceLength();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % copySeq.size();
        dnaIt->setBase(copySeq[i]);
    }
    dnaIt->flush();

    dnaIt = copyLeafGenome->getDnaIterator();
    n = copyLeafGenome->getSequenceLength();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % copySeq.size();
        dnaIt->setBase(copySeq[i]);
    }
    dnaIt->flush();

    topIt = copyLeafGenome->getTopSegmentIterator();
    n = copyLeafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        topIt->setCoordinates(7, 8);
        topIt->tseg()->setParentIndex(9);
        topIt->tseg()->setParentReversed(false);
        topIt->tseg()->setBottomParseIndex(11);
        if (topIt->getArrayIndex() != 12) {
            topIt->tseg()->setNextParalogyIndex(12);
        } else {
            topIt->tseg()->setNextParalogyIndex(7);
        }
    }
    botIt = copyRootGenome->getBottomSegmentIterator();
    n = copyRootGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        botIt->setCoordinates(6, 7);
        botIt->bseg()->setChildIndex(0, 8);
        botIt->bseg()->setChildReversed(0, false);
        botIt->bseg()->setTopParseIndex(10);
    }

    ancGenome->copy(copyRootGenome);
    leafGenome->copy(copyLeafGenome);
    _secondAlignment->close();
}
コード例 #12
0
ファイル: halLodExtract.cpp プロジェクト: dayin1989/hal
void LodExtract::writeDimensions(
  const map<const Sequence*, hal_size_t>& segmentCounts, 
  const string& parentName,
  const vector<string>& childNames)
{
  // initialize a dimensions list for each (input) genome
  map<const Genome*, vector<Sequence::Info> > dimMap;
  map<const Genome*, vector<Sequence::Info> >::iterator dimMapIt;
  vector<string> newGenomeNames = childNames;
  newGenomeNames.push_back(parentName);
 
  for (size_t i = 0; i < newGenomeNames.size(); ++i)
  {
    const Genome* inGenome = _inAlignment->openGenome(newGenomeNames[i]);
    pair<const Genome*, vector<Sequence::Info> > newEntry;
    newEntry.first = inGenome;
    
    // it's important we keep the sequences in the output genome
    // in the same order as the sequences in the input genome since
    // we always use global coordinates!
    SequenceIteratorConstPtr seqIt = inGenome->getSequenceIterator();
    SequenceIteratorConstPtr seqEnd = inGenome->getSequenceEndIterator();
    for (; seqIt != seqEnd; seqIt->toNext())
    {
      const Sequence* inSequence = seqIt->getSequence();
      map<const Sequence*, hal_size_t>::const_iterator segMapIt;
      segMapIt = segmentCounts.find(inSequence);
      // we skip empty sequences for now with below check
      if (segMapIt != segmentCounts.end())
      {
        vector<Sequence::Info>& segDims = newEntry.second;
        hal_size_t nTop = 
           inGenome->getName() == parentName ? 0 : segMapIt->second;
        hal_size_t nBot = 
           inGenome->getName() != parentName ? 0 : segMapIt->second;
        segDims.push_back(Sequence::Info(inSequence->getName(),
                                         inSequence->getSequenceLength(),
                                         nTop,
                                         nBot));
      }
    }

    // note potential bug here for genome with no data
    dimMap.insert(newEntry);
  }
  
  // now that we have the dimensions for each genome, update them in
  // the output alignment
  for (dimMapIt = dimMap.begin(); dimMapIt != dimMap.end(); ++dimMapIt)
  {
    Genome* newGenome = _outAlignment->openGenome(dimMapIt->first->getName());
    assert(newGenome != NULL);
    vector<Sequence::Info>& segDims = dimMapIt->second;
    // ROOT 
    if (newGenome->getName() == _outAlignment->getRootName())
    {
      assert(newGenome->getName() == parentName);
      newGenome->setDimensions(segDims, _keepSequences);
    }
    // LEAF
    else if (newGenome->getName() != parentName)
    {
      newGenome->setDimensions(segDims, _keepSequences);
    }
    // INTERNAL NODE
    else
    {
      vector<Sequence::UpdateInfo> updateInfo;
      for (size_t i = 0; i < segDims.size(); ++i)
      {
        updateInfo.push_back(
          Sequence::UpdateInfo(segDims[i]._name,
                               segDims[i]._numBottomSegments));
      }
      newGenome->updateBottomDimensions(updateInfo);
    }
  }
}
コード例 #13
0
void MappedSegmentMapExtraParalogsTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);

  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;

  // Set up a case where all the segments of grandChild1 coalesce with
  // the first segment of grandChild2, but only if using the root as
  // the coalescence limit. Otherwise only the first segments map to
  // each other.
  Genome* root = alignment->addRootGenome("root");
  Genome* parent = alignment->addLeafGenome("parent", "root", 1);
  Genome* grandChild1 = alignment->addLeafGenome("grandChild1", "parent", 1);
  Genome* grandChild2 = alignment->addLeafGenome("grandChild2", "parent", 1);
  seqVec[0] = Sequence::Info("Sequence", 3, 0, 1);
  root->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 3);
  parent->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  grandChild1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 9, 3, 0);
  grandChild2->setDimensions(seqVec);

  root->setString("CCC");
  parent->setString("CCCTACGTG");
  grandChild1->setString("CCCTACGTG");
  grandChild2->setString("CCCTACGTG");

  bi = root->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);

  ti = parent->getTopSegmentIterator();
  ts.set(0, 3, 0, false, NULL_INDEX, 1);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 0, false, NULL_INDEX, 2);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 0, false, NULL_INDEX, 0);
  ts.applyTo(ti);

  bi = parent->getBottomSegmentIterator();
  bs.set(0, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);
  bi->toRight();
  bs.set(3, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(1, true));
  bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, true));
  bs.applyTo(bi);
  bi->toRight();
  bs.set(6, 3);
  bs._children.clear();
  bs._children.push_back(pair<hal_size_t, bool>(2, true));
  bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, false));
  bs.applyTo(bi);

  ti = grandChild1->getTopSegmentIterator();
  ts.set(0, 3, 0, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, 1, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, 2, true);
  ts.applyTo(ti);

  ti = grandChild2->getTopSegmentIterator();
  ts.set(0, 3, 0, false);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(3, 3, NULL_INDEX, true);
  ts.applyTo(ti);
  ti->toRight();
  ts.set(6, 3, NULL_INDEX, false);
  ts.applyTo(ti);

  parent->fixParseInfo();
}
コード例 #14
0
void MappedSegmentMapUpTest::createCallBack(AlignmentPtr alignment)
{
  vector<Sequence::Info> seqVec(1);
  
  BottomSegmentIteratorPtr bi;
  BottomSegmentStruct bs;
  TopSegmentIteratorPtr ti;
  TopSegmentStruct ts;
  
  // setup simple case were there is an edge from a parent to 
  // child1 and it is reversed and nonreversed to child2
  Genome* parent = alignment->addRootGenome("parent");
  Genome* child1 = alignment->addLeafGenome("child1", "parent", 1);
  Genome* child2 = alignment->addLeafGenome("child2", "parent", 1);
  // add a bunch of grandchildren with no rearrangemnts to test
  // simple parsing
  Genome* g1 = alignment->addLeafGenome("g1", "child2", 1);
  Genome* g2 = alignment->addLeafGenome("g2", "g1", 1);
  Genome* g3 = alignment->addLeafGenome("g3", "g2", 1);
  Genome* g4 = alignment->addLeafGenome("g4", "g3", 1);
  Genome* g5 = alignment->addLeafGenome("g5", "g4", 1);
  // add some with random inversions
  Genome* gi1 = alignment->addLeafGenome("gi1", "child1", 1);
  Genome* gi2 = alignment->addLeafGenome("gi2", "gi1", 1);
  Genome* gi3 = alignment->addLeafGenome("gi3", "gi2", 1);
  Genome* gi4 = alignment->addLeafGenome("gi4", "gi3", 1);
  Genome* gi5 = alignment->addLeafGenome("gi5", "gi4", 1);
  Genome* gs[] = {g1, g2, g3, g4, g5};
  Genome* gis[] = {gi1, gi2, gi3, gi4, gi5};
  seqVec[0] = Sequence::Info("Sequence", 12, 0, 1);
  parent->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 1, 6);
  child1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 1, 6);
  child2->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 6, 4);
  g1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 4, 3);
  g2->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 3, 2);
  g3->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 2, 12);
  g4->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 12, 0);
  g5->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 6, 4);
  gi1->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 4, 3);
  gi2->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 3, 2);
  gi3->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 2, 12);
  gi4->setDimensions(seqVec);
  seqVec[0] = Sequence::Info("Sequence", 12, 12, 0);
  gi5->setDimensions(seqVec);


  parent->setString("CCCTACTTGTGC");
  child1->setString("CCCTACTTGTGC");
  child2->setString("CCCTACTTGTGC");
  for (size_t i = 0; i < 5; ++i)
  {
    gs[i]->setString("TCCTACTTGTGC");
    gis[i]->setString("TCCTACTTGTGC");
  }

  bi = parent->getBottomSegmentIterator();
  bs.set(0, 12);
  bs._children.push_back(pair<hal_size_t, bool>(0, true));
  bs._children.push_back(pair<hal_size_t, bool>(0, false));
  bs.applyTo(bi);
     
  ti = child1->getTopSegmentIterator();
  ts.set(0, 12, 0, true, 0);
  ts.applyTo(ti);

  ti = child2->getTopSegmentIterator();
  ts.set(0, 12, 0, false, 0);
  ts.applyTo(ti);
  
  for (size_t i = 0; i < 6; ++i)
  {
    bi = child2->getBottomSegmentIterator(i);
    bs.set(i * 2, 2, 0);
    bs._children.clear();
    bs._children.push_back(pair<hal_size_t, bool>(i, false));
    bs.applyTo(bi);

    ti = g1->getTopSegmentIterator(i);
    ts.set(i * 2, 2, i, false);
    ts.applyTo(ti);
  }

  for (size_t i = 0; i < 6; ++i)
  {
    bi = child1->getBottomSegmentIterator(i);
    bs.set(i * 2, 2, 0);
    bs._children.clear();
    bs._children.push_back(pair<hal_size_t, bool>(i, false));
    bs.applyTo(bi);

    ti = gi1->getTopSegmentIterator(i);
    ts.set(i * 2, 2, i, false);
    ts.applyTo(ti);
  }

  for (size_t i = 0; i < 5; ++i)
  {
    const Genome* g = gs[i];
    const Genome* parent = g->getParent();
    const Genome* child = i == 4 ? NULL : g->getChild(0);
    hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments();
    hal_size_t psegLen = parent->getSequenceLength() / 
       parent->getNumTopSegments();
    hal_size_t csegLen = 0;
    if (child)
    {
      csegLen =  child->getSequenceLength() / child->getNumTopSegments();
    }
    
    for (size_t j = 0; j < g->getNumTopSegments(); ++j)
    {
      bool inv = false;
      bi = parent->getBottomSegmentIterator(j);
      bs.set(j * segLen, segLen, (j * segLen) / psegLen);
      bs._children.clear();
      bs._children.push_back(pair<hal_size_t, bool>(j, inv));
      bs.applyTo(bi);

      hal_index_t bparse = NULL_INDEX;
      if (child != NULL)
      {
        bparse = (j * segLen) / csegLen;
      }
      ti = g->getTopSegmentIterator(j);
      ts.set(j * segLen, segLen, j, inv, bparse);
      ts.applyTo(ti);      
    }
  }
  
  for (size_t i = 0; i < 5; ++i)
  {
    const Genome* g = gis[i];
    const Genome* parent = g->getParent();
    const Genome* child = i == 4 ? NULL : g->getChild(0);
    hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments();
    hal_size_t psegLen = parent->getSequenceLength() / 
       parent->getNumTopSegments();
    hal_size_t csegLen = 0;
    if (child)
    {
      csegLen =  child->getSequenceLength() / child->getNumTopSegments();
    }
    
    for (size_t j = 0; j < g->getNumTopSegments(); ++j)
    {
      bool inv = rand() % 4 == 0;
      bi = parent->getBottomSegmentIterator(j);
      bs.set(j * segLen, segLen, (j * segLen) / psegLen);
      bs._children.clear();
      bs._children.push_back(pair<hal_size_t, bool>(j, inv));
      bs.applyTo(bi);

      hal_index_t bparse = NULL_INDEX;
      if (child != NULL)
      {
        bparse = (j * segLen) / csegLen;
      }
      ti = g->getTopSegmentIterator(j);
      ts.set(j * segLen, segLen, j, inv, bparse);
      ts.applyTo(ti);      
    }
  }

}