Example #1
0
void SequenceUpdateTest::createCallBack(Alignment *alignment) {
    hal_size_t alignmentSize = alignment->getNumGenomes();
    CuAssertTrue(_testCase, alignmentSize == 0);

    Genome *ancGenome = alignment->addRootGenome("AncGenome", 0);

    size_t numSequences = 1000;
    vector<Sequence::Info> seqVec;
    for (size_t i = 0; i < numSequences; ++i) {
        hal_size_t len = 1 + i * 5 + i;
        string name = "sequence" + std::to_string(i);
        seqVec.push_back(Sequence::Info(name, len, i, i * 2));
    }
    ancGenome->setDimensions(seqVec);
    alignment->closeGenome(ancGenome);
    ancGenome = alignment->openGenome("AncGenome");

    vector<Sequence::UpdateInfo> updateVec;
    for (size_t i = 0; i < numSequences / 2; ++i) {
        const Sequence *sequence = ancGenome->getSequence("sequence" + std::to_string(i));
        updateVec.push_back(Sequence::UpdateInfo(sequence->getName(), i * 7));
    }
    ancGenome->updateTopDimensions(updateVec);

    updateVec.clear();
    for (size_t i = 0; i < numSequences / 3; ++i) {
        const Sequence *sequence = ancGenome->getSequence("sequence" + std::to_string(i));
        updateVec.push_back(Sequence::UpdateInfo(sequence->getName(), i * 5));
    }
    ancGenome->updateBottomDimensions(updateVec);
}
Example #2
0
void LodExtract::writeDimensions(
  const map<const Sequence*, hal_size_t>& segmentCounts, 
  const string& parentName,
  const vector<string>& childNames)
{
  // initialize a dimensions list for each (input) genome
  map<const Genome*, vector<Sequence::Info> > dimMap;
  map<const Genome*, vector<Sequence::Info> >::iterator dimMapIt;
  vector<string> newGenomeNames = childNames;
  newGenomeNames.push_back(parentName);
 
  for (size_t i = 0; i < newGenomeNames.size(); ++i)
  {
    const Genome* inGenome = _inAlignment->openGenome(newGenomeNames[i]);
    pair<const Genome*, vector<Sequence::Info> > newEntry;
    newEntry.first = inGenome;
    
    // it's important we keep the sequences in the output genome
    // in the same order as the sequences in the input genome since
    // we always use global coordinates!
    SequenceIteratorConstPtr seqIt = inGenome->getSequenceIterator();
    SequenceIteratorConstPtr seqEnd = inGenome->getSequenceEndIterator();
    for (; seqIt != seqEnd; seqIt->toNext())
    {
      const Sequence* inSequence = seqIt->getSequence();
      map<const Sequence*, hal_size_t>::const_iterator segMapIt;
      segMapIt = segmentCounts.find(inSequence);
      // we skip empty sequences for now with below check
      if (segMapIt != segmentCounts.end())
      {
        vector<Sequence::Info>& segDims = newEntry.second;
        hal_size_t nTop = 
           inGenome->getName() == parentName ? 0 : segMapIt->second;
        hal_size_t nBot = 
           inGenome->getName() != parentName ? 0 : segMapIt->second;
        segDims.push_back(Sequence::Info(inSequence->getName(),
                                         inSequence->getSequenceLength(),
                                         nTop,
                                         nBot));
      }
    }

    // note potential bug here for genome with no data
    dimMap.insert(newEntry);
  }
  
  // now that we have the dimensions for each genome, update them in
  // the output alignment
  for (dimMapIt = dimMap.begin(); dimMapIt != dimMap.end(); ++dimMapIt)
  {
    Genome* newGenome = _outAlignment->openGenome(dimMapIt->first->getName());
    assert(newGenome != NULL);
    vector<Sequence::Info>& segDims = dimMapIt->second;
    // ROOT 
    if (newGenome->getName() == _outAlignment->getRootName())
    {
      assert(newGenome->getName() == parentName);
      newGenome->setDimensions(segDims, _keepSequences);
    }
    // LEAF
    else if (newGenome->getName() != parentName)
    {
      newGenome->setDimensions(segDims, _keepSequences);
    }
    // INTERNAL NODE
    else
    {
      vector<Sequence::UpdateInfo> updateInfo;
      for (size_t i = 0; i < segDims.size(); ++i)
      {
        updateInfo.push_back(
          Sequence::UpdateInfo(segDims[i]._name,
                               segDims[i]._numBottomSegments));
      }
      newGenome->updateBottomDimensions(updateInfo);
    }
  }
}