Beispiel #1
0
void LodManager::checkAlignment(hal_size_t minQuery,
                                const string& path,
                                AlignmentConstPtr alignment)
{
  if (alignment->getNumGenomes() == 0)
  {
    stringstream ss;
    ss << "No genomes found in base alignment specified in " << path;
    throw hal_exception(ss.str());
  }

#ifndef NDEBUG
  if (minQuery == 0)
  {
    vector<string> leafNames = alignment->getLeafNamesBelow(
      alignment->getRootName());
    string name = !leafNames.empty() ? leafNames[0] : alignment->getRootName();
    const Genome* genome = alignment->openGenome(name);
    
    bool seqFound = genome->containsDNAArray();
    alignment->closeGenome(genome);
    if (seqFound == false)
    {
      stringstream ss;
      ss << "HAL file for highest level of detail (0) in " << path 
         << "must contain DNA sequence information.";
      throw hal_exception(ss.str());
    }
  }
#endif
}
Beispiel #2
0
static void printBranches(ostream& os, AlignmentConstPtr alignment)
{
  const Genome* root = alignment->openGenome(alignment->getRootName());
  set<const Genome*> genomes;
  getGenomesInSubTree(root, genomes);
  genomes.insert(root);
  bool first = true;
  for (set<const Genome*>::iterator i = genomes.begin(); i != genomes.end();
       ++i)
  {
    if ((*i)->getParent() != NULL)
    {
      if (!first)
      {
        os << " ";
      }
      else
      {
        first = false;
      }
      os << (*i)->getName();
    }
  }
  os << endl;      
}
Beispiel #3
0
void  MappedSegmentColCompareTest::checkCallBack(AlignmentConstPtr alignment)
{
  if (alignment->getNumGenomes() == 0)
  {
    return;
  }

  validateAlignment(alignment);
  set<const Genome*> genomeSet;
  hal::getGenomesInSubTree(alignment->openGenome(alignment->getRootName()), 
                           genomeSet);
  for (set<const Genome*>::iterator i = genomeSet.begin(); i != genomeSet.end();
       ++i)
  {
    const Genome* srcGenome = *i;
    for (set<const Genome*>::iterator j = genomeSet.begin(); 
         j != genomeSet.end(); ++j)
    {
      const Genome* tgtGenome = *j;

      if (srcGenome->getSequenceLength() > 0 && 
          tgtGenome->getSequenceLength() > 0)
      {
        _ref = srcGenome;
        _tgt = tgtGenome;
        createColArray();
        createBlockArray();
        compareArrays();
      }
    }
  }
}
Beispiel #4
0
void printParent(ostream& os, AlignmentConstPtr alignment, 
                        const string& genomeName)
{
  if (genomeName != alignment->getRootName())
  {
    os << alignment->getParentName(genomeName) << endl;
  }
}
Beispiel #5
0
void printBranchLength(ostream& os, AlignmentConstPtr alignment, 
                       const string& genomeName)
{
  if (genomeName != alignment->getRootName())
  {
    string parentName = alignment->getParentName(genomeName);
    os << alignment->getBranchLength(parentName, genomeName) << endl;
  }
}
void printGenomes(ostream& os, AlignmentConstPtr alignment)
{
  const Genome* root = alignment->openGenome(alignment->getRootName());
  set<const Genome*> genomes;
  getGenomesInSubTree(root, genomes);
  genomes.insert(root);
  for (set<const Genome*>::iterator i = genomes.begin(); i != genomes.end();
       ++i)
  {
    if (i != genomes.begin())
    {
      os << ",";
    }
    os << (*i)->getName();
  }
  os << endl;      
}
void hal::validateAlignment(AlignmentConstPtr alignment)
{
  deque<string> bfQueue;
  bfQueue.push_back(alignment->getRootName());
  while (bfQueue.empty() == false)
  {
    string name = bfQueue.back();
    bfQueue.pop_back();
    if (name.empty() == false)
    {
      const Genome* genome = alignment->openGenome(name);
      if (genome == NULL)
      {
        throw hal_exception("Failure to open genome " + name);
      }
      validateGenome(genome);
      vector<string> childNames = alignment->getChildNames(name);
      for (size_t i = 0; i < childNames.size(); ++i)
      {
        bfQueue.push_front(childNames[i]);
      }
    }
  }
}
Beispiel #8
0
void printRootName(ostream& os, AlignmentConstPtr alignment)
{
  os << alignment->getRootName() << endl;
}
Beispiel #9
0
void MafExport::convertEntireAlignment(ostream& mafStream,
                                       AlignmentConstPtr alignment)
{
    hal_size_t appendCount = 0;
    size_t numBlocks = 0;

    _mafStream = &mafStream;
    _alignment = alignment;

    writeHeader();

    // Load in all leaves from alignment
    vector<string> leafNames = alignment->getLeafNamesBelow(alignment->getRootName());
    vector<const Genome *> leafGenomes;
    for (hal_size_t i = 0; i < leafNames.size(); i++) {
        const Genome *genome = alignment->openGenome(leafNames[i]);
        assert(genome != NULL);
        leafGenomes.push_back(genome);
    }
    ColumnIterator::VisitCache visitCache;
    // Go through all the genomes one by one, and spit out any columns
    // they participate in that we haven't seen.
    for (hal_size_t i = 0; i < leafGenomes.size(); i++) {
        const Genome *genome = leafGenomes[i];
        ColumnIteratorConstPtr colIt = genome->getColumnIterator(NULL,
                                                                 0,
                                                                 0,
                                                                 NULL_INDEX,
                                                                 _noDupes,
                                                                 _noAncestors);
        colIt->setVisitCache(&visitCache);
        for (;;) {
            if (appendCount == 0) {
              _mafBlock.initBlock(colIt, _ucscNames, _printTree);
                assert(_mafBlock.canAppendColumn(colIt) == true);
            }
            if (_mafBlock.canAppendColumn(colIt) == false)
            {
                // erase empty entries from the column.  helps when there are 
                // millions of sequences (ie from fastas with lots of scaffolds)
                if (numBlocks++ % 1000 == 0)
                {
                    colIt->defragment();
                }
                if (appendCount > 0)
                {
                    mafStream << _mafBlock << '\n';
                }
                _mafBlock.initBlock(colIt, _ucscNames, _printTree);
                assert(_mafBlock.canAppendColumn(colIt) == true);
            }
            _mafBlock.appendColumn(colIt);
            appendCount++;

            if (colIt->lastColumn()) {
                // Have to break here because otherwise
                // colIt->toRight() will crash.
                break;
            }
            colIt->toRight();
        }
        // Copy over the updated visit cache information. This is a
        // deep copy, so it's slow, but necessary to preserve the
        // column iterator ownership of the visit cache
        visitCache.clear();
        ColumnIterator::VisitCache *newVisitCache = colIt->getVisitCache();
        for(ColumnIterator::VisitCache::iterator it = newVisitCache->begin();
            it != newVisitCache->end(); it++) {
            visitCache[it->first] = new PositionCache(*it->second);
        }
    }

    // if nothing was ever added (seems to happen in corner case where
    // all columns violate unique), mafBlock ostream operator will crash
    // so we do following check
    if (appendCount > 0)
    {
        mafStream << _mafBlock << endl;
    }
}