Beispiel #1
0
void LodManager::checkAlignment(hal_size_t minQuery,
                                const string& path,
                                AlignmentConstPtr alignment)
{
  if (alignment->getNumGenomes() == 0)
  {
    stringstream ss;
    ss << "No genomes found in base alignment specified in " << path;
    throw hal_exception(ss.str());
  }

#ifndef NDEBUG
  if (minQuery == 0)
  {
    vector<string> leafNames = alignment->getLeafNamesBelow(
      alignment->getRootName());
    string name = !leafNames.empty() ? leafNames[0] : alignment->getRootName();
    const Genome* genome = alignment->openGenome(name);
    
    bool seqFound = genome->containsDNAArray();
    alignment->closeGenome(genome);
    if (seqFound == false)
    {
      stringstream ss;
      ss << "HAL file for highest level of detail (0) in " << path 
         << "must contain DNA sequence information.";
      throw hal_exception(ss.str());
    }
  }
#endif
}
Beispiel #2
0
void MafExport::convertEntireAlignment(ostream& mafStream,
                                       AlignmentConstPtr alignment)
{
    hal_size_t appendCount = 0;
    size_t numBlocks = 0;

    _mafStream = &mafStream;
    _alignment = alignment;

    writeHeader();

    // Load in all leaves from alignment
    vector<string> leafNames = alignment->getLeafNamesBelow(alignment->getRootName());
    vector<const Genome *> leafGenomes;
    for (hal_size_t i = 0; i < leafNames.size(); i++) {
        const Genome *genome = alignment->openGenome(leafNames[i]);
        assert(genome != NULL);
        leafGenomes.push_back(genome);
    }
    ColumnIterator::VisitCache visitCache;
    // Go through all the genomes one by one, and spit out any columns
    // they participate in that we haven't seen.
    for (hal_size_t i = 0; i < leafGenomes.size(); i++) {
        const Genome *genome = leafGenomes[i];
        ColumnIteratorConstPtr colIt = genome->getColumnIterator(NULL,
                                                                 0,
                                                                 0,
                                                                 NULL_INDEX,
                                                                 _noDupes,
                                                                 _noAncestors);
        colIt->setVisitCache(&visitCache);
        for (;;) {
            if (appendCount == 0) {
              _mafBlock.initBlock(colIt, _ucscNames, _printTree);
                assert(_mafBlock.canAppendColumn(colIt) == true);
            }
            if (_mafBlock.canAppendColumn(colIt) == false)
            {
                // erase empty entries from the column.  helps when there are 
                // millions of sequences (ie from fastas with lots of scaffolds)
                if (numBlocks++ % 1000 == 0)
                {
                    colIt->defragment();
                }
                if (appendCount > 0)
                {
                    mafStream << _mafBlock << '\n';
                }
                _mafBlock.initBlock(colIt, _ucscNames, _printTree);
                assert(_mafBlock.canAppendColumn(colIt) == true);
            }
            _mafBlock.appendColumn(colIt);
            appendCount++;

            if (colIt->lastColumn()) {
                // Have to break here because otherwise
                // colIt->toRight() will crash.
                break;
            }
            colIt->toRight();
        }
        // Copy over the updated visit cache information. This is a
        // deep copy, so it's slow, but necessary to preserve the
        // column iterator ownership of the visit cache
        visitCache.clear();
        ColumnIterator::VisitCache *newVisitCache = colIt->getVisitCache();
        for(ColumnIterator::VisitCache::iterator it = newVisitCache->begin();
            it != newVisitCache->end(); it++) {
            visitCache[it->first] = new PositionCache(*it->second);
        }
    }

    // if nothing was ever added (seems to happen in corner case where
    // all columns violate unique), mafBlock ostream operator will crash
    // so we do following check
    if (appendCount > 0)
    {
        mafStream << _mafBlock << endl;
    }
}