void LodManager::checkAlignment(hal_size_t minQuery, const string& path, AlignmentConstPtr alignment) { if (alignment->getNumGenomes() == 0) { stringstream ss; ss << "No genomes found in base alignment specified in " << path; throw hal_exception(ss.str()); } #ifndef NDEBUG if (minQuery == 0) { vector<string> leafNames = alignment->getLeafNamesBelow( alignment->getRootName()); string name = !leafNames.empty() ? leafNames[0] : alignment->getRootName(); const Genome* genome = alignment->openGenome(name); bool seqFound = genome->containsDNAArray(); alignment->closeGenome(genome); if (seqFound == false) { stringstream ss; ss << "HAL file for highest level of detail (0) in " << path << "must contain DNA sequence information."; throw hal_exception(ss.str()); } } #endif }
static void printBranches(ostream& os, AlignmentConstPtr alignment) { const Genome* root = alignment->openGenome(alignment->getRootName()); set<const Genome*> genomes; getGenomesInSubTree(root, genomes); genomes.insert(root); bool first = true; for (set<const Genome*>::iterator i = genomes.begin(); i != genomes.end(); ++i) { if ((*i)->getParent() != NULL) { if (!first) { os << " "; } else { first = false; } os << (*i)->getName(); } } os << endl; }
void MappedSegmentColCompareTest::checkCallBack(AlignmentConstPtr alignment) { if (alignment->getNumGenomes() == 0) { return; } validateAlignment(alignment); set<const Genome*> genomeSet; hal::getGenomesInSubTree(alignment->openGenome(alignment->getRootName()), genomeSet); for (set<const Genome*>::iterator i = genomeSet.begin(); i != genomeSet.end(); ++i) { const Genome* srcGenome = *i; for (set<const Genome*>::iterator j = genomeSet.begin(); j != genomeSet.end(); ++j) { const Genome* tgtGenome = *j; if (srcGenome->getSequenceLength() > 0 && tgtGenome->getSequenceLength() > 0) { _ref = srcGenome; _tgt = tgtGenome; createColArray(); createBlockArray(); compareArrays(); } } } }
void printParent(ostream& os, AlignmentConstPtr alignment, const string& genomeName) { if (genomeName != alignment->getRootName()) { os << alignment->getParentName(genomeName) << endl; } }
void printBranchLength(ostream& os, AlignmentConstPtr alignment, const string& genomeName) { if (genomeName != alignment->getRootName()) { string parentName = alignment->getParentName(genomeName); os << alignment->getBranchLength(parentName, genomeName) << endl; } }
void printGenomes(ostream& os, AlignmentConstPtr alignment) { const Genome* root = alignment->openGenome(alignment->getRootName()); set<const Genome*> genomes; getGenomesInSubTree(root, genomes); genomes.insert(root); for (set<const Genome*>::iterator i = genomes.begin(); i != genomes.end(); ++i) { if (i != genomes.begin()) { os << ","; } os << (*i)->getName(); } os << endl; }
void hal::validateAlignment(AlignmentConstPtr alignment) { deque<string> bfQueue; bfQueue.push_back(alignment->getRootName()); while (bfQueue.empty() == false) { string name = bfQueue.back(); bfQueue.pop_back(); if (name.empty() == false) { const Genome* genome = alignment->openGenome(name); if (genome == NULL) { throw hal_exception("Failure to open genome " + name); } validateGenome(genome); vector<string> childNames = alignment->getChildNames(name); for (size_t i = 0; i < childNames.size(); ++i) { bfQueue.push_front(childNames[i]); } } } }
void printRootName(ostream& os, AlignmentConstPtr alignment) { os << alignment->getRootName() << endl; }
void MafExport::convertEntireAlignment(ostream& mafStream, AlignmentConstPtr alignment) { hal_size_t appendCount = 0; size_t numBlocks = 0; _mafStream = &mafStream; _alignment = alignment; writeHeader(); // Load in all leaves from alignment vector<string> leafNames = alignment->getLeafNamesBelow(alignment->getRootName()); vector<const Genome *> leafGenomes; for (hal_size_t i = 0; i < leafNames.size(); i++) { const Genome *genome = alignment->openGenome(leafNames[i]); assert(genome != NULL); leafGenomes.push_back(genome); } ColumnIterator::VisitCache visitCache; // Go through all the genomes one by one, and spit out any columns // they participate in that we haven't seen. for (hal_size_t i = 0; i < leafGenomes.size(); i++) { const Genome *genome = leafGenomes[i]; ColumnIteratorConstPtr colIt = genome->getColumnIterator(NULL, 0, 0, NULL_INDEX, _noDupes, _noAncestors); colIt->setVisitCache(&visitCache); for (;;) { if (appendCount == 0) { _mafBlock.initBlock(colIt, _ucscNames, _printTree); assert(_mafBlock.canAppendColumn(colIt) == true); } if (_mafBlock.canAppendColumn(colIt) == false) { // erase empty entries from the column. helps when there are // millions of sequences (ie from fastas with lots of scaffolds) if (numBlocks++ % 1000 == 0) { colIt->defragment(); } if (appendCount > 0) { mafStream << _mafBlock << '\n'; } _mafBlock.initBlock(colIt, _ucscNames, _printTree); assert(_mafBlock.canAppendColumn(colIt) == true); } _mafBlock.appendColumn(colIt); appendCount++; if (colIt->lastColumn()) { // Have to break here because otherwise // colIt->toRight() will crash. break; } colIt->toRight(); } // Copy over the updated visit cache information. This is a // deep copy, so it's slow, but necessary to preserve the // column iterator ownership of the visit cache visitCache.clear(); ColumnIterator::VisitCache *newVisitCache = colIt->getVisitCache(); for(ColumnIterator::VisitCache::iterator it = newVisitCache->begin(); it != newVisitCache->end(); it++) { visitCache[it->first] = new PositionCache(*it->second); } } // if nothing was ever added (seems to happen in corner case where // all columns violate unique), mafBlock ostream operator will crash // so we do following check if (appendCount > 0) { mafStream << _mafBlock << endl; } }