void Genome::copyTopSegments(Genome *dest) const { const Genome *inParent = getParent(); const Genome *outParent = dest->getParent(); TopSegmentIteratorConstPtr inTop = getTopSegmentIterator(); TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator(); hal_size_t n = dest->getNumTopSegments(); assert(n == 0 || n == getNumTopSegments()); if (n == 0) { // Nothing to do if there are no top segments. return; } BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator(); BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator(); for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(), outTop->toRight()) { hal_index_t genomePos = inTop->getStartPosition(); assert(genomePos != NULL_INDEX); string inSeqName = getSequenceBySite(genomePos)->getName(); string outSeqName = dest->getSequenceBySite(genomePos)->getName(); // if (inSeqName != outSeqName) { // stringstream ss; // ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos; // throw hal_exception(ss.str()); // } outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength()); outTop->setParentIndex(inTop->getParentIndex()); outTop->setParentReversed(inTop->getParentReversed()); outTop->setBottomParseIndex(inTop->getBottomParseIndex()); outTop->setNextParalogyIndex(inTop->getNextParalogyIndex()); // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are. if (inTop->getParentIndex() != NULL_INDEX) { inParentBottomSegIt->toParent(inTop); const Sequence *inParentSequence = inParentBottomSegIt->getSequence(); const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName()); hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex(); hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex(); outTop->setParentIndex(outParentSegmentIndex); } } }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->setDescription("Rertrieve chain (pairwise alignment) " "information from a hal database.\n" "WARNING: THIS TOOL WAS NEVER FINISHED OR" " TESTED. USE AT OWN RISK. PLEASE " "CONSIDER halLiftover --outPSL INSTEAD."); optionsParser->addArgument("halFile", "path to hal file to analyze"); optionsParser->addArgument("genome", "(query) genome to process"); optionsParser->addOption("sequence", "sequence name in query genome (" "all sequences if not specified)", "\"\""); optionsParser->addOption("start", "start position in query genome", 0); optionsParser->addOption("length", "maximum length of chain to output.", 0); optionsParser->addOption("chainFile", "path for output file. stdout if not" " specified", "\"\""); optionsParser->addOption("maxGap", "maximum indel length to be considered a gap within" " a chain.", 20); string halPath; string chainPath; string genomeName; string sequenceName; hal_size_t start; hal_size_t length; hal_size_t maxGap; try { optionsParser->parseOptions(argc, argv); halPath = optionsParser->getArgument<string>("halFile"); genomeName = optionsParser->getArgument<string>("genome"); sequenceName = optionsParser->getOption<string>("sequence"); start = optionsParser->getOption<hal_size_t>("start"); length = optionsParser->getOption<hal_size_t>("length"); chainPath = optionsParser->getOption<string>("chainFile"); maxGap = optionsParser->getOption<hal_size_t>("maxGap"); } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { cerr << "WARNING: THIS TOOL WAS NEVER FINISHED OR TESTED. USE AT OWN RISK." << " PLEASE CONSIDER halLiftover --outPSL INSTEAD." <<endl; AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, optionsParser); const Genome* genome = alignment->openGenome(genomeName); if (genome == NULL) { throw hal_exception(string("Genome not found: ") + genomeName); } hal_index_t endPosition = length > 0 ? start + length : genome->getSequenceLength(); const Sequence* sequence = NULL; if (sequenceName != "\"\"") { sequence = genome->getSequence(sequenceName); if (sequence == NULL) { throw hal_exception(string("Sequence not found: ") + sequenceName); } start += sequence->getStartPosition(); endPosition = length > 0 ? start + length : sequence->getSequenceLength(); } ofstream ofile; ostream& outStream = chainPath == "\"\"" ? cout : ofile; if (chainPath != "\"\"") { ofile.open(chainPath.c_str()); if (!ofile) { throw hal_exception(string("Error opening output file ") + chainPath); } } TopSegmentIteratorConstPtr top = genome->getTopSegmentIterator(); top->toSite(start, false); // do slicing here; GappedTopSegmentIteratorConstPtr gtop = genome->getGappedTopSegmentIterator(top->getArrayIndex(), maxGap); // need to review! Chain chain; chain._id = 0; while (gtop->getRightArrayIndex() < (hal_index_t)genome->getNumTopSegments() && gtop->getLeft()->getStartPosition() < endPosition) { if (gtop->hasParent() == true) { hal_offset_t leftOffset = 0; if ((hal_index_t)start > gtop->getStartPosition() && (hal_index_t)start < gtop->getEndPosition()) { leftOffset = start - gtop->getStartPosition() ; } hal_offset_t rightOffset = 0; if (endPosition - 1 > gtop->getStartPosition() && endPosition - 1 < gtop->getEndPosition()) { rightOffset = gtop->getEndPosition() + 1 - endPosition; } // need to do offsets for edge cases gtIteratorToChain(gtop, chain, leftOffset, rightOffset); outStream << chain; ++chain._id; } gtop->toRight(); } } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }