SGSide SGLookup::mapPosition(const SGPosition& inPos) const { const PosMap& pm = _mapVec.at(inPos.getSeqID()); PosMap::const_iterator i = pm.lower_bound(inPos.getPos()); assert(i != pm.end()); if (i->first > inPos.getPos()) { assert(i != pm.begin()); --i; } if (i->second.getBase() == SideGraph::NullPos) { return i->second; } assert(i->first <= inPos.getPos()); sg_int_t offset = inPos.getPos() - i->first; SGSide outSide(i->second); SGPosition outPos(outSide.getBase()); outPos.setPos(outPos.getPos() + offset); if (outSide.getForward() == false) { PosMap::const_iterator j = i; ++j; sg_int_t transform = j->first - offset - i->first - 1 - offset; outPos.setPos(outPos.getPos() + transform); } outSide.setBase(outPos); return outSide; }
void SGLookup::addInterval(const SGPosition& inPos, const SGPosition& outPos, sg_int_t length, bool reversed) { PosMap& pm = _mapVec.at(inPos.getSeqID()); sg_int_t left = inPos.getPos(); sg_int_t right = inPos.getPos() + length; // find the left point SGSide leftSide(SideGraph::NullPos, true); PosMap::iterator li = pm.insert(pair<sg_int_t, SGSide>( left, leftSide)).first; // find the right point PosMap::iterator ri = li; ++ri; if (ri == pm.end() || ri->second.getBase().getPos() != right) { SGSide rightSide(SideGraph::NullPos, true); ri = pm.insert(li, pair<sg_int_t, SGSide>(right, rightSide)); --ri; assert(ri == li); } // update the left point assert(li->second.getBase() == SideGraph::NullPos); li->second = SGSide(outPos, !reversed); }
void Side2Seq::convertSequence(const SGSequence* seq) { // we exclude the very first and last sides as they don't induce breaks // (very first start woult have forward == true, for example..) SGSide start = SGSide(SGPosition(seq->getID(), 0), false); SGSide end = SGSide(SGPosition(seq->getID(), seq->getLength() - 1), true); set<SGSide> cutSides; if (seq->getLength() > 1) { getIncidentJoins(start, end, cutSides); } if (_chop > 0) { getChopSides(seq, cutSides); } cleanCutSides(cutSides); SGPosition first(seq->getID(), 0); int firstIdx = _outGraph->getNumSequences(); for (set<SGSide>::iterator i = cutSides.begin(); i != cutSides.end(); ++i) { SGPosition last = i->getBase(); assert(last.getSeqID() == first.getSeqID()); if (i->getForward() == true) { // left side of base: don't include this position last.setPos(last.getPos() - 1); } // add it addOutSequence(seq, first, last); // add one because segments inclusive first.setPos(last.getPos() + 1); } // need to do one segment at end SGPosition last(seq->getID(), seq->getLength() - 1); addOutSequence(seq, first, last); // chain all the added seqeunces with new joins for (int j = firstIdx + 1; j < _outGraph->getNumSequences(); ++j) { const SGSequence* fs = _outGraph->getSequence(j-1); const SGSequence* ts = _outGraph->getSequence(j); SGSide side1(SGPosition(fs->getID(), fs->getLength() - 1), false); SGSide side2(SGPosition(ts->getID(), 0), true); const SGJoin* newJoin = _outGraph->addJoin(new SGJoin(side1, side2)); verifyOutJoin(newJoin); } }
void Side2Seq::addOutSequence(const SGSequence* inSeq, const SGPosition& first, const SGPosition& last) { int length = last.getPos() - first.getPos() + 1; assert(length > 0); const SGSequence* outSeq = _outGraph->addSequence( new SGSequence(inSeq->getID(), length, getOutSeqName(inSeq, first, length))); _luTo.addInterval(first, SGPosition(outSeq->getID(), 0), length, false); // add the bases assert(_outBases.size() == outSeq->getID()); _outBases.resize(outSeq->getID() + 1); getInDNA(SGSegment(SGSide(first, true), length), _outBases.back()); }
string Side2Seq::getOutSeqName(const SGSequence* inSeq, const SGPosition& first, int length) const { if (_stripSeqNames == true) { return ""; } stringstream ss; ss << inSeq->getName() << "_" << first.getPos(); return ss.str(); }
void SGLookup::getPath(const SGPosition& startPos, const SGPosition& endPos, vector<SGSegment>& outPath) const { SGPosition halStart = startPos; SGPosition halEnd = endPos; bool backward = endPos < startPos; if (backward == true) { // always make a forward path. if query is reversed, we will // remember here and flip at very end. swap(halStart, halEnd); } assert(halStart.getSeqID() == halEnd.getSeqID()); const PosMap& pm = _mapVec.at(halStart.getSeqID()); // find marker to the right of halStart PosMap::const_iterator i = pm.lower_bound(halStart.getPos()); assert(i != pm.end()); if (i->first == halStart.getPos()) { ++i; } // find marker >= halEnd (ie one-past what we iterate) PosMap::const_iterator j = pm.lower_bound(halEnd.getPos()); assert(j != pm.begin()); if (j->first == halEnd.getPos()) { ++j; } outPath.clear(); sg_int_t pathLength = 0; sg_int_t prevHalPos = halStart.getPos(); SGSide prevSgSide = mapPosition(halStart); for (PosMap::const_iterator k = i; k != j; ++k) { assert(k != pm.end()); assert(k != pm.begin()); sg_int_t halPos = k->first; sg_int_t segLen = halPos - prevHalPos; assert(segLen > 0); SGSide sgSide = k->second; // note we are taking advantage of the fact that // the side returned by mapPosition has a forward flag // consistent with its use in sgsegment. if (prevSgSide.getForward() == false && k != i) { // interval lookup always stores intervals left->right. // for reverse mapping, we have to manually adjust segment // coordinate (unless first iteration, which is already adjusted // by call to mapPosition) prevSgSide.setBase(SGPosition(prevSgSide.getBase().getSeqID(), prevSgSide.getBase().getPos() + segLen -1)); } outPath.push_back(SGSegment(prevSgSide, segLen)); pathLength += segLen; prevHalPos = halPos; prevSgSide = sgSide; } sg_int_t segLen = halEnd.getPos() - prevHalPos + 1; assert(segLen > 0); if (prevSgSide.getForward() == false) { prevSgSide.setBase(SGPosition(prevSgSide.getBase().getSeqID(), prevSgSide.getBase().getPos() + segLen -1)); } outPath.push_back(SGSegment(prevSgSide, segLen)); pathLength += segLen; (void)pathLength; if (halStart < halEnd) { assert(pathLength == halEnd.getPos() - halStart.getPos() + 1); } // we really wanted our path in the other direction. flip the // order of the vector, and the orientation of every segment. if (backward == true) { reverse(outPath.begin(), outPath.end()); for (size_t i = 0; i < outPath.size(); ++i) { outPath[i].flip(); } } }