int QualityValueScoreFunction<DNASequence, FASTQSequence>::Deletion( DNASequence &seq, DNALength refPos, FASTQSequence &querySeq, DNALength queryPos) { PB_UNUSED(querySeq); PB_UNUSED(queryPos); return Deletion(seq, refPos); }
int QualityValueScoreFunction<T_RefSequence, T_QuerySequence>::Insertion( T_RefSequence &ref, DNALength refPos, T_QuerySequence &seq, DNALength pos) { PB_UNUSED(ref); PB_UNUSED(refPos); PB_UNUSED(seq); PB_UNUSED(pos); return QualityScoreTypeNotSpecified("Insertion"); }
int QualityValueScoreFunction<T_RefSequence, T_QuerySequence>::Match( T_RefSequence &ref, DNALength refPos, T_QuerySequence &query, DNALength queryPos) { PB_UNUSED(ref); PB_UNUSED(refPos); PB_UNUSED(query); PB_UNUSED(queryPos); return QualityScoreTypeNotSpecified("Match"); }
int QualityValueScoreFunction<T_RefSequence, T_QuerySequence>::Deletion( T_RefSequence &seq, DNALength refPos, T_QuerySequence &querySeq, DNALength queryPos) { PB_UNUSED(seq); PB_UNUSED(refPos); PB_UNUSED(querySeq); PB_UNUSED(queryPos); return QualityScoreTypeNotSpecified("Deletion"); }
int QualityValueScoreFunction<DNASequence, FASTQSequence>::Insertion( DNASequence &ref, DNALength refPos, FASTQSequence &query, DNALength pos) { PB_UNUSED(ref); PB_UNUSED(refPos); PB_UNUSED(query); PB_UNUSED(pos); // Positive value for quality value penalizes the alignment. // return query.qual[pos]; // return Insertion(query, pos); return ins; }
DNALength Locate(T_DNASequence &seq, std::vector<DNALength> &positions, DNALength maxCount =0) { PB_UNUSED(maxCount); DNALength ep, sp; Count(seq, sp, ep); return Locate(sp, ep, positions); }
int CompressedSequence<T_Sequence>::BuildReverseIndex(int maxRun, int binSize) { PB_UNUSED(binSize); hasIndex = 1; DNALength i; DNALength hpi; // // Phase 1. Count the number of nucleotide transitions-1 // hpi = 0; for (i = 0; i < length; i++) { // if (hpi % binSize == 0 ){ // index.push_back(i); // } int run = 1; while (i < length - 1 and ThreeBit[seq[i]] == ThreeBit[seq[i+1]] and (run == 0 or run < maxRun)) {i++, run++;}; hpi++; } // // Phase 2. Store the index. // index.Free(); index.indexLength = hpi/index.binSize + 1; index.index = ProtectedNew<int>(index.indexLength); hpi = 0; int ii = 0; for (i = 0; i < length; i++) { if (hpi % index.binSize == 0 ) { assert(ii < index.indexLength); index.index[ii] = i; ++ii; } int run = 1; while (i < length - 1 and ThreeBit[seq[i]] == ThreeBit[seq[i+1]] and (run == 0 or run < maxRun)) {i++, run++;}; hpi++; } return index.size(); }
int ComputeTotalTupleCount(TupleMetrics &tm, TupleCountTable<TSequence, T_Tuple> &ct, TSequence &seq, int start, int end) { PB_UNUSED(start); if (end == -1) { end = seq.length; } int nTuples = end - tm.tupleSize + 1; if (nTuples == 0) { return 0; } int totalCount = 0; T_Tuple tuple; int i; for (i = 0; i < nTuples; i++) { tuple.FromStringLR(&seq.seq[i], tm); totalCount += ct.countTable[tuple.ToLongIndex()]; } return totalCount; }
/* Walk through all the registered extensions and give them a chance * to encode themselves. */ static bool checkreturn encode_extension_field(pb_ostream_t *stream, const pb_field_t *field, const void *pData) { const pb_extension_t *extension = *(const pb_extension_t* const *)pData; PB_UNUSED(field); while (extension) { bool status; if (extension->type->encode) status = extension->type->encode(stream, extension); else status = default_extension_encoder(stream, extension); if (!status) return false; extension = extension->next; } return true; }
static bool checkreturn pb_enc_fixed32(pb_ostream_t *stream, const pb_field_t *field, const void *src) { PB_UNUSED(field); return pb_encode_fixed32(stream, src); }
int ExtendAlignment(T_QuerySeq &querySeq, int queryPos, T_RefSeq &refSeq, int refPos, int k, std::vector<int> &scoreMat, std::vector<Arrow> &pathMat, T_Alignment &alignment, T_ScoreFn &scoreFn, T_Index &index, int minExtendNBases=1, // Require that there // are more than one // base to align. int maxNDrops=2 // A drop is a row where // the alignment is // extended without // increasing the alignment // score. maxnDrops is the // maximum number of times // that one may have before // terminating the alignment // ) { PB_UNUSED(queryPos); PB_UNUSED(refPos); // // Try extending an alignment in the forward direction as long the // maximum score that is extended is above a threshold above the // initial score. This dynamically grows the alignment matrices as // the alignment is extended (or the limits of the alignment // matrices since reusable buffers are used). // DNALength nCols = 2 * k + 1 + 1; // 2*k is for search space, +1 is for the // middle band, and the last +1 is for the // boundary conditions at the beginning of // the array. RCToIndex rcToIndex; rcToIndex.band = k; rcToIndex.nCols = nCols; rcToIndex.middleCol = k+2-1; if (index.queryAlignLength < minExtendNBases or index.refAlignLength < minExtendNBases) { // // One of the sequences isn't long enough to even try to extend, // just bail with an empty alignment. // return 0; } // // Preallocate arrays to be at least k long. The full matrix may // not be loaded. // int matSize = nCols * (k+1); if (scoreMat.size() < nCols * (k+1)) { scoreMat.resize(nCols * (k+1)); pathMat.resize(nCols * (k+1)); } // // Initialize boundary conditions. // int q; int t; // Initialize first column for insertions. int firstIndex; fill(scoreMat.begin(), scoreMat.begin() + matSize, 0); fill(pathMat.begin(), pathMat.begin() + matSize, NoArrow); rcToIndex(0, 0, firstIndex); scoreMat[firstIndex] = 0; pathMat[firstIndex] = NoArrow; // Initialize insertion penalties. t = 0; int i; int pi; for (q = 1; q <= k and index.QNotAtSeqBoundary(q-1); q++) { bool res = rcToIndex(q, t, i); assert(res); res = rcToIndex(q-1, t, pi); int qSeqPos = index.QuerySeqPos(q-1); scoreMat[i] = scoreMat[pi] + scoreFn.Insertion(querySeq, qSeqPos); pathMat[i] = Up; // cout << "initializing insertion gap penalty for " << q << " " << refPos-1 << " " << i << " " << scoreMat[i] << endl; } // Initialize the first row for deletions. q = 0; for (t = 1; t <= k and index.TNotAtSeqBoundary(t-1); t++) { bool res = rcToIndex(q, t, i); assert(res); int previ; res = rcToIndex(q,t-1,previ); int qSeqPos = index.QuerySeqPos(0); scoreMat[i] = scoreMat[previ] + scoreFn.Deletion(querySeq, qSeqPos); pathMat[i] = Left; // cout << "initializing deletion gap penalty for " << ((int)queryPos)-1 << " " << t << " " << i << " " << scoreMat[i] << endl; } /* PrintFlatMatrix(&scoreMat[0], k , nCols, cout); cout << endl; PrintFlatMatrix(&pathMat[0], k, nCols, cout); cout << endl; */ int nDrops = 0; int prevRowMinScore = INF_INT; int globalMinScore = INF_INT; int globalMinScoreQPos = 0; int globalMinScoreTPos = 0; int curIndex = -1; int maxAlignLength = std::min(index.QAlignLength(), index.TAlignLength()) + maxNDrops; for (q = 1; (index.QNotAtSeqBoundary(q-1) and nDrops < maxNDrops and q < maxAlignLength); q++ ) { // // Grow the path and score matrices by another row if this has // extended beyond their current capacity. // if ((q+1) * nCols > scoreMat.size()) { scoreMat.resize((q+1)*nCols); pathMat.resize((q+1)*nCols); } // // Now score the latest row. // int curRowMinScore = INF_INT; int diagLength = q; int tStart = std::max((int) 1, ((int)diagLength) - k); int tEnd = std::min((int) (diagLength + k +1), index.TAlignLength() + 1 ); int qSeqPos, tSeqPos; for (t = tStart; t < std::min(tEnd, maxAlignLength); t++) { int insIndex, delIndex, matchIndex; bool hasInsIndex = false, hasDelIndex = false, hasMatchIndex = false, hasCurIndex = false; hasCurIndex = rcToIndex(q, t, curIndex); assert(hasCurIndex); hasDelIndex = rcToIndex(q, t - 1, delIndex); hasInsIndex = rcToIndex(q - 1, t, insIndex); hasMatchIndex = rcToIndex(q-1, t-1, matchIndex); int insScore, delScore, matchScore; delScore = INF_INT; insScore = INF_INT; matchScore = INF_INT; // cout << "ins index: " << insIndex << " del: " << delIndex << " match index " << matchIndex << endl; qSeqPos = index.QuerySeqPos(q-1); // The offset is to allow for the boundary buffer. tSeqPos = index.RefSeqPos(t-1); // ditto. /* if (scoreMat[insIndex] == -1) { cout << "bleh" << endl; } if (scoreMat[matchIndex] == -1) { cout << "bleh" << endl; } if (scoreMat[delIndex] == -1) { cout << "bleh" << endl; } if (scoreFn.Insertion(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos) == -1) { cout << "bleh" << endl; } if (scoreFn.Deletion(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos) == -1) { cout << "ugh" << endl; } if ( scoreFn.Match(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos) == -1) { cout <<" gah" << endl; }*/ if (hasInsIndex) { insScore = scoreMat[insIndex] + scoreFn.Insertion(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos); } if (hasDelIndex) { delScore = scoreMat[delIndex] + scoreFn.Deletion(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos); } if (hasMatchIndex) { matchScore = scoreMat[matchIndex] + scoreFn.Match(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos); } /* cout << "ins score: " << insScore << "[" << scoreMat[insIndex] << "] del score " << delScore << " [" << scoreMat[delIndex] << "] match score " << matchScore << " [" << scoreMat[matchIndex] << "] qchar " << (int) querySeq.seq[qSeqPos] << " tchar " << (int) refSeq.seq[tSeqPos] << endl;*/ int minScore = std::min(matchScore, delScore); minScore = std::min(minScore, insScore); scoreMat[curIndex] = minScore; // cout << "extend: " << qSeqPos << " " << tSeqPos << " " << minScore << endl; if (minScore != INF_INT) { if (minScore == insScore) { pathMat[curIndex] = Up; } if (minScore == delScore) { pathMat[curIndex] = Left; } if (minScore == matchScore) { pathMat[curIndex] = Diagonal; } } else { pathMat[curIndex] = NoArrow; } assert(pathMat[curIndex] != NoArrow); if (minScore < curRowMinScore) { curRowMinScore = minScore; } if (minScore < globalMinScore) { globalMinScore = minScore; globalMinScoreQPos = q; globalMinScoreTPos = t; } } if (curRowMinScore > prevRowMinScore) { nDrops++; } prevRowMinScore = curRowMinScore; } q = globalMinScoreQPos; t = globalMinScoreTPos; std::vector<Arrow> optAlignment; rcToIndex(q,t,i); // // When the optimal score is on a cell with NoArrow, there is no // good alignment. Only try and trace an alignment out if the path // starts on a good alignment. // if (pathMat[i] != NoArrow) { while(q > 0 or t > 0) { int res; res = rcToIndex(q, t, i); assert(res != 0); Arrow arrow = pathMat[i]; optAlignment.push_back(pathMat[i]); if (pathMat[i] == NoArrow) { assert(pathMat[i] != NoArrow); } if (arrow == Diagonal) { q--; t--; } else if (arrow == Left) { t--; } else if (arrow == Up) { q--; } } } index.OrderArrowVector(optAlignment); alignment.ArrowPathToAlignment(optAlignment); alignment.qPos = index.GetQueryStartPos(q, globalMinScoreQPos); alignment.tPos = index.GetRefStartPos(t, globalMinScoreTPos); return globalMinScore; }
int QualityValueScoreFunction<DNASequence, FASTQSequence>::Deletion( DNASequence &ref, DNALength pos) { PB_UNUSED(ref); PB_UNUSED(pos); return del; // For now there is no global deletion quality value. }