void Align_Recursion(char *A, int Alen, char *B, int Blen, Trapezoid *b, int current, int comp, int MinLen, double MaxDiff, int Traplen) { int j, mid, indel; float pcnt; DPHit *hend, *lend; Trapezoid ltrp, htrp; mid = (b->bot + b->top) / 2; #ifdef REPORT_DPREACH printf(" [%d,%d]x[%d,%d] = %d (Depth = %d)\n", b->bot,b->top,b->lft,b->rgt,b->top - b->bot + 1,Al_depth); #endif lend = TraceForwardPath(B,Blen,A,Alen,mid,mid-b->rgt,mid-b->lft); { int x; x = 0; do { x += 1; hend = TraceReversePath(B,Blen,A,Alen, lend->bepos,lend->aepos,lend->aepos, mid+MAXIGAP,BLOCKCOST+2*x*DIFFCOST); } while (hend->bbpos > mid + x*MAXIGAP && hend->score < lend->score); } hend->aepos = lend->aepos; hend->bepos = lend->bepos; #ifdef REPORT_DPREACH printf(" Got [%d,%d]x[%d,%d] ([%d,%d]) at score = %d\n", hend->bbpos,hend->bepos,hend->ldiag,hend->hdiag,hend->abpos,hend->aepos,hend->score); #endif ltrp = htrp = *b; ltrp.top = hend->bbpos - MAXIGAP; htrp.bot = hend->bepos + MAXIGAP; if (hend->bepos - hend->bbpos >= MinLen && hend->aepos - hend->abpos >= MinLen ) { indel = abs( (hend->abpos - hend->bbpos) - (hend->aepos - hend->bepos) ); pcnt = (float)((1/RMATCHCOST) - (hend->score - indel) / (RMATCHCOST*(hend->bepos - hend->bbpos))); if (pcnt <= MaxDiff) { hend->error = pcnt; for (j = current+1; j < Traplen; j++) { Trapezoid *t; int ta, tb, ua, ub; t = Tarray[j]; if (t->bot >= hend->bepos) break; tb = t->top - t->bot + 1; ta = t->rgt - t->lft + 1; if (t->lft < hend->ldiag) ua = hend->ldiag; else ua = t->lft; if (t->rgt > hend->hdiag) ub = hend->hdiag; else ub = t->rgt; if (ua > ub) continue; ua = ub - ua + 1; if (t->top > hend->bepos) ub = hend->bepos - t->bot + 1; else ub = tb; if (((1.*ua)/ta)*((1.*ub)/tb) > .99) Covered[j] = 1; } if (NumSegs >= SegMax) { SegMax = (int)(1.2*NumSegs + 500); SegSols = (DPHit *) ckrealloc(SegSols, sizeof(DPHit)*SegMax, "Segment Alignment array"); } { int d; d = hend->ldiag; /* Oops, diags to this point are b-a, not a-b. */ hend->ldiag = - (hend->hdiag); hend->hdiag = - d; if (comp) { hend->bbpos = Blen - hend->bbpos; hend->bepos = Blen - hend->bepos; hend->ldiag = Blen + hend->ldiag; hend->hdiag = Blen + hend->hdiag; } } SegSols[NumSegs++] = *hend; #ifdef REPORT_DPREACH printf(" Hit from (%d,%d) to (%d,%d) within [%d,%d] score %d\n", hend->abpos,hend->bbpos,hend->aepos,hend->bepos, hend->ldiag,hend->hdiag,hend->score); #endif } } #ifdef REPORT_DPREACH Al_depth += 1; #endif if (ltrp.top - ltrp.bot > MinLen && ltrp.top < b->top - MAXIGAP) Align_Recursion(A,Alen,B,Blen,<rp,current,comp,MinLen,MaxDiff,Traplen); if (htrp.top - htrp.bot > MinLen) Align_Recursion(A,Alen,B,Blen,&htrp,current,comp,MinLen,MaxDiff,Traplen); #ifdef REPORT_DPREACH Al_depth -= 1; #endif }
static void Align_Recursion(char *A, int Alen, char *B, int Blen, Trapezoid *b, int current, int comp, int MinLen, float MaxDiff, int Traplen) { int j, mid, indel; float pcnt; Local_Segment *hend, *lend; Trapezoid ltrp, htrp; #undef START_AT_BEGINNING_OF_TRAP #ifdef START_AT_BEGINNING_OF_TRAP mid = b->bot; #else mid = (b->bot + b->top) / 2; #endif #ifdef REPORT_DPREACH fprintf(stderr, " [%d,%d]x[%d,%d] = %d (Depth = %d)\n", b->bot,b->top,b->lft,b->rgt,b->top - b->bot + 1,Al_depth); #endif lend = TraceForwardPath(B,Blen,A,Alen,mid,mid-b->rgt,mid-b->lft); { int x; x = 0; do { x += 1; //fprintf(stderr, "Trying reverse pass\n"); hend = TraceReversePath(B,Blen,A,Alen, lend->bepos,lend->aepos,lend->aepos, mid+MAXIGAP,BLOCKCOST+2*x*diffcost); //fprintf(stderr, "End reverse pass\n"); } while (hend->bbpos > mid + x*MAXIGAP && hend->score < lend->score); hend->aepos = lend->aepos; hend->bepos = lend->bepos; /* We can miss a small segment here! the segment is at the beginning of a trapezoid; it is followed by a run of bad luck which is - not bad enough to terminate an extension but - long enough to have a negative score with abs. value greater than the positive value of the segment that will be missed after the run of bad luck is a larger good run which *does* exceed the bad run, so that the best value for the forward extension goes past the bad run What happens is that when we trace backwards, the best value occurs after the bad segment. Thus, even if we start the search before the bad run, the returned segment starts after the bad run. I guess basically this means that we can miss a segment if it has a positive value smaller than BLOCKCOST. So, if we want small minimum segments, could we lower BLOCKCOST? This seems not to work; for instance, with a scoring scheme of 1:10, but a desire to find a segment consisting of, e.g., 10 matches with one mismatch in the middle (i.e. score = 1), we'd have to have BLOCKCOST = 0---not a good idea! So, how about testing for the possible case and running the search backwards when it occurs? New case on which this same logic is attempted: if we are trying a recursive alignment (based on ltrp or htrp) but the TraceForwardPath step got nowhere, then try in reverse orientation. */ if(hend->bbpos > mid+x*MAXIGAP || hend->bepos==mid ) { #ifdef WARN_MISSED_SEGMENT fprintf(stderr, "WARNING: might have missed a small local segment (possible segment with score < blockcost)!\n"); #endif #define CHECK_FOR_MISSING_SEGMENT #ifdef CHECK_FOR_MISSING_SEGMENT #ifdef WARN_MISSED_SEGMENT fprintf(stderr, "WARNING: will try to reverse direction of search!\n"); #endif /* Need to: reverse both sequences reverse mid, top bottom left and right run forward run backward until converged reverse resulting segment reverse both sequences */ mid=Blen-mid-1; { int tmp; tmp=b->top; b->top=Blen-b->bot-1; b->bot=Blen-tmp-1; tmp=b->rgt; b->rgt=Blen-Alen-b->lft; b->lft=Blen-Alen-tmp; } lend = TraceForwardPath(BrevC,Blen,ArevC,Alen,mid,mid-b->rgt,mid-b->lft); { int x; x = 0; do { x += 1; hend = TraceReversePath(BrevC,Blen,ArevC,Alen, lend->bepos,lend->aepos,lend->aepos, mid+MAXIGAP,BLOCKCOST+2*x*diffcost); } while (hend->bbpos > mid + x*MAXIGAP && hend->score < lend->score); hend->aepos = lend->aepos; hend->bepos = lend->bepos; } mid=Blen-mid-1; { int tmp; tmp=b->top; b->top=Blen-b->bot-1; b->bot=Blen-tmp-1; tmp=b->rgt; b->rgt=Blen-Alen-b->lft; b->lft=Blen-Alen-tmp; tmp=hend->ldiag; hend->ldiag=Blen-Alen-hend->hdiag; hend->hdiag=Blen-Alen-tmp; // indices: start, end = positions in between bases, // so reversing is newpos=len-oldpos tmp=hend->abpos; hend->abpos=Alen-hend->aepos; hend->aepos=Alen-tmp; tmp=hend->bbpos; hend->bbpos=Blen-hend->bepos; hend->bepos=Blen-tmp; } #endif /* CHECK_FOR_MISSING_SEGMENT */ } } ltrp = htrp = *b; ltrp.top = MIN(b->top,hend->bbpos) - MAXIGAP; htrp.bot = MAX(b->bot,hend->bepos) + MAXIGAP; if (hend->bepos - hend->bbpos >= MinLen && hend->aepos - hend->abpos >= MinLen ) { indel = abs( (hend->abpos - hend->bbpos) - (hend->aepos - hend->bepos) ); /* original formula for pcnt doesn't seem to be robust to scoring scheme variation; use ALTERNATE_PCNT until Gene gets a fix in */ #ifndef ALTERNATE_PCNT pcnt = (1/RMATCHCOST) - (hend->score - indel) / (RMATCHCOST*(hend->bepos - hend->bbpos)); #else pcnt = (-hend->score+samecost*(hend->bepos-hend->bbpos))*1./ (1.*(MATCHCOST)*(hend->bepos-hend->bbpos)); #endif if (pcnt <= MaxDiff) { hend->error = pcnt; for (j = current+1; j < Traplen; j++) { Trapezoid *t; int ta, tb, ua, ub; t = Tarray[j]; if (t->bot >= hend->bepos) break; tb = t->top - t->bot + 1; ta = t->rgt - t->lft + 1; if (t->lft < hend->ldiag) ua = hend->ldiag; else ua = t->lft; if (t->rgt > hend->hdiag) ub = hend->hdiag; else ub = t->rgt; if (ua > ub) continue; ua = ub - ua + 1; if (t->top > hend->bepos) ub = hend->bepos - t->bot + 1; else ub = tb; if (((1.*ua)/ta)*((1.*ub)/tb) > .99) Covered[j] = 1; } if (NumSegs >= SegMax) { SegMax = (int)(1.2*NumSegs) + 500; SegSols = (Local_Segment *) safe_realloc(SegSols, sizeof(Local_Segment)*SegMax); } { int d; d = hend->hdiag; /* Oops, diags to this point are b-a, not a-b. */ hend->hdiag = - (hend->ldiag); hend->ldiag = - d; if (comp) { hend->bbpos = Blen - hend->bbpos; hend->bepos = Blen - hend->bepos; hend->ldiag = Blen + hend->ldiag; hend->hdiag = Blen + hend->hdiag; } } SegSols[NumSegs++] = *hend; #ifdef REPORT_DPREACH fprintf(stderr, " Hit from (%d,%d) to (%d,%d) within [%d,%d] score %d\n", hend->abpos,hend->bbpos,hend->aepos,hend->bepos, hend->ldiag,hend->hdiag,hend->score); #endif }else{ // SAK #ifdef REPORT_DPREACH fprintf(stderr, " FAILED (%g > %g) Hit from (%d,%d) to (%d,%d) within [%d,%d] score %d\n", pcnt, MaxDiff, hend->abpos,hend->bbpos,hend->aepos,hend->bepos, hend->ldiag,hend->hdiag,hend->score); #endif } } #ifdef REPORT_DPREACH Al_depth += 1; #endif if (ltrp.top - ltrp.bot > MinLen && ltrp.top < b->top - MAXIGAP){ Align_Recursion(A,Alen,B,Blen,<rp,current,comp,MinLen,MaxDiff,Traplen); } if (htrp.top - htrp.bot > MinLen){ Align_Recursion(A,Alen,B,Blen,&htrp,current,comp,MinLen,MaxDiff,Traplen); } #ifdef REPORT_DPREACH Al_depth -= 1; #endif }