Beispiel #1
0
void Align_Recursion(char *A, int Alen, char *B, int Blen,
                            Trapezoid *b, int current, int comp,
                            int MinLen, double MaxDiff, int Traplen)
{ int j, mid, indel;
  float pcnt;
  DPHit *hend, *lend;
  Trapezoid ltrp, htrp;


  mid = (b->bot + b->top) / 2;


#ifdef REPORT_DPREACH
  printf(" [%d,%d]x[%d,%d] = %d (Depth = %d)\n",
         b->bot,b->top,b->lft,b->rgt,b->top - b->bot + 1,Al_depth);
#endif


  lend = TraceForwardPath(B,Blen,A,Alen,mid,mid-b->rgt,mid-b->lft);


  { int x;


    x = 0;
    do
      { x += 1;
        hend = TraceReversePath(B,Blen,A,Alen,
                                lend->bepos,lend->aepos,lend->aepos,
                                mid+MAXIGAP,BLOCKCOST+2*x*DIFFCOST);
      }
    while (hend->bbpos > mid + x*MAXIGAP && hend->score < lend->score);
  }


  hend->aepos = lend->aepos;
  hend->bepos = lend->bepos;


#ifdef REPORT_DPREACH
  printf("  Got [%d,%d]x[%d,%d] ([%d,%d]) at score = %d\n",
         hend->bbpos,hend->bepos,hend->ldiag,hend->hdiag,hend->abpos,hend->aepos,hend->score);
#endif


  ltrp = htrp = *b;
  ltrp.top = hend->bbpos - MAXIGAP;
  htrp.bot = hend->bepos + MAXIGAP;


  if (hend->bepos - hend->bbpos >= MinLen &&
      hend->aepos - hend->abpos >= MinLen   )


    { indel = abs( (hend->abpos - hend->bbpos)
                 - (hend->aepos - hend->bepos) );
      pcnt = (float)((1/RMATCHCOST)
           - (hend->score - indel)
           / (RMATCHCOST*(hend->bepos - hend->bbpos)));


      if (pcnt <= MaxDiff)
    
        { hend->error = pcnt;
    
          for (j = current+1; j < Traplen; j++)
            { Trapezoid *t;
              int   ta, tb, ua, ub; 
        
              t = Tarray[j];
              if (t->bot >= hend->bepos) break;
        
              tb = t->top - t->bot + 1;
              ta = t->rgt - t->lft + 1;
              if (t->lft < hend->ldiag)
                ua = hend->ldiag;
              else
                ua = t->lft;
              if (t->rgt > hend->hdiag)
                ub = hend->hdiag;
              else
                ub = t->rgt;
        
              if (ua > ub) continue;
        
              ua = ub - ua + 1;
              if (t->top > hend->bepos)
                ub = hend->bepos - t->bot + 1;
              else
                ub = tb;
        
              if (((1.*ua)/ta)*((1.*ub)/tb) > .99)
                Covered[j] = 1;
            }
        
          if (NumSegs >= SegMax)
            { SegMax = (int)(1.2*NumSegs + 500);
              SegSols = (DPHit *) ckrealloc(SegSols,
                                                    sizeof(DPHit)*SegMax,
                                                    "Segment Alignment array");
            }
        
          { int d;
        
            d = hend->ldiag;  /*  Oops, diags to this point are b-a, not a-b. */
            hend->ldiag = - (hend->hdiag);
            hend->hdiag = - d;
            if (comp)
              { hend->bbpos = Blen - hend->bbpos;
                hend->bepos = Blen - hend->bepos;
                hend->ldiag = Blen + hend->ldiag;
                hend->hdiag = Blen + hend->hdiag;
              }
          }
        
          SegSols[NumSegs++] = *hend;
        
#ifdef REPORT_DPREACH
          printf("  Hit from (%d,%d) to (%d,%d) within [%d,%d] score %d\n",
                 hend->abpos,hend->bbpos,hend->aepos,hend->bepos,
                 hend->ldiag,hend->hdiag,hend->score);
#endif
        }
    }


#ifdef REPORT_DPREACH
  Al_depth += 1;
#endif
  if (ltrp.top - ltrp.bot > MinLen && ltrp.top < b->top - MAXIGAP)
    Align_Recursion(A,Alen,B,Blen,&ltrp,current,comp,MinLen,MaxDiff,Traplen);
  if (htrp.top - htrp.bot > MinLen)
    Align_Recursion(A,Alen,B,Blen,&htrp,current,comp,MinLen,MaxDiff,Traplen);
#ifdef REPORT_DPREACH
  Al_depth -= 1;
#endif
}
Beispiel #2
0
static void Align_Recursion(char *A, int Alen, char *B, int Blen,
                            Trapezoid *b, int current, int comp,
                            int MinLen, float MaxDiff, int Traplen)
{ int j, mid, indel;
  float pcnt;
  Local_Segment *hend, *lend;
  Trapezoid ltrp, htrp;

#undef START_AT_BEGINNING_OF_TRAP
#ifdef START_AT_BEGINNING_OF_TRAP
  mid = b->bot;
#else
  mid = (b->bot + b->top) / 2;
#endif

#ifdef REPORT_DPREACH
  fprintf(stderr, " [%d,%d]x[%d,%d] = %d (Depth = %d)\n",
         b->bot,b->top,b->lft,b->rgt,b->top - b->bot + 1,Al_depth);
#endif


  lend = TraceForwardPath(B,Blen,A,Alen,mid,mid-b->rgt,mid-b->lft);

  { int x;

    x = 0;
    do
      { x += 1;

      //fprintf(stderr, "Trying reverse pass\n");
        hend = TraceReversePath(B,Blen,A,Alen,
                                lend->bepos,lend->aepos,lend->aepos,
                                mid+MAXIGAP,BLOCKCOST+2*x*diffcost);
	//fprintf(stderr, "End reverse pass\n");
      }
    while (hend->bbpos > mid + x*MAXIGAP && hend->score < lend->score);

  hend->aepos = lend->aepos;
  hend->bepos = lend->bepos;


    /* We can miss a small segment here!

	the segment is at the beginning of a trapezoid;
	it is followed by a run of bad luck which is
		- not bad enough to terminate an extension but
		- long enough to have a negative score with abs. value
		  greater than the positive value of the segment that
	          will be missed
	after the run of bad luck is a larger good run which *does*
	   exceed the bad run, so that the best value for the
	  forward extension goes past the bad run

	  What happens is that when we trace backwards,
	  the best value occurs after the bad segment.

	  Thus, even if we start the search before the bad run,
	  the returned segment starts after the bad run.

	  I guess basically this means that we can miss a segment
	  if it has a positive value smaller than BLOCKCOST.

	  So, if we want small minimum segments,
	  could we lower BLOCKCOST?
	  This seems not to work; for instance,
	  with a scoring scheme of 1:10,
	  but a desire to find a segment consisting of, e.g.,
	  10 matches with one mismatch in the middle (i.e. score = 1),
	  we'd have to have BLOCKCOST = 0---not a good idea!

	  So, how about testing for the possible case and running the
	  search backwards when it occurs?


	  New case on which this same logic is attempted: if we are trying
	  a recursive alignment (based on ltrp or htrp) but the
	  TraceForwardPath step got nowhere, then try in reverse orientation.

    */

    if(hend->bbpos > mid+x*MAXIGAP || hend->bepos==mid )
      {
#ifdef WARN_MISSED_SEGMENT
	fprintf(stderr, "WARNING: might have missed a small local segment (possible segment with score < blockcost)!\n");
#endif

#define CHECK_FOR_MISSING_SEGMENT
#ifdef  CHECK_FOR_MISSING_SEGMENT

#ifdef WARN_MISSED_SEGMENT
	fprintf(stderr, "WARNING: will try to reverse direction of search!\n");
#endif

	/* Need to:
	   reverse both sequences
	   reverse mid, top bottom left and right
	   run forward
	   run backward until converged
	   reverse resulting segment
	   reverse both sequences
	*/

	mid=Blen-mid-1;
	{ int tmp;
  	  tmp=b->top;
	  b->top=Blen-b->bot-1;
	  b->bot=Blen-tmp-1;
	  tmp=b->rgt;
	  b->rgt=Blen-Alen-b->lft;
	  b->lft=Blen-Alen-tmp;
	}

	lend = TraceForwardPath(BrevC,Blen,ArevC,Alen,mid,mid-b->rgt,mid-b->lft);

	{ int x;

	  x = 0;
  	  do
	    { x += 1;
	      hend = TraceReversePath(BrevC,Blen,ArevC,Alen,
				      lend->bepos,lend->aepos,lend->aepos,
				      mid+MAXIGAP,BLOCKCOST+2*x*diffcost);
	    }
	  while (hend->bbpos > mid + x*MAXIGAP && hend->score < lend->score);

	  hend->aepos = lend->aepos;
	  hend->bepos = lend->bepos;

	}

	mid=Blen-mid-1;
	{ int tmp;

	  tmp=b->top;
	  b->top=Blen-b->bot-1;
	  b->bot=Blen-tmp-1;
	  tmp=b->rgt;
	  b->rgt=Blen-Alen-b->lft;
	  b->lft=Blen-Alen-tmp;

	  tmp=hend->ldiag;
	  hend->ldiag=Blen-Alen-hend->hdiag;
	  hend->hdiag=Blen-Alen-tmp;

	  // indices: start, end = positions in between bases,
	  //   so reversing is newpos=len-oldpos
	  tmp=hend->abpos;
	  hend->abpos=Alen-hend->aepos;
	  hend->aepos=Alen-tmp;
	  tmp=hend->bbpos;
	  hend->bbpos=Blen-hend->bepos;
	  hend->bepos=Blen-tmp;

	}


#endif  /* CHECK_FOR_MISSING_SEGMENT */

      }


  }





  ltrp = htrp = *b;
  ltrp.top = MIN(b->top,hend->bbpos) - MAXIGAP;


  htrp.bot = MAX(b->bot,hend->bepos) + MAXIGAP;

  if (hend->bepos - hend->bbpos >= MinLen &&
      hend->aepos - hend->abpos >= MinLen   )

    { indel = abs( (hend->abpos - hend->bbpos)
                 - (hend->aepos - hend->bepos) );

    /* original formula for pcnt doesn't seem to be robust to scoring scheme variation; use ALTERNATE_PCNT until Gene gets a fix in */

#ifndef ALTERNATE_PCNT
      pcnt = (1/RMATCHCOST)
           - (hend->score - indel)
           / (RMATCHCOST*(hend->bepos - hend->bbpos));
#else
      pcnt = (-hend->score+samecost*(hend->bepos-hend->bbpos))*1./
	(1.*(MATCHCOST)*(hend->bepos-hend->bbpos));
#endif

      if (pcnt <= MaxDiff) {
	hend->error = pcnt;

          for (j = current+1; j < Traplen; j++)
            { Trapezoid *t;
              int   ta, tb, ua, ub;

              t = Tarray[j];
              if (t->bot >= hend->bepos) break;

              tb = t->top - t->bot + 1;
              ta = t->rgt - t->lft + 1;
              if (t->lft < hend->ldiag)
                ua = hend->ldiag;
              else
                ua = t->lft;
              if (t->rgt > hend->hdiag)
                ub = hend->hdiag;
              else
                ub = t->rgt;

              if (ua > ub) continue;

              ua = ub - ua + 1;
              if (t->top > hend->bepos)
                ub = hend->bepos - t->bot + 1;
              else
                ub = tb;

              if (((1.*ua)/ta)*((1.*ub)/tb) > .99)
                Covered[j] = 1;
            }

          if (NumSegs >= SegMax)
            { SegMax = (int)(1.2*NumSegs) + 500;
              SegSols = (Local_Segment *) safe_realloc(SegSols, sizeof(Local_Segment)*SegMax);
            }

          { int d;

            d = hend->hdiag;  /*  Oops, diags to this point are b-a, not a-b. */
            hend->hdiag = - (hend->ldiag);
            hend->ldiag = - d;
            if (comp)
              { hend->bbpos = Blen - hend->bbpos;
                hend->bepos = Blen - hend->bepos;
                hend->ldiag = Blen + hend->ldiag;
                hend->hdiag = Blen + hend->hdiag;
              }
          }

          SegSols[NumSegs++] = *hend;

#ifdef REPORT_DPREACH
          fprintf(stderr, "  Hit from (%d,%d) to (%d,%d) within [%d,%d] score %d\n",
                 hend->abpos,hend->bbpos,hend->aepos,hend->bepos,
                 hend->ldiag,hend->hdiag,hend->score);
#endif
      }else{ // SAK
#ifdef REPORT_DPREACH
          fprintf(stderr, "  FAILED (%g > %g) Hit from (%d,%d) to (%d,%d) within [%d,%d] score %d\n",
		 pcnt, MaxDiff,
                 hend->abpos,hend->bbpos,hend->aepos,hend->bepos,
                 hend->ldiag,hend->hdiag,hend->score);
#endif

      }
    }

#ifdef REPORT_DPREACH
  Al_depth += 1;
#endif
  if (ltrp.top - ltrp.bot > MinLen && ltrp.top < b->top - MAXIGAP){
    Align_Recursion(A,Alen,B,Blen,&ltrp,current,comp,MinLen,MaxDiff,Traplen);
  }
  if (htrp.top - htrp.bot > MinLen){
    Align_Recursion(A,Alen,B,Blen,&htrp,current,comp,MinLen,MaxDiff,Traplen);
  }
#ifdef REPORT_DPREACH
  Al_depth -= 1;
#endif
}