Beispiel #1
0
int make_fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag)
    {
/*TREATMENT OF THE TERMINAL GAP PENALTIES*/
/*TG_MODE=0---> gop and gep*/
/*TG_MODE=1---> ---     gep*/
      /*TG_MODE=2---> ---     ---*/


	int TG_MODE, gop, l_gop, gep,l_gep, maximise;

/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
	int a, b,c,k, t;
	int l1, l2,eg, ch, sub,score=0, last_i=0, last_j=0, i, delta_i, j, pos_j, ala, alb, LEN, n_diag, match1, match2;
	int su, in, de, tr;

	int **C, **D, **I, **trace, **pos0, **LD;
	int lenal[2], len;
	char *buffer, *char_buf;
	char **aln, **al;

        /********Prepare Penalties******/
	gop=CL->gop*SCORE_K;
	gep=CL->gep*SCORE_K;
	TG_MODE=CL->TG_MODE;
	maximise=CL->maximise;


	/********************************/


        n_diag=diag[0];



       l1=lenal[0]=strlen (A->seq_al[l_s[0][0]]);
       l2=lenal[1]=strlen (A->seq_al[l_s[1][0]]);

       if ( getenv ("DEBUG_TCOFFEE"))fprintf ( stderr, "\n\tNdiag=%d%%  ", (diag[0]*100)/(l1+l2));

	/*diag:
	  diag[1..n_diag]--> flaged diagonal in order;
	  diag[0]=0--> first diagonal;
	  diag[n_diag+1]=l1+l2-1;
	*/

	/*numeration of the diagonals strats from the bottom right [1...l1+l2-1]*/
	/*sequence s1 is vertical and seq s2 is horizontal*/
	/*D contains the best Deletion  in S2==>comes from diagonal N+1*/
	/*I contains the best insertion in S2=> comes from diagonal N-1*/





       C=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       D=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       LD=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       I=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       trace=declare_int (lenal[0]+lenal[1]+1, n_diag+2);


       al=declare_char (2,lenal[0]+lenal[1]+lenal[1]+1);

       len= MAX(lenal[0],lenal[1])+1;
       buffer=(char*)vcalloc ( 2*len, sizeof (char));
       char_buf=(char*) vcalloc (2*len, sizeof (char));

       pos0=aln2pos_simple ( A,-1, ns, l_s);
       C[0][0]=0;

       t=(TG_MODE==0)?gop:0;
       for ( j=1; j<= n_diag; j++)
	    {
		l_gop=(TG_MODE==0)?gop:0;
		l_gep=(TG_MODE==2)?0:gep;



		if ( (diag[j]-lenal[0])<0 )
		    {
		    trace[0][j]=UNDEFINED;
		    continue;
		    }
		C[0][j]=(diag[j]-lenal[0])*l_gep +l_gop;
		D[0][j]=(diag[j]-lenal[0])*l_gep +l_gop+gop;
	    }
       D[0][j]=D[0][j-1]+gep;


       t=(TG_MODE==0)?gop:0;
       for ( i=1; i<=lenal[0]; i++)
           {
	        l_gop=(TG_MODE==0)?gop:0;
		l_gep=(TG_MODE==2)?0:gep;

		C[i][0]=C[i][n_diag+1]=t=t+l_gep;
		I[i][0]=D[i][n_diag+1]=t+    gop;

		for ( j=1; j<=n_diag; j++)
		    {
			C[i][j]=C[i][0];
			D[i][j]=I[i][j]=I[i][0];
		    }

		for (eg=0, j=1; j<=n_diag; j++)
		    {

			pos_j=diag[j]-lenal[0]+i;
			if (pos_j<=0 || pos_j>l2 )
			    {
			    trace[i][j]=UNDEFINED;
			    continue;
			    }
			sub=(CL->get_dp_cost) ( A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],pos_j-1, CL );

		    /*1 identify the best insertion in S2:*/
			l_gop=(i==lenal[0])?((TG_MODE==0)?gop:0):gop;
			l_gep=(i==lenal[0])?((TG_MODE==2)?0:gep):gep;
			len=(j==1)?0:(diag[j]-diag[j-1]);
			if ( a_better_than_b(I[i][j-1], C[i][j-1]+l_gop, maximise))eg++;
			else eg=1;
			I[i][j]=best_of_a_b (I[i][j-1], C[i][j-1]+l_gop, maximise)+len*l_gep;

		    /*2 Identify the best deletion in S2*/
			l_gop=(pos_j==lenal[1])?((TG_MODE==0)?gop:0):gop;
			l_gep=(pos_j==lenal[1])?((TG_MODE==2)?0:gep):gep;

			len=(j==n_diag)?0:(diag[j+1]-diag[j]);
			delta_i=((i-len)>0)?(i-len):0;

			if ( a_better_than_b(D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)){LD[i][j]=LD[delta_i][j+1]+1;}
			else {LD[i][j]=1;}
			D[i][j]=best_of_a_b (D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)+len*l_gep;


			/*Identify the best way*/
			/*
			score=C[i][j]=best_int ( 3, maximise, &fop, I[i][j], C[i-1][j]+sub, D[i][j]);
			fop-=1;
			if ( fop<0)trace[i][j]=fop*eg;
			else if ( fop>0 ) {trace[i][j]=fop*LD[i][j];}
			else if ( fop==0) trace[i][j]=0;
			*/

			su=C[i-1][j]+sub;
			in=I[i][j];
			de=D[i][j];

			/*HERE ("%d %d %d", su, in, de);*/
			if (su>=in && su>=de)
			  {
			    score=su;
			    tr=0;
			  }
			else if (in>=de)
			  {
			    score=in;
			    tr=-eg;
			  }
			else
			  {
			    score=de;
			    tr=LD[i][j];
			  }
			trace[i][j]=tr;
			C[i][j]=score;


			last_i=i;
			last_j=j;
		    }
	    }


       /*
	            [0][Positive]
	             ^     ^
	             |    /
                     |   /
                     |  /
                     | /
                     |/
       [Neg]<-------[*]
	*/


	i=last_i;
	j=last_j;



	ala=alb=0;
	match1=match2=0;
	while (!(match1==l1 && match2==l2))
	      {


		  if ( match1==l1)
		     {
			 len=l2-match2;
			 for ( a=0; a< len; a++)
			     {
			     al[0][ala++]=0;
			     al[1][alb++]=1;
			     match2++;
			     }
			 k=0;
			 break;

			 /*k=-(j-1);*/

		     }
		  else if ( match2==l2)
		     {
			 len=l1-match1;
			 for ( a=0; a< len; a++)
			     {
			     al[0][ala++]=1;
			     al[1][alb++]=0;
			     match1++;
			     }
			 k=0;
			 break;
			 /*k= n_diag-j;*/
		     }
		  else
		      {
			  k=trace[i][j];
		      }


		  if ( k==0)
			     {
				 if ( match2==l2 || match1==l1);
				 else
				    {

				    al[0][ala++]=1;
				    al[1][alb++]=1;
				    i--;
				    match1++;
				    match2++;
				    }
			     }
		  else if ( k>0)
			     {

			     len=diag[j+k]-diag[j];
			     for ( a=0; a<len; a++)
			         {
				     if ( match1==l1)break;
				     al[0][ala++]=1;
				     al[1][alb++]=0;
				     match1++;
				 }
			     i-=len;
			     j+=k;
			     }
		  else if ( k<0)
			     {
			     k*=-1;
			     len=diag[j]-diag[j-k];
			     for ( a=0; a<len; a++)
			         {
				     if ( match2==l2)break;
				     al[0][ala++]=0;
				     al[1][alb++]=1;
				     match2++;
				 }


			     j-=k;
			     }
	      }

	LEN=ala;
	c=LEN-1;
	invert_list_char ( al[0], LEN);
	invert_list_char ( al[1], LEN);
	if ( A->declared_len<=LEN)A=realloc_aln2  ( A,A->max_n_seq, 2*LEN);
	aln=A->seq_al;

	for ( c=0; c< 2; c++)
	    {
	    for ( a=0; a< ns[c]; a++)
		{
		ch=0;
		for ( b=0; b< LEN; b++)
		    {
		    if (al[c][b]==1)
			char_buf[b]=aln[l_s[c][a]][ch++];
		    else
			char_buf[b]='-';
		   }
		char_buf[b]='\0';
		aln[l_s[c][a]]=csprintf (aln[l_s[c][a]],"%s", char_buf);
	        }
	     }


	A->len_aln=LEN;
	A->nseq=ns[0]+ns[1];

	free_int (pos0, -1);
	free_int (C, -1);
	free_int (D, -1);
	free_int (I, -1);
	free_int (trace, -1);
	free_int (LD, -1);
	free_char ( al, -1);
	vfree(buffer);
	vfree(char_buf);


	return score;
    }
Dp_Result * make_fast_generic_dp_pair_wise (Alignment *A, int*ns, int **l_s,Dp_Model *M)
	{
	  
	  /*SIZE VARIABLES*/ 
	  
	  int ndiag;
	  int l0, l1, len_al,len_diag;
	  static int max_len_al, max_len_diag;
	  static int mI, mJ;
	  /*Evaluation*/
	  int **pos0;
	  
	  
	  	  
	  /*DP VARIABLES*/
	  static int *Mat, *LMat, *trace;
	  int a, i, j,l;
	  int state, cur_state, prev_state;
	  int pos_i=0,  pos_j=0;
	  int last_i=0, last_j=0;
	  int prev_i, prev_j;
	  int len_i, len_j, len;
	  int t, e, em;
	  
	  int prev_score; 
	  int pc, best_pc;
	  
	  int *prev;
	  int model_index;
	  /*TRACEBACK*/
	  Dp_Result *DPR;
	  int k=0, next_k;
	  int new_i, new_j;
	  
	  
	  /*Cleqanning CALL*/
	  if ( A==NULL)
	    {
	      max_len_al=0; max_len_diag=0;mI=0;mJ=0;
	      vfree (Mat); vfree(LMat);vfree(trace);
	      Mat=trace=LMat=NULL;
	      return NULL;
	    }
	  
	  ndiag=M->diag[0];

	  l0=strlen (A->seq_al[l_s[0][0]]);
	  l1=strlen (A->seq_al[l_s[1][0]]);
	  len_al =l0+l1+1;	
	  len_diag=ndiag+4;
	  
	 

	  if ( (len_al>max_len_al || len_diag>max_len_diag))
	    {
	      
	      vfree (Mat);
	      vfree (LMat);
	      vfree(trace);	    
	      max_len_diag=max_len_al=0;	   
	    }
	  
	  if (max_len_al==0)
	    {
	      max_len_al=len_al;
	      max_len_diag=len_diag;
	      mI=max_len_al*max_len_diag;
	      mJ=max_len_diag;
	      
	      
	      Mat  =(int*)vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int));
	      LMat =(int*)vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int));
	      trace=(int*)vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int));
	      
	    }
	  
	  prev=(int*)vcalloc ( M->nstate, sizeof (int));
	  DPR=( Dp_Result*)vcalloc ( 1, sizeof ( Dp_Result));
	  DPR->traceback=(int*)vcalloc (max_len_al, sizeof (int));
	  
/*PREPARE THE EVALUATION*/      
	  
	  
	  pos0=aln2pos_simple ( A,-1, ns, l_s);
	  
/*INITIALIZATION OF THE DP MATRICES*/

	for (i=0; i<=l0;i++)
	  {						
	    for (j=0; j<=ndiag+1;j++)
	      {
		for ( state=0; state<M->nstate; state++)
		  {
		    Mat   [state*mI+i*mJ+j]=UNDEFINED;
		    LMat  [state*mI+i*mJ+j]=UNDEFINED;
		    trace [state*mI+i*mJ+j]=M->START;
		  }
	      }
	  }	

	M->diag[0]=1;
	M->diag[ndiag+1]=M->diag[ndiag];

	for (i=0; i<=l0; i++)
	  for ( j=0; j<=ndiag+1; j++)
	    {
	      pos_j=M->diag[j]-l0+i;
	      pos_i=i;
	      if (!(pos_j==0 || pos_i==0))continue;
	      if ( pos_j<0 || pos_i<0)continue;
	      if ( pos_i==0 && pos_j==0)
		  {
		  for ( a=0; a< M->nstate; a++)
		    {
		     Mat  [a*mI+i*mJ+j]=0;
		     LMat [a*mI+i*mJ+j]=0;
		     trace[a*mI+i*mJ+j]=M->START;
		    }
		}
	      else
		{	
		  l=MAX(pos_i,pos_j);
		  for ( state=0; state<M->START; state++)
		    {		     
		      if (pos_j==0 && M->model_properties[state][M->LEN_J])continue;
		      if (pos_i==0 && M->model_properties[state][M->LEN_I])continue;
		     
		     
		     t=M->model[M->START][state];
		     e=((M->model_emission_function)[state][M->START_EMISSION])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);
		     /*e=((M->get_dp_cost_list)[M->model_properties[state][M->START_EMISSION]])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);*/
	    		     
		     Mat   [state*mI+i*mJ+j]=t+e*l;
		     LMat  [state*mI+i*mJ+j]=l;
		     trace [state*mI+i*mJ+j]=M->START;
		    }
		}
	    }

/*DYNAMIC PROGRAMMING: Forward Pass*/

	/*Diagonals: 
	  M->diag[0]=Number of diagonals being considered
	  M->diag[1]=First diagonal being considered
	             Diagonals are numbered 1...L0+l1-1
		     1 is the bottom-left diag
	*/

	for (i=1; i<=l0;i++)
	  {						
	    for (j=1; j<=ndiag;j++)
	      {
		pos_j=M->diag[j]-l0+i;
		pos_i=i;
		
		if (pos_j<=0 || pos_j>l1 )continue;
		last_i=i;
		last_j=j;
		
		for (cur_state=0; cur_state<M->START; cur_state++)
		  {
		    if (M->model_properties[cur_state][M->DELTA_J])
		      {
			prev_j=j+M->model_properties[cur_state][M->DELTA_J];
			prev_i=i+M->model_properties[cur_state][M->DELTA_I]*FABS((M->diag[j]-M->diag[prev_j]));			
			
		      }
		    else
		      {
			prev_j=j;
			prev_i=i+M->model_properties[cur_state][M->DELTA_I];
		      }
		    
		    
		    len_i=FABS((i-prev_i));
		    len_j=FABS((M->diag[prev_j]-M->diag[j]));
		    len=MAX(len_i, len_j);
	
		    em=((M->model_emission_function[cur_state][M->EMISSION]))(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);
		    /*em=((M->get_dp_cost_list)[M->model_properties[cur_state][M->EMISSION]])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);*/
		  		    
		    for (pc=best_pc=UNDEFINED, model_index=1; model_index<=M->bounded_model[cur_state][0]; model_index++)
		      {
			prev_state=M->bounded_model[cur_state][model_index];
			
			if(prev_i<0 || prev_j<0 ||prev_i>l0 || prev_j>ndiag || len==UNDEFINED)prev_score=UNDEFINED;
			else prev_score=Mat[prev_state*mI+prev_i*mJ+prev_j];
			t=M->model[prev_state][cur_state];			
			e=em;
		
			if   (prev_score==UNDEFINED || len==UNDEFINED)e=UNDEFINED;			
			else if (len==0|| e==UNDEFINED)e=UNDEFINED;
			else e=e*len;
			
			if (is_defined_int(3,prev_score,e, t))
			  {
			    pc=prev_score+t+e;
			  }
			else  pc=UNDEFINED;
			
			/*Identify the best previous score*/
			if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED))
			  {
			    prev[cur_state]=prev_state;
			    best_pc=pc;
			   
			  }
		      }
		    
		    Mat[cur_state*mI+i*mJ+j]=best_pc;
		   


		    if ( Mat[cur_state*mI+i*mJ+j]==UNDEFINED)
		      {
			LMat[cur_state*mI+i*mJ+j]=UNDEFINED;
			trace[cur_state*mI+i*mJ+j]=UNDEFINED;
			continue;
		      }
		    
		    else if ( prev[cur_state]==cur_state)
		      {
			LMat [cur_state*mI+i*mJ+j]=	LMat [cur_state*mI+prev_i*mJ+prev_j]+len;
			trace[cur_state*mI+i*mJ+j]=     trace[cur_state*mI+prev_i*mJ+prev_j];
		      }
		    else
		      {
			LMat[cur_state*mI+i*mJ+j]=len;
			trace[cur_state*mI+i*mJ+j]=prev[cur_state];
		      }
		  }
	      }
	  }
	
	
        i=last_i;
	j=last_j;
 	for (pc=best_pc=UNDEFINED, state=0; state<M->START; state++)
	  {
	    t=M->model[state][M->END];
	    e=( M->model_emission_function[state][M->TERM_EMISSION])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);

	    /*e=((M->get_dp_cost_list)[M->model_properties[state][M->TERM_EMISSION]])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);*/

	    l=LMat[state*mI+i*mJ+j];
	    
	   
	    if (!is_defined_int(4,t,e,Mat[state*mI+i*mJ+j],l))Mat[state*mI+i*mJ+j]=UNDEFINED;
	    else Mat[state*mI+i*mJ+j]+=t+e*(l);
	    pc=Mat[state*mI+i*mJ+j];
	    
	   
	    if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED))
	      {
		k=state;
		best_pc=pc;
	      }
	  }
	 DPR->score=best_pc;
	
/*TRACEBACK*/ 


	e=0;
	len=0;    
	
	
	while (k!=M->START)
	  {
	    next_k=trace[k*mI+i*mJ+j];
	    
	    new_i=i;
	    new_j=j;
	    l=LMat[k*mI+i*mJ+j];
	    for (a=0; a< l; a++)
	      {
		DPR->traceback[len++]=k;
	      }
	   new_i+=M->model_properties[k][M->DELTA_I]*l;
	   

	   if ( M->model_properties[k][M->DELTA_J])
	     {
	       while ( next_k!=M->START && FABS((M->diag[j]-M->diag[new_j]))!=l)new_j+=M->model_properties[k][M->DELTA_J];
	     }

	   i=new_i;
	   j=new_j;
	   k=next_k;
	  }
	DPR->len=len;
	DPR->traceback[DPR->len++]=M->START;
	invert_list_int  (DPR->traceback,DPR->len);
	DPR->traceback[DPR->len]=M->END;
	
	vfree (prev);
	free_int (pos0, -1);
	return DPR;
	

	}
Beispiel #3
0
int myers_miller_pair_wise (Alignment *A,int *ns, int **ls,Constraint_list *CL )
{
    int **pos;
    int a,b, i, j, l,l1, l2, len;
    int *S;
    char ** char_buf;
    int score;

    /********Prepare Penalties******/
    //ns2master_ns (ns,ls, &sns,&sls);
    sns=ns;
    sls=ls;

    /********************************/


    pos=aln2pos_simple ( A,-1, ns, ls);


    l1=strlen (A->seq_al[ls[0][0]]);
    l2=strlen (A->seq_al[ls[1][0]]);
    S=(int*)vcalloc (l1+l2+1, sizeof (int));
    last=0;
    sapp=S;

    score=diff (A,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos);
    diff (NULL,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos);



    i=0;
    j=0;
    sapp=S;
    len=0;
    while (!(i==l1 && j==l2))
    {
        if (*sapp==0) {
            i++;
            j++;
            len++;
        }
        else if ( *sapp<0) {
            i-=*sapp;
            len-=*sapp;
        }
        else if ( *sapp>0) {
            j+=*sapp;
            len+=*sapp;
        }
        sapp++;
    }



    A=realloc_aln2  ( A,A->max_n_seq,len+1);
    char_buf=declare_char (A->max_n_seq,len+1);

    i=0;
    j=0;
    sapp=S;
    len=0;
    while (!(i==l1 && j==l2))
    {

        if (*sapp==0)
        {
            for (b=0; b< ns[0]; b++)
                char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i];
            for (b=0; b< ns[1]; b++)
                char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j];
            i++;
            j++;
            len++;
        }
        else if ( *sapp>0)
        {
            l=*sapp;
            for ( a=0; a<l; a++, j++, len++)
            {
                for (b=0; b< ns[0]; b++)
                    char_buf[ls[0][b]][len]='-';
                for (b=0; b< ns[1]; b++)
                    char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j];
            }
        }
        else if ( *sapp<0)
        {
            l=-*sapp;
            for ( a=0; a<l; a++, i++, len++)
            {
                for (b=0; b< ns[0]; b++)
                    char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i];;
                for (b=0; b< ns[1]; b++)
                    char_buf[ls[1][b]][len]='-';
            }
        }

        sapp++;
    }


    A->len_aln=len;
    A->nseq=ns[0]+ns[1];

    for ( a=0; a< ns[0]; a++) {
        char_buf[ls[0][a]][len]='\0';
        sprintf ( A->seq_al[ls[0][a]], "%s", char_buf[ls[0][a]]);
    }
    for ( a=0; a< ns[1]; a++) {
        char_buf[ls[1][a]][len]='\0';
        sprintf ( A->seq_al[ls[1][a]], "%s", char_buf[ls[1][a]]);
    }


    vfree (S);
    free_char ( char_buf, -1);
    l1=strlen (A->seq_al[ls[0][0]]);
    l2=strlen (A->seq_al[ls[1][0]]);
    if ( l1!=l2) exit(1);

    free_int (pos, -1);
    return score;
}