int ssec_pwaln_maln (Alignment *A, int *ns, int **ls, Constraint_list *CL)
  {

    static Dp_Model *M=NULL;
    Dp_Result *R=NULL;
    int a, ndiag;
    int Sa,Sb,St, Da, Db, Dt, Ia, Ib, It;
    int ala, alb, s,b;

    a=0;    
    Sa=a++;Da=a++;Ia=a++;
    Sb=a++;Db=a++;Ib=a++;
    St=a++;Dt=a++;It=a++;
    
    if ( strm (CL->matrices_list[0], "analyse"))
      {
      for ( a=0; a< CL->n_matrices; a++)
	{

	  rescale_two_mat(CL->matrices_list[1],CL->matrices_list[2],1000, 100, AA_ALPHABET);	
	  exit (0);
	}
      }

    

    /*2 Prepare the Model*/
    M=initialize_sseq_model(2,2,CL);    
    ndiag=strlen (A->seq_al[0])+strlen (A->seq_al[1])-1;
    M->diag=vcalloc (ndiag+1, sizeof (int));
    M->diag[0]=ndiag-1;
    for ( a=1; a<=M->diag[0]; a++)M->diag[a]=a;
    
    /*3 Prepare Sequence Presentation*/
    R=make_fast_generic_dp_pair_wise(A, ns, ls, M);
    
  
    ala=alb=0;

    A=realloc_aln2(A,A->nseq, R->len+1);
    for (b=1; b<R->len;b++)
      {
	if (R->traceback[b]==Sa ||  R->traceback[b]==Sb ||R->traceback[b]==St )
	  {
	    for (s=0; s<ns[0]; s++)
	      A->seq_al[ls[0][s]][b-1]=(CL->S)->seq[A->order[ls[0][s]][0]][ala];
	    ala++;
	    for (s=0; s<ns[1]; s++)
	      A->seq_al[ls[1][s]][b-1]=(CL->S)->seq[A->order[ls[1][s]][0]][alb];
	    alb++;
	  }
	else if ( R->traceback[b]==Da ||  R->traceback[b]==Db ||R->traceback[b]==Dt )
	  {
	    for (s=0; s<ns[0]; s++)
	      A->seq_al[ls[0][s]][b-1]=(CL->S)->seq[A->order[ls[0][s]][0]][ala];
	    ala++;
	    for (s=0; s<ns[1]; s++)
	      A->seq_al[ls[1][s]][b-1]='-';	   
	  }
	else if ( R->traceback[b]==Ia ||  R->traceback[b]==Ib ||R->traceback[b]==It )
	  {
	    for (s=0; s<ns[0]; s++)
	      A->seq_al[ls[0][s]][b-1]='-';
	    
	    for (s=0; s<ns[1]; s++)
	      A->seq_al[ls[1][s]][b-1]=(CL->S)->seq[A->order[ls[1][s]][0]][alb];
	    alb++;
	  }
      }
    for (s=0; s<ns[0]; s++)
      A->seq_al[ls[0][s]][b-1]='\0';
    for (s=0; s<ns[1]; s++)
      A->seq_al[ls[1][s]][b-1]='\0';
    
    A->len_aln=strlen (A->seq_al[ls[0][0]]);
    R->Dp_model=M;
    A->Dp_result=R;
    return A->score;
  }
Beispiel #2
0
int make_fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag)
    {
/*TREATMENT OF THE TERMINAL GAP PENALTIES*/
/*TG_MODE=0---> gop and gep*/
/*TG_MODE=1---> ---     gep*/
      /*TG_MODE=2---> ---     ---*/


	int TG_MODE, gop, l_gop, gep,l_gep, maximise;

/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
	int a, b,c,k, t;
	int l1, l2,eg, ch, sub,score=0, last_i=0, last_j=0, i, delta_i, j, pos_j, ala, alb, LEN, n_diag, match1, match2;
	int su, in, de, tr;

	int **C, **D, **I, **trace, **pos0, **LD;
	int lenal[2], len;
	char *buffer, *char_buf;
	char **aln, **al;

        /********Prepare Penalties******/
	gop=CL->gop*SCORE_K;
	gep=CL->gep*SCORE_K;
	TG_MODE=CL->TG_MODE;
	maximise=CL->maximise;


	/********************************/


        n_diag=diag[0];



       l1=lenal[0]=strlen (A->seq_al[l_s[0][0]]);
       l2=lenal[1]=strlen (A->seq_al[l_s[1][0]]);

       if ( getenv ("DEBUG_TCOFFEE"))fprintf ( stderr, "\n\tNdiag=%d%%  ", (diag[0]*100)/(l1+l2));

	/*diag:
	  diag[1..n_diag]--> flaged diagonal in order;
	  diag[0]=0--> first diagonal;
	  diag[n_diag+1]=l1+l2-1;
	*/

	/*numeration of the diagonals strats from the bottom right [1...l1+l2-1]*/
	/*sequence s1 is vertical and seq s2 is horizontal*/
	/*D contains the best Deletion  in S2==>comes from diagonal N+1*/
	/*I contains the best insertion in S2=> comes from diagonal N-1*/





       C=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       D=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       LD=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       I=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
       trace=declare_int (lenal[0]+lenal[1]+1, n_diag+2);


       al=declare_char (2,lenal[0]+lenal[1]+lenal[1]+1);

       len= MAX(lenal[0],lenal[1])+1;
       buffer=(char*)vcalloc ( 2*len, sizeof (char));
       char_buf=(char*) vcalloc (2*len, sizeof (char));

       pos0=aln2pos_simple ( A,-1, ns, l_s);
       C[0][0]=0;

       t=(TG_MODE==0)?gop:0;
       for ( j=1; j<= n_diag; j++)
	    {
		l_gop=(TG_MODE==0)?gop:0;
		l_gep=(TG_MODE==2)?0:gep;



		if ( (diag[j]-lenal[0])<0 )
		    {
		    trace[0][j]=UNDEFINED;
		    continue;
		    }
		C[0][j]=(diag[j]-lenal[0])*l_gep +l_gop;
		D[0][j]=(diag[j]-lenal[0])*l_gep +l_gop+gop;
	    }
       D[0][j]=D[0][j-1]+gep;


       t=(TG_MODE==0)?gop:0;
       for ( i=1; i<=lenal[0]; i++)
           {
	        l_gop=(TG_MODE==0)?gop:0;
		l_gep=(TG_MODE==2)?0:gep;

		C[i][0]=C[i][n_diag+1]=t=t+l_gep;
		I[i][0]=D[i][n_diag+1]=t+    gop;

		for ( j=1; j<=n_diag; j++)
		    {
			C[i][j]=C[i][0];
			D[i][j]=I[i][j]=I[i][0];
		    }

		for (eg=0, j=1; j<=n_diag; j++)
		    {

			pos_j=diag[j]-lenal[0]+i;
			if (pos_j<=0 || pos_j>l2 )
			    {
			    trace[i][j]=UNDEFINED;
			    continue;
			    }
			sub=(CL->get_dp_cost) ( A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],pos_j-1, CL );

		    /*1 identify the best insertion in S2:*/
			l_gop=(i==lenal[0])?((TG_MODE==0)?gop:0):gop;
			l_gep=(i==lenal[0])?((TG_MODE==2)?0:gep):gep;
			len=(j==1)?0:(diag[j]-diag[j-1]);
			if ( a_better_than_b(I[i][j-1], C[i][j-1]+l_gop, maximise))eg++;
			else eg=1;
			I[i][j]=best_of_a_b (I[i][j-1], C[i][j-1]+l_gop, maximise)+len*l_gep;

		    /*2 Identify the best deletion in S2*/
			l_gop=(pos_j==lenal[1])?((TG_MODE==0)?gop:0):gop;
			l_gep=(pos_j==lenal[1])?((TG_MODE==2)?0:gep):gep;

			len=(j==n_diag)?0:(diag[j+1]-diag[j]);
			delta_i=((i-len)>0)?(i-len):0;

			if ( a_better_than_b(D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)){LD[i][j]=LD[delta_i][j+1]+1;}
			else {LD[i][j]=1;}
			D[i][j]=best_of_a_b (D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)+len*l_gep;


			/*Identify the best way*/
			/*
			score=C[i][j]=best_int ( 3, maximise, &fop, I[i][j], C[i-1][j]+sub, D[i][j]);
			fop-=1;
			if ( fop<0)trace[i][j]=fop*eg;
			else if ( fop>0 ) {trace[i][j]=fop*LD[i][j];}
			else if ( fop==0) trace[i][j]=0;
			*/

			su=C[i-1][j]+sub;
			in=I[i][j];
			de=D[i][j];

			/*HERE ("%d %d %d", su, in, de);*/
			if (su>=in && su>=de)
			  {
			    score=su;
			    tr=0;
			  }
			else if (in>=de)
			  {
			    score=in;
			    tr=-eg;
			  }
			else
			  {
			    score=de;
			    tr=LD[i][j];
			  }
			trace[i][j]=tr;
			C[i][j]=score;


			last_i=i;
			last_j=j;
		    }
	    }


       /*
	            [0][Positive]
	             ^     ^
	             |    /
                     |   /
                     |  /
                     | /
                     |/
       [Neg]<-------[*]
	*/


	i=last_i;
	j=last_j;



	ala=alb=0;
	match1=match2=0;
	while (!(match1==l1 && match2==l2))
	      {


		  if ( match1==l1)
		     {
			 len=l2-match2;
			 for ( a=0; a< len; a++)
			     {
			     al[0][ala++]=0;
			     al[1][alb++]=1;
			     match2++;
			     }
			 k=0;
			 break;

			 /*k=-(j-1);*/

		     }
		  else if ( match2==l2)
		     {
			 len=l1-match1;
			 for ( a=0; a< len; a++)
			     {
			     al[0][ala++]=1;
			     al[1][alb++]=0;
			     match1++;
			     }
			 k=0;
			 break;
			 /*k= n_diag-j;*/
		     }
		  else
		      {
			  k=trace[i][j];
		      }


		  if ( k==0)
			     {
				 if ( match2==l2 || match1==l1);
				 else
				    {

				    al[0][ala++]=1;
				    al[1][alb++]=1;
				    i--;
				    match1++;
				    match2++;
				    }
			     }
		  else if ( k>0)
			     {

			     len=diag[j+k]-diag[j];
			     for ( a=0; a<len; a++)
			         {
				     if ( match1==l1)break;
				     al[0][ala++]=1;
				     al[1][alb++]=0;
				     match1++;
				 }
			     i-=len;
			     j+=k;
			     }
		  else if ( k<0)
			     {
			     k*=-1;
			     len=diag[j]-diag[j-k];
			     for ( a=0; a<len; a++)
			         {
				     if ( match2==l2)break;
				     al[0][ala++]=0;
				     al[1][alb++]=1;
				     match2++;
				 }


			     j-=k;
			     }
	      }

	LEN=ala;
	c=LEN-1;
	invert_list_char ( al[0], LEN);
	invert_list_char ( al[1], LEN);
	if ( A->declared_len<=LEN)A=realloc_aln2  ( A,A->max_n_seq, 2*LEN);
	aln=A->seq_al;

	for ( c=0; c< 2; c++)
	    {
	    for ( a=0; a< ns[c]; a++)
		{
		ch=0;
		for ( b=0; b< LEN; b++)
		    {
		    if (al[c][b]==1)
			char_buf[b]=aln[l_s[c][a]][ch++];
		    else
			char_buf[b]='-';
		   }
		char_buf[b]='\0';
		aln[l_s[c][a]]=csprintf (aln[l_s[c][a]],"%s", char_buf);
	        }
	     }


	A->len_aln=LEN;
	A->nseq=ns[0]+ns[1];

	free_int (pos0, -1);
	free_int (C, -1);
	free_int (D, -1);
	free_int (I, -1);
	free_int (trace, -1);
	free_int (LD, -1);
	free_char ( al, -1);
	vfree(buffer);
	vfree(char_buf);


	return score;
    }
int make_fasta_cdna_pair_wise (Alignment *B,Alignment *A,int*in_ns, int **l_s,Constraint_list *CL, int *diag)
    {
      int a,c,p,k;
      Dp_Result *DPR;
      static Dp_Model  *M;
      int l0, l1;
      int len_i, len_j;
      int f0=0, f1=0;
      int deltaf0, deltaf1, delta;
      int nr1, nr2;
      int ala, alb, aa0, aa1;
      int type;
      
      char **al;
      int **tl_s;
      int *tns;
      /*DEBUG*/
      int debug_cdna_fasta=0;
      Alignment *DA;
      int score;
      int state,prev_state;
      int t, e;
      int a1, a2;
      
      
      l0=strlen ( B->seq_al[l_s[0][0]]);
      l1=strlen ( B->seq_al[l_s[1][0]]);

      al=declare_char (2, l0+l1+1); 
      B=realloc_aln2 (B,B->nseq,l0+l1+1);


      free_int (B->cdna_cache, -1);
      B->cdna_cache=declare_int(1, l0+l1+1);
      
      if ( !M)M=initialize_dna_dp_model (CL);

     
      M->diag=diag;

      tl_s=declare_int (2, 2);tns=vcalloc(2, sizeof(int));tl_s[0][0]=0;tl_s[1][0]=3;tns[0]=tns[1]=1;
      DPR=make_fast_dp_pair_wise (A,tns, tl_s,CL,M);
      vfree(tns);free_int(tl_s, -1);


      
      /*new_trace_back*/
      a=p=0;
      aa0=aa1=ala=alb=0;
      while ( (k=DPR->traceback[a++])!=M->START);
      while ( (k=DPR->traceback[a++])!=M->END)
	{
	  
	  f0=M->model_properties[k][M->F0];
	  f1=M->model_properties[k][M->F1];

	  len_i=M->model_properties[k][M->LEN_I];
	  len_j=M->model_properties[k][M->LEN_J];
	  
	  type=M->model_properties[k][M->TYPE];
	  
	  

	  if (type==M->CODING0)
	    {
	      deltaf0=(aa0*3+f0)-ala;
	      deltaf1=(aa1*3+f1)-alb;

	      delta=MAX(deltaf0, deltaf1);
	      
	      for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++)		  
		      {
			if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
			else al[0][p]='-';
			
			if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
			else al[1][p]='-'; 
			
			B->cdna_cache[0][p]=M->NON_CODING;	
			if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
			else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c",  al[0][p], al[1][p]);
		      } 
	      for ( c=0; c< 3; c++, p++)
		{
		  if ( c==0)B->cdna_cache[0][p]=M->CODING0;
		  else if ( c==1)B->cdna_cache[0][p]=M->CODING1;
		  else if ( c==2)B->cdna_cache[0][p]=M->CODING2;
		  if (ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
		  else al[0][p]='-';

		  if (alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
		  else al[1][p]='-';
			
		  if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
		  else if ( debug_cdna_fasta)fprintf (stderr, "\n%d: %c %c",k,  al[0][p], al[1][p]);
		}
	    }

	  aa0+=len_i;
	  aa1+=len_j;
	}
      
      deltaf0=(aa0*3+f0)-ala;
      deltaf1=(aa1*3+f1)-alb;
      delta=MAX(deltaf0, deltaf1);
      for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++)		  
	{
	  if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
	  else al[0][p]='-';
	  
	  if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
	  else al[1][p]='-'; 
	  
	  B->cdna_cache[0][p]=M->NON_CODING;	
	  if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
	  else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c",  al[0][p], al[1][p]);
	}
      

      /*End New traceback*/
      



      al[0][p]='\0';
      al[1][p]='\0';


      sprintf( B->seq_al[l_s[0][0]], "%s", al[0]);
      sprintf( B->seq_al[l_s[1][0]], "%s", al[1]);
      B->len_aln=strlen (al[0]);
      B->nseq=2;
     
      
     
      
      if ( debug_cdna_fasta)
	  {
	    fprintf ( stderr, "\nA-A=%d, %d", CL->M['a'-'A']['a'-'A'], CL->M['a'-'A']['a'-'A'] *SCORE_K);
	    for ( a=1; a<diag[0]; a++)
	      {
		fprintf ( stderr, "\nchosen diag: %d", diag[a]);
	      }
	    
	    fprintf ( stderr, "\n  GOP=%d   GEP=%d   TG_MODE=%d", M->gop, M->gep, M->TG_MODE);
	    fprintf ( stderr, "\nF_GOP=%d F_GEP=%d F_TG_MODE=%d", M->gop, M->gep, M->F_TG_MODE);
	    
	    DA=copy_aln (B, NULL);
	    DA=realloc_aln2 (DA,6,(DA->len_aln+1));
	

	    for ( a=0; a<B->len_aln; a++)
	      {

		fprintf ( stderr, "\n%d", DA->cdna_cache[0][a]);
		if (DA->cdna_cache[0][a]>=M->CODING0)DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0';
		else DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0';

		if (DA->cdna_cache[0][a]==M->CODING0)
		  {
		    DA->seq_al[DA->nseq+1][a]=translate_dna_codon (DA->seq_al[0]+a,'*');
		    DA->seq_al[DA->nseq+2][a]=translate_dna_codon (DA->seq_al[1]+a,'*');
		  }
		else
		  {
		    DA->seq_al[DA->nseq+1][a]='-'; 
		    DA->seq_al[DA->nseq+2][a]='-'; 
		  }
		
	      }
	    DA->nseq+=3;
	    print_aln (DA);
	    
	    free_aln(DA);		      
	    score=0;
	    
	    
	    for (prev_state=M->START,a=0; a< DA->len_aln;)
	      {
		state=DA->cdna_cache[0][a];
		t=M->model[prev_state][state];
		if ( DA->cdna_cache[0][a]==M->CODING0)
		  {
		    a1=translate_dna_codon (A->seq_al[0]+a,'x');
		    a2=translate_dna_codon (A->seq_al[1]+a,'x');
		    
		    if ( a1!='x' && a2!='x')
		      {
			e=CL->M[a1-'A'][a2-'A']*SCORE_K;
		      }
		  }
		else if ( DA->cdna_cache[0][a]>M->CODING0);
		else
		  {
		    e=M->model_properties[B->cdna_cache[0][a]][M->EMISSION];
		  }
		if ( e==UNDEFINED || t==UNDEFINED) fprintf ( stderr, "\nPROBLEM %d\n", a);
		
		fprintf ( stderr, "\n[%c..%c: %d(e)+%d(t)=%d]", A->seq_al[0][a], A->seq_al[1][a], e,t,e+t);
		score+=e+t;
		prev_state=state;
		
		if (B->cdna_cache[0][a]==M->NON_CODING)a++;
		else a+=3;
		
	      }
	    
	  }
      
      for ( a=0; a<B->len_aln; a++)
	{
	  
	  if ( B->cdna_cache[0][a]<M->CODING0)B->cdna_cache[0][a]=0;
	  else B->cdna_cache[0][a]=1;
	}
      
      free_char ( al, -1);
      return DPR->score;
      
    }
Beispiel #4
0
int myers_miller_pair_wise (Alignment *A,int *ns, int **ls,Constraint_list *CL )
{
    int **pos;
    int a,b, i, j, l,l1, l2, len;
    int *S;
    char ** char_buf;
    int score;

    /********Prepare Penalties******/
    //ns2master_ns (ns,ls, &sns,&sls);
    sns=ns;
    sls=ls;

    /********************************/


    pos=aln2pos_simple ( A,-1, ns, ls);


    l1=strlen (A->seq_al[ls[0][0]]);
    l2=strlen (A->seq_al[ls[1][0]]);
    S=(int*)vcalloc (l1+l2+1, sizeof (int));
    last=0;
    sapp=S;

    score=diff (A,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos);
    diff (NULL,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos);



    i=0;
    j=0;
    sapp=S;
    len=0;
    while (!(i==l1 && j==l2))
    {
        if (*sapp==0) {
            i++;
            j++;
            len++;
        }
        else if ( *sapp<0) {
            i-=*sapp;
            len-=*sapp;
        }
        else if ( *sapp>0) {
            j+=*sapp;
            len+=*sapp;
        }
        sapp++;
    }



    A=realloc_aln2  ( A,A->max_n_seq,len+1);
    char_buf=declare_char (A->max_n_seq,len+1);

    i=0;
    j=0;
    sapp=S;
    len=0;
    while (!(i==l1 && j==l2))
    {

        if (*sapp==0)
        {
            for (b=0; b< ns[0]; b++)
                char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i];
            for (b=0; b< ns[1]; b++)
                char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j];
            i++;
            j++;
            len++;
        }
        else if ( *sapp>0)
        {
            l=*sapp;
            for ( a=0; a<l; a++, j++, len++)
            {
                for (b=0; b< ns[0]; b++)
                    char_buf[ls[0][b]][len]='-';
                for (b=0; b< ns[1]; b++)
                    char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j];
            }
        }
        else if ( *sapp<0)
        {
            l=-*sapp;
            for ( a=0; a<l; a++, i++, len++)
            {
                for (b=0; b< ns[0]; b++)
                    char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i];;
                for (b=0; b< ns[1]; b++)
                    char_buf[ls[1][b]][len]='-';
            }
        }

        sapp++;
    }


    A->len_aln=len;
    A->nseq=ns[0]+ns[1];

    for ( a=0; a< ns[0]; a++) {
        char_buf[ls[0][a]][len]='\0';
        sprintf ( A->seq_al[ls[0][a]], "%s", char_buf[ls[0][a]]);
    }
    for ( a=0; a< ns[1]; a++) {
        char_buf[ls[1][a]][len]='\0';
        sprintf ( A->seq_al[ls[1][a]], "%s", char_buf[ls[1][a]]);
    }


    vfree (S);
    free_char ( char_buf, -1);
    l1=strlen (A->seq_al[ls[0][0]]);
    l2=strlen (A->seq_al[ls[1][0]]);
    if ( l1!=l2) exit(1);

    free_int (pos, -1);
    return score;
}