int make_fasta_cdna_pair_wise (Alignment *B,Alignment *A,int*in_ns, int **l_s,Constraint_list *CL, int *diag)
    {
      int a,c,p,k;
      Dp_Result *DPR;
      static Dp_Model  *M;
      int l0, l1;
      int len_i, len_j;
      int f0=0, f1=0;
      int deltaf0, deltaf1, delta;
      int nr1, nr2;
      int ala, alb, aa0, aa1;
      int type;
      
      char **al;
      int **tl_s;
      int *tns;
      /*DEBUG*/
      int debug_cdna_fasta=0;
      Alignment *DA;
      int score;
      int state,prev_state;
      int t, e;
      int a1, a2;
      
      
      l0=strlen ( B->seq_al[l_s[0][0]]);
      l1=strlen ( B->seq_al[l_s[1][0]]);

      al=declare_char (2, l0+l1+1); 
      B=realloc_aln2 (B,B->nseq,l0+l1+1);


      free_int (B->cdna_cache, -1);
      B->cdna_cache=declare_int(1, l0+l1+1);
      
      if ( !M)M=initialize_dna_dp_model (CL);

     
      M->diag=diag;

      tl_s=declare_int (2, 2);tns=vcalloc(2, sizeof(int));tl_s[0][0]=0;tl_s[1][0]=3;tns[0]=tns[1]=1;
      DPR=make_fast_dp_pair_wise (A,tns, tl_s,CL,M);
      vfree(tns);free_int(tl_s, -1);


      
      /*new_trace_back*/
      a=p=0;
      aa0=aa1=ala=alb=0;
      while ( (k=DPR->traceback[a++])!=M->START);
      while ( (k=DPR->traceback[a++])!=M->END)
	{
	  
	  f0=M->model_properties[k][M->F0];
	  f1=M->model_properties[k][M->F1];

	  len_i=M->model_properties[k][M->LEN_I];
	  len_j=M->model_properties[k][M->LEN_J];
	  
	  type=M->model_properties[k][M->TYPE];
	  
	  

	  if (type==M->CODING0)
	    {
	      deltaf0=(aa0*3+f0)-ala;
	      deltaf1=(aa1*3+f1)-alb;

	      delta=MAX(deltaf0, deltaf1);
	      
	      for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++)		  
		      {
			if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
			else al[0][p]='-';
			
			if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
			else al[1][p]='-'; 
			
			B->cdna_cache[0][p]=M->NON_CODING;	
			if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
			else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c",  al[0][p], al[1][p]);
		      } 
	      for ( c=0; c< 3; c++, p++)
		{
		  if ( c==0)B->cdna_cache[0][p]=M->CODING0;
		  else if ( c==1)B->cdna_cache[0][p]=M->CODING1;
		  else if ( c==2)B->cdna_cache[0][p]=M->CODING2;
		  if (ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
		  else al[0][p]='-';

		  if (alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
		  else al[1][p]='-';
			
		  if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
		  else if ( debug_cdna_fasta)fprintf (stderr, "\n%d: %c %c",k,  al[0][p], al[1][p]);
		}
	    }

	  aa0+=len_i;
	  aa1+=len_j;
	}
      
      deltaf0=(aa0*3+f0)-ala;
      deltaf1=(aa1*3+f1)-alb;
      delta=MAX(deltaf0, deltaf1);
      for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++)		  
	{
	  if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
	  else al[0][p]='-';
	  
	  if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
	  else al[1][p]='-'; 
	  
	  B->cdna_cache[0][p]=M->NON_CODING;	
	  if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
	  else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c",  al[0][p], al[1][p]);
	}
      

      /*End New traceback*/
      



      al[0][p]='\0';
      al[1][p]='\0';


      sprintf( B->seq_al[l_s[0][0]], "%s", al[0]);
      sprintf( B->seq_al[l_s[1][0]], "%s", al[1]);
      B->len_aln=strlen (al[0]);
      B->nseq=2;
     
      
     
      
      if ( debug_cdna_fasta)
	  {
	    fprintf ( stderr, "\nA-A=%d, %d", CL->M['a'-'A']['a'-'A'], CL->M['a'-'A']['a'-'A'] *SCORE_K);
	    for ( a=1; a<diag[0]; a++)
	      {
		fprintf ( stderr, "\nchosen diag: %d", diag[a]);
	      }
	    
	    fprintf ( stderr, "\n  GOP=%d   GEP=%d   TG_MODE=%d", M->gop, M->gep, M->TG_MODE);
	    fprintf ( stderr, "\nF_GOP=%d F_GEP=%d F_TG_MODE=%d", M->gop, M->gep, M->F_TG_MODE);
	    
	    DA=copy_aln (B, NULL);
	    DA=realloc_aln2 (DA,6,(DA->len_aln+1));
	

	    for ( a=0; a<B->len_aln; a++)
	      {

		fprintf ( stderr, "\n%d", DA->cdna_cache[0][a]);
		if (DA->cdna_cache[0][a]>=M->CODING0)DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0';
		else DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0';

		if (DA->cdna_cache[0][a]==M->CODING0)
		  {
		    DA->seq_al[DA->nseq+1][a]=translate_dna_codon (DA->seq_al[0]+a,'*');
		    DA->seq_al[DA->nseq+2][a]=translate_dna_codon (DA->seq_al[1]+a,'*');
		  }
		else
		  {
		    DA->seq_al[DA->nseq+1][a]='-'; 
		    DA->seq_al[DA->nseq+2][a]='-'; 
		  }
		
	      }
	    DA->nseq+=3;
	    print_aln (DA);
	    
	    free_aln(DA);		      
	    score=0;
	    
	    
	    for (prev_state=M->START,a=0; a< DA->len_aln;)
	      {
		state=DA->cdna_cache[0][a];
		t=M->model[prev_state][state];
		if ( DA->cdna_cache[0][a]==M->CODING0)
		  {
		    a1=translate_dna_codon (A->seq_al[0]+a,'x');
		    a2=translate_dna_codon (A->seq_al[1]+a,'x');
		    
		    if ( a1!='x' && a2!='x')
		      {
			e=CL->M[a1-'A'][a2-'A']*SCORE_K;
		      }
		  }
		else if ( DA->cdna_cache[0][a]>M->CODING0);
		else
		  {
		    e=M->model_properties[B->cdna_cache[0][a]][M->EMISSION];
		  }
		if ( e==UNDEFINED || t==UNDEFINED) fprintf ( stderr, "\nPROBLEM %d\n", a);
		
		fprintf ( stderr, "\n[%c..%c: %d(e)+%d(t)=%d]", A->seq_al[0][a], A->seq_al[1][a], e,t,e+t);
		score+=e+t;
		prev_state=state;
		
		if (B->cdna_cache[0][a]==M->NON_CODING)a++;
		else a+=3;
		
	      }
	    
	  }
      
      for ( a=0; a<B->len_aln; a++)
	{
	  
	  if ( B->cdna_cache[0][a]<M->CODING0)B->cdna_cache[0][a]=0;
	  else B->cdna_cache[0][a]=1;
	}
      
      free_char ( al, -1);
      return DPR->score;
      
    }
示例#2
0
Alignment * extract_domain ( Constraint_list *CL)
{
    /*
      function documentation: start
      Alignment * extract_domain ( Constraint_list *CL)

      given a CL, this function extracts the next best scoring local multiple alignment
      It returns a CL where the aligned residues have been indicated in (CL->moca)->forbiden_residues;

      the local alignment is extracted with the dp function indicated by
                          CL->dp_mode: (gotoh_sw_pair_wise)
      Evaluation:
                 CL->get_dp_cost=slow_get_dp_cost;
      CL->evaluate_residue_pair=sw_residue_pair_extended_list;
      Continuation:
                (CL->moca)->evaluate_domain=evaluate_moca_domain;
      Cache of CL:
                (CL->moca)->cache_cl_with_domain=cache_cl_with_moca_domain;
      Domain post processing:
                (CL->moca)->make_nol_aln=make_moca_nol_aln;
      function documentation: end
    */
    int min_start, max_start, start,min_len, max_len, len, score;
    int step;
    Alignment *C=NULL;
    Alignment *RESULT=NULL;
    Alignment *EA=NULL;




    /*CASE 1: Non Automatic Domain Extraction*/
    if ((CL->moca)->moca_interactive)
    {
        return interactive_domain_extraction (CL);
    }
    else if ((CL->moca)->moca_len)
    {
        while ((C=extract_domain_with_coordinates (C,(CL->moca)->moca_start,(CL->moca)->moca_len,CL))->nseq==0)(CL->moca)->moca_scale=(CL->moca)->moca_scale*0.9;
        RESULT=copy_aln ( C, RESULT);
        unpack_seq_aln (RESULT, CL);
        output_format_aln ("mocca_aln",RESULT,EA=fast_coffee_evaluate_output(RESULT, CL),"stdout");
        free_aln(EA);

        return RESULT;
    }
    else if ( !(CL->moca)->moca_len)
    {
        analyse_sequence (CL);
        myexit (EXIT_FAILURE);
    }

    /*CASE 2: Automatic Domain Extraction: Find Coordinates*/


    start=500;

    step=10;
    min_start=0;
    max_start=strlen ((CL->S)->seq[0]);
    min_len=20;
    max_len=strlen ((CL->S)->seq[0]);

    C=extract_domain_with_coordinates (C,13,30,CL);
    C->output_res_num=1;
    print_aln (C);

    (CL->moca)->moca_scale=-180;
    C=add_seq2aln (CL,C, CL->S);
    print_aln (C);

    (CL->moca)->moca_scale=-160;
    C=add_seq2aln (CL,C, CL->S);
    print_aln (C);

    myexit (EXIT_FAILURE);

    while ( step>0)
    {
        C=approximate_domain (min_start,max_start,step,min_len,max_len, step,&start, &len, &score, CL);
        min_start=start-step;
        max_start=start+step;
        min_len=len-step;
        max_len=len+step;
        step=step/2;
    }

    C=extract_domain_with_coordinates (C,start-10, len+20,CL);
    C->output_res_num=1;
    print_aln (C);

    myexit (EXIT_FAILURE);
    return C;


}