int ssec_pwaln_maln (Alignment *A, int *ns, int **ls, Constraint_list *CL) { static Dp_Model *M=NULL; Dp_Result *R=NULL; int a, ndiag; int Sa,Sb,St, Da, Db, Dt, Ia, Ib, It; int ala, alb, s,b; a=0; Sa=a++;Da=a++;Ia=a++; Sb=a++;Db=a++;Ib=a++; St=a++;Dt=a++;It=a++; if ( strm (CL->matrices_list[0], "analyse")) { for ( a=0; a< CL->n_matrices; a++) { rescale_two_mat(CL->matrices_list[1],CL->matrices_list[2],1000, 100, AA_ALPHABET); exit (0); } } /*2 Prepare the Model*/ M=initialize_sseq_model(2,2,CL); ndiag=strlen (A->seq_al[0])+strlen (A->seq_al[1])-1; M->diag=vcalloc (ndiag+1, sizeof (int)); M->diag[0]=ndiag-1; for ( a=1; a<=M->diag[0]; a++)M->diag[a]=a; /*3 Prepare Sequence Presentation*/ R=make_fast_generic_dp_pair_wise(A, ns, ls, M); ala=alb=0; A=realloc_aln2(A,A->nseq, R->len+1); for (b=1; b<R->len;b++) { if (R->traceback[b]==Sa || R->traceback[b]==Sb ||R->traceback[b]==St ) { for (s=0; s<ns[0]; s++) A->seq_al[ls[0][s]][b-1]=(CL->S)->seq[A->order[ls[0][s]][0]][ala]; ala++; for (s=0; s<ns[1]; s++) A->seq_al[ls[1][s]][b-1]=(CL->S)->seq[A->order[ls[1][s]][0]][alb]; alb++; } else if ( R->traceback[b]==Da || R->traceback[b]==Db ||R->traceback[b]==Dt ) { for (s=0; s<ns[0]; s++) A->seq_al[ls[0][s]][b-1]=(CL->S)->seq[A->order[ls[0][s]][0]][ala]; ala++; for (s=0; s<ns[1]; s++) A->seq_al[ls[1][s]][b-1]='-'; } else if ( R->traceback[b]==Ia || R->traceback[b]==Ib ||R->traceback[b]==It ) { for (s=0; s<ns[0]; s++) A->seq_al[ls[0][s]][b-1]='-'; for (s=0; s<ns[1]; s++) A->seq_al[ls[1][s]][b-1]=(CL->S)->seq[A->order[ls[1][s]][0]][alb]; alb++; } } for (s=0; s<ns[0]; s++) A->seq_al[ls[0][s]][b-1]='\0'; for (s=0; s<ns[1]; s++) A->seq_al[ls[1][s]][b-1]='\0'; A->len_aln=strlen (A->seq_al[ls[0][0]]); R->Dp_model=M; A->Dp_result=R; return A->score; }
int make_fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag) { /*TREATMENT OF THE TERMINAL GAP PENALTIES*/ /*TG_MODE=0---> gop and gep*/ /*TG_MODE=1---> --- gep*/ /*TG_MODE=2---> --- ---*/ int TG_MODE, gop, l_gop, gep,l_gep, maximise; /*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ int a, b,c,k, t; int l1, l2,eg, ch, sub,score=0, last_i=0, last_j=0, i, delta_i, j, pos_j, ala, alb, LEN, n_diag, match1, match2; int su, in, de, tr; int **C, **D, **I, **trace, **pos0, **LD; int lenal[2], len; char *buffer, *char_buf; char **aln, **al; /********Prepare Penalties******/ gop=CL->gop*SCORE_K; gep=CL->gep*SCORE_K; TG_MODE=CL->TG_MODE; maximise=CL->maximise; /********************************/ n_diag=diag[0]; l1=lenal[0]=strlen (A->seq_al[l_s[0][0]]); l2=lenal[1]=strlen (A->seq_al[l_s[1][0]]); if ( getenv ("DEBUG_TCOFFEE"))fprintf ( stderr, "\n\tNdiag=%d%% ", (diag[0]*100)/(l1+l2)); /*diag: diag[1..n_diag]--> flaged diagonal in order; diag[0]=0--> first diagonal; diag[n_diag+1]=l1+l2-1; */ /*numeration of the diagonals strats from the bottom right [1...l1+l2-1]*/ /*sequence s1 is vertical and seq s2 is horizontal*/ /*D contains the best Deletion in S2==>comes from diagonal N+1*/ /*I contains the best insertion in S2=> comes from diagonal N-1*/ C=declare_int (lenal[0]+lenal[1]+1, n_diag+2); D=declare_int (lenal[0]+lenal[1]+1, n_diag+2); LD=declare_int (lenal[0]+lenal[1]+1, n_diag+2); I=declare_int (lenal[0]+lenal[1]+1, n_diag+2); trace=declare_int (lenal[0]+lenal[1]+1, n_diag+2); al=declare_char (2,lenal[0]+lenal[1]+lenal[1]+1); len= MAX(lenal[0],lenal[1])+1; buffer=(char*)vcalloc ( 2*len, sizeof (char)); char_buf=(char*) vcalloc (2*len, sizeof (char)); pos0=aln2pos_simple ( A,-1, ns, l_s); C[0][0]=0; t=(TG_MODE==0)?gop:0; for ( j=1; j<= n_diag; j++) { l_gop=(TG_MODE==0)?gop:0; l_gep=(TG_MODE==2)?0:gep; if ( (diag[j]-lenal[0])<0 ) { trace[0][j]=UNDEFINED; continue; } C[0][j]=(diag[j]-lenal[0])*l_gep +l_gop; D[0][j]=(diag[j]-lenal[0])*l_gep +l_gop+gop; } D[0][j]=D[0][j-1]+gep; t=(TG_MODE==0)?gop:0; for ( i=1; i<=lenal[0]; i++) { l_gop=(TG_MODE==0)?gop:0; l_gep=(TG_MODE==2)?0:gep; C[i][0]=C[i][n_diag+1]=t=t+l_gep; I[i][0]=D[i][n_diag+1]=t+ gop; for ( j=1; j<=n_diag; j++) { C[i][j]=C[i][0]; D[i][j]=I[i][j]=I[i][0]; } for (eg=0, j=1; j<=n_diag; j++) { pos_j=diag[j]-lenal[0]+i; if (pos_j<=0 || pos_j>l2 ) { trace[i][j]=UNDEFINED; continue; } sub=(CL->get_dp_cost) ( A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],pos_j-1, CL ); /*1 identify the best insertion in S2:*/ l_gop=(i==lenal[0])?((TG_MODE==0)?gop:0):gop; l_gep=(i==lenal[0])?((TG_MODE==2)?0:gep):gep; len=(j==1)?0:(diag[j]-diag[j-1]); if ( a_better_than_b(I[i][j-1], C[i][j-1]+l_gop, maximise))eg++; else eg=1; I[i][j]=best_of_a_b (I[i][j-1], C[i][j-1]+l_gop, maximise)+len*l_gep; /*2 Identify the best deletion in S2*/ l_gop=(pos_j==lenal[1])?((TG_MODE==0)?gop:0):gop; l_gep=(pos_j==lenal[1])?((TG_MODE==2)?0:gep):gep; len=(j==n_diag)?0:(diag[j+1]-diag[j]); delta_i=((i-len)>0)?(i-len):0; if ( a_better_than_b(D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)){LD[i][j]=LD[delta_i][j+1]+1;} else {LD[i][j]=1;} D[i][j]=best_of_a_b (D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)+len*l_gep; /*Identify the best way*/ /* score=C[i][j]=best_int ( 3, maximise, &fop, I[i][j], C[i-1][j]+sub, D[i][j]); fop-=1; if ( fop<0)trace[i][j]=fop*eg; else if ( fop>0 ) {trace[i][j]=fop*LD[i][j];} else if ( fop==0) trace[i][j]=0; */ su=C[i-1][j]+sub; in=I[i][j]; de=D[i][j]; /*HERE ("%d %d %d", su, in, de);*/ if (su>=in && su>=de) { score=su; tr=0; } else if (in>=de) { score=in; tr=-eg; } else { score=de; tr=LD[i][j]; } trace[i][j]=tr; C[i][j]=score; last_i=i; last_j=j; } } /* [0][Positive] ^ ^ | / | / | / | / |/ [Neg]<-------[*] */ i=last_i; j=last_j; ala=alb=0; match1=match2=0; while (!(match1==l1 && match2==l2)) { if ( match1==l1) { len=l2-match2; for ( a=0; a< len; a++) { al[0][ala++]=0; al[1][alb++]=1; match2++; } k=0; break; /*k=-(j-1);*/ } else if ( match2==l2) { len=l1-match1; for ( a=0; a< len; a++) { al[0][ala++]=1; al[1][alb++]=0; match1++; } k=0; break; /*k= n_diag-j;*/ } else { k=trace[i][j]; } if ( k==0) { if ( match2==l2 || match1==l1); else { al[0][ala++]=1; al[1][alb++]=1; i--; match1++; match2++; } } else if ( k>0) { len=diag[j+k]-diag[j]; for ( a=0; a<len; a++) { if ( match1==l1)break; al[0][ala++]=1; al[1][alb++]=0; match1++; } i-=len; j+=k; } else if ( k<0) { k*=-1; len=diag[j]-diag[j-k]; for ( a=0; a<len; a++) { if ( match2==l2)break; al[0][ala++]=0; al[1][alb++]=1; match2++; } j-=k; } } LEN=ala; c=LEN-1; invert_list_char ( al[0], LEN); invert_list_char ( al[1], LEN); if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN); aln=A->seq_al; for ( c=0; c< 2; c++) { for ( a=0; a< ns[c]; a++) { ch=0; for ( b=0; b< LEN; b++) { if (al[c][b]==1) char_buf[b]=aln[l_s[c][a]][ch++]; else char_buf[b]='-'; } char_buf[b]='\0'; aln[l_s[c][a]]=csprintf (aln[l_s[c][a]],"%s", char_buf); } } A->len_aln=LEN; A->nseq=ns[0]+ns[1]; free_int (pos0, -1); free_int (C, -1); free_int (D, -1); free_int (I, -1); free_int (trace, -1); free_int (LD, -1); free_char ( al, -1); vfree(buffer); vfree(char_buf); return score; }
int make_fasta_cdna_pair_wise (Alignment *B,Alignment *A,int*in_ns, int **l_s,Constraint_list *CL, int *diag) { int a,c,p,k; Dp_Result *DPR; static Dp_Model *M; int l0, l1; int len_i, len_j; int f0=0, f1=0; int deltaf0, deltaf1, delta; int nr1, nr2; int ala, alb, aa0, aa1; int type; char **al; int **tl_s; int *tns; /*DEBUG*/ int debug_cdna_fasta=0; Alignment *DA; int score; int state,prev_state; int t, e; int a1, a2; l0=strlen ( B->seq_al[l_s[0][0]]); l1=strlen ( B->seq_al[l_s[1][0]]); al=declare_char (2, l0+l1+1); B=realloc_aln2 (B,B->nseq,l0+l1+1); free_int (B->cdna_cache, -1); B->cdna_cache=declare_int(1, l0+l1+1); if ( !M)M=initialize_dna_dp_model (CL); M->diag=diag; tl_s=declare_int (2, 2);tns=vcalloc(2, sizeof(int));tl_s[0][0]=0;tl_s[1][0]=3;tns[0]=tns[1]=1; DPR=make_fast_dp_pair_wise (A,tns, tl_s,CL,M); vfree(tns);free_int(tl_s, -1); /*new_trace_back*/ a=p=0; aa0=aa1=ala=alb=0; while ( (k=DPR->traceback[a++])!=M->START); while ( (k=DPR->traceback[a++])!=M->END) { f0=M->model_properties[k][M->F0]; f1=M->model_properties[k][M->F1]; len_i=M->model_properties[k][M->LEN_I]; len_j=M->model_properties[k][M->LEN_J]; type=M->model_properties[k][M->TYPE]; if (type==M->CODING0) { deltaf0=(aa0*3+f0)-ala; deltaf1=(aa1*3+f1)-alb; delta=MAX(deltaf0, deltaf1); for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++) { if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++]; else al[0][p]='-'; if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++]; else al[1][p]='-'; B->cdna_cache[0][p]=M->NON_CODING; if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c", al[0][p], al[1][p]); } for ( c=0; c< 3; c++, p++) { if ( c==0)B->cdna_cache[0][p]=M->CODING0; else if ( c==1)B->cdna_cache[0][p]=M->CODING1; else if ( c==2)B->cdna_cache[0][p]=M->CODING2; if (ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++]; else al[0][p]='-'; if (alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++]; else al[1][p]='-'; if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; else if ( debug_cdna_fasta)fprintf (stderr, "\n%d: %c %c",k, al[0][p], al[1][p]); } } aa0+=len_i; aa1+=len_j; } deltaf0=(aa0*3+f0)-ala; deltaf1=(aa1*3+f1)-alb; delta=MAX(deltaf0, deltaf1); for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++) { if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++]; else al[0][p]='-'; if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++]; else al[1][p]='-'; B->cdna_cache[0][p]=M->NON_CODING; if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c", al[0][p], al[1][p]); } /*End New traceback*/ al[0][p]='\0'; al[1][p]='\0'; sprintf( B->seq_al[l_s[0][0]], "%s", al[0]); sprintf( B->seq_al[l_s[1][0]], "%s", al[1]); B->len_aln=strlen (al[0]); B->nseq=2; if ( debug_cdna_fasta) { fprintf ( stderr, "\nA-A=%d, %d", CL->M['a'-'A']['a'-'A'], CL->M['a'-'A']['a'-'A'] *SCORE_K); for ( a=1; a<diag[0]; a++) { fprintf ( stderr, "\nchosen diag: %d", diag[a]); } fprintf ( stderr, "\n GOP=%d GEP=%d TG_MODE=%d", M->gop, M->gep, M->TG_MODE); fprintf ( stderr, "\nF_GOP=%d F_GEP=%d F_TG_MODE=%d", M->gop, M->gep, M->F_TG_MODE); DA=copy_aln (B, NULL); DA=realloc_aln2 (DA,6,(DA->len_aln+1)); for ( a=0; a<B->len_aln; a++) { fprintf ( stderr, "\n%d", DA->cdna_cache[0][a]); if (DA->cdna_cache[0][a]>=M->CODING0)DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0'; else DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0'; if (DA->cdna_cache[0][a]==M->CODING0) { DA->seq_al[DA->nseq+1][a]=translate_dna_codon (DA->seq_al[0]+a,'*'); DA->seq_al[DA->nseq+2][a]=translate_dna_codon (DA->seq_al[1]+a,'*'); } else { DA->seq_al[DA->nseq+1][a]='-'; DA->seq_al[DA->nseq+2][a]='-'; } } DA->nseq+=3; print_aln (DA); free_aln(DA); score=0; for (prev_state=M->START,a=0; a< DA->len_aln;) { state=DA->cdna_cache[0][a]; t=M->model[prev_state][state]; if ( DA->cdna_cache[0][a]==M->CODING0) { a1=translate_dna_codon (A->seq_al[0]+a,'x'); a2=translate_dna_codon (A->seq_al[1]+a,'x'); if ( a1!='x' && a2!='x') { e=CL->M[a1-'A'][a2-'A']*SCORE_K; } } else if ( DA->cdna_cache[0][a]>M->CODING0); else { e=M->model_properties[B->cdna_cache[0][a]][M->EMISSION]; } if ( e==UNDEFINED || t==UNDEFINED) fprintf ( stderr, "\nPROBLEM %d\n", a); fprintf ( stderr, "\n[%c..%c: %d(e)+%d(t)=%d]", A->seq_al[0][a], A->seq_al[1][a], e,t,e+t); score+=e+t; prev_state=state; if (B->cdna_cache[0][a]==M->NON_CODING)a++; else a+=3; } } for ( a=0; a<B->len_aln; a++) { if ( B->cdna_cache[0][a]<M->CODING0)B->cdna_cache[0][a]=0; else B->cdna_cache[0][a]=1; } free_char ( al, -1); return DPR->score; }
int myers_miller_pair_wise (Alignment *A,int *ns, int **ls,Constraint_list *CL ) { int **pos; int a,b, i, j, l,l1, l2, len; int *S; char ** char_buf; int score; /********Prepare Penalties******/ //ns2master_ns (ns,ls, &sns,&sls); sns=ns; sls=ls; /********************************/ pos=aln2pos_simple ( A,-1, ns, ls); l1=strlen (A->seq_al[ls[0][0]]); l2=strlen (A->seq_al[ls[1][0]]); S=(int*)vcalloc (l1+l2+1, sizeof (int)); last=0; sapp=S; score=diff (A,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos); diff (NULL,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos); i=0; j=0; sapp=S; len=0; while (!(i==l1 && j==l2)) { if (*sapp==0) { i++; j++; len++; } else if ( *sapp<0) { i-=*sapp; len-=*sapp; } else if ( *sapp>0) { j+=*sapp; len+=*sapp; } sapp++; } A=realloc_aln2 ( A,A->max_n_seq,len+1); char_buf=declare_char (A->max_n_seq,len+1); i=0; j=0; sapp=S; len=0; while (!(i==l1 && j==l2)) { if (*sapp==0) { for (b=0; b< ns[0]; b++) char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i]; for (b=0; b< ns[1]; b++) char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j]; i++; j++; len++; } else if ( *sapp>0) { l=*sapp; for ( a=0; a<l; a++, j++, len++) { for (b=0; b< ns[0]; b++) char_buf[ls[0][b]][len]='-'; for (b=0; b< ns[1]; b++) char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j]; } } else if ( *sapp<0) { l=-*sapp; for ( a=0; a<l; a++, i++, len++) { for (b=0; b< ns[0]; b++) char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i];; for (b=0; b< ns[1]; b++) char_buf[ls[1][b]][len]='-'; } } sapp++; } A->len_aln=len; A->nseq=ns[0]+ns[1]; for ( a=0; a< ns[0]; a++) { char_buf[ls[0][a]][len]='\0'; sprintf ( A->seq_al[ls[0][a]], "%s", char_buf[ls[0][a]]); } for ( a=0; a< ns[1]; a++) { char_buf[ls[1][a]][len]='\0'; sprintf ( A->seq_al[ls[1][a]], "%s", char_buf[ls[1][a]]); } vfree (S); free_char ( char_buf, -1); l1=strlen (A->seq_al[ls[0][0]]); l2=strlen (A->seq_al[ls[1][0]]); if ( l1!=l2) exit(1); free_int (pos, -1); return score; }