int make_fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag) { /*TREATMENT OF THE TERMINAL GAP PENALTIES*/ /*TG_MODE=0---> gop and gep*/ /*TG_MODE=1---> --- gep*/ /*TG_MODE=2---> --- ---*/ int TG_MODE, gop, l_gop, gep,l_gep, maximise; /*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ int a, b,c,k, t; int l1, l2,eg, ch, sub,score=0, last_i=0, last_j=0, i, delta_i, j, pos_j, ala, alb, LEN, n_diag, match1, match2; int su, in, de, tr; int **C, **D, **I, **trace, **pos0, **LD; int lenal[2], len; char *buffer, *char_buf; char **aln, **al; /********Prepare Penalties******/ gop=CL->gop*SCORE_K; gep=CL->gep*SCORE_K; TG_MODE=CL->TG_MODE; maximise=CL->maximise; /********************************/ n_diag=diag[0]; l1=lenal[0]=strlen (A->seq_al[l_s[0][0]]); l2=lenal[1]=strlen (A->seq_al[l_s[1][0]]); if ( getenv ("DEBUG_TCOFFEE"))fprintf ( stderr, "\n\tNdiag=%d%% ", (diag[0]*100)/(l1+l2)); /*diag: diag[1..n_diag]--> flaged diagonal in order; diag[0]=0--> first diagonal; diag[n_diag+1]=l1+l2-1; */ /*numeration of the diagonals strats from the bottom right [1...l1+l2-1]*/ /*sequence s1 is vertical and seq s2 is horizontal*/ /*D contains the best Deletion in S2==>comes from diagonal N+1*/ /*I contains the best insertion in S2=> comes from diagonal N-1*/ C=declare_int (lenal[0]+lenal[1]+1, n_diag+2); D=declare_int (lenal[0]+lenal[1]+1, n_diag+2); LD=declare_int (lenal[0]+lenal[1]+1, n_diag+2); I=declare_int (lenal[0]+lenal[1]+1, n_diag+2); trace=declare_int (lenal[0]+lenal[1]+1, n_diag+2); al=declare_char (2,lenal[0]+lenal[1]+lenal[1]+1); len= MAX(lenal[0],lenal[1])+1; buffer=(char*)vcalloc ( 2*len, sizeof (char)); char_buf=(char*) vcalloc (2*len, sizeof (char)); pos0=aln2pos_simple ( A,-1, ns, l_s); C[0][0]=0; t=(TG_MODE==0)?gop:0; for ( j=1; j<= n_diag; j++) { l_gop=(TG_MODE==0)?gop:0; l_gep=(TG_MODE==2)?0:gep; if ( (diag[j]-lenal[0])<0 ) { trace[0][j]=UNDEFINED; continue; } C[0][j]=(diag[j]-lenal[0])*l_gep +l_gop; D[0][j]=(diag[j]-lenal[0])*l_gep +l_gop+gop; } D[0][j]=D[0][j-1]+gep; t=(TG_MODE==0)?gop:0; for ( i=1; i<=lenal[0]; i++) { l_gop=(TG_MODE==0)?gop:0; l_gep=(TG_MODE==2)?0:gep; C[i][0]=C[i][n_diag+1]=t=t+l_gep; I[i][0]=D[i][n_diag+1]=t+ gop; for ( j=1; j<=n_diag; j++) { C[i][j]=C[i][0]; D[i][j]=I[i][j]=I[i][0]; } for (eg=0, j=1; j<=n_diag; j++) { pos_j=diag[j]-lenal[0]+i; if (pos_j<=0 || pos_j>l2 ) { trace[i][j]=UNDEFINED; continue; } sub=(CL->get_dp_cost) ( A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],pos_j-1, CL ); /*1 identify the best insertion in S2:*/ l_gop=(i==lenal[0])?((TG_MODE==0)?gop:0):gop; l_gep=(i==lenal[0])?((TG_MODE==2)?0:gep):gep; len=(j==1)?0:(diag[j]-diag[j-1]); if ( a_better_than_b(I[i][j-1], C[i][j-1]+l_gop, maximise))eg++; else eg=1; I[i][j]=best_of_a_b (I[i][j-1], C[i][j-1]+l_gop, maximise)+len*l_gep; /*2 Identify the best deletion in S2*/ l_gop=(pos_j==lenal[1])?((TG_MODE==0)?gop:0):gop; l_gep=(pos_j==lenal[1])?((TG_MODE==2)?0:gep):gep; len=(j==n_diag)?0:(diag[j+1]-diag[j]); delta_i=((i-len)>0)?(i-len):0; if ( a_better_than_b(D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)){LD[i][j]=LD[delta_i][j+1]+1;} else {LD[i][j]=1;} D[i][j]=best_of_a_b (D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)+len*l_gep; /*Identify the best way*/ /* score=C[i][j]=best_int ( 3, maximise, &fop, I[i][j], C[i-1][j]+sub, D[i][j]); fop-=1; if ( fop<0)trace[i][j]=fop*eg; else if ( fop>0 ) {trace[i][j]=fop*LD[i][j];} else if ( fop==0) trace[i][j]=0; */ su=C[i-1][j]+sub; in=I[i][j]; de=D[i][j]; /*HERE ("%d %d %d", su, in, de);*/ if (su>=in && su>=de) { score=su; tr=0; } else if (in>=de) { score=in; tr=-eg; } else { score=de; tr=LD[i][j]; } trace[i][j]=tr; C[i][j]=score; last_i=i; last_j=j; } } /* [0][Positive] ^ ^ | / | / | / | / |/ [Neg]<-------[*] */ i=last_i; j=last_j; ala=alb=0; match1=match2=0; while (!(match1==l1 && match2==l2)) { if ( match1==l1) { len=l2-match2; for ( a=0; a< len; a++) { al[0][ala++]=0; al[1][alb++]=1; match2++; } k=0; break; /*k=-(j-1);*/ } else if ( match2==l2) { len=l1-match1; for ( a=0; a< len; a++) { al[0][ala++]=1; al[1][alb++]=0; match1++; } k=0; break; /*k= n_diag-j;*/ } else { k=trace[i][j]; } if ( k==0) { if ( match2==l2 || match1==l1); else { al[0][ala++]=1; al[1][alb++]=1; i--; match1++; match2++; } } else if ( k>0) { len=diag[j+k]-diag[j]; for ( a=0; a<len; a++) { if ( match1==l1)break; al[0][ala++]=1; al[1][alb++]=0; match1++; } i-=len; j+=k; } else if ( k<0) { k*=-1; len=diag[j]-diag[j-k]; for ( a=0; a<len; a++) { if ( match2==l2)break; al[0][ala++]=0; al[1][alb++]=1; match2++; } j-=k; } } LEN=ala; c=LEN-1; invert_list_char ( al[0], LEN); invert_list_char ( al[1], LEN); if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN); aln=A->seq_al; for ( c=0; c< 2; c++) { for ( a=0; a< ns[c]; a++) { ch=0; for ( b=0; b< LEN; b++) { if (al[c][b]==1) char_buf[b]=aln[l_s[c][a]][ch++]; else char_buf[b]='-'; } char_buf[b]='\0'; aln[l_s[c][a]]=csprintf (aln[l_s[c][a]],"%s", char_buf); } } A->len_aln=LEN; A->nseq=ns[0]+ns[1]; free_int (pos0, -1); free_int (C, -1); free_int (D, -1); free_int (I, -1); free_int (trace, -1); free_int (LD, -1); free_char ( al, -1); vfree(buffer); vfree(char_buf); return score; }
Dp_Result * make_fast_generic_dp_pair_wise (Alignment *A, int*ns, int **l_s,Dp_Model *M) { /*SIZE VARIABLES*/ int ndiag; int l0, l1, len_al,len_diag; static int max_len_al, max_len_diag; static int mI, mJ; /*Evaluation*/ int **pos0; /*DP VARIABLES*/ static int *Mat, *LMat, *trace; int a, i, j,l; int state, cur_state, prev_state; int pos_i=0, pos_j=0; int last_i=0, last_j=0; int prev_i, prev_j; int len_i, len_j, len; int t, e, em; int prev_score; int pc, best_pc; int *prev; int model_index; /*TRACEBACK*/ Dp_Result *DPR; int k=0, next_k; int new_i, new_j; /*Cleqanning CALL*/ if ( A==NULL) { max_len_al=0; max_len_diag=0;mI=0;mJ=0; vfree (Mat); vfree(LMat);vfree(trace); Mat=trace=LMat=NULL; return NULL; } ndiag=M->diag[0]; l0=strlen (A->seq_al[l_s[0][0]]); l1=strlen (A->seq_al[l_s[1][0]]); len_al =l0+l1+1; len_diag=ndiag+4; if ( (len_al>max_len_al || len_diag>max_len_diag)) { vfree (Mat); vfree (LMat); vfree(trace); max_len_diag=max_len_al=0; } if (max_len_al==0) { max_len_al=len_al; max_len_diag=len_diag; mI=max_len_al*max_len_diag; mJ=max_len_diag; Mat =(int*)vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); LMat =(int*)vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); trace=(int*)vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); } prev=(int*)vcalloc ( M->nstate, sizeof (int)); DPR=( Dp_Result*)vcalloc ( 1, sizeof ( Dp_Result)); DPR->traceback=(int*)vcalloc (max_len_al, sizeof (int)); /*PREPARE THE EVALUATION*/ pos0=aln2pos_simple ( A,-1, ns, l_s); /*INITIALIZATION OF THE DP MATRICES*/ for (i=0; i<=l0;i++) { for (j=0; j<=ndiag+1;j++) { for ( state=0; state<M->nstate; state++) { Mat [state*mI+i*mJ+j]=UNDEFINED; LMat [state*mI+i*mJ+j]=UNDEFINED; trace [state*mI+i*mJ+j]=M->START; } } } M->diag[0]=1; M->diag[ndiag+1]=M->diag[ndiag]; for (i=0; i<=l0; i++) for ( j=0; j<=ndiag+1; j++) { pos_j=M->diag[j]-l0+i; pos_i=i; if (!(pos_j==0 || pos_i==0))continue; if ( pos_j<0 || pos_i<0)continue; if ( pos_i==0 && pos_j==0) { for ( a=0; a< M->nstate; a++) { Mat [a*mI+i*mJ+j]=0; LMat [a*mI+i*mJ+j]=0; trace[a*mI+i*mJ+j]=M->START; } } else { l=MAX(pos_i,pos_j); for ( state=0; state<M->START; state++) { if (pos_j==0 && M->model_properties[state][M->LEN_J])continue; if (pos_i==0 && M->model_properties[state][M->LEN_I])continue; t=M->model[M->START][state]; e=((M->model_emission_function)[state][M->START_EMISSION])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL); /*e=((M->get_dp_cost_list)[M->model_properties[state][M->START_EMISSION]])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);*/ Mat [state*mI+i*mJ+j]=t+e*l; LMat [state*mI+i*mJ+j]=l; trace [state*mI+i*mJ+j]=M->START; } } } /*DYNAMIC PROGRAMMING: Forward Pass*/ /*Diagonals: M->diag[0]=Number of diagonals being considered M->diag[1]=First diagonal being considered Diagonals are numbered 1...L0+l1-1 1 is the bottom-left diag */ for (i=1; i<=l0;i++) { for (j=1; j<=ndiag;j++) { pos_j=M->diag[j]-l0+i; pos_i=i; if (pos_j<=0 || pos_j>l1 )continue; last_i=i; last_j=j; for (cur_state=0; cur_state<M->START; cur_state++) { if (M->model_properties[cur_state][M->DELTA_J]) { prev_j=j+M->model_properties[cur_state][M->DELTA_J]; prev_i=i+M->model_properties[cur_state][M->DELTA_I]*FABS((M->diag[j]-M->diag[prev_j])); } else { prev_j=j; prev_i=i+M->model_properties[cur_state][M->DELTA_I]; } len_i=FABS((i-prev_i)); len_j=FABS((M->diag[prev_j]-M->diag[j])); len=MAX(len_i, len_j); em=((M->model_emission_function[cur_state][M->EMISSION]))(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL); /*em=((M->get_dp_cost_list)[M->model_properties[cur_state][M->EMISSION]])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);*/ for (pc=best_pc=UNDEFINED, model_index=1; model_index<=M->bounded_model[cur_state][0]; model_index++) { prev_state=M->bounded_model[cur_state][model_index]; if(prev_i<0 || prev_j<0 ||prev_i>l0 || prev_j>ndiag || len==UNDEFINED)prev_score=UNDEFINED; else prev_score=Mat[prev_state*mI+prev_i*mJ+prev_j]; t=M->model[prev_state][cur_state]; e=em; if (prev_score==UNDEFINED || len==UNDEFINED)e=UNDEFINED; else if (len==0|| e==UNDEFINED)e=UNDEFINED; else e=e*len; if (is_defined_int(3,prev_score,e, t)) { pc=prev_score+t+e; } else pc=UNDEFINED; /*Identify the best previous score*/ if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED)) { prev[cur_state]=prev_state; best_pc=pc; } } Mat[cur_state*mI+i*mJ+j]=best_pc; if ( Mat[cur_state*mI+i*mJ+j]==UNDEFINED) { LMat[cur_state*mI+i*mJ+j]=UNDEFINED; trace[cur_state*mI+i*mJ+j]=UNDEFINED; continue; } else if ( prev[cur_state]==cur_state) { LMat [cur_state*mI+i*mJ+j]= LMat [cur_state*mI+prev_i*mJ+prev_j]+len; trace[cur_state*mI+i*mJ+j]= trace[cur_state*mI+prev_i*mJ+prev_j]; } else { LMat[cur_state*mI+i*mJ+j]=len; trace[cur_state*mI+i*mJ+j]=prev[cur_state]; } } } } i=last_i; j=last_j; for (pc=best_pc=UNDEFINED, state=0; state<M->START; state++) { t=M->model[state][M->END]; e=( M->model_emission_function[state][M->TERM_EMISSION])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL); /*e=((M->get_dp_cost_list)[M->model_properties[state][M->TERM_EMISSION]])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);*/ l=LMat[state*mI+i*mJ+j]; if (!is_defined_int(4,t,e,Mat[state*mI+i*mJ+j],l))Mat[state*mI+i*mJ+j]=UNDEFINED; else Mat[state*mI+i*mJ+j]+=t+e*(l); pc=Mat[state*mI+i*mJ+j]; if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED)) { k=state; best_pc=pc; } } DPR->score=best_pc; /*TRACEBACK*/ e=0; len=0; while (k!=M->START) { next_k=trace[k*mI+i*mJ+j]; new_i=i; new_j=j; l=LMat[k*mI+i*mJ+j]; for (a=0; a< l; a++) { DPR->traceback[len++]=k; } new_i+=M->model_properties[k][M->DELTA_I]*l; if ( M->model_properties[k][M->DELTA_J]) { while ( next_k!=M->START && FABS((M->diag[j]-M->diag[new_j]))!=l)new_j+=M->model_properties[k][M->DELTA_J]; } i=new_i; j=new_j; k=next_k; } DPR->len=len; DPR->traceback[DPR->len++]=M->START; invert_list_int (DPR->traceback,DPR->len); DPR->traceback[DPR->len]=M->END; vfree (prev); free_int (pos0, -1); return DPR; }
int myers_miller_pair_wise (Alignment *A,int *ns, int **ls,Constraint_list *CL ) { int **pos; int a,b, i, j, l,l1, l2, len; int *S; char ** char_buf; int score; /********Prepare Penalties******/ //ns2master_ns (ns,ls, &sns,&sls); sns=ns; sls=ls; /********************************/ pos=aln2pos_simple ( A,-1, ns, ls); l1=strlen (A->seq_al[ls[0][0]]); l2=strlen (A->seq_al[ls[1][0]]); S=(int*)vcalloc (l1+l2+1, sizeof (int)); last=0; sapp=S; score=diff (A,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos); diff (NULL,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos); i=0; j=0; sapp=S; len=0; while (!(i==l1 && j==l2)) { if (*sapp==0) { i++; j++; len++; } else if ( *sapp<0) { i-=*sapp; len-=*sapp; } else if ( *sapp>0) { j+=*sapp; len+=*sapp; } sapp++; } A=realloc_aln2 ( A,A->max_n_seq,len+1); char_buf=declare_char (A->max_n_seq,len+1); i=0; j=0; sapp=S; len=0; while (!(i==l1 && j==l2)) { if (*sapp==0) { for (b=0; b< ns[0]; b++) char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i]; for (b=0; b< ns[1]; b++) char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j]; i++; j++; len++; } else if ( *sapp>0) { l=*sapp; for ( a=0; a<l; a++, j++, len++) { for (b=0; b< ns[0]; b++) char_buf[ls[0][b]][len]='-'; for (b=0; b< ns[1]; b++) char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j]; } } else if ( *sapp<0) { l=-*sapp; for ( a=0; a<l; a++, i++, len++) { for (b=0; b< ns[0]; b++) char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i];; for (b=0; b< ns[1]; b++) char_buf[ls[1][b]][len]='-'; } } sapp++; } A->len_aln=len; A->nseq=ns[0]+ns[1]; for ( a=0; a< ns[0]; a++) { char_buf[ls[0][a]][len]='\0'; sprintf ( A->seq_al[ls[0][a]], "%s", char_buf[ls[0][a]]); } for ( a=0; a< ns[1]; a++) { char_buf[ls[1][a]][len]='\0'; sprintf ( A->seq_al[ls[1][a]], "%s", char_buf[ls[1][a]]); } vfree (S); free_char ( char_buf, -1); l1=strlen (A->seq_al[ls[0][0]]); l2=strlen (A->seq_al[ls[1][0]]); if ( l1!=l2) exit(1); free_int (pos, -1); return score; }