int make_fasta_cdna_pair_wise (Alignment *B,Alignment *A,int*in_ns, int **l_s,Constraint_list *CL, int *diag) { int a,c,p,k; Dp_Result *DPR; static Dp_Model *M; int l0, l1; int len_i, len_j; int f0=0, f1=0; int deltaf0, deltaf1, delta; int nr1, nr2; int ala, alb, aa0, aa1; int type; char **al; int **tl_s; int *tns; /*DEBUG*/ int debug_cdna_fasta=0; Alignment *DA; int score; int state,prev_state; int t, e; int a1, a2; l0=strlen ( B->seq_al[l_s[0][0]]); l1=strlen ( B->seq_al[l_s[1][0]]); al=declare_char (2, l0+l1+1); B=realloc_aln2 (B,B->nseq,l0+l1+1); free_int (B->cdna_cache, -1); B->cdna_cache=declare_int(1, l0+l1+1); if ( !M)M=initialize_dna_dp_model (CL); M->diag=diag; tl_s=declare_int (2, 2);tns=vcalloc(2, sizeof(int));tl_s[0][0]=0;tl_s[1][0]=3;tns[0]=tns[1]=1; DPR=make_fast_dp_pair_wise (A,tns, tl_s,CL,M); vfree(tns);free_int(tl_s, -1); /*new_trace_back*/ a=p=0; aa0=aa1=ala=alb=0; while ( (k=DPR->traceback[a++])!=M->START); while ( (k=DPR->traceback[a++])!=M->END) { f0=M->model_properties[k][M->F0]; f1=M->model_properties[k][M->F1]; len_i=M->model_properties[k][M->LEN_I]; len_j=M->model_properties[k][M->LEN_J]; type=M->model_properties[k][M->TYPE]; if (type==M->CODING0) { deltaf0=(aa0*3+f0)-ala; deltaf1=(aa1*3+f1)-alb; delta=MAX(deltaf0, deltaf1); for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++) { if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++]; else al[0][p]='-'; if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++]; else al[1][p]='-'; B->cdna_cache[0][p]=M->NON_CODING; if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c", al[0][p], al[1][p]); } for ( c=0; c< 3; c++, p++) { if ( c==0)B->cdna_cache[0][p]=M->CODING0; else if ( c==1)B->cdna_cache[0][p]=M->CODING1; else if ( c==2)B->cdna_cache[0][p]=M->CODING2; if (ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++]; else al[0][p]='-'; if (alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++]; else al[1][p]='-'; if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; else if ( debug_cdna_fasta)fprintf (stderr, "\n%d: %c %c",k, al[0][p], al[1][p]); } } aa0+=len_i; aa1+=len_j; } deltaf0=(aa0*3+f0)-ala; deltaf1=(aa1*3+f1)-alb; delta=MAX(deltaf0, deltaf1); for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++) { if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++]; else al[0][p]='-'; if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++]; else al[1][p]='-'; B->cdna_cache[0][p]=M->NON_CODING; if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c", al[0][p], al[1][p]); } /*End New traceback*/ al[0][p]='\0'; al[1][p]='\0'; sprintf( B->seq_al[l_s[0][0]], "%s", al[0]); sprintf( B->seq_al[l_s[1][0]], "%s", al[1]); B->len_aln=strlen (al[0]); B->nseq=2; if ( debug_cdna_fasta) { fprintf ( stderr, "\nA-A=%d, %d", CL->M['a'-'A']['a'-'A'], CL->M['a'-'A']['a'-'A'] *SCORE_K); for ( a=1; a<diag[0]; a++) { fprintf ( stderr, "\nchosen diag: %d", diag[a]); } fprintf ( stderr, "\n GOP=%d GEP=%d TG_MODE=%d", M->gop, M->gep, M->TG_MODE); fprintf ( stderr, "\nF_GOP=%d F_GEP=%d F_TG_MODE=%d", M->gop, M->gep, M->F_TG_MODE); DA=copy_aln (B, NULL); DA=realloc_aln2 (DA,6,(DA->len_aln+1)); for ( a=0; a<B->len_aln; a++) { fprintf ( stderr, "\n%d", DA->cdna_cache[0][a]); if (DA->cdna_cache[0][a]>=M->CODING0)DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0'; else DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0'; if (DA->cdna_cache[0][a]==M->CODING0) { DA->seq_al[DA->nseq+1][a]=translate_dna_codon (DA->seq_al[0]+a,'*'); DA->seq_al[DA->nseq+2][a]=translate_dna_codon (DA->seq_al[1]+a,'*'); } else { DA->seq_al[DA->nseq+1][a]='-'; DA->seq_al[DA->nseq+2][a]='-'; } } DA->nseq+=3; print_aln (DA); free_aln(DA); score=0; for (prev_state=M->START,a=0; a< DA->len_aln;) { state=DA->cdna_cache[0][a]; t=M->model[prev_state][state]; if ( DA->cdna_cache[0][a]==M->CODING0) { a1=translate_dna_codon (A->seq_al[0]+a,'x'); a2=translate_dna_codon (A->seq_al[1]+a,'x'); if ( a1!='x' && a2!='x') { e=CL->M[a1-'A'][a2-'A']*SCORE_K; } } else if ( DA->cdna_cache[0][a]>M->CODING0); else { e=M->model_properties[B->cdna_cache[0][a]][M->EMISSION]; } if ( e==UNDEFINED || t==UNDEFINED) fprintf ( stderr, "\nPROBLEM %d\n", a); fprintf ( stderr, "\n[%c..%c: %d(e)+%d(t)=%d]", A->seq_al[0][a], A->seq_al[1][a], e,t,e+t); score+=e+t; prev_state=state; if (B->cdna_cache[0][a]==M->NON_CODING)a++; else a+=3; } } for ( a=0; a<B->len_aln; a++) { if ( B->cdna_cache[0][a]<M->CODING0)B->cdna_cache[0][a]=0; else B->cdna_cache[0][a]=1; } free_char ( al, -1); return DPR->score; }
Alignment * extract_domain ( Constraint_list *CL) { /* function documentation: start Alignment * extract_domain ( Constraint_list *CL) given a CL, this function extracts the next best scoring local multiple alignment It returns a CL where the aligned residues have been indicated in (CL->moca)->forbiden_residues; the local alignment is extracted with the dp function indicated by CL->dp_mode: (gotoh_sw_pair_wise) Evaluation: CL->get_dp_cost=slow_get_dp_cost; CL->evaluate_residue_pair=sw_residue_pair_extended_list; Continuation: (CL->moca)->evaluate_domain=evaluate_moca_domain; Cache of CL: (CL->moca)->cache_cl_with_domain=cache_cl_with_moca_domain; Domain post processing: (CL->moca)->make_nol_aln=make_moca_nol_aln; function documentation: end */ int min_start, max_start, start,min_len, max_len, len, score; int step; Alignment *C=NULL; Alignment *RESULT=NULL; Alignment *EA=NULL; /*CASE 1: Non Automatic Domain Extraction*/ if ((CL->moca)->moca_interactive) { return interactive_domain_extraction (CL); } else if ((CL->moca)->moca_len) { while ((C=extract_domain_with_coordinates (C,(CL->moca)->moca_start,(CL->moca)->moca_len,CL))->nseq==0)(CL->moca)->moca_scale=(CL->moca)->moca_scale*0.9; RESULT=copy_aln ( C, RESULT); unpack_seq_aln (RESULT, CL); output_format_aln ("mocca_aln",RESULT,EA=fast_coffee_evaluate_output(RESULT, CL),"stdout"); free_aln(EA); return RESULT; } else if ( !(CL->moca)->moca_len) { analyse_sequence (CL); myexit (EXIT_FAILURE); } /*CASE 2: Automatic Domain Extraction: Find Coordinates*/ start=500; step=10; min_start=0; max_start=strlen ((CL->S)->seq[0]); min_len=20; max_len=strlen ((CL->S)->seq[0]); C=extract_domain_with_coordinates (C,13,30,CL); C->output_res_num=1; print_aln (C); (CL->moca)->moca_scale=-180; C=add_seq2aln (CL,C, CL->S); print_aln (C); (CL->moca)->moca_scale=-160; C=add_seq2aln (CL,C, CL->S); print_aln (C); myexit (EXIT_FAILURE); while ( step>0) { C=approximate_domain (min_start,max_start,step,min_len,max_len, step,&start, &len, &score, CL); min_start=start-step; max_start=start+step; min_len=len-step; max_len=len+step; step=step/2; } C=extract_domain_with_coordinates (C,start-10, len+20,CL); C->output_res_num=1; print_aln (C); myexit (EXIT_FAILURE); return C; }