Sequence * declare_sequence ( int min, int max, int nseq) { Sequence *LS; LS=vcalloc (1, sizeof ( Sequence)); LS->seq_comment=declare_char ( nseq,COMMENT_SIZE); LS->aln_comment=declare_char ( nseq,COMMENT_SIZE); LS->file=declare_char( nseq,STRING+1); LS->seq=declare_char ( nseq, max+1); LS->name=declare_char( nseq,MAXNAMES+1); LS->len=vcalloc ( nseq, sizeof (int)); LS->max_len=max; LS->min_len=min; LS->nseq=nseq; LS->max_nseq=nseq; LS->type=vcalloc(30, sizeof (char)); LS->T=declare_arrayN(2, sizeof (Template), nseq, 1); LS->dc=declare_int (nseq, 2); return LS; }
Oligo * read_oligo_list ( char *fname) { Oligo *O; FILE *fp; int a, b; O=vcalloc (1, sizeof (Oligo)); O->ALPHABET=vcalloc ( 100, sizeof (char)); O->EALPHABET=vcalloc ( 100, sizeof (char)); O->AMBIGUITIES=vcalloc ( 100, sizeof (char)); fp=vfopen ( fname, "r"); fscanf ( fp, "ALPHABET %s\n", O->ALPHABET); fscanf ( fp, "AMBIG_ALPHABET %s\n", O->AMBIGUITIES); if ( O->AMBIGUITIES[0]=='@')O->AMBIGUITIES[0]='\0'; fscanf ( fp, "WORD_SIZE %d\n", &O->WSIZE); fscanf ( fp, "NSEQ %d\n", &O->NSEQ); fscanf ( fp, "LEN %d\n", &O->LEN); fscanf ( fp, "SCORE %d", &a); sprintf ( O->EALPHABET, "%s%s", O->ALPHABET, O->AMBIGUITIES); O->seq=declare_char ( O->NSEQ, O->LEN+1); for ( a=0; a< O->NSEQ; a++) { fscanf ( fp, "%*s\n%s\n",O->seq[a]); } vfclose (fp); return O; }
Constraint_list *prepare_cl_for_moca ( Constraint_list *CL) { int a, b, c; int tot_l, l; char **name, **seq; Sequence *NS=NULL; /*Prepare the constraint list*/ CL->do_self=1; CL->get_dp_cost=moca_slow_get_dp_cost; CL->evaluate_residue_pair=moca_residue_pair_extended_list; /*Prepare the moca parameters*/ (CL->moca)->evaluate_domain=evaluate_moca_domain; (CL->moca)->cache_cl_with_domain=cache_cl_with_moca_domain; (CL->moca)->make_nol_aln=make_moca_nol_aln; /*Prepare the packing of the sequences*/ for ( a=0, b=1; a< (CL->S)->nseq; a++)b+=strlen ( (CL->S)->seq[a])+1; seq =declare_char ( 1,b+1); name=declare_char( 1,30); CL->packed_seq_lu =declare_int ( b, 2); for (tot_l=1,a=0; a< (CL->S)->nseq; a++) { strcat (seq[0], (CL->S)->seq[a]); strcat (seq[0], "X"); l=strlen((CL->S)->seq[a]); for ( c=1; c<= l; c++, tot_l++) { CL->packed_seq_lu[tot_l][0]=a; CL->packed_seq_lu[tot_l][1]=c; } CL->packed_seq_lu[tot_l++][0]=UNDEFINED; } sprintf ( name[0], "catseq"); NS=fill_sequence_struc(1, seq, name, NULL); CL->S=add_sequence (NS, CL->S, 0); free_char( seq, -1); free_char(name, -1); free_sequence (NS, NS->nseq); return CL; }
int *code_seq (char *seq, char *type) { static int *code; static int *aa, ng; int a, b, l; if (!aa) { char **gl; if ( strm (type, "DNA") || strm (type, "RNA")) { gl=declare_char (4,5); sprintf ( gl[ng++], "Aa"); sprintf ( gl[ng++], "Gg"); sprintf ( gl[ng++], "TtUu"); sprintf ( gl[ng++], "Cc"); } else { gl=make_group_aa ( &ng, "mafft"); } aa=(int*)vcalloc ( 256, sizeof (int)); for ( a=0; a<ng; a++) { for ( b=0; b< strlen (gl[a]); b++) { aa[(int)gl[a][b]]=a; } } free_char (gl, -1); } l=strlen (seq); if ( code) code--; if ( !code || read_array_size (code, sizeof (int))<(l+2)) { vfree (code); code=(int*)vcalloc (l+2, sizeof (int)); } code[0]=ng; code++; for (a=0; a<l; a++) { code[a]=aa[(int)seq[a]]; } code[a]=END_ARRAY; return code; }
Alignment *declare_Alignment ( Sequence *S) { Alignment *LA; int a; /*ordre: [x][0]= which is the xth seq of aln [x][1]= how many deleted residues before the first one */ LA=vcalloc (1, sizeof ( Alignment)); aln_stack (LA, DECLARE_ALN); if ( S==NULL) { LA->declared_len=MAX_LEN_ALN; LA->max_n_seq=MAX_N_SEQ; } else { LA->declared_len=2*S->max_len+1; LA->max_n_seq=S->nseq+1; } LA->S=S; LA->seq_comment=declare_char (LA->max_n_seq, COMMENT_SIZE); LA->aln_comment=declare_char (LA->max_n_seq, COMMENT_SIZE); LA->seq_al=declare_char ( LA->max_n_seq,LA->declared_len ); LA->name=declare_char (LA->max_n_seq, MAXNAMES+1); LA->file=declare_char (LA->max_n_seq, STRING); LA->tree_order=declare_char (LA->max_n_seq, STRING); LA->order= declare_int (LA->max_n_seq , 5); //order[a][0]: sequence index in S //order[a][1]: offset of the sequence //order[a][2]: used by sw_gotoh_pair_wise //order[a][3]: used by sw_gotoh_pair_wise //order[a][4]: weight, -1 LA->score_seq= vcalloc (LA->max_n_seq, sizeof (int)); for ( a=0; a< LA->max_n_seq; a++)LA->order[a][0]=a; LA->len_aln=0; LA->score_aln=0; LA->len=vcalloc (LA->max_n_seq, sizeof (int)); if (S && S->name)for ( a=0; a<S->nseq; a++) { sprintf ( LA->name[a], "%s", S->name[a]); } return LA; }
Weights* declare_weights ( int nseq) { Weights *W; W=vcalloc ( 1, sizeof ( Weights)); W->comments=vcalloc ( 1000, sizeof (char)); W->nseq=nseq; W->mode=vcalloc (FILENAMELEN, sizeof (char)); W->seq_name= declare_char ( W->nseq*2, 200); W->PW_SD=declare_float ( W->nseq, W->nseq); W->PW_ID=declare_float ( W->nseq, W->nseq); W->SEQ_W=vcalloc ( W->nseq, sizeof ( float)); return W; }
Constraint_list * declare_constraint_list ( Sequence *S, char *name, int *L, int ne,FILE *fp, int **M) { Constraint_list *CL; CL=vcalloc (1, sizeof ( Constraint_list)); CL->S=S; CL->M=M; if ( name!=NULL) { sprintf ( CL->list_name, "%s", name); } CL->cpu=1; CL->fp=fp; if (L) { HERE ("The USE of L is now Deprecated with Constraint Lists"); exit (0); } CL->ne=ne; CL->entry_len=LIST_N_FIELDS; CL->el_size=sizeof (CLIST_TYPE); CL->matrices_list=declare_char(20,20); CL->weight_field=WE; if ( S)CL->seq_for_quadruplet=vcalloc ( S->nseq, sizeof (int)); CL->Prot_Blast=vcalloc ( 1, sizeof ( Blast_param)); CL->DNA_Blast=vcalloc ( 1, sizeof ( Blast_param)); CL->Pdb_Blast=vcalloc ( 1, sizeof ( Blast_param)); CL->TC=vcalloc (1, sizeof (TC_param)); //New data structure CL->residue_index=declare_residue_index (S); return CL; }
Dp_Model * initialize_sseq_model(int left_tg_mode, int right_tg_mode, Constraint_list *CL) { Dp_Model *M; int a, b, c,d; int Sa,Sb,St, Da, Db, Dt, Ia, Ib, It; int tgop=CL->gep*3; M=vcalloc ( 1, sizeof (Dp_Model)); M->nstate=9; M->START=M->nstate++; M->END =M->nstate++; M->model_comments=declare_char (M->nstate+1, 100); M->bounded_model=declare_int (M->nstate+1, M->nstate+1); M->model=declare_int (M->nstate+1, M->nstate+1); for ( a=0; a<=M->nstate; a++) for ( b=0; b<= M->nstate; b++) M->model[a][b]=UNDEFINED; M->model_properties=declare_int ( M->nstate, 10); a=0; M->TYPE=a++;M->LEN_I=a++; M->LEN_J=a++; M->DELTA_I=a++;M->DELTA_J=a++;M->EMISSION=a++;M->TERM_EMISSION=a++;M->START_EMISSION=a++; M->CODING0=a++;M->DELETION=a++; M->model_properties=declare_int ( M->nstate, 10); a=0; M->EMISSION=a++;M->TERM_EMISSION=a++;M->START_EMISSION=a++; M->model_emission_function=vcalloc(M->nstate, sizeof (int (**)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *))); for ( a=0; a< M->nstate; a++) M->model_emission_function[a]=vcalloc(3, sizeof (int (*)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *))); a=0; Sa=a++;Da=a++;Ia=a++; Sb=a++;Db=a++;Ib=a++; St=a++;Dt=a++;It=a++; sprintf ( M->model_comments[M->START], "START"); sprintf ( M->model_comments[M->END], "END"); /*ALPHA*/ /*Substitution in Alpha*/ if (CL->matrices_list[0][0])sprintf ( M->model_comments[Sa], "Substitution %s", CL->matrices_list[0]); M->model_properties[Sa][M->TYPE]=Sa; M->model_properties[Sa][M->LEN_I]=1; M->model_properties[Sa][M->LEN_J]=1; M->model_properties[Sa][M->DELTA_I]=-1; M->model_properties[Sa][M->DELTA_J]= 0; M->model_emission_function[Sa][M->EMISSION] =get_alpha_sub_cost; M->model_emission_function[Sa][M->START_EMISSION]=get_ssec_no_cost; M->model_emission_function[Sa][M->TERM_EMISSION] =get_ssec_no_cost; /*Deletions*/ if (CL->matrices_list[0][0])sprintf ( M->model_comments[Da], "Deletion %s", CL->matrices_list[0]); M->model_properties[Da][M->TYPE]=Da; M->model_properties[Da][M->LEN_I]=1; M->model_properties[Da][M->LEN_J]=0; M->model_properties[Da][M->DELTA_I]=-1; M->model_properties[Da][M->DELTA_J]=+1; M->model_emission_function[Da][M->EMISSION] =get_alpha_gep_cost; M->model_emission_function[Da][M->START_EMISSION]=get_alpha_start_gep_cost; M->model_emission_function[Da][M->TERM_EMISSION] =get_alpha_term_gep_cost; /*Insertion*/ if (CL->matrices_list[0][0])sprintf ( M->model_comments[Ia], "Insertion %s", CL->matrices_list[0]); M->model_properties[Ia][M->TYPE]=Ia; M->model_properties[Ia][M->LEN_I]=0; M->model_properties[Ia][M->LEN_J]=1; M->model_properties[Ia][M->DELTA_I]=0; M->model_properties[Ia][M->DELTA_J]=-1; M->model_emission_function[Ia][M->EMISSION] =get_alpha_gep_cost; M->model_emission_function[Ia][M->START_EMISSION]=get_alpha_start_gep_cost; M->model_emission_function[Ia][M->TERM_EMISSION] =get_alpha_term_gep_cost; /*BETA*/ /*Substitution in Beta*/ if (CL->matrices_list[1][0])sprintf ( M->model_comments[Sb], "Substitution %s", CL->matrices_list[1]); M->model_properties[Sb][M->TYPE]=Sb; M->model_properties[Sb][M->LEN_I]=1; M->model_properties[Sb][M->LEN_J]=1; M->model_properties[Sb][M->DELTA_I]=-1; M->model_properties[Sb][M->DELTA_J]= 0; M->model_emission_function[Sb][M->EMISSION] =get_beta_sub_cost; M->model_emission_function[Sb][M->START_EMISSION]=get_ssec_no_cost; M->model_emission_function[Sb][M->TERM_EMISSION] =get_ssec_no_cost; /*Deletions*/ if (CL->matrices_list[1][0])sprintf ( M->model_comments[Db], "Deletion %s", CL->matrices_list[1]); M->model_properties[Db][M->TYPE]=Db; M->model_properties[Db][M->LEN_I]=1; M->model_properties[Db][M->LEN_J]=0; M->model_properties[Db][M->DELTA_I]=-1; M->model_properties[Db][M->DELTA_J]=+1; M->model_emission_function[Db][M->EMISSION] =get_beta_gep_cost; M->model_emission_function[Db][M->START_EMISSION]=get_beta_start_gep_cost; M->model_emission_function[Db][M->TERM_EMISSION] =get_beta_term_gep_cost; /*Insertion*/ if (CL->matrices_list[1][0])sprintf ( M->model_comments[Ib], "Insertion %s", CL->matrices_list[1]); M->model_properties[Ib][M->TYPE]=Ib; M->model_properties[Ib][M->LEN_I]=0; M->model_properties[Ib][M->LEN_J]=1; M->model_properties[Ib][M->DELTA_I]=0; M->model_properties[Ib][M->DELTA_J]=-1; M->model_emission_function[Ib][M->EMISSION] =get_beta_gep_cost; M->model_emission_function[Ib][M->START_EMISSION]=get_beta_start_gep_cost; M->model_emission_function[Ib][M->TERM_EMISSION] =get_beta_term_gep_cost; /*TURNS*/ /*Substitution in Turn*/ if (CL->matrices_list[2][0])sprintf ( M->model_comments[St], "Substitution %s", CL->matrices_list[2]); M->model_properties[St][M->TYPE]=St; M->model_properties[St][M->LEN_I]=1; M->model_properties[St][M->LEN_J]=1; M->model_properties[St][M->DELTA_I]=-1; M->model_properties[St][M->DELTA_J]= 0; M->model_emission_function[St][M->EMISSION] =get_turn_sub_cost; M->model_emission_function[St][M->START_EMISSION]=get_ssec_no_cost; M->model_emission_function[St][M->TERM_EMISSION] =get_ssec_no_cost; /*Deletions*/ if (CL->matrices_list[2][0])sprintf ( M->model_comments[Dt], "Deletion %s", CL->matrices_list[2]); M->model_properties[Dt][M->TYPE]=Dt; M->model_properties[Dt][M->LEN_I]=1; M->model_properties[Dt][M->LEN_J]=0; M->model_properties[Dt][M->DELTA_I]=-1; M->model_properties[Dt][M->DELTA_J]=+1; M->model_emission_function[Dt][M->EMISSION] =get_turn_gep_cost; M->model_emission_function[Dt][M->START_EMISSION]=get_turn_start_gep_cost; M->model_emission_function[Dt][M->TERM_EMISSION] =get_turn_term_gep_cost; /*Insertion*/ if (CL->matrices_list[2][0])sprintf ( M->model_comments[It], "Insertion %s", CL->matrices_list[2]); M->model_properties[It][M->TYPE]=It; M->model_properties[It][M->LEN_I]=0; M->model_properties[It][M->LEN_J]=1; M->model_properties[It][M->DELTA_I]=0; M->model_properties[It][M->DELTA_J]=-1; M->model_emission_function[It][M->EMISSION] =get_turn_gep_cost; M->model_emission_function[It][M->START_EMISSION]=get_turn_start_gep_cost; M->model_emission_function[It][M->TERM_EMISSION] =get_turn_term_gep_cost; /*Transitions*/ M->model[M->START][Sa]=ALLOWED; M->model[M->START][Sb]=ALLOWED; M->model[M->START][St]=ALLOWED; M->model[M->START][Db]=M->model[M->START][Ib]=(CL->TG_MODE==0)?CL->gop*SCORE_K:0; M->model[M->START][Da]=M->model[M->START][Ia]=(CL->TG_MODE==0)?CL->gop*SCORE_K:0; M->model[M->START][Dt]=M->model[M->START][It]=(CL->TG_MODE==0)?CL->gop*SCORE_K:0; M->model[Sa][M->END]=ALLOWED; M->model[Sb][M->END]=ALLOWED; M->model[St][M->END]=ALLOWED; M->model[Ia][M->END]=M->model[Da][M->END]=(CL->TG_MODE==0)?0:CL->gop*SCORE_K*(-1); M->model[Ib][M->END]=M->model[Db][M->END]=(CL->TG_MODE==0)?0:CL->gop*SCORE_K*(-1); M->model[It][M->END]=M->model[Dt][M->END]=(CL->TG_MODE==0)?0:CL->gop*SCORE_K*(-1); for ( a=0; a< M->nstate; a++)M->model[a][a]=ALLOWED; M->model[Sa][Ia]=M->model[Sa][Da]=CL->gop*SCORE_K; M->model[Sa][Ib]=M->model[Sa][Db]=CL->gop*SCORE_K+tgop*SCORE_K; M->model[Sa][It]=M->model[Sa][Dt]=CL->gop*SCORE_K+tgop*SCORE_K; M->model[Sa][Sb]=M->model[Sa][St]=tgop*SCORE_K; M->model[Sb][Ib]=M->model[Sb][Db]=CL->gop*SCORE_K; M->model[Sb][Ia]=M->model[Sb][Da]=CL->gop*SCORE_K+tgop*SCORE_K; M->model[Sb][It]=M->model[Sb][Dt]=CL->gop*SCORE_K+tgop*SCORE_K; M->model[Sb][Sa]=M->model[Sb][St]=tgop*SCORE_K; M->model[St][It]=M->model[St][Dt]=CL->gop*SCORE_K; M->model[St][Ia]=M->model[St][Da]=CL->gop*SCORE_K+tgop*SCORE_K; M->model[St][Ib]=M->model[St][Db]=CL->gop*SCORE_K+tgop*SCORE_K; M->model[St][Sa]=M->model[St][Sb]=tgop*SCORE_K; M->model[Ia][Sa]=M->model[Da][Sa]=ALLOWED; M->model[Ia][Sb]=M->model[Da][Sb]=tgop*SCORE_K; M->model[Ia][St]=M->model[Da][St]=tgop*SCORE_K; M->model[Ib][Sa]=M->model[Db][Sa]=tgop*SCORE_K; M->model[Ib][Sb]=M->model[Db][Sb]=ALLOWED; M->model[Ib][St]=M->model[Db][St]=tgop*SCORE_K; M->model[It][Sa]=M->model[Dt][Sa]=tgop*SCORE_K; M->model[It][Sb]=M->model[Dt][Sb]=tgop*SCORE_K; M->model[It][St]=M->model[Dt][St]=ALLOWED; /*Prune the model*/ for (c=0,a=0, d=0; a< M->START; a++) for ( b=0; b<M->START; b++, d++) { if (M->model[a][b]!=UNDEFINED) { M->bounded_model[b][1+M->bounded_model[b][0]++]=a; c++; } } M->CL=CL; return M; }
int make_fasta_cdna_pair_wise (Alignment *B,Alignment *A,int*in_ns, int **l_s,Constraint_list *CL, int *diag) { int a,c,p,k; Dp_Result *DPR; static Dp_Model *M; int l0, l1; int len_i, len_j; int f0=0, f1=0; int deltaf0, deltaf1, delta; int nr1, nr2; int ala, alb, aa0, aa1; int type; char **al; int **tl_s; int *tns; /*DEBUG*/ int debug_cdna_fasta=0; Alignment *DA; int score; int state,prev_state; int t, e; int a1, a2; l0=strlen ( B->seq_al[l_s[0][0]]); l1=strlen ( B->seq_al[l_s[1][0]]); al=declare_char (2, l0+l1+1); B=realloc_aln2 (B,B->nseq,l0+l1+1); free_int (B->cdna_cache, -1); B->cdna_cache=declare_int(1, l0+l1+1); if ( !M)M=initialize_dna_dp_model (CL); M->diag=diag; tl_s=declare_int (2, 2);tns=vcalloc(2, sizeof(int));tl_s[0][0]=0;tl_s[1][0]=3;tns[0]=tns[1]=1; DPR=make_fast_dp_pair_wise (A,tns, tl_s,CL,M); vfree(tns);free_int(tl_s, -1); /*new_trace_back*/ a=p=0; aa0=aa1=ala=alb=0; while ( (k=DPR->traceback[a++])!=M->START); while ( (k=DPR->traceback[a++])!=M->END) { f0=M->model_properties[k][M->F0]; f1=M->model_properties[k][M->F1]; len_i=M->model_properties[k][M->LEN_I]; len_j=M->model_properties[k][M->LEN_J]; type=M->model_properties[k][M->TYPE]; if (type==M->CODING0) { deltaf0=(aa0*3+f0)-ala; deltaf1=(aa1*3+f1)-alb; delta=MAX(deltaf0, deltaf1); for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++) { if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++]; else al[0][p]='-'; if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++]; else al[1][p]='-'; B->cdna_cache[0][p]=M->NON_CODING; if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c", al[0][p], al[1][p]); } for ( c=0; c< 3; c++, p++) { if ( c==0)B->cdna_cache[0][p]=M->CODING0; else if ( c==1)B->cdna_cache[0][p]=M->CODING1; else if ( c==2)B->cdna_cache[0][p]=M->CODING2; if (ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++]; else al[0][p]='-'; if (alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++]; else al[1][p]='-'; if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; else if ( debug_cdna_fasta)fprintf (stderr, "\n%d: %c %c",k, al[0][p], al[1][p]); } } aa0+=len_i; aa1+=len_j; } deltaf0=(aa0*3+f0)-ala; deltaf1=(aa1*3+f1)-alb; delta=MAX(deltaf0, deltaf1); for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++) { if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++]; else al[0][p]='-'; if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++]; else al[1][p]='-'; B->cdna_cache[0][p]=M->NON_CODING; if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c", al[0][p], al[1][p]); } /*End New traceback*/ al[0][p]='\0'; al[1][p]='\0'; sprintf( B->seq_al[l_s[0][0]], "%s", al[0]); sprintf( B->seq_al[l_s[1][0]], "%s", al[1]); B->len_aln=strlen (al[0]); B->nseq=2; if ( debug_cdna_fasta) { fprintf ( stderr, "\nA-A=%d, %d", CL->M['a'-'A']['a'-'A'], CL->M['a'-'A']['a'-'A'] *SCORE_K); for ( a=1; a<diag[0]; a++) { fprintf ( stderr, "\nchosen diag: %d", diag[a]); } fprintf ( stderr, "\n GOP=%d GEP=%d TG_MODE=%d", M->gop, M->gep, M->TG_MODE); fprintf ( stderr, "\nF_GOP=%d F_GEP=%d F_TG_MODE=%d", M->gop, M->gep, M->F_TG_MODE); DA=copy_aln (B, NULL); DA=realloc_aln2 (DA,6,(DA->len_aln+1)); for ( a=0; a<B->len_aln; a++) { fprintf ( stderr, "\n%d", DA->cdna_cache[0][a]); if (DA->cdna_cache[0][a]>=M->CODING0)DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0'; else DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0'; if (DA->cdna_cache[0][a]==M->CODING0) { DA->seq_al[DA->nseq+1][a]=translate_dna_codon (DA->seq_al[0]+a,'*'); DA->seq_al[DA->nseq+2][a]=translate_dna_codon (DA->seq_al[1]+a,'*'); } else { DA->seq_al[DA->nseq+1][a]='-'; DA->seq_al[DA->nseq+2][a]='-'; } } DA->nseq+=3; print_aln (DA); free_aln(DA); score=0; for (prev_state=M->START,a=0; a< DA->len_aln;) { state=DA->cdna_cache[0][a]; t=M->model[prev_state][state]; if ( DA->cdna_cache[0][a]==M->CODING0) { a1=translate_dna_codon (A->seq_al[0]+a,'x'); a2=translate_dna_codon (A->seq_al[1]+a,'x'); if ( a1!='x' && a2!='x') { e=CL->M[a1-'A'][a2-'A']*SCORE_K; } } else if ( DA->cdna_cache[0][a]>M->CODING0); else { e=M->model_properties[B->cdna_cache[0][a]][M->EMISSION]; } if ( e==UNDEFINED || t==UNDEFINED) fprintf ( stderr, "\nPROBLEM %d\n", a); fprintf ( stderr, "\n[%c..%c: %d(e)+%d(t)=%d]", A->seq_al[0][a], A->seq_al[1][a], e,t,e+t); score+=e+t; prev_state=state; if (B->cdna_cache[0][a]==M->NON_CODING)a++; else a+=3; } } for ( a=0; a<B->len_aln; a++) { if ( B->cdna_cache[0][a]<M->CODING0)B->cdna_cache[0][a]=0; else B->cdna_cache[0][a]=1; } free_char ( al, -1); return DPR->score; }
int make_fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag) { /*TREATMENT OF THE TERMINAL GAP PENALTIES*/ /*TG_MODE=0---> gop and gep*/ /*TG_MODE=1---> --- gep*/ /*TG_MODE=2---> --- ---*/ int TG_MODE, gop, l_gop, gep,l_gep, maximise; /*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ int a, b,c,k, t; int l1, l2,eg, ch, sub,score=0, last_i=0, last_j=0, i, delta_i, j, pos_j, ala, alb, LEN, n_diag, match1, match2; int su, in, de, tr; int **C, **D, **I, **trace, **pos0, **LD; int lenal[2], len; char *buffer, *char_buf; char **aln, **al; /********Prepare Penalties******/ gop=CL->gop*SCORE_K; gep=CL->gep*SCORE_K; TG_MODE=CL->TG_MODE; maximise=CL->maximise; /********************************/ n_diag=diag[0]; l1=lenal[0]=strlen (A->seq_al[l_s[0][0]]); l2=lenal[1]=strlen (A->seq_al[l_s[1][0]]); if ( getenv ("DEBUG_TCOFFEE"))fprintf ( stderr, "\n\tNdiag=%d%% ", (diag[0]*100)/(l1+l2)); /*diag: diag[1..n_diag]--> flaged diagonal in order; diag[0]=0--> first diagonal; diag[n_diag+1]=l1+l2-1; */ /*numeration of the diagonals strats from the bottom right [1...l1+l2-1]*/ /*sequence s1 is vertical and seq s2 is horizontal*/ /*D contains the best Deletion in S2==>comes from diagonal N+1*/ /*I contains the best insertion in S2=> comes from diagonal N-1*/ C=declare_int (lenal[0]+lenal[1]+1, n_diag+2); D=declare_int (lenal[0]+lenal[1]+1, n_diag+2); LD=declare_int (lenal[0]+lenal[1]+1, n_diag+2); I=declare_int (lenal[0]+lenal[1]+1, n_diag+2); trace=declare_int (lenal[0]+lenal[1]+1, n_diag+2); al=declare_char (2,lenal[0]+lenal[1]+lenal[1]+1); len= MAX(lenal[0],lenal[1])+1; buffer=(char*)vcalloc ( 2*len, sizeof (char)); char_buf=(char*) vcalloc (2*len, sizeof (char)); pos0=aln2pos_simple ( A,-1, ns, l_s); C[0][0]=0; t=(TG_MODE==0)?gop:0; for ( j=1; j<= n_diag; j++) { l_gop=(TG_MODE==0)?gop:0; l_gep=(TG_MODE==2)?0:gep; if ( (diag[j]-lenal[0])<0 ) { trace[0][j]=UNDEFINED; continue; } C[0][j]=(diag[j]-lenal[0])*l_gep +l_gop; D[0][j]=(diag[j]-lenal[0])*l_gep +l_gop+gop; } D[0][j]=D[0][j-1]+gep; t=(TG_MODE==0)?gop:0; for ( i=1; i<=lenal[0]; i++) { l_gop=(TG_MODE==0)?gop:0; l_gep=(TG_MODE==2)?0:gep; C[i][0]=C[i][n_diag+1]=t=t+l_gep; I[i][0]=D[i][n_diag+1]=t+ gop; for ( j=1; j<=n_diag; j++) { C[i][j]=C[i][0]; D[i][j]=I[i][j]=I[i][0]; } for (eg=0, j=1; j<=n_diag; j++) { pos_j=diag[j]-lenal[0]+i; if (pos_j<=0 || pos_j>l2 ) { trace[i][j]=UNDEFINED; continue; } sub=(CL->get_dp_cost) ( A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],pos_j-1, CL ); /*1 identify the best insertion in S2:*/ l_gop=(i==lenal[0])?((TG_MODE==0)?gop:0):gop; l_gep=(i==lenal[0])?((TG_MODE==2)?0:gep):gep; len=(j==1)?0:(diag[j]-diag[j-1]); if ( a_better_than_b(I[i][j-1], C[i][j-1]+l_gop, maximise))eg++; else eg=1; I[i][j]=best_of_a_b (I[i][j-1], C[i][j-1]+l_gop, maximise)+len*l_gep; /*2 Identify the best deletion in S2*/ l_gop=(pos_j==lenal[1])?((TG_MODE==0)?gop:0):gop; l_gep=(pos_j==lenal[1])?((TG_MODE==2)?0:gep):gep; len=(j==n_diag)?0:(diag[j+1]-diag[j]); delta_i=((i-len)>0)?(i-len):0; if ( a_better_than_b(D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)){LD[i][j]=LD[delta_i][j+1]+1;} else {LD[i][j]=1;} D[i][j]=best_of_a_b (D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)+len*l_gep; /*Identify the best way*/ /* score=C[i][j]=best_int ( 3, maximise, &fop, I[i][j], C[i-1][j]+sub, D[i][j]); fop-=1; if ( fop<0)trace[i][j]=fop*eg; else if ( fop>0 ) {trace[i][j]=fop*LD[i][j];} else if ( fop==0) trace[i][j]=0; */ su=C[i-1][j]+sub; in=I[i][j]; de=D[i][j]; /*HERE ("%d %d %d", su, in, de);*/ if (su>=in && su>=de) { score=su; tr=0; } else if (in>=de) { score=in; tr=-eg; } else { score=de; tr=LD[i][j]; } trace[i][j]=tr; C[i][j]=score; last_i=i; last_j=j; } } /* [0][Positive] ^ ^ | / | / | / | / |/ [Neg]<-------[*] */ i=last_i; j=last_j; ala=alb=0; match1=match2=0; while (!(match1==l1 && match2==l2)) { if ( match1==l1) { len=l2-match2; for ( a=0; a< len; a++) { al[0][ala++]=0; al[1][alb++]=1; match2++; } k=0; break; /*k=-(j-1);*/ } else if ( match2==l2) { len=l1-match1; for ( a=0; a< len; a++) { al[0][ala++]=1; al[1][alb++]=0; match1++; } k=0; break; /*k= n_diag-j;*/ } else { k=trace[i][j]; } if ( k==0) { if ( match2==l2 || match1==l1); else { al[0][ala++]=1; al[1][alb++]=1; i--; match1++; match2++; } } else if ( k>0) { len=diag[j+k]-diag[j]; for ( a=0; a<len; a++) { if ( match1==l1)break; al[0][ala++]=1; al[1][alb++]=0; match1++; } i-=len; j+=k; } else if ( k<0) { k*=-1; len=diag[j]-diag[j-k]; for ( a=0; a<len; a++) { if ( match2==l2)break; al[0][ala++]=0; al[1][alb++]=1; match2++; } j-=k; } } LEN=ala; c=LEN-1; invert_list_char ( al[0], LEN); invert_list_char ( al[1], LEN); if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN); aln=A->seq_al; for ( c=0; c< 2; c++) { for ( a=0; a< ns[c]; a++) { ch=0; for ( b=0; b< LEN; b++) { if (al[c][b]==1) char_buf[b]=aln[l_s[c][a]][ch++]; else char_buf[b]='-'; } char_buf[b]='\0'; aln[l_s[c][a]]=csprintf (aln[l_s[c][a]],"%s", char_buf); } } A->len_aln=LEN; A->nseq=ns[0]+ns[1]; free_int (pos0, -1); free_int (C, -1); free_int (D, -1); free_int (I, -1); free_int (trace, -1); free_int (LD, -1); free_char ( al, -1); vfree(buffer); vfree(char_buf); return score; }
Alignment* copy_aln ( Alignment *A, Alignment *B) { int a, b; int nnseq; int nlen; /* c[100]=10;*/ if ( A==NULL){free_aln(B); return NULL;} nnseq=MAX(A->nseq, A->max_n_seq); nlen=A->len_aln+1; if (B) B=realloc_alignment2 (B, nnseq, nlen); else B=declare_aln2 (nnseq, nlen); B->S=A->S; /*SIZES*/ B->max_len=A->max_len; B->min_len=A->min_len; B->declared_len=nlen; B->max_n_seq=nnseq; B->nseq=A->nseq; B->len_aln=A->len_aln; /*sequence Information*/ if ( A->generic_comment) { vfree(B->generic_comment); B->generic_comment=vcalloc (strlen(A->generic_comment)+1, sizeof (char)); sprintf ( B->generic_comment, "%s", A->generic_comment); } if ( (A->S)==NULL){vfree (B->len); B->len=vcalloc ( A->max_n_seq, sizeof (int));} ga_memcpy_int ( A->len, B->len, B->nseq); B->seq_comment=copy_char ( A->seq_comment, B->seq_comment, -1,-1); B->aln_comment=copy_char ( A->aln_comment, B->aln_comment, -1,-1); B->name=copy_char ( A->name, B->name, -1,-1); B->file=copy_char ( A->file, B->file, -1,-1); B->tree_order=copy_char ( A->tree_order, B->tree_order, -1,-1); B->expanded_order=A->expanded_order; free_char ( B->seq_al, -1); B->seq_al=declare_char(B->max_n_seq, B->declared_len); // HERE ("A: MAX_NSEQ=%d %d %d %d",B->nseq, B->max_n_seq, B->declared_len, B->len_aln); // HERE ("B: MAX_NSEQ=%d %d %d %d",A->nseq, A->max_n_seq, A->declared_len, A->len_aln); for ( a=0; a< nnseq; a++) { if (A->seq_al[a]) { for ( b=0; b< A->len_aln; b++) B->seq_al[a][b]=A->seq_al[a][b]; } } B->order=copy_int ( A->order, B->order, -1, -1); B->S=A->S; if (A->seq_cache) { B->seq_cache=copy_int ( A->seq_cache, B->seq_cache,-1,-1); } if (A->cdna_cache) { B->cdna_cache=copy_int ( A->cdna_cache, B->cdna_cache,-1,-1); } B->P=copy_profile (A->P); B->Dp_result=A->Dp_result; /*Score*/ if ( (A->S)==NULL){vfree (B->score_seq); B->score_seq=vcalloc ( A->max_n_seq, sizeof (int));} ga_memcpy_int( A->score_seq,B->score_seq,B->nseq); B->score_res=A->score_res; B->score_aln=A->score_aln; B->score=A->score; B->ibit=A->ibit; B->cpu=A->cpu; B->finished=A->finished; /*Output Options*/ B->output_res_num=A->output_res_num; B->residue_case=A->residue_case; B->expand=A->expand; B->CL=A->CL; B->random_tag=A->random_tag; /*Make the function Recursive */ if ( A->A) { B->A=copy_aln (A->A, NULL); } else B->A=NULL; return B; }
int myers_miller_pair_wise (Alignment *A,int *ns, int **ls,Constraint_list *CL ) { int **pos; int a,b, i, j, l,l1, l2, len; int *S; char ** char_buf; int score; /********Prepare Penalties******/ //ns2master_ns (ns,ls, &sns,&sls); sns=ns; sls=ls; /********************************/ pos=aln2pos_simple ( A,-1, ns, ls); l1=strlen (A->seq_al[ls[0][0]]); l2=strlen (A->seq_al[ls[1][0]]); S=(int*)vcalloc (l1+l2+1, sizeof (int)); last=0; sapp=S; score=diff (A,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos); diff (NULL,ns, ls, 0, l1, 0, l2, 0, 0, CL, pos); i=0; j=0; sapp=S; len=0; while (!(i==l1 && j==l2)) { if (*sapp==0) { i++; j++; len++; } else if ( *sapp<0) { i-=*sapp; len-=*sapp; } else if ( *sapp>0) { j+=*sapp; len+=*sapp; } sapp++; } A=realloc_aln2 ( A,A->max_n_seq,len+1); char_buf=declare_char (A->max_n_seq,len+1); i=0; j=0; sapp=S; len=0; while (!(i==l1 && j==l2)) { if (*sapp==0) { for (b=0; b< ns[0]; b++) char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i]; for (b=0; b< ns[1]; b++) char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j]; i++; j++; len++; } else if ( *sapp>0) { l=*sapp; for ( a=0; a<l; a++, j++, len++) { for (b=0; b< ns[0]; b++) char_buf[ls[0][b]][len]='-'; for (b=0; b< ns[1]; b++) char_buf[ls[1][b]][len]=A->seq_al[ls[1][b]][j]; } } else if ( *sapp<0) { l=-*sapp; for ( a=0; a<l; a++, i++, len++) { for (b=0; b< ns[0]; b++) char_buf[ls[0][b]][len]=A->seq_al[ls[0][b]][i];; for (b=0; b< ns[1]; b++) char_buf[ls[1][b]][len]='-'; } } sapp++; } A->len_aln=len; A->nseq=ns[0]+ns[1]; for ( a=0; a< ns[0]; a++) { char_buf[ls[0][a]][len]='\0'; sprintf ( A->seq_al[ls[0][a]], "%s", char_buf[ls[0][a]]); } for ( a=0; a< ns[1]; a++) { char_buf[ls[1][a]][len]='\0'; sprintf ( A->seq_al[ls[1][a]], "%s", char_buf[ls[1][a]]); } vfree (S); free_char ( char_buf, -1); l1=strlen (A->seq_al[ls[0][0]]); l2=strlen (A->seq_al[ls[1][0]]); if ( l1!=l2) exit(1); free_int (pos, -1); return score; }