int *code_seq (char *seq, char *type) { static int *code; static int *aa, ng; int a, b, l; if (!aa) { char **gl; if ( strm (type, "DNA") || strm (type, "RNA")) { gl=declare_char (4,5); sprintf ( gl[ng++], "Aa"); sprintf ( gl[ng++], "Gg"); sprintf ( gl[ng++], "TtUu"); sprintf ( gl[ng++], "Cc"); } else { gl=make_group_aa ( &ng, "mafft"); } aa=(int*)vcalloc ( 256, sizeof (int)); for ( a=0; a<ng; a++) { for ( b=0; b< strlen (gl[a]); b++) { aa[(int)gl[a][b]]=a; } } free_char (gl, -1); } l=strlen (seq); if ( code) code--; if ( !code || read_array_size (code, sizeof (int))<(l+2)) { vfree (code); code=(int*)vcalloc (l+2, sizeof (int)); } code[0]=ng; code++; for (a=0; a<l; a++) { code[a]=aa[(int)seq[a]]; } code[a]=END_ARRAY; return code; }
int fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) { /*TREATMENT OF THE TERMINAL GAP PENALTIES*/ /*TG_MODE=0---> gop and gep*/ /*TG_MODE=1---> --- gep*/ /*TG_MODE=2---> --- ---*/ int maximise; /*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ int **tot_diag; int *diag; int ktup; float diagonal_threshold; static int n_groups; static char **group_list; int score; /********Prepare Penalties******/ maximise=CL->maximise; ktup=CL->ktup; diagonal_threshold=CL->diagonal_threshold; /********************************/ if ( !group_list) { group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group); } tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup); if ( !CL->fasta_step) { diag=flag_diagonals (strlen(A->seq_al[l_s[0][0]]),strlen(A->seq_al[l_s[1][0]]), tot_diag,diagonal_threshold,0); } else { diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag,CL->fasta_step,0); } score=make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag); free_int (tot_diag, -1); vfree (diag); return score; }
int fasta_cdna_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) { /*TREATMENT OF THE TERMINAL GAP PENALTIES*/ /*TG_MODE=0---> gop and gep*/ /*TG_MODE=1---> --- gep*/ /*TG_MODE=2---> --- ---*/ int maximise; int l0, l1; /*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ int **tot_diag; int *diag; int ktup; static int n_groups; static char **group_list; int score; Alignment *B; /********Prepare Penalties******/ maximise=CL->maximise; ktup=CL->ktup; /********************************/ if ( !group_list) { group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group); } B=dna_aln2_3frame_cdna_aln(A, ns, l_s); B->nseq=6; l0=strlen ( B->seq_al[0]); l1=strlen ( B->seq_al[3]); tot_diag=evaluate_diagonals_cdna( B, ns, l_s, CL, maximise,n_groups,group_list, ktup); diag=extract_N_diag (l0, l1, tot_diag,20,1); score=make_fasta_cdna_pair_wise ( A,B, ns, l_s, CL, diag); free_aln(B); free_int (tot_diag, -1); vfree (diag); return score; }
int ktup_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) { static char **gl; static int ng; char *seq1; char *seq2; int min_len=10; if ( !gl) gl=make_group_aa (&ng, "vasiliky"); if ( ns[0]>1)seq1=sub_aln2cons_seq_mat (A, ns[0], l_s[0],"blosum62mt"); else { seq1=(char*)vcalloc ( strlen (A->seq_al[l_s[0][0]])+1, sizeof (char)); sprintf ( seq1, "%s",A->seq_al[l_s[0][0]]); } if ( ns[1]>1)seq2=sub_aln2cons_seq_mat (A, ns[1], l_s[1],"blosum62mt"); else { seq2=(char*)vcalloc ( strlen (A->seq_al[l_s[1][0]])+1, sizeof (char)); sprintf ( seq2, "%s",A->seq_al[l_s[1][0]]); } if ( strlen (seq1)<min_len || strlen (seq2)<min_len) { Alignment *B; ungap(seq1); ungap(seq2); B=align_two_sequences ( seq1, seq2, "blosum62mt",-10, -1, "myers_miller_pair_wise"); A->score=A->score_aln=aln2sim(B, "idmat"); free_aln (B); return A->score; } else { string_convert (seq1, ng, gl); string_convert (seq2, ng, gl); A->score=A->score_aln=ktup_comparison (seq1,seq2, CL->ktup); } vfree (seq1); vfree (seq2); return A->score; }
int very_fast_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) { /*TREATMENT OF THE TERMINAL GAP PENALTIES*/ /*TG_MODE=0---> gop and gep*/ /*TG_MODE=1---> --- gep*/ /*TG_MODE=2---> --- ---*/ int maximise; /*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ int **tot_diag; int *diag; int ktup; static int n_groups; static char **group_list; int score; /********Prepare Penalties******/ maximise=CL->maximise; ktup=CL->ktup; /********************************/ if ( !group_list) { group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group); } CL->use_fragments=0; tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup); /*Note: 20 diagonals. 5 shadows on each side: tunned on Hom39, 2/2/04 */ diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag,20,5); score=make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag); free_int (tot_diag, -1); vfree (diag); return score; }
int cfasta_cdna_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) { /*TREATMENT OF THE TERMINAL GAP PENALTIES*/ /*TG_MODE=0---> gop and gep*/ /*TG_MODE=1---> --- gep*/ /*TG_MODE=2---> --- ---*/ int maximise; /*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ int **tot_diag; int *diag; int ktup; static int n_groups; static char **group_list; int score, new_score; int n_chosen_diag=0; int step; int max_n_chosen_diag; int l0, l1; Alignment *B; /********Prepare Penalties******/ maximise=CL->maximise; ktup=CL->ktup; /********************************/ if ( !group_list) { group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group); } B=dna_aln2_3frame_cdna_aln(A, ns, l_s); l0=strlen(B->seq_al[0]); l1=strlen(B->seq_al[3]); tot_diag=evaluate_diagonals_cdna ( B, ns, l_s, CL, maximise,n_groups,group_list, ktup); max_n_chosen_diag=100; n_chosen_diag=step=10 ; n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag); diag=extract_N_diag (l0,l1, tot_diag, n_chosen_diag,2); score =make_fasta_cdna_pair_wise ( A,B, ns, l_s, CL, diag); new_score=0; vfree ( diag); while (new_score!=score && n_chosen_diag< max_n_chosen_diag ) { score=new_score; ungap_sub_aln ( A, ns[0], l_s[0]); ungap_sub_aln ( A, ns[1], l_s[1]); n_chosen_diag+=step; n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag); diag =extract_N_diag (l0,l1, tot_diag, n_chosen_diag,3); new_score=make_fasta_cdna_pair_wise ( A, B,ns, l_s, CL, diag); vfree ( diag); } score=new_score; free_int (tot_diag, -1); free_aln(B); return score; }
int cfasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) { /*TREATMENT OF THE TERMINAL GAP PENALTIES*/ /*TG_MODE=0---> gop and gep*/ /*TG_MODE=1---> --- gep*/ /*TG_MODE=2---> --- ---*/ int maximise; /*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ int **tot_diag; int *diag; int ktup; static int n_groups; static char **group_list; int score, new_score; int n_chosen_diag=20; int step; int max_n_chosen_diag; int l1, l2; /********Prepare Penalties******/ maximise=CL->maximise; ktup=CL->ktup; /********************************/ if ( !group_list) { group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group); } l1=strlen (A->seq_al[l_s[0][0]]); l2=strlen (A->seq_al[l_s[1][0]]); if ( !CL->fasta_step) { step=MIN(l1,l2); step=(int) log ((double)MAX(step, 1)); step=MAX(step, 20); } else { step=CL->fasta_step; } tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup); max_n_chosen_diag=strlen (A->seq_al[l_s[0][0]])+strlen (A->seq_al[l_s[1][0]])-1; /*max_n_chosen_diag=(int)log10((double)(l1+l2))*10;*/ n_chosen_diag+=step; n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag); diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag, n_chosen_diag, 0); score =make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag); new_score=0; vfree ( diag); while (new_score!=score && n_chosen_diag< max_n_chosen_diag ) { score=new_score; ungap_sub_aln ( A, ns[0], l_s[0]); ungap_sub_aln ( A, ns[1], l_s[1]); n_chosen_diag+=step; n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag); diag =extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag, n_chosen_diag, 0); new_score=make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag); vfree ( diag); } score=new_score; free_int (tot_diag, -1); return score; }