int process_almt (Alignment * alignment){ int abs_gaps, almt_size; int conserved_columns, similar_columns; double pct_gaps, pct_conserved_columns, pct_similar_columns; int count_gaps (Alignment * alignment); int count_conserved_columns (Alignment * alignment); int count_similar_columns (Alignment * alignment); almt_size = alignment->length*alignment->number_of_seqs; abs_gaps = count_gaps (alignment); pct_gaps = (double)abs_gaps/almt_size; conserved_columns = count_conserved_columns (alignment); pct_conserved_columns = (double)conserved_columns/(alignment->length); similar_columns = count_similar_columns (alignment); pct_similar_columns = (double)similar_columns/(alignment->length); printf (" abs_gaps %8d\n", abs_gaps); printf (" abs_cons_columns %8d\n", conserved_columns); printf (" abs_similar_columns %8d\n", similar_columns); printf (" pct_gaps %8.2lf\n", pct_gaps); printf (" pct_cons_columns %8.2lf\n", pct_conserved_columns); printf (" pct_similar_columns %8.2lf\n", pct_similar_columns); return 0; }
void aln_score(void) { static short *mat_xref, *matptr; static sint maxres; static sint s1,s2,c1,c2; static sint ngaps; static sint i,l1,l2; static lint score; static sint matrix[NUMRES][NUMRES]; /* calculate an overall score for the alignment by summing the scores for each pairwise alignment */ matptr = blosum45mt; mat_xref = def_aa_xref; maxres = get_matrix(matptr, mat_xref, matrix, TRUE, 100); if (maxres == 0) { fprintf(stdout,"Error: matrix blosum30 not found\n"); return; } score=0; for (s1=1;s1<=nseqs;s1++) { for (s2=1;s2<s1;s2++) { l1 = seqlen_array[s1]; l2 = seqlen_array[s2]; for (i=1;i<l1 && i<l2;i++) { c1 = seq_array[s1][i]; c2 = seq_array[s2][i]; if ((c1>=0) && (c1<=max_aa) && (c2>=0) && (c2<=max_aa)) score += matrix[c1][c2]; } ngaps = count_gaps(s1, s2, l1); score -= 100 * gap_open * ngaps; } } score /= 100; info("Alignment Score %d", (pint)score); }
int process_almt (Options *options, Alignment *alignment) { int retval; int process_exons (Options *options, Alignment * alignment); int protected_positions (Options *options, Alignment * alignment); /* store the position of exons, and replace them with gaps in the alignment */ process_exons (options, alignment); /* gaps */ count_gaps (options, alignment); /* protected positions */ protected_positions (options, alignment); /*allocate space for various indicators of sequence similarity*/ alignment->seq_dist = dmatrix ( alignment->number_of_seqs, alignment->number_of_seqs); if ( !alignment->seq_dist ) return 1; alignment->aligned_sites = intmatrix ( alignment->number_of_seqs, alignment->number_of_seqs); if ( ! alignment->aligned_sites ) return 1; alignment->identical_sites = intmatrix ( alignment->number_of_seqs, alignment->number_of_seqs); if ( ! alignment->identical_sites ) return 1; alignment->similar_sites = intmatrix ( alignment->number_of_seqs, alignment->number_of_seqs); if ( ! alignment->similar_sites ) return 1; retval = seq_pw_dist (alignment); if ( retval) return retval; return 0; }
int read_clustalw ( char * cwname, Alignment * alignment){ FILE * fptr = NULL; char line[BUFFLEN]; int number_of_seqs, almt_length, ctr; int * seq_pos, pos; char ** sequence; char ** name; char curr_name[BUFFLEN]; /* open file */ fptr = efopen ( cwname, "r"); if ( !fptr ) return 1; /* find the alignment length info */ almt_length = 0; while(fgets(line, BUFFLEN, fptr)!=NULL){ if ( strstr(line, "MSF:" ) ){ sscanf (line, "%*s %d", &almt_length); break; } } if ( almt_length ) { /* printf ( "Alignment length in %s is %d.\n", cwname, almt_length); */ } else { fprintf ( stderr, "Alignment length info not found in %s. Is the format gcg?\n", cwname); return 1; } /* determine the number of sequences */ number_of_seqs = 0; while(fgets(line, BUFFLEN, fptr)!=NULL){ if ( ! strncmp (line, "//", 2) ) break; if ( strstr(line, "Name:" ) ) number_of_seqs++; } if ( number_of_seqs ) { /* printf ( "Number of sequences in %s is %d.\n", cwname, number_of_seqs); */ } else { fprintf ( stderr, "No sequences found in %s. Is the format gcg?\n", cwname); return 1; } /* allocate */ sequence = chmatrix (number_of_seqs, almt_length); if ( !sequence ) return 1; name = chmatrix (number_of_seqs, ALMT_NAME_LENGTH); if ( !name ) return 1; seq_pos = (int *) calloc ( number_of_seqs, sizeof(int)); if ( !seq_pos ) return 1; /* read in */ rewind(fptr); ctr = 0; while(fgets(line, BUFFLEN, fptr)!=NULL){ if (! strncmp (line, "//", 2) ) break; if ( strstr(line, "Name:" ) ) { sscanf (line, "%*s %s", name[ctr]); ctr ++; } } while(fgets(line, BUFFLEN, fptr)!=NULL){ if ( isspace (line[0] ) ) continue; sscanf (line, "%s", curr_name); ctr = 0; while ( ctr <number_of_seqs && strcmp (name[ctr], curr_name) ) ctr++; if ( ctr >= number_of_seqs ) { fprintf ( stderr, "The name %s not found in the header of %s.\n", curr_name, cwname); return 1; } pos = 0; while ( ! isspace(line[pos]) ) pos++; while (line[pos] != '\n' && pos < BUFFLEN) { if ( ! isspace(line[pos] ) ){ if ((line[pos]>=97)&&(line[pos]<=122)) {line[pos] -= 32;} /* --> turn to uppercase */ if ( line[pos]==126) {line[pos] = 46;} /* turn tweedle to dot */ sequence [ctr] [ seq_pos[ctr] ] = line[pos]; seq_pos[ctr]++; } pos ++; } } fclose(fptr); /* sanity check */ for (ctr=0; ctr < number_of_seqs; ctr++ ) { if ( seq_pos[ctr] != almt_length ) { fprintf (stderr, "Sequence %s is shorter (%d position) than the alignment.\n", name[ctr], seq_pos[ctr]); return 1; } } /* return values */ alignment->number_of_seqs = number_of_seqs; alignment->length = almt_length; alignment->sequence = sequence; alignment->name = name; /* free */ free (seq_pos); { int count_gaps (Alignment * alignment); count_gaps (alignment); } return 0; }