Example #1
0
int process_almt (Alignment * alignment){

    int abs_gaps, almt_size;
    int conserved_columns, similar_columns;
    double pct_gaps, pct_conserved_columns, pct_similar_columns;
    int count_gaps (Alignment * alignment);
    int count_conserved_columns (Alignment * alignment);
    int count_similar_columns (Alignment * alignment);
    
    almt_size = alignment->length*alignment->number_of_seqs;
    abs_gaps  = count_gaps (alignment);
    pct_gaps = (double)abs_gaps/almt_size;

    conserved_columns = count_conserved_columns (alignment);
    pct_conserved_columns =
	(double)conserved_columns/(alignment->length);

    similar_columns = count_similar_columns (alignment);
    pct_similar_columns =
	(double)similar_columns/(alignment->length);

    

    printf (" abs_gaps            %8d\n", abs_gaps);
    printf (" abs_cons_columns    %8d\n", conserved_columns);
    printf (" abs_similar_columns %8d\n", similar_columns);
    printf (" pct_gaps            %8.2lf\n", pct_gaps);
    printf (" pct_cons_columns    %8.2lf\n", pct_conserved_columns);
    printf (" pct_similar_columns %8.2lf\n", pct_similar_columns);
    
    
    return 0; 
    
} 
Example #2
0
void aln_score(void)
{
  static short  *mat_xref, *matptr;
  static sint maxres;
  static sint  s1,s2,c1,c2;
  static sint    ngaps;
  static sint    i,l1,l2;
  static lint    score;
  static sint   matrix[NUMRES][NUMRES];

/* calculate an overall score for the alignment by summing the
scores for each pairwise alignment */

  matptr = blosum45mt;
  mat_xref = def_aa_xref;
  maxres = get_matrix(matptr, mat_xref, matrix, TRUE, 100);
  if (maxres == 0)
    {
       fprintf(stdout,"Error: matrix blosum30 not found\n");
       return;
    }

  score=0;
  for (s1=1;s1<=nseqs;s1++)
   {
    for (s2=1;s2<s1;s2++)
      {

        l1 = seqlen_array[s1];
        l2 = seqlen_array[s2];
        for (i=1;i<l1 && i<l2;i++)
          {
            c1 = seq_array[s1][i];
            c2 = seq_array[s2][i];
            if ((c1>=0) && (c1<=max_aa) && (c2>=0) && (c2<=max_aa))
                score += matrix[c1][c2];
          }

        ngaps = count_gaps(s1, s2, l1);

        score -= 100 * gap_open * ngaps;

      }
   }

  score /= 100;

  info("Alignment Score %d", (pint)score);

}
Example #3
0
int process_almt (Options *options, Alignment *alignment) {
    
    int retval;
    int process_exons (Options *options, Alignment * alignment);
    int protected_positions (Options *options, Alignment * alignment);

    /* store the position of exons, and replace them with gaps in the alignment */
    process_exons (options, alignment);

    /* gaps */
    count_gaps (options, alignment);

    /* protected positions */
    protected_positions (options, alignment);

     /*allocate space for various indicators of sequence similarity*/
    alignment->seq_dist =
	dmatrix ( alignment->number_of_seqs, alignment->number_of_seqs);
    if ( !alignment->seq_dist )  return 1;
	
    alignment->aligned_sites =
	intmatrix ( alignment->number_of_seqs, alignment->number_of_seqs);
    if ( ! alignment->aligned_sites ) return 1;
    
    alignment->identical_sites =
	intmatrix ( alignment->number_of_seqs, alignment->number_of_seqs);
    if ( ! alignment->identical_sites ) return 1;
    
    alignment->similar_sites =
	intmatrix ( alignment->number_of_seqs, alignment->number_of_seqs);
    if ( ! alignment->similar_sites ) return 1;
    
    retval   = seq_pw_dist (alignment);
    if ( retval) return retval;

    return 0;
}
Example #4
0
int read_clustalw ( char * cwname, Alignment * alignment){
    
    FILE * fptr = NULL;
    char line[BUFFLEN];
    int  number_of_seqs, almt_length, ctr;
    int * seq_pos, pos;
    char ** sequence;
    char ** name;
    char curr_name[BUFFLEN];
     
    /* open file */
    fptr = efopen ( cwname, "r");
    if ( !fptr ) return 1;
    
    /* find the alignment length info */
    almt_length = 0;
    while(fgets(line, BUFFLEN, fptr)!=NULL){
	if ( strstr(line, "MSF:" ) ){
	    sscanf (line, "%*s %d", &almt_length);
	    break;
	}
    }
    if ( almt_length ) {
	/* printf ( "Alignment length in %s is %d.\n", cwname, almt_length); */
    } else {
	fprintf ( stderr, "Alignment length info not found in %s. Is the format gcg?\n", cwname);
	return 1;
    }

    /* determine the number of sequences */
    number_of_seqs = 0;
    while(fgets(line, BUFFLEN, fptr)!=NULL){
	if ( ! strncmp (line, "//", 2) ) break;
	if ( strstr(line, "Name:" ) ) number_of_seqs++;
    }
    if ( number_of_seqs ) {
	/* printf ( "Number of sequences in %s is %d.\n", cwname, number_of_seqs); */
    } else {
	fprintf ( stderr, "No sequences found in %s. Is the format gcg?\n", cwname);
	return 1;
    } 
    
    /* allocate */
    sequence = chmatrix (number_of_seqs, almt_length);
    if ( !sequence ) return 1;
    name     = chmatrix (number_of_seqs, ALMT_NAME_LENGTH);
    if ( !name ) return 1;
    seq_pos = (int *) calloc ( number_of_seqs, sizeof(int));
    if ( !seq_pos ) return 1;
    
    /* read in */
    rewind(fptr);
    ctr = 0;
    while(fgets(line, BUFFLEN, fptr)!=NULL){
	if (!  strncmp (line, "//", 2) ) break;
	if ( strstr(line, "Name:" ) ) {
	    sscanf (line, "%*s %s", name[ctr]);
	    ctr ++;
	}
    }
    while(fgets(line, BUFFLEN, fptr)!=NULL){
	if ( isspace (line[0] ) ) continue;
	sscanf (line, "%s", curr_name);
	ctr = 0;
	while (  ctr <number_of_seqs &&  strcmp (name[ctr], curr_name) ) ctr++;
	if ( ctr >= number_of_seqs ) {
	    fprintf ( stderr, "The name %s not found in the header of %s.\n", curr_name, cwname);
	    return 1;
	}
	pos = 0;
	while ( ! isspace(line[pos]) ) pos++;
	while  (line[pos] != '\n' && pos < BUFFLEN) {
	    if ( !  isspace(line[pos] ) ){
		if ((line[pos]>=97)&&(line[pos]<=122)) {line[pos] -= 32;} /* --> turn to uppercase */
		if ( line[pos]==126)                   {line[pos]  = 46;} /* turn tweedle to dot */
		sequence [ctr] [ seq_pos[ctr] ] = line[pos];
		seq_pos[ctr]++;
	    }
	    pos ++;
	}
    }
    fclose(fptr);

    /* sanity check */
    for (ctr=0; ctr < number_of_seqs; ctr++ ) {
	if ( seq_pos[ctr] != almt_length ) {
	    fprintf (stderr, "Sequence %s is shorter (%d position) than the alignment.\n", name[ctr],  seq_pos[ctr]);
	    return 1;
	}
    }

    /* return values */
    alignment->number_of_seqs = number_of_seqs;
    alignment->length         = almt_length;
    alignment->sequence       = sequence;
    alignment->name           = name;

    /* free */
    free (seq_pos);

    { 
	int count_gaps (Alignment * alignment);
	count_gaps (alignment);
    }
    return 0;
}