Example #1
0
Score logl_positive_selection(char * ref,char * diff,RandomModel * rm,CodonTable *ct,DnaProbMatrix * dm)
{
  int i;
  Score s = 0;
  char ref_aa;
  char diff_aa;

  /* we have to assess this position having changed */
  for(i=0;i<3;i++) {
    s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]);
  }

  /* if the position has not changed, then we know it would have been selected */


  ref_aa  = aminoacid_from_seq(ct,ref);
  diff_aa = aminoacid_from_seq(ct,diff);

  if( ref_aa == diff_aa ) {
    return s;
  }

  /* else we add the probability of seeing this amino acid*/

  s += Probability2Score(rm->aminoacid[diff_aa-'A']);

  return s;  
}
Sequence * translate_swapped(Sequence * swapped) 
{
  CodonTable * ct;
  int i,j;
  Sequence * out;
  
  out = Sequence_alloc();
  out->name = stringalloc(swapped->name);
  out->seq = calloc(1+swapped->len/3,sizeof(char));

  ct = read_CodonTable_file("codon.table");

  for(i=0,j=0;i<swapped->len;i+=3,j++) {
    out->seq[j] = aminoacid_from_seq(ct,swapped->seq+i);
    if( isupper(swapped->seq[i]) && isupper(swapped->seq[i+1]) &&
	isupper(swapped->seq[i+2]) ) {
      out->seq[j] = toupper(out->seq[j]);
    } else{
      out->seq[j] = tolower(out->seq[j]);
    }
  }

  out->seq[j] = '\0';

  return out;

}
Sequence * translate_Sequence(Sequence * dna,CodonTable * ct)
{
  Sequence * out;
  int i;
  int j;
  int len;
  char * seq;
  char * name;
  char buffer[512];

  if( is_dna_Sequence(dna) == FALSE) {
    warn("Trying to make a translation from a non DNA sequence... type is [%s]",Sequence_type_to_string(dna->type));
    return NULL;
  }

  len = dna->len/3 + 1;
  
  seq = ckcalloc(len,sizeof(char));

  sprintf(buffer,"%s.tr",dna->name == NULL ? "NoNameDNASeq" : dna->name);

  name = stringalloc(buffer);

  out  = Sequence_from_dynamic_memory(name,seq);

  for(i=0,j=0;i<dna->len-3;i+=3,j++) {
    out->seq[j] = aminoacid_from_seq(ct,dna->seq+i);
  }
  out->seq[j] = '\0';

  out->type  = SEQUENCE_PROTEIN;
  out->len   = strlen(out->seq);

  return out;
}
Example #4
0
void show_PairBaseCodonModelScore(PairBaseCodonModelScore * sc,CodonTable * ct,FILE * ofp)
{
  int i;
  pairbase_type a;
  pairbase_type b;
  pairbase_type c;

  int anchor_a;
  int anchor_b;
  int anchor_c;

  int informant_a;
  int informant_b;
  int informant_c;

  char seq1[4];
  char seq2[4];

  seq1[3] = seq2[3] = '\0';

  for(i=0;i<PAIRBASE_CODON_LENGTH;i++) {
    decompose_pairbase_codon(i,&a,&b,&c);

    anchor_a = anchor_base_from_pairbase(a);
    anchor_b = anchor_base_from_pairbase(b);
    anchor_c = anchor_base_from_pairbase(c);

    informant_a = informant_base_from_pairbase(a);
    informant_b = informant_base_from_pairbase(b);
    informant_c = informant_base_from_pairbase(c);

    seq1[0] = char_for_base(anchor_a);
    seq1[1] = char_for_base(anchor_b);
    seq1[2] = char_for_base(anchor_c);

    seq2[0] = char_for_base(informant_a);
    seq2[1] = char_for_base(informant_b);
    seq2[2] = char_for_base(informant_c);


    fprintf(ofp,"%9d %s[%c] %s[%c] : %d\n",i,seq1,aminoacid_from_seq(ct,seq1),seq2,aminoacid_from_seq(ct,seq2),sc->codon[i]);

  }
  
}
Example #5
0
Score logl_negative_selection(char * ref,char * diff,ThreeStateUnit * unit,CodonTable * ct,DnaProbMatrix * dm)
{
  int i;
  Score s = 0;
  char ref_aa;
  char diff_aa;

  /* we have to assess this position having changed */
  for(i=0;i<3;i++) {
    s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]);
  }

  /* if the position has not changed, then we know it could not have been selected */


  ref_aa  = aminoacid_from_seq(ct,ref);
  diff_aa = aminoacid_from_seq(ct,diff);

  if( ref_aa == diff_aa ) {
    return s;
  }

  /* else we add the difference in probability between the two amino acids */
  /*
  fprintf(stdout,"%c vs %c has %d plays %d for total of %d\n",ref_aa,diff_aa,
	  Probability2Score(unit->match_emission[ref_aa-'A']),
	  Probability2Score(unit->match_emission[diff_aa-'A']),
	  Probability2Score(unit->match_emission[diff_aa-'A'])  - Probability2Score(unit->match_emission[ref_aa-'A'])
	  );
  */


  s += Probability2Score(unit->match_emission[diff_aa-'A'])  - Probability2Score(unit->match_emission[ref_aa-'A']);

  return s;
}
Example #6
0
Protein * get_Protein_from_Translation(Translation * ts,CodonTable * ct)
{
  cDNA * cd;
  int i,j;
  Sequence * seq;
  char buffer[64];

  assert(ts);
  assert(ct);

  /*  fprintf(stderr,"Codon table is %d\n",ct);*/

  if( ts->protein != NULL)
    return ts->protein;

  if( ts->parent == NULL ) {
    warn("Cannot get Protein from translation as no parent!");
    return NULL;
  }


  cd = get_cDNA_from_Transcript(ts->parent);

  if( cd == NULL ) {
    warn("Cannot make translation as can't get transcript!");
    return NULL;
  }

  if( cd->baseseq == NULL ) {
    warn("A bad error - a non NULL cDNA with a null sequence object. No translation here!");
    return NULL;
  }
  if( cd->baseseq->len == 0 ) {
    warn("Attempting to translate a zero length cDNA. Yikes!");
    return NULL;
  }

  seq = Sequence_alloc();
  sprintf(buffer,"%s.tr",cDNA_name(cd));
  seq->name = stringalloc(buffer);
  seq->seq = ckcalloc((cd->baseseq->len/3) + 2,sizeof(char));
  seq->type = SEQUENCE_PROTEIN;

  if( cd->baseseq->len%3 != 0 ) {
    warn("Problem in making translation, cDNA is not mod3! - length is %d - transcript id %s",cd->baseseq->len,seq->name);
  }


  for(i=0,j=0;i<cd->baseseq->len;i+=3,j++) {
    if( is_stop_codon(codon_from_seq(cd->baseseq->seq+i),ct) == TRUE ) {
      if( i+3 >= cd->baseseq->len ) 
	break;
      else {
	warn("Got a stop codon in the middle of a translation at postion [%d]. Yuk!",i);
	seq->seq[j] = '*';
      }
    } else {
      seq->seq[j] = aminoacid_from_seq(ct,cd->baseseq->seq+i);

    }
  }
  seq->seq[j]='\0';
  make_len_type_Sequence(seq);

  /*write_fasta_Sequence(seq,stdout);*/
  seq->type = SEQUENCE_PROTEIN;
  ts->protein = Protein_from_Sequence(seq);

  return ts->protein;

}
Example #7
0
void debug_genomewise(AlnBlock * alb,GenomeEvidenceSet * ges,CodonTable * ct,Sequence * gen,FILE * ofp)
{
  AlnColumn *alc;
  int cstart;


  for(alc=alb->start;alc != NULL;alc = alc->next ) {
    fprintf(ofp,"%4d %12s %12s [%3d][%5d %5d] ",alc->alu[1]->score[0],alc->alu[0]->text_label,alc->alu[1]->text_label,alc->alu[0]->start,alc->alu[1]->start+1,alc->alu[1]->end);
    if( strstartcmp(alc->alu[1]->text_label,"CODON") == 0 ) { 
      cstart = alc->alu[1]->start+1;
      fprintf(ofp,"%c%c%c  %c\n",gen->seq[cstart],gen->seq[cstart+1],gen->seq[cstart+2],aminoacid_from_seq(ct,gen->seq+cstart));
    } else {
      fprintf(ofp,"\n");
    }
  }
    
}
Example #8
0
boolean protdna_btc_display(AlnBlock * alb,char * protsequence,char * protname_in,int protoff,Sequence * dna,CodonTable * ct,int name,int main,btCanvas * btc,char (*match_central_line)(char,int,char),boolean multalign)
{
  AlnColumn * alc;
  AlnColumn * alc_temp,*alc_endscore;
  int a_phase, d_phase;
  int intron_number = 1;
  int aln_score;
  int aln_num = 1;
  btPasteArea * btp;
  char tempbuf[2];
  char protname[60];
  char dnaname[60];
  char dnatemp[4];
  char protc;
  char transc;
  boolean is_reversed = FALSE;
  boolean issplit;

  if( strlen(protname_in) > name ) {
    info("Name %s is longer than allowed name block (%d). Truncating\n",protname_in,name);
    strncpy(protname,protname_in,name);
    protname[name] = '\0';
  } else {
    strcpy(protname,protname_in);
  }

  if( strlen(dna->name) > name ) {
    info("Name %s is longer than allowed name block (%d). Truncating\n",dna->name,name);
    strncpy(dnaname,dna->name,name);
    dnaname[name] = '\0';
  } else {
    strcpy(dnaname,dna->name);
  }

  if( dna->offset > dna->end ) {
    is_reversed = TRUE;
  }
  
  for(alc=alb->start;alc != NULL;) {

    if ( strcmp(alc->alu[1]->text_label,"END") == 0 ) 
      break; /* end of alignment */


    for(;alc != NULL && is_random_AlnColumn_genewise(alc) == TRUE;alc = alc->next)
      ;

    if( alc == NULL)
      break; /* end of alignment */


      
    if( multalign == TRUE ) {
      /* get the end score */
      for(aln_score = 0,alc_endscore=alc;alc_endscore->next != NULL;alc_endscore = alc_endscore->next) {
	if( is_random_AlnColumn_genewise(alc_endscore) == TRUE)
	  break;
	aln_score += alc_endscore->alu[0]->score[0];
      }
      /*aln_score += alc_endscore->alu[0]->score[0];*/
      write_alignment_separator(btc,aln_num++,aln_score);
    }
      
      
    while( alc != NULL ) {


      write_name_start_stuff(btc,protname,protoff,dnaname,dna,name,alc);

      for(; alc != NULL;alc=alc->next ) {
	
	if( is_random_AlnColumn_genewise(alc) == TRUE ) 
	  break;

	if( strcmp(alc->alu[1]->text_label,"INSERT") == 0 ) {
	  if( can_get_paste_area_btCanvas(btc,1) == FALSE) 
	    break;  /* back to upper for, to place names and starts */
	  btp = get_paste_area_btCanvas(btc,1);
	  
	  paste_char_btPasteArea(btp,0,0,protsequence[alc->alu[0]->start+1],0);
	  paste_char_btPasteArea(btp,0,2,'-',0);
	  free_btPasteArea(btp);
	} else if ( strcmp(alc->alu[1]->text_label,"SEQUENCE_INSERTION") == 0 ||
		    strcmp(alc->alu[1]->text_label,"SEQUENCE_DELETION") == 0 ) {
	  if( can_get_paste_area_btCanvas(btc,1) == FALSE) 
	    break;  /* back to upper for, to place names and starts */
	  btp = get_paste_area_btCanvas(btc,1);
	  
	  if( strcmp(alc->alu[0]->text_label,"INSERT_STATE")== 0 ) {
	    paste_char_btPasteArea(btp,0,0,'-',0);
	  }
	  else {
	    paste_char_btPasteArea(btp,0,0,protsequence[alc->alu[0]->end],0);
	  }

	  sprintf(tempbuf,"%d",alc->alu[1]->end - alc->alu[1]->start);
	  paste_char_btPasteArea(btp,0,3,tempbuf[0],0);
	  paste_char_btPasteArea(btp,0,2,'!',0);

	  free_btPasteArea(btp);
	  
	} else if (strcmp(alc->alu[1]->text_label,"END") == 0 && strcmp(alc->alu[0]->text_label,"END") == 0) {
	  break; /* end of alignment */
	} else if ( strcmp(alc->alu[1]->text_label,"RANDOM_SEQUENCE") == 0 ) {
	  break;
	} else if( strcmp(alc->alu[1]->text_label,"CODON") == 0 ) {
	  
	  if( can_get_paste_area_btCanvas(btc,1) == FALSE) 
	    break;  /* back to upper for, to place names and starts */
	  
	  btp = get_paste_area_btCanvas(btc,1);
	  
	  if( strcmp(alc->alu[0]->text_label,"INSERT_STATE")== 0 ) {
	    write_codon_match(btp,'-',' ',alc->alu[1]->start+1,aminoacid_from_seq(ct,dna->seq+alc->alu[1]->start+1),dna->seq+alc->alu[1]->start+1);
	  }
	  else {
	    write_codon_match(btp,protsequence[alc->alu[0]->end],(*match_central_line)(protsequence[alc->alu[0]->end],alc->alu[0]->score[0],aminoacid_from_seq(ct,dna->seq+alc->alu[1]->start+1)),alc->alu[1]->start+1,aminoacid_from_seq(ct,dna->seq+alc->alu[1]->start+1),dna->seq+alc->alu[1]->start+1);
	  }
	  
	  free_btPasteArea(btp);
	  
	  continue;
	} else if ( strstartcmp(alc->alu[1]->text_label,"5SS") == 0 )  {
	  
	  
	  /*
	   * intron stuff. Figure out the start and end, 
	   * then place the 5'SS Central and End.
	   * 
	   * If we can't fit in the intron, loop over 
	   * in this region before returning to higher loop. 
	   *
	   */
	  
	  if( strcmp(alc->alu[1]->text_label,"5SS_PHASE_0") == 0 ) {
	    d_phase = 0;
	  } else if ( strcmp(alc->alu[1]->text_label,"5SS_PHASE_1") == 0 ) {
	    d_phase = 1;
	  } else if ( strcmp(alc->alu[1]->text_label,"5SS_PHASE_2") == 0 ) {
	    d_phase = 2;
	  } else {
	    warn("No no no. You have a non 0,1,2 phase intron (god knows how!). Not displaying it %s",alc->alu[1]->text_label);
	    advance_line_btCanvas(btc);
	    return FALSE;
	  }
	  
	  alc_temp = alc->next;
	  
	  if( strcmp(alc_temp->alu[1]->text_label,"CENTRAL_INTRON") != 0 ) {
	    warn("Bad news. I have found a 5SS in your alignment, but it is not followed by a central intron node. Don't like it!");
	    advance_line_btCanvas(btc);
	    return FALSE;
	  }
	  
	  for(alc_temp = alc_temp->next ;alc_temp != NULL && strstartcmp(alc_temp->alu[1]->text_label,"3SS") != 0;alc_temp = alc_temp->next) 
	    ;
	  
	  if( alc_temp == NULL ) {
	    warn("Got to the end of the alignment in the middle of an intron from %s. Weird!",alc->alu[1]->text_label);
	    advance_line_btCanvas(btc);
	    return FALSE;
	  }

	  if( strcmp(alc_temp->alu[1]->text_label,"3SS_PHASE_0") == 0 ) {
	    a_phase = 0;
	  } else if ( strcmp(alc_temp->alu[1]->text_label,"3SS_PHASE_1") == 0 ) {
	    a_phase = 1;
	  } else if ( strcmp(alc_temp->alu[1]->text_label,"3SS_PHASE_2") == 0 ) {
	    a_phase = 2;
	  } else {
	    warn("No no no. You have a non 0,1,2 phase intron (god knows how!). Not displaying it %s",alc_temp->alu[1]->text_label);
	    advance_line_btCanvas(btc);
	    return FALSE;
	  }

	  /*
	   * At this point we have alc on 5SS alc_temp on 3SS.
	   *
	   * Check to see if we can place 5SS and Central intron piece
	   * on the line, if not advance.
	   *
	   */

	  if( can_get_paste_area_btCanvas(btc,d_phase+7+13) == FALSE) {
	    advance_line_btCanvas(btc);
	    
	    write_name_start_stuff(btc,protname,protoff,dnaname,dna,name,alc);
	  }
	  
	  /*** ok, if we can't get it now then we are f****d ***/
	  
	  if( can_get_paste_area_btCanvas(btc,d_phase+7+13) == FALSE) {
	    warn("You have specified a length of your main canvas too small. I need at least 23 characters long.");
	    advance_line_btCanvas(btc);
	    return FALSE;
	  }

	  btp = get_paste_area_btCanvas(btc,d_phase+7);
	  
	  /* ? split phase */
	  if( a_phase == 0 || (a_phase != d_phase ) ) {
	    protc = ' ';
	    transc = ' ';
	    dnatemp[0]= '\0';
	    issplit = FALSE; 
	  } else {

	    if( strcmp(alc_temp->alu[0]->text_label,"INSERT_STATE")== 0 ) {
	      protc = '-';
	    } else {
	      protc = protsequence[alc->alu[0]->start+1];
	    }

	    dnatemp[0] = tolower((int)dna->seq[alc->alu[1]->start+1]);
	    if( d_phase == 2) {
	      dnatemp[1] = tolower((int)dna->seq[alc->alu[1]->start+2]);
	    } else {
	      dnatemp[1] = tolower((int)dna->seq[alc_temp->alu[1]->end-1]);
	    }
	    dnatemp[2] = tolower((int)dna->seq[alc_temp->alu[1]->end]);
	    dnatemp[3] = '\0';

	    transc = aminoacid_from_seq(ct,dnatemp);
	    issplit = TRUE; 
	  }

	  write_5intron_match(btp,d_phase,7,dna->seq+alc->alu[1]->start+1);
	  free_btPasteArea(btp);
	  
	  btp = get_paste_area_btCanvas(btc,13);

	  if( is_reversed == FALSE ) 
	    write_intron_desc(btp,alc->alu[1]->start+1+d_phase+dna->offset,alc_temp->alu[1]->start+3+dna->offset,intron_number++,issplit,protc,transc,dnatemp);
	  else
	    write_intron_desc(btp,dna->offset - (alc->alu[1]->start+d_phase+1),dna->offset - (alc_temp->alu[1]->start+3),intron_number++,issplit,protc,transc,dnatemp);

	  free_btPasteArea(btp);


	  /* 
	   * written the start of the intron, now to deal with the
	   * acceptor. We need to loop here, because we might go over the
	   * line length. 
	   */
	  
	  alc = alc->next->next;  /*** move alc forward two columns ***/

	  while( alc != alc_temp ) {
	    for(; alc != alc_temp;alc = alc->next) { /** alc_temp is 3SS **/
	      if( strcmp(alc->alu[1]->text_label,"PYRIMIDINE_TRACT") == 0 ) {
		if( can_get_paste_area_btCanvas(btc,1) == FALSE ) 
		  break;
		btp = get_paste_area_btCanvas(btc,1);
		paste_char_btPasteArea(btp,0,3,dna->seq[alc->alu[1]->start+1],0);
		paste_char_btPasteArea(btp,0,4,'+',0);
		free_btPasteArea(btp);
	      } else if( strcmp(alc->alu[1]->text_label,"SPACER") == 0 ) {
		if( can_get_paste_area_btCanvas(btc,1) == FALSE ) 
		  break;
		btp = get_paste_area_btCanvas(btc,1);
		paste_char_btPasteArea(btp,0,3,dna->seq[alc->alu[1]->start+1],0);
		free_btPasteArea(btp);
	      } else {
		warn("Sorry, don't know how to print %s. Skipping...",alc->alu[1]->text_label);
	      }
	    }
	 
	    /** end for for loop **/

	    if ( alc == alc_temp ) {
	      break;
	    }
	    
	    /*** run out of space ***/
	    
	    advance_line_btCanvas(btc);

	    write_name_start_stuff(btc,protname,protoff,dnaname,dna,name,alc);
	  
	  } /** end of while still in central->3SS **/
	  
	  /*
	   * Now do 3SS 
	   *
	   */
	  
	  if( can_get_paste_area_btCanvas(btc,a_phase == 0 ? 3 : 3- a_phase + 3) == FALSE ) {
	    advance_line_btCanvas(btc);
	    write_name_start_stuff(btc,protname,protoff,dnaname,dna,name,alc);
	  }

	  if( a_phase != 0 ) {
	    btp = get_paste_area_btCanvas(btc,3 - a_phase + 3);
	    
	    write_3intron_match(btp,a_phase,3,dna->seq + alc->alu[1]->start+1);
	    
	    free_btPasteArea(btp);
	  } else {
	    btp = get_paste_area_btCanvas(btc,3);
	    write_3intron_match(btp,a_phase,3,dna->seq + alc->alu[1]->start+1);
	    free_btPasteArea(btp);
	  }
	  
	  /*
	   * Finished with intron !!!
	   */
	} else {
	  warn("Sorry, could not print the alignment %s:%s column",alc->alu[0]->text_label,alc->alu[1]->text_label);
	}

      } /*** in this loop ***/


      advance_line_btCanvas(btc);

      if( alc == NULL)
	break;

      if ( is_random_AlnColumn_genewise(alc) == TRUE) 
	break;

    } /* end of while over alignments */
  } /* end of foreach alignment */


  /*** end of print ! **/

  return TRUE;
} 
Example #9
0
void show_verbose_evo(AlnBlock * alb,ThreeStateModel * tsm,Sequence * ref,Sequence * diff,CodonTable * ct,FILE * ofp)
{
  AlnColumn * alc;
  Protein * hmmp;

  Sequence * ref_trans;
  Sequence * diff_trans;

  DnaProbMatrix * negative_dm;
  DnaProbMatrix * pseudo_dm;
  
  int i;
  int count = 0;
  double est_mutation = 0.0;

  int dna_offset;

  Score total_pseudo = 0;
  Score total_neg = 0;
  Score pseudo = 0;
  Score neg = 0;

  int count_ref_positive = 0;
  int count_ref_negative = 0; 

  int count_ref_negative_0_5   = 0;
  int count_ref_negative_5_10  = 0;
  int count_ref_negative_10_15 = 0;

  int syn_sites = 0;
  int nonsyn_sites = 0;

  int syn_changes = 0;
  int nonsyn_changes = 0;

  int diff_score;

  char diff_aa;
  char ref_aa;

  int score_ratio = 0;
  Score score_neg_5  = Probability2Score(Bits2Probability(-5.0));
  Score score_neg_10 = Probability2Score(Bits2Probability(-10.0));


  int k;

  for(i=0;i<ref->len;i+=3) {

    /* if this has changed, then it is definitely non syn */
    if( aminoacid_from_seq(ct,ref->seq+i) != aminoacid_from_seq(ct,diff->seq+i)) {
      for(k=0;k<3;k++) {
	if( ref->seq[i+k] != diff->seq[i+k] ) {
	  nonsyn_changes++;
	}
      }
    } else {
      /* could still be syn change */
      for(k=0;k<3;k++) {
	if( ref->seq[i+k] != diff->seq[i+k] ) {
	  syn_changes++;
	}
      }
    }

    /* calculate the sites. There is always 2 non syn sites */

    nonsyn_sites += 2;

    if( four_fold_sites_CodonTable(ct,ref->seq+i) > 0 ) {
      syn_sites++;
    } else {
      nonsyn_sites += 1;
    } 
  }

  for(i=0;i<ref->len;i++) {
    if( ref->seq[i] != diff->seq[i] ) {
      count++;
    }
  }


  est_mutation = (double)count / (double)ref->len;


  pseudo_dm = DnaProbMatrix_from_match(1.0 - est_mutation,NMaskType_BANNED);
  negative_dm = DnaProbMatrix_from_match(1.0 - (est_mutation*2),NMaskType_BANNED);


  ref_trans = translate_Sequence(ref,ct);
  diff_trans = translate_Sequence(diff,ct);
  
  hmmp = pseudo_Protein_from_ThreeStateModel(tsm);

  for(alc=alb->start;alc != NULL;alc = alc->next) {
    /*    fprintf(stdout,"In position %s\n",alc->alu[0]->text_label); */
    if( strcmp(alc->alu[0]->text_label,"SEQUENCE") == 0 &&
	strcmp(alc->alu[1]->text_label,"SEQUENCE") == 0 ) {
      dna_offset = alc->alu[1]->end*3;

      pseudo = 	      logl_pseudogene(ref->seq+dna_offset,diff->seq+dna_offset,pseudo_dm);
      neg = 	      logl_negative_selection(ref->seq+dna_offset,diff->seq+dna_offset,tsm->unit[alc->alu[0]->end],ct,
					      pseudo_dm);

      /*
      fprintf(ofp,"Position %d [%c], vs %d [%c,%c] Scores Negative %d, Pseudo %d\n",
	      alc->alu[0]->end,hmmp->baseseq->seq[alc->alu[0]->end],
	      alc->alu[1]->end,ref_trans->seq[alc->alu[1]->end],diff_trans->seq[alc->alu[1]->end],
	      neg,
	      pseudo
	      );
      */

      ref_aa = ref_trans->seq[alc->alu[1]->end];
      diff_aa = diff_trans->seq[alc->alu[1]->end]; 
      if( ref_aa != diff_aa  ) {
	score_ratio += Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']);

	diff_score = Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']);
 
	if( diff_score < 0) {
	  count_ref_negative++;
	  if( diff_score > score_neg_5 ) {
	    count_ref_negative_0_5++;
	  } else if ( diff_score > score_neg_10 ) {
	    count_ref_negative_5_10++;
	  } else {
	    count_ref_negative_10_15++;
	  }
	} else {
	  count_ref_positive++;
	}

      }

      total_pseudo += pseudo;
      total_neg += neg;
    }
  }

  fprintf(ofp,"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\n",ref->name,hmmp->baseseq->name,Score2Bits(score_ratio),
	  count_ref_positive,count_ref_negative,
	  count_ref_negative_0_5,
	  count_ref_negative_5_10,
	  count_ref_negative_10_15);


  /*
  fprintf(ofp,"%s,%s Total Pseudo %d vs Negative %d, Ratio %.4f  Positive %d Negative %d Score %.2f Syn %d Changes %d NonSyn %d Changes %d\n",ref->name,hmmp->baseseq->name,total_pseudo,total_neg,Score2Bits(total_neg-total_pseudo),count_ref_positive,count_ref_negative,Score2Bits(score_ratio),syn_sites,syn_changes,nonsyn_sites,nonsyn_changes);
  */

  free_Protein(hmmp);
	      
}
Example #10
0
void show_score_sequence(AlnBlock * alb,PairBaseSeq * pbs,PairBaseModelScore * m,FILE * ofp)
{
    AlnColumn * alc;
    char seq1[20];
    char seq2[20];
    int match_score = 0;


    for(alc=alb->start; alc != NULL; alc=alc->next) {
        if( strcmp(alc->alu[1]->text_label,"CODON") == 0 ) {
            seq1[0] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
            seq1[1] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
            seq1[2] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));

            seq2[0] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
            seq2[1] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
            seq2[2] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));

            seq1[3] = '\0';
            seq2[3] = '\0';

            match_score = m->base[pbs->seq[alc->alu[1]->start+1]];
            match_score += m->base[pbs->seq[alc->alu[1]->start+2]];
            match_score += m->base[pbs->seq[alc->alu[1]->start+3]];

            fprintf(ofp,"%s %5d %5d %s [%c] vs %s [%c] %-.2f %-.2f\n",alc->alu[1]->text_label,alc->alu[0]->start+1,alc->alu[1]->start+1,seq1,aminoacid_from_seq(ct,seq1),seq2,aminoacid_from_seq(ct,seq2),Score2Bits(alc->alu[0]->score[0]),Score2Bits(match_score));
        }
        if( strstr(alc->alu[1]->text_label,"SS") != NULL ) {

            seq1[0] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start-3]));
            seq1[1] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start-2]));
            seq1[2] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start-1]));
            seq1[3] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start]));
            seq1[4] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
            seq1[5] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
            seq1[6] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));
            seq1[7] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+4]));
            seq1[8] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+5]));
            seq1[9] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+6]));
            seq1[10] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+7]));

            seq2[0] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start-3]));
            seq2[1] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start-2]));
            seq2[2] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start-1]));
            seq2[3] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start]));
            seq2[4] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
            seq2[5] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
            seq2[6] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));
            seq2[7] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+4]));
            seq2[8] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+5]));
            seq2[9] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+6]));
            seq2[10] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+7]));

            seq1[11] = '\0';
            seq2[11] = '\0';


            fprintf(ofp,"%12s %5d %5d %.2f\n",alc->alu[1]->text_label,alc->alu[0]->start+1,alc->alu[1]->start+1,Score2Bits(alc->alu[0]->score[0]));
            fprintf(ofp,"     %s\n     %s\n",seq1,seq2);
        }
    }

}