Exemplo n.º 1
0
GenomeEvidenceSet * read_est_evidence(FILE * ifp,CodonTable * ct)
{
  char buffer[MAXLINE];
  GenomeEvidenceUnit * geu;
  EstEvidence * evi;
  EstExon * exon;
  GenomeEvidenceSet * ges;
  EstIndel * indel;

  assert(ct);
  assert(ifp);
  ges = GenomeEvidenceSet_alloc_std();
  evi = EstEvidence_alloc_std();
  evi->ct = hard_link_CodonTable(ct);
  
  while( fgets(buffer,MAXLINE,ifp) != NULL ) {
    if( buffer[0] == '#' ) {
      continue;
    }
    if( strstartcmp(buffer,"//") == 0 ) {
      geu = new_est_GenomeEvidenceUnit(evi);
      add_GenomeEvidenceSet(ges,geu);
      evi = EstEvidence_alloc_std();
      evi->ct = hard_link_CodonTable(ct);
      continue;
    } 
    if( strstartcmp(buffer,"exon") == 0 ) {
      exon = EstExon_alloc();
      exon->intron_3_score = 0;
      if( sscanf(buffer,"exon %d %d %d",&exon->start,&exon->end,&exon->intron_3_score) < 2 ) {
	fatal("Unable to read exon line as evidence [%s]");
      }

      exon->start--;
      exon->end--;
      add_EstEvidence(evi,exon);
    } else if( strstartcmp(buffer,"cds") == 0 ) {
      exon = EstExon_alloc();
      sscanf(buffer,"cds %d %d %d",&exon->start,&exon->end,&exon->phase);
      exon->start--;
      exon->end--;
      if( exon->phase > 2 || exon->phase < 0 ) {
	fprintf(stderr,"Exon has a non clear phase - %d\n",exon->phase);
	return NULL;
      }
      exon->is_coding = TRUE;
      add_EstEvidence(evi,exon);
    } else if ( strstartcmp(buffer,"indel") == 0 ) {
      indel = EstIndel_alloc();
      sscanf(buffer,"indel %d %d",&indel->start,&indel->end);
      indel->start--;
      indel->end--;
      add_indel_EstEvidence(evi,indel);
    } else {
      fprintf(stderr,"Unable to read as est evidence - %s",buffer);
    }

  }
  if( evi->len > 0 ) {
    geu = new_est_GenomeEvidenceUnit(evi);
    add_GenomeEvidenceSet(ges,geu);
  }
  return ges;
}
Exemplo n.º 2
0
CodonMapper * new_CodonMapper(CodonTable * ct,CodonFrequency * cf)
{
  register int i;
  register int j;
  int k;
  base one;
  base two;
  base three;
  int base4;
  int oi,ti,ri;
  double total_freq;
  CodonMapper * out;
  

  out = CodonMapper_alloc();



  out->ct = hard_link_CodonTable(ct);

  for(i=0;i<125;i++) {
    for(j=0;j<26;j++) 
      out->codon_map[i][j] =0.0;


    if( has_random_bases(i) == FALSE ) {
      if( is_stop_codon(i,ct) == TRUE ) {
	for(k=0;k<26;k++)
	out->codon_map[i][k] = (0.0);
      }
      else {
	out->codon_map[i][aminoacid_no_from_codon(ct,i)] = cf->freq[base4_codon_from_codon(i)];
      }
    }
    else { /*** is a random base ***/


      /***
	sneaky stuff. What we want to do is loop over all possible
	codons, adding up their frequencies for the amino acids 
	they represent. This is done by looping over all possible
	bases for each position and then letting through ones 
	which either have an N at this position or is the actual base.

	***/



      all_bases_from_codon(i,&one,&two,&three);

      total_freq = 0.0;

      for(oi=0;oi<4;oi++)
	for(ti=0;ti<4;ti++) 
	  for(ri=0;ri<4;ri++) {
	    if( (one == BASE_N || one == oi) && (two == BASE_N || two == ti) && (three == BASE_N || three == ri) ) {

	      base4 = codon_from_base4_codon(oi*16+ti*4+ri);
	      if( !is_stop_codon(base4,ct) ) {
		out->codon_map[i][aminoacid_no_from_codon(ct,base4)] += cf->freq[base4_codon_from_codon(base4)];
	      }
	    } /* end of if one == BASE_N ||  one == oi */
	  }  /* end of for oi,ti,ri */

      
    } /* end of else */
  }

  return out;
}