GenomeEvidenceSet * read_est_evidence(FILE * ifp,CodonTable * ct) { char buffer[MAXLINE]; GenomeEvidenceUnit * geu; EstEvidence * evi; EstExon * exon; GenomeEvidenceSet * ges; EstIndel * indel; assert(ct); assert(ifp); ges = GenomeEvidenceSet_alloc_std(); evi = EstEvidence_alloc_std(); evi->ct = hard_link_CodonTable(ct); while( fgets(buffer,MAXLINE,ifp) != NULL ) { if( buffer[0] == '#' ) { continue; } if( strstartcmp(buffer,"//") == 0 ) { geu = new_est_GenomeEvidenceUnit(evi); add_GenomeEvidenceSet(ges,geu); evi = EstEvidence_alloc_std(); evi->ct = hard_link_CodonTable(ct); continue; } if( strstartcmp(buffer,"exon") == 0 ) { exon = EstExon_alloc(); exon->intron_3_score = 0; if( sscanf(buffer,"exon %d %d %d",&exon->start,&exon->end,&exon->intron_3_score) < 2 ) { fatal("Unable to read exon line as evidence [%s]"); } exon->start--; exon->end--; add_EstEvidence(evi,exon); } else if( strstartcmp(buffer,"cds") == 0 ) { exon = EstExon_alloc(); sscanf(buffer,"cds %d %d %d",&exon->start,&exon->end,&exon->phase); exon->start--; exon->end--; if( exon->phase > 2 || exon->phase < 0 ) { fprintf(stderr,"Exon has a non clear phase - %d\n",exon->phase); return NULL; } exon->is_coding = TRUE; add_EstEvidence(evi,exon); } else if ( strstartcmp(buffer,"indel") == 0 ) { indel = EstIndel_alloc(); sscanf(buffer,"indel %d %d",&indel->start,&indel->end); indel->start--; indel->end--; add_indel_EstEvidence(evi,indel); } else { fprintf(stderr,"Unable to read as est evidence - %s",buffer); } } if( evi->len > 0 ) { geu = new_est_GenomeEvidenceUnit(evi); add_GenomeEvidenceSet(ges,geu); } return ges; }
CodonMapper * new_CodonMapper(CodonTable * ct,CodonFrequency * cf) { register int i; register int j; int k; base one; base two; base three; int base4; int oi,ti,ri; double total_freq; CodonMapper * out; out = CodonMapper_alloc(); out->ct = hard_link_CodonTable(ct); for(i=0;i<125;i++) { for(j=0;j<26;j++) out->codon_map[i][j] =0.0; if( has_random_bases(i) == FALSE ) { if( is_stop_codon(i,ct) == TRUE ) { for(k=0;k<26;k++) out->codon_map[i][k] = (0.0); } else { out->codon_map[i][aminoacid_no_from_codon(ct,i)] = cf->freq[base4_codon_from_codon(i)]; } } else { /*** is a random base ***/ /*** sneaky stuff. What we want to do is loop over all possible codons, adding up their frequencies for the amino acids they represent. This is done by looping over all possible bases for each position and then letting through ones which either have an N at this position or is the actual base. ***/ all_bases_from_codon(i,&one,&two,&three); total_freq = 0.0; for(oi=0;oi<4;oi++) for(ti=0;ti<4;ti++) for(ri=0;ri<4;ri++) { if( (one == BASE_N || one == oi) && (two == BASE_N || two == ti) && (three == BASE_N || three == ri) ) { base4 = codon_from_base4_codon(oi*16+ti*4+ri); if( !is_stop_codon(base4,ct) ) { out->codon_map[i][aminoacid_no_from_codon(ct,base4)] += cf->freq[base4_codon_from_codon(base4)]; } } /* end of if one == BASE_N || one == oi */ } /* end of for oi,ti,ri */ } /* end of else */ } return out; }