Score logl_positive_selection(char * ref,char * diff,RandomModel * rm,CodonTable *ct,DnaProbMatrix * dm) { int i; Score s = 0; char ref_aa; char diff_aa; /* we have to assess this position having changed */ for(i=0;i<3;i++) { s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]); } /* if the position has not changed, then we know it would have been selected */ ref_aa = aminoacid_from_seq(ct,ref); diff_aa = aminoacid_from_seq(ct,diff); if( ref_aa == diff_aa ) { return s; } /* else we add the probability of seeing this amino acid*/ s += Probability2Score(rm->aminoacid[diff_aa-'A']); return s; }
SyExonScoreUnit * SyExonScoreUnit_from_SyExon(SyExon * sye) { SyExonScoreUnit * out; out = SyExonScoreUnit_alloc(); out->exit_score = Probability2Score(sye->exit_prob); out->stay_score = Probability2Score(sye->stay_prob); return out; }
Score * Probability2Score_move(Probability * from,Score * to,int len) { register int i; for(i=0;i<len;i++) to[i] = Probability2Score(from[i]); return to; }
Score logl_pseudogene(char * ref,char * diff,DnaProbMatrix * dm) { int i; Score s = 0; for(i=0;i<3;i++) { s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]); } return s; }
ComplexConsensusWord * ComplexConsensusWord_from_string_and_prob(char * string,Probability p) { ComplexConsensusWord * out; out = ComplexConsensusWord_alloc(); out->pattern = stringalloc(string); out->p = p; out->score = Probability2Score(p); return out; }
RandomModelDNAScore * folded_RandomModelDNAScore_from_2RMD(RandomModelDNA * dis,RandomModelDNA * rnd) { int i; RandomModelDNAScore * out; out = RandomModelDNAScore_alloc(); for(i=0;i<5;i++) out->base[i]= Probability2Score(dis->base[i]/rnd->base[i]); return out; }
CompMat * CompMat_from_halfbit(CompMat * cm) { CompMat * out; int i,j; out = CompMat_alloc(); for(i=0;i<26;i++) for(j=0;j<26;j++) out->comp[i][j] = Probability2Score(halfbit2Probability(cm->comp[i][j])); return out; }
GenePhaseScore * GenePhaseScore_from_GenePhaseModel(GenePhaseModel * gpm) { int i; GenePhaseScore * out; assert(gpm != NULL); assert(gpm->gw != NULL); assert(gpm->gw->len == gpm->len); out = GenePhaseScore_alloc_len(gpm->len); out->gws = GeneWiseScore_from_GeneWise(gpm->gw); for(i=0;i<gpm->len;i++) { add_GenePhaseScore(out,GenePhaseSegScore_alloc()); out->phase[i]->intron_0 = Probability2Score(gpm->phase[i]->intron_0); out->phase[i]->intron_1 = Probability2Score(gpm->phase[i]->intron_1); out->phase[i]->intron_2 = Probability2Score(gpm->phase[i]->intron_2); } return out; }
CodonMatrixScore * CodonMatrixScore_from_CodonMatrix(CodonMatrix * cm) { int i,j; CodonMatrixScore * out; out = CodonMatrixScore_alloc(); for(i=0; i<125; i++) for(j=0; j<125; j++) out->score[i][j] = Probability2Score(cm->prob[i][j]); return out; }
DnaProfileMatchPair * DnaProfileMatchPair_from_DnaProfile(DnaProfile * query,DnaProfile * target,DnaProfileEnginePara * dpep) { DnaProfileMatchPair * out; DnaProfileScore * query_s; DnaProfileScore * target_s; DnaProfileMatchScore * match; PackAln * pal; assert(query != NULL); assert(target != NULL); /* assert(query->len > 4 ); assert(target->len > 4); */ out = DnaProfileMatchPair_alloc(); out->query = hard_link_DnaProfile(query); out->target = hard_link_DnaProfile(target); query_s = DnaProfileScore_from_DnaProfile(query); target_s = DnaProfileScore_from_DnaProfile(target); fprintf(stderr,"Matching %d to %d\n",query->len,target->len); match= new_ALLR_DnaProfileMatchScore(query,target); pal = PackAln_bestmemory_DnaProfileMat(query_s,target_s,match,Probability2Score(dpep->open_unmatched),Probability2Score(dpep->ext_unmatched),Probability2Score(dpep->gap_unmatched),NULL,dpep->dpri); fprintf(stderr,"...Made pal %d\n",pal); out->alb = convert_PackAln_to_AlnBlock_DnaProfileMat(pal); out->score = pal->score; fprintf(stderr,"...freeing pal\n"); free_PackAln(pal); fprintf(stderr,"...freeing match\n"); free_DnaProfileMatchScore(match); fprintf(stderr,"...freeing query\n"); free_DnaProfileScore(query_s); fprintf(stderr,"...freeing target\n"); free_DnaProfileScore(target_s); return out; }
MotifMatrixScore * MotifMatrixScore_from_MotifMatrixPara(MotifMatrixPara * mmp) { MotifMatrixScore * out; DnaProbMatrix * dmp; out = MotifMatrixScore_alloc(); dmp = DnaProbMatrix_from_match(mmp->comp_in_match,NMaskType_BANNED); assert(dmp); flat_null_DnaProbMatrix(dmp); out->comp_in_motif = DnaMatrix_from_DnaProbMatrix(dmp); free_DnaProbMatrix(dmp); dmp = DnaProbMatrix_from_match(mmp->comp_out_match,NMaskType_BANNED); assert(dmp); flat_null_DnaProbMatrix(dmp); out->comp_out_motif = DnaMatrix_from_DnaProbMatrix(dmp); free_DnaProbMatrix(dmp); dmp = DnaProbMatrix_from_match(mmp->comp_spacer,NMaskType_BANNED); assert(dmp); flat_null_DnaProbMatrix(dmp); out->comp_spacer = DnaMatrix_from_DnaProbMatrix(dmp); free_DnaProbMatrix(dmp); out->region_in = Probability2Score(mmp->region_in); out->motif_indel = Probability2Score(mmp->motif_indel); out->cons_indel = Probability2Score(mmp->cons_indel); out->spacer_indel = Probability2Score(mmp->spacer_indel); out->spacer_to_cons = Probability2Score(mmp->spacer_to_cons); out->spacer_to_motif = Probability2Score(mmp->spacer_to_motif); out->spacer_duration = Probability2Score(mmp->spacer_duration); out->motif_duration = Probability2Score(mmp->motif_duration); out->cons_duration = Probability2Score(mmp->cons_duration); return out; }
RandomModelScoreaa * RandomModelScoreaa_from_RandomModel(RandomModel * rm) { register int i; RandomModelScoreaa * out; out = RandomModelScoreaa_alloc(); if( out == NULL ) return NULL; for(i=0;i<26;i++) out->aminoacid[i] = Probability2Score(rm->aminoacid[i]); return out; }
Score logl_negative_selection(char * ref,char * diff,ThreeStateUnit * unit,CodonTable * ct,DnaProbMatrix * dm) { int i; Score s = 0; char ref_aa; char diff_aa; /* we have to assess this position having changed */ for(i=0;i<3;i++) { s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]); } /* if the position has not changed, then we know it could not have been selected */ ref_aa = aminoacid_from_seq(ct,ref); diff_aa = aminoacid_from_seq(ct,diff); if( ref_aa == diff_aa ) { return s; } /* else we add the difference in probability between the two amino acids */ /* fprintf(stdout,"%c vs %c has %d plays %d for total of %d\n",ref_aa,diff_aa, Probability2Score(unit->match_emission[ref_aa-'A']), Probability2Score(unit->match_emission[diff_aa-'A']), Probability2Score(unit->match_emission[diff_aa-'A']) - Probability2Score(unit->match_emission[ref_aa-'A']) ); */ s += Probability2Score(unit->match_emission[diff_aa-'A']) - Probability2Score(unit->match_emission[ref_aa-'A']); return s; }
CompMat * CompMat_from_CompProb(CompProb * cp) { int i,j; CompMat * cm; cm = CompMat_alloc(); for(i=0;i<26;i++) { for(j=0;j<26;j++) { cm->comp[i][j] = Probability2Score(cp->comp[i][j]); } } return cm; }
RandomModelDNAScore * RandomModelDNAScore_from_RandomModelDNA(RandomModelDNA * rmd) { RandomModelDNAScore * out; register int i; out = RandomModelDNAScore_alloc(); if( out == NULL ) return NULL; for(i=0;i<5;i++) { out->base[i] = Probability2Score(rmd->base[i]); } return out; }
SpliceSiteModel * new_SpliceSiteModel(int offset,int pre_length,int post_length,int start,int stop,ComplexConsensi * cc,RandomModelDNAScore * rmds,Probability error) { SpliceSiteModel * out; out = SpliceSiteModel_alloc(); if( out == NULL ) return NULL; out->offset = offset; out->pre_splice_site = pre_length; out->post_splice_site = post_length; out->start_random = start; out->stop_random = stop; out->cc = hard_link_ComplexConsensi(cc); out->rmds = hard_link_RandomModelDNAScore(rmds); out->error_pos = Probability2Score(error); return out; }
int main(int argc,char ** argv) { int i; DPRunImpl * dpri = NULL; GeneModelParam * gmp = NULL; GeneModel * gm = NULL; Sequence * seq; RandomCodon * rc; RandomModelDNA * rmd; RandomCodonScore * rcs; ComplexSequenceEval * splice5; ComplexSequenceEval * splice3; ComplexSequenceEvalSet * cses; ComplexSequence * cseq; SyExonScore * exonscore; PackAln * pal; AlnBlock * alb; Genomic * genomic; GenomicRegion * gr; Protein * trans; dpri = new_DPRunImpl_from_argv(&argc,argv); if( dpri == NULL ) { fatal("Unable to build DPRun implementation. Bad arguments"); } gmp = new_GeneModelParam_from_argv(&argc,argv); ct= read_CodonTable_file("codon.table"); strip_out_standard_options(&argc,argv,show_help,show_version); if( argc != 2 ) { show_help(stdout); exit(12); } if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) { fatal("Could not build gene model"); } seq = read_fasta_file_Sequence(argv[1]); assert(seq); cses = new_ComplexSequenceEvalSet_from_GeneModel(gm); cseq = new_ComplexSequence(seq,cses); rc = flat_RandomCodon(ct); rmd = RandomModelDNA_std(); fold_in_RandomModelDNA_into_RandomCodon(rc,rmd); rcs = RandomCodonScore_from_RandomCodon(rc); exonscore = SyExonScore_flat_model(200,250,0.1,0.1); /* for(i=0;i<cseq->length;i++) { fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]); } exit(0); */ /* show_RandomCodonScore(rcs,stdout); for(i=3;i<seq->len;i++) { fprintf(stdout,"seq %d is %c with score %d\n",i,aminoacid_from_seq(ct,seq->seq+i-2),rcs->codon[CSEQ_GENOMIC_CODON(cseq,i)]); } exit(0); */ pal = PackAln_bestmemory_StatWise10(exonscore,cseq,rcs,Probability2Score(1.0/10.0),Probability2Score(1.0/10.0),NULL,dpri); alb = convert_PackAln_to_AlnBlock_StatWise10(pal); mapped_ascii_AlnBlock(alb,id,1,stdout); genomic = Genomic_from_Sequence(seq); gr = new_GenomicRegion(genomic); add_Genes_to_GenomicRegion_GeneWise(gr,1,seq->len,alb,"bollocks",0,NULL); for(i=0;i<gr->len;i++) { if( gr->gene[i]->ispseudo == TRUE ) { fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i); } else { trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct); write_fasta_Sequence(trans->baseseq,stdout); } } return 0; }
void show_verbose_evo(AlnBlock * alb,ThreeStateModel * tsm,Sequence * ref,Sequence * diff,CodonTable * ct,FILE * ofp) { AlnColumn * alc; Protein * hmmp; Sequence * ref_trans; Sequence * diff_trans; DnaProbMatrix * negative_dm; DnaProbMatrix * pseudo_dm; int i; int count = 0; double est_mutation = 0.0; int dna_offset; Score total_pseudo = 0; Score total_neg = 0; Score pseudo = 0; Score neg = 0; int count_ref_positive = 0; int count_ref_negative = 0; int count_ref_negative_0_5 = 0; int count_ref_negative_5_10 = 0; int count_ref_negative_10_15 = 0; int syn_sites = 0; int nonsyn_sites = 0; int syn_changes = 0; int nonsyn_changes = 0; int diff_score; char diff_aa; char ref_aa; int score_ratio = 0; Score score_neg_5 = Probability2Score(Bits2Probability(-5.0)); Score score_neg_10 = Probability2Score(Bits2Probability(-10.0)); int k; for(i=0;i<ref->len;i+=3) { /* if this has changed, then it is definitely non syn */ if( aminoacid_from_seq(ct,ref->seq+i) != aminoacid_from_seq(ct,diff->seq+i)) { for(k=0;k<3;k++) { if( ref->seq[i+k] != diff->seq[i+k] ) { nonsyn_changes++; } } } else { /* could still be syn change */ for(k=0;k<3;k++) { if( ref->seq[i+k] != diff->seq[i+k] ) { syn_changes++; } } } /* calculate the sites. There is always 2 non syn sites */ nonsyn_sites += 2; if( four_fold_sites_CodonTable(ct,ref->seq+i) > 0 ) { syn_sites++; } else { nonsyn_sites += 1; } } for(i=0;i<ref->len;i++) { if( ref->seq[i] != diff->seq[i] ) { count++; } } est_mutation = (double)count / (double)ref->len; pseudo_dm = DnaProbMatrix_from_match(1.0 - est_mutation,NMaskType_BANNED); negative_dm = DnaProbMatrix_from_match(1.0 - (est_mutation*2),NMaskType_BANNED); ref_trans = translate_Sequence(ref,ct); diff_trans = translate_Sequence(diff,ct); hmmp = pseudo_Protein_from_ThreeStateModel(tsm); for(alc=alb->start;alc != NULL;alc = alc->next) { /* fprintf(stdout,"In position %s\n",alc->alu[0]->text_label); */ if( strcmp(alc->alu[0]->text_label,"SEQUENCE") == 0 && strcmp(alc->alu[1]->text_label,"SEQUENCE") == 0 ) { dna_offset = alc->alu[1]->end*3; pseudo = logl_pseudogene(ref->seq+dna_offset,diff->seq+dna_offset,pseudo_dm); neg = logl_negative_selection(ref->seq+dna_offset,diff->seq+dna_offset,tsm->unit[alc->alu[0]->end],ct, pseudo_dm); /* fprintf(ofp,"Position %d [%c], vs %d [%c,%c] Scores Negative %d, Pseudo %d\n", alc->alu[0]->end,hmmp->baseseq->seq[alc->alu[0]->end], alc->alu[1]->end,ref_trans->seq[alc->alu[1]->end],diff_trans->seq[alc->alu[1]->end], neg, pseudo ); */ ref_aa = ref_trans->seq[alc->alu[1]->end]; diff_aa = diff_trans->seq[alc->alu[1]->end]; if( ref_aa != diff_aa ) { score_ratio += Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']); diff_score = Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']); if( diff_score < 0) { count_ref_negative++; if( diff_score > score_neg_5 ) { count_ref_negative_0_5++; } else if ( diff_score > score_neg_10 ) { count_ref_negative_5_10++; } else { count_ref_negative_10_15++; } } else { count_ref_positive++; } } total_pseudo += pseudo; total_neg += neg; } } fprintf(ofp,"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\n",ref->name,hmmp->baseseq->name,Score2Bits(score_ratio), count_ref_positive,count_ref_negative, count_ref_negative_0_5, count_ref_negative_5_10, count_ref_negative_10_15); /* fprintf(ofp,"%s,%s Total Pseudo %d vs Negative %d, Ratio %.4f Positive %d Negative %d Score %.2f Syn %d Changes %d NonSyn %d Changes %d\n",ref->name,hmmp->baseseq->name,total_pseudo,total_neg,Score2Bits(total_neg-total_pseudo),count_ref_positive,count_ref_negative,Score2Bits(score_ratio),syn_sites,syn_changes,nonsyn_sites,nonsyn_changes); */ free_Protein(hmmp); }
Score Score_Probability_sum(Score one,Score two) { return Probability2Score(Score2Probability(one) + Score2Probability(two)); }
int main(int argc,char ** argv) { int i; DPRunImpl * dpri = NULL; GeneModelParam * gmp = NULL; GeneModel * gm = NULL; FILE * ifp; SeqAlign * al; PairBaseSeq * pbs; ComplexSequenceEval * splice5; ComplexSequenceEval * splice3; ComplexSequence * cseq; CompMat * score_mat; CompProb * comp_prob; RandomModel * rm; PairBaseCodonModelScore * codon_score; PairBaseModelScore* nonc_score; PairBaseCodonModelScore * start; PairBaseCodonModelScore * stop; SyExonScore * exonscore; PackAln * pal; AlnBlock * alb; Genomic * genomic; GenomicRegion * gr; GenomicRegion * gr2; Protein * trans; StandardOutputOptions * std_opt; ShowGenomicRegionOptions * sgro; char * dump_packaln = NULL; char * read_packaln = NULL; FILE * packifp = NULL; boolean show_trans = 1; boolean show_gene_raw = 0; ct = read_CodonTable_file(codon_table); /* score_mat = read_Blast_file_CompMat("blosum62.bla"); comp_prob = CompProb_from_halfbit(score_mat); */ rm = default_RandomModel(); comp_prob = read_Blast_file_CompProb("wag85"); fold_column_RandomModel_CompProb(comp_prob,rm); dpri = new_DPRunImpl_from_argv(&argc,argv); if( dpri == NULL ) { fatal("Unable to build DPRun implementation. Bad arguments"); } gmp = new_GeneModelParam_from_argv(&argc,argv); std_opt = new_StandardOutputOptions_from_argv(&argc,argv); sgro = new_ShowGenomicRegionOptions_from_argv(&argc,argv); dump_packaln = strip_out_assigned_argument(&argc,argv,"dump"); read_packaln = strip_out_assigned_argument(&argc,argv,"recover"); strip_out_standard_options(&argc,argv,show_help,show_version); if( argc != 2 ) { show_help(stdout); exit(12); } if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) { fatal("Could not build gene model"); } codon_score = make_PairBaseCodonModelScore(comp_prob); nonc_score = make_PairBaseModelScore(); splice5 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice5score); splice3 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice3score); if((ifp = openfile(argv[1],"r")) == NULL ) { fatal("Could not open file %s",argv[1]); } al = read_fasta_SeqAlign(ifp); assert(al); assert(al->len == 2); assert(al->seq[0]->len > 0); assert(al->seq[1]->len > 0); /* write_fasta_SeqAlign(al,stdout);*/ pbs = new_PairBaseSeq_SeqAlign(al); if( read_packaln == NULL ) { cseq = ComplexSequence_from_PairBaseSeq(pbs,splice5,splice3); } start = make_start_PairBaseCodonModelScore(ct); stop = make_stop_PairBaseCodonModelScore(ct); /* show_PairBaseCodonModelScore(stop,ct,stdout); */ /* for(i=0;i<pbs->anchor->len;i++) { printf("%3d %c For %-6d %-6d %c Rev %-6d %-6d\n",i,pbs->anchor->seq[i], CSEQ_PAIR_5SS(cseq,i),CSEQ_PAIR_3SS(cseq,i), char_complement_base(pbs->anchor->seq[i]), CSEQ_REV_PAIR_5SS(cseq,i),CSEQ_REV_PAIR_3SS(cseq,i)); } */ /* show_ComplexSequence(cseq,stdout); */ exonscore = SyExonScore_flat_model(100,150,0.1,1.0); /* for(i=0;i<cseq->length;i++) { fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]); } exit(0); */ if( read_packaln != NULL ) { packifp = openfile(read_packaln,"r"); if( packifp == NULL ) { fatal("File %s is unopenable - ignoring dump command",dump_packaln); } else { pal = read_simple_PackAln(packifp); } } else { pal = PackAln_bestmemory_SyWise20(exonscore,cseq,codon_score,nonc_score,start,stop,Probability2Score(1.0/100.0),Probability2Score(1.0/10000.0),Probability2Score(1.0/10.0),NULL,dpri); } alb = convert_PackAln_to_AlnBlock_SyWise20(pal); if( dump_packaln != NULL ) { packifp = openfile(dump_packaln,"w"); if( packifp == NULL ) { warn("File %s is unopenable - ignoring dump command",dump_packaln); } else { show_simple_PackAln(pal,packifp); } } show_score_sequence(alb,pbs,nonc_score,stdout); /* show_StandardOutputOptions(std_opt,alb,pal,"//",stdout); */ genomic = Genomic_from_Sequence(al->seq[0]); gr = new_GenomicRegion(genomic); gr2 = new_GenomicRegion(genomic); add_Genes_to_GenomicRegion_new(gr,alb); show_GenomicRegionOptions(sgro,gr,ct,"//",stdout); return 0; }
Bits Probability2Bits(Probability p) { return Score2Bits(Probability2Score(p)); }
Score Probability_logsum(Score one,Score two) { return Probability2Score(Score2Probability(one) + Score2Probability(two)); }
GeneModel * GeneModel_from_GeneStats(GeneStats * gs,GeneModelParam * p) { GeneModel * out; int i; double total; out = GeneModel_alloc(); assert(gs); assert(gs->splice5); assert(gs->splice3); assert(gs->intron); assert(gs->rnd); for(i=0;i<64;i++) { out->codon[i] = gs->codon[i]; } out->splice5 = pwmDNA_from_SeqAlign(gs->splice5,p->splice5_pseudo); /* fprintf(stdout,"GS splice5 %d splice3 %d\n",gs->splice5,gs->splice3);*/ fold_randommodel_pwmDNA(out->splice5,gs->rnd); out->splice5score = SpliceSiteScore_alloc(); out->splice5score->score = pwmDNAScore_from_pwmDNA(out->splice5); out->splice5score->offset = gs->splice5_offset; out->splice5score->min_collar = Probability2Score(Bits2Probability(p->min_collar)); out->splice5score->max_collar = Probability2Score(Bits2Probability(p->max_collar)); out->splice5score->score_offset = Probability2Score(Bits2Probability(p->score_offset)); out->splice3 = pwmDNA_from_SeqAlign(gs->splice3,p->splice3_pseudo); fold_randommodel_pwmDNA(out->splice3,gs->rnd); out->splice3score = SpliceSiteScore_alloc(); out->splice3score->score = pwmDNAScore_from_pwmDNA(out->splice3); out->splice3score->offset = gs->splice3_offset; out->splice3score->min_collar = Probability2Score(Bits2Probability(p->min_collar)); out->splice3score->max_collar = Probability2Score(Bits2Probability(p->max_collar)); out->splice3score->score_offset = Probability2Score(Bits2Probability(p->score_offset)); out->use_gtag_splice = p->use_gtag_splice; out->score_for_gtag = Probability2Score(p->prob_for_gtag); out->intron = RandomModelDNA_alloc(); for(total = 0.0,i=0;i<4;i++) total += gs->intron->base[i] + p->intron_emission_pseudo; for(i=0;i<4;i++) out->intron->base[i] = (gs->intron->base[i] + p->intron_emission_pseudo)/total; out->intron->base[4] = 1.0; if( gs->polyp != NULL ) { out->polyp = RandomModelDNA_alloc(); for(total = 0.0,i=0;i<4;i++) total += gs->polyp->base[i] + p->polyp_emission_pseudo; for(i=0;i<4;i++) out->polyp->base[i] = (gs->polyp->base[i] + p->polyp_emission_pseudo)/total; } out->rnd = hard_link_RandomModelDNA(gs->rnd); return out; }
int main(int argc,char **argv) { FiveStateFrameSet * frame; FiveStateModel * fsm; FiveStateScore * fss; RandomModel * rm; ProteinDB * proteindb; DBSearchImpl * dbsi; Hscore * hs; double gathering_cutoff = 0.0; double bits; int i; dbsi = new_DBSearchImpl_from_argv(&argc,argv); strip_out_float_argument(&argc,argv,"ga",&gathering_cutoff); strip_out_standard_options(&argc,argv,show_help,show_version); if( argc != 3 ) { show_help(stdout); exit(12); } rm = default_RandomModel(); frame = read_FiveStateFrameSet_file(argv[1],"block.str"); if( frame == NULL ) fatal("Unable to make FiveStateModel from context %s, block.str file",argv[1]); fsm = FiveStateModel_from_FiveStateFrameSet(frame); /* dump_FiveStateModel(fsm,stdout); */ fsm->name = stringalloc(argv[1]); fold_RandomModel_into_FiveStateModel(fsm,rm); /* converts probabilities to integers for calculation */ fss = FiveStateScore_from_FiveStateModel(fsm); proteindb = single_fasta_ProteinDB(argv[2]); if( proteindb== NULL ) fatal("Unable to make proteindb from %s",argv[2]); hs = std_score_Hscore(Probability2Score(gathering_cutoff)-10,-1); search_FiveStateProtein(dbsi,hs,fss,proteindb); fprintf(stdout,"\n\n#High Score list\n"); fprintf(stdout,"#Protein ID DNA Str ID Bits Evalue\n"); fprintf(stdout,"--------------------------------------------------------------------------\n"); for(i=0;i<hs->len;i++) { bits = Score2Bits(hs->ds[i]->score); if( bits < gathering_cutoff ) { break; } fprintf(stdout,"Protein %-20sDNA [%c] %-24s %.2f\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits); } }
int main(int argc,char ** argv) { Sequence * cdna; Sequence * gen; Sequence * active_gen; Sequence * active_cdna; int i; int dstart = -1; int dend = -1; int cstart = -1; int cend = -1; CodonTable * ct = NULL; CodonMatrixScore * cm = NULL; RandomCodon * rndcodon = NULL; RandomCodonScore * rndcodonscore = NULL; DnaMatrix * dm = NULL; DPRunImpl * dpri = NULL; GeneModel * gm; GeneModelParam * gmp; GeneStats * gs; GeneParser21 * gp21; GeneParser21Score * gp21s; GeneParser4Score * gp; ComplexSequenceEvalSet * cdna_cses; ComplexSequenceEvalSet * gen_cses; ComplexSequence * cs_cdna; ComplexSequence * cs_gen; Genomic * gent; GenomicRegion * gr; CompMat * cmat; CompProb * cprob; char * matfile = "blosum62.bla"; Protein * trans; PackAln * pal; AlnBlock * alb; FILE * ofp = stdout; dpri = new_DPRunImpl_from_argv(&argc,argv); gmp = new_GeneModelParam_from_argv(&argc,argv); strip_out_integer_argument(&argc,argv,"u",&dstart); strip_out_integer_argument(&argc,argv,"v",&dend); strip_out_integer_argument(&argc,argv,"s",&cstart); strip_out_integer_argument(&argc,argv,"t",&cend); strip_out_standard_options(&argc,argv,show_help,show_version); ct = read_CodonTable_file(codon_file); cmat = read_Blast_file_CompMat(matfile); cprob = CompProb_from_halfbit(cmat); cm = naive_CodonMatrixScore_from_prob(ct,cprob); gm = GeneModel_from_GeneModelParam(gmp); cdna = read_fasta_file_Sequence(argv[1]); gen = read_fasta_file_Sequence(argv[2]); if( dstart != -1 || dend != -1 ) { if( dstart == -1 ) { dstart = 1; } if( dend == -1 ) { dend = gen->len; } active_gen = magic_trunc_Sequence(gen,dstart,dend); } else { active_gen = hard_link_Sequence(gen); } if( cstart != -1 || cend != -1 ) { if( cstart == -1 ) { cstart = 1; } if( cend == -1 ) { cend = gen->len; } active_cdna = magic_trunc_Sequence(gen,cstart,cend); } else { active_cdna = hard_link_Sequence(gen); } rndcodon = RandomCodon_from_raw_CodonFrequency(gm->codon,ct); fold_in_RandomModelDNA_into_RandomCodon(rndcodon,gm->rnd); rndcodonscore = RandomCodonScore_from_RandomCodon(rndcodon); assert(active_cdna); assert(active_gen); cdna_cses = default_cDNA_ComplexSequenceEvalSet(); gen_cses = new_ComplexSequenceEvalSet_from_GeneModel(gm); cs_cdna = new_ComplexSequence(active_cdna,cdna_cses); cs_gen = new_ComplexSequence(active_gen,gen_cses); gp21 = std_GeneParser21(); GeneParser21_fold_in_RandomModelDNA(gp21,gm->rnd); gp21s = GeneParser21Score_from_GeneParser21(gp21); gp = GeneParser4Score_from_GeneParser21Score(gp21s); dm = identity_DnaMatrix(Probability2Score(halfbit2Probability(1)),Probability2Score(halfbit2Probability(-1))); assert(cs_cdna); assert(cs_gen); assert(gp); assert(rndcodonscore); assert(dm); assert(dpri); /* show_CodonMatrixScore(cm,ct,ofp);*/ pal = PackAln_bestmemory_CdnaWise10(cs_cdna,cs_gen,gp,cm,rndcodonscore,dm, Probability2Score(halfbit2Probability(-12)), Probability2Score(halfbit2Probability(-2)), Probability2Score(halfbit2Probability(-5)), Probability2Score(halfbit2Probability(0)), NULL, dpri); alb = convert_PackAln_to_AlnBlock_CdnaWise10(pal); gent = Genomic_from_Sequence(gen); assert(gent); gr = new_GenomicRegion(gent); assert(gr); add_Genes_to_GenomicRegion_GeneWise(gr,active_gen->offset,active_gen->end,alb,cdna->name,0,NULL); mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp); show_pretty_GenomicRegion(gr,0,ofp); for(i=0;i<gr->len;i++) { if( gr->gene[i]->ispseudo == TRUE ) { fprintf(ofp,"#Gene %d is a pseudo gene - no translation possible\n",i); } else { trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct); write_fasta_Sequence(trans->baseseq,ofp); } } }