boolean show_pretty_aln(void) { Protein * ps; fprintf(ofp,"\n%s output\nScore %4.2f bits over entire alignment\n",program_name,Score2Bits(pal->score)); if( alg == GWWRAP_2193L || alg == GWWRAP_2193) { fprintf(ofp,"Entrie alignment score contains unseen 'random' score segments\nYou should only use the per-alignments score printed below\nfor the bits score of the alignment\n\n"); } if( use_syn == FALSE ) { fprintf(ofp,"Scores as bits over a flat simple random model\n\n"); } else { fprintf(ofp,"Scores as bits over a synchronous coding model\n\n"); } if( use_tsm == FALSE ) { fprintf(ofp,"Warning: The bits scores is not probablistically correct for single seqs\nSee WWW help for more info\n\n"); protgene_ascii_display(alb,pro->baseseq->seq,pro->baseseq->name,pro->baseseq->offset,gen,ct,15,main_block,(alg == GWWRAP_623L || alg == GWWRAP_2193L || alg == GWWRAP_2193) ? TRUE : FALSE, ofp); } else { ps = pseudo_Protein_from_ThreeStateModel(tsm); protgene_ascii_display(alb,ps->baseseq->seq,ps->baseseq->name,ps->baseseq->offset,gen,ct,15,main_block,(alg == GWWRAP_623L || alg == GWWRAP_2193L || alg == GWWRAP_2193) ? TRUE : FALSE,ofp); free_Protein(ps); } fprintf(ofp,"%s\n",divide_str); return TRUE; }
int main(int argc,char ** argv) { int i; SequenceSet * in; Sequence * trans; ThreeStateDB * tsd; DPRunImpl * dpri; CodonTable * ct; int return_status; ThreeStateModel * tsm; ThreeStateScore * tss; Protein * hmmp; ComplexSequence * cs; ComplexSequenceEvalSet * cses; PackAln * pal; AlnBlock * alb; int show_align = 0; int show_alb = 0; int show_verbose = 1; int show_trans = 0; ct = read_CodonTable_file("codon.table"); cses = default_aminoacid_ComplexSequenceEvalSet(); dpri = new_DPRunImpl_from_argv(&argc,argv); strip_out_boolean_def_argument(&argc,argv,"pretty",&show_align); strip_out_boolean_def_argument(&argc,argv,"alb",&show_alb); strip_out_boolean_def_argument(&argc,argv,"trans",&show_trans); if( argc != 3 ) { show_help(stdout); exit(63); } in = read_fasta_SequenceSet_file(argv[1]); tsd = HMMer2_ThreeStateDB(argv[2]); assert(in); assert(tsd); assert(in->len == 2); trans = translate_Sequence(in->set[0],ct); if( show_trans ) { write_fasta_Sequence(trans,stdout); } cs = new_ComplexSequence(trans,cses); open_ThreeStateDB(tsd); while( (tsm = read_TSM_ThreeStateDB(tsd,&return_status)) != NULL ) { fold_RandomModel_into_ThreeStateModel(tsm,tsm->rm); set_startend_policy_ThreeStateModel(tsm,TSM_local,10,1.0); tss = ThreeStateScore_from_ThreeStateModel(tsm); hmmp = pseudo_Protein_from_ThreeStateModel(tsm); pal = PackAln_bestmemory_ThreeStateLoop(tss,cs,NULL,dpri); alb = convert_PackAln_to_AlnBlock_ThreeStateLoop(pal); if( show_alb ) { show_flat_AlnBlock(alb,stdout); } if( show_align ) { write_pretty_seq_align(alb,hmmp->baseseq,trans,15,50,stdout); } if( show_verbose ) { show_verbose_evo(alb,tsm,in->set[0],in->set[1],ct,stdout); } } }
boolean show_output(void) { int i,k; ThreeStateModel * temptsm; AlnBlock * alb; PackAln * pal; MatchSummarySet * mss; Protein * ps; cDNA * cdna; double bits; boolean fitted_res = FALSE; AlnBlockList * alist; AlnBlock * anchored; SequenceSet * set; AlnColumn * alt; Protein * trans; /* sort by bit score first */ sort_Hscore_by_score(hs); if( search_mode == PC_SEARCH_S2DB ) { if( hs->his == NULL || hs->his->total < 1000 ) { info("Cannot fit histogram to a db smaller than 1,000"); fprintf(ofp,"[Warning: Can't fit histogram to a db smaller than 1,000]\n\n"); show_histogram = FALSE; } else { fitted_res = TRUE; fit_Hscore_to_EVD(hs,20); } } /* deal with initialising anchored alignment. * Could be done for either single HMMs or single proteins, * but we will only do it for HMMs at the moment */ if( make_anchored_aln == TRUE ) { if( tsm == NULL ) { warn("Attempting to make an achored alignment without a HMM. impossible!"); make_anchored_aln = FALSE; } else { anchored = single_unit_AlnBlock(tsm->len,"MATCH_STATE"); set = SequenceSet_alloc_std(); } } /* dofus catcher */ if( aln_alg != alg ) { fprintf(ofp,"\n#\n#WARNING!\n#\n# Your alignment algorithm is different from your search algorithm.\n# This is probably quite sensible but will lead to differing scores.\n# Use the search score as an indicator of the significance of the match\n# Read the docs for more information\n#\n"); } fprintf(ofp,"\n\n#High Score list\n"); fprintf(ofp,"#Protein ID DNA Str ID Bits Evalue\n"); fprintf(ofp,"--------------------------------------------------------------------------\n"); for(i=0;i<hs->len;i++) { bits = Score2Bits(hs->ds[i]->score); if( bits < search_cutoff ) { break; } if( fitted_res == TRUE && evalue_search_str != NULL ) { if( hs->ds[i]->evalue > evalue_search_cutoff ) break; } if( fitted_res == TRUE) fprintf(ofp,"Protein %-20sDNA [%c] %-24s %.2f %.2g\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits,hs->ds[i]->evalue); else fprintf(ofp,"Protein %-20sDNA [%c] %-24s %.2f\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits); } if( search_mode == PC_SEARCH_S2DB && show_histogram == TRUE ) { fprintf(ofp,"\n\n#Histogram\n"); fprintf(ofp,"-----------------------------------------------------------------------\n"); PrintASCIIHistogram(hs->his,ofp); } fprintf(ofp,"\n\n#Alignments\n"); fprintf(ofp,"-----------------------------------------------------------------------\n"); for(i=0;i<hs->len;i++) { bits = Score2Bits(hs->ds[i]->score); if( bits < search_cutoff ) { break; } if( i >= aln_number ) { break; } if( fitted_res == TRUE && evalue_search_str != NULL ) { if( hs->ds[i]->evalue > evalue_search_cutoff ) break; } fprintf(ofp,"\n\n>Results for %s vs %s (%s) [%d]\n",hs->ds[i]->query->name,hs->ds[i]->target->name,hs->ds[i]->target->is_reversed == TRUE ? "reverse" : "forward",i+1 ); cdna = get_cDNA_from_cDNADB(cdb,hs->ds[i]->target); temptsm = indexed_ThreeStateModel_ThreeStateDB(tsmdb,hs->ds[i]->query); alb = AlnBlock_from_TSM_estwise_wrap(temptsm,cdna,cps,cm,ct,rmd,aln_alg,use_syn,allN,flat_insert,dpri,&pal); if( alb == NULL ) { warn("Got a NULL alignment. Exiting now due to presumed problems"); fprintf(ofp,"\n\n*Got a NULL alignment. Exiting now due to presumed problems*\n\n"); return FALSE; } if( use_single_pro == FALSE) mss = MatchSummarySet_from_AlnBlock_genewise(alb,temptsm->name,1,cdna->baseseq); else mss = MatchSummarySet_from_AlnBlock_genewise(alb,pro->baseseq->name,pro->baseseq->offset,cdna->baseseq); if( show_pretty == TRUE ) { fprintf(ofp,"\n%s output\nScore %4.2f bits over entire alignment.\nThis will be different from per-alignment scores. See manual for details\nFor computer parsable output, try %s -help or read the manual\n",program_name,Score2Bits(pal->score),program_name); if( use_syn == FALSE ) { fprintf(ofp,"Scores as bits over a flat simple random model\n\n"); } else { fprintf(ofp,"Scores as bits over a synchronous coding model\n\n"); } ps = pseudo_Protein_from_ThreeStateModel(temptsm); protcdna_ascii_display(alb,ps->baseseq->seq,ps->baseseq->name,ps->baseseq->offset,cdna,ct,15,main_block,TRUE,ofp); free_Protein(ps); fprintf(ofp,"%s\n",divide_str); } if( show_match_sum == TRUE ) { show_MatchSummary_genewise_header(ofp); show_MatchSummarySet_genewise(mss,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_pep == TRUE ) { alt = alb->start; for(;alt != NULL;) { trans = Protein_from_GeneWise_AlnColumn(cdna->baseseq,alt,1,&alt,ct,is_random_AlnColumn_genewise); if ( trans == NULL ) break; write_fasta_Sequence(trans->baseseq,ofp); free_Protein(trans); } fprintf(ofp,"%s\n",divide_str); } if( show_AlnBlock == TRUE ) { mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_PackAln == TRUE ) { show_simple_PackAln(pal,ofp); fprintf(ofp,"%s\n",divide_str); } /* * This goes at the end because it destroys the alb structure */ if( make_anchored_aln == TRUE ) { /* attach sequence to als in alb, so we have it for later use */ alb->seq[1]->data = (void *) cdna->baseseq; /* add to SequenceSet so we can destroy the memory */ add_SequenceSet(set,hard_link_Sequence(cdna->baseseq)); alist = split_AlnBlock(alb,is_random_AlnColumn_genewise); for(k=0;k<alist->len;k++) { /* actually produce the anchored alignment */ /*mapped_ascii_AlnBlock(alist->alb[k],Score2Bits,stderr);*/ add_to_anchored_AlnBlock(anchored,alist->alb[k]); /* dump_ascii_AlnBlock(anchored,stderr);*/ } } alb = free_AlnBlock(alb); pal = free_PackAln(pal); mss = free_MatchSummarySet(mss); cdna = free_cDNA(cdna); temptsm = free_ThreeStateModel(temptsm); } if( do_complete_analysis == TRUE ) { fprintf(ofp,"\n\n#Complete Analysis\n"); fprintf(ofp,"-------------------------------------------------------------\n\n"); /* ok - end of loop over relevant hits. If we have an * anchored alignment, print it out! */ if( make_anchored_aln == TRUE ) { /*dump_ascii_AlnBlock(anchored,stderr);*/ write_mul_estwise_AlnBlock(anchored,ct,ofp); fprintf(ofp,"%s\n",divide_str); } } return TRUE; }
void show_verbose_evo(AlnBlock * alb,ThreeStateModel * tsm,Sequence * ref,Sequence * diff,CodonTable * ct,FILE * ofp) { AlnColumn * alc; Protein * hmmp; Sequence * ref_trans; Sequence * diff_trans; DnaProbMatrix * negative_dm; DnaProbMatrix * pseudo_dm; int i; int count = 0; double est_mutation = 0.0; int dna_offset; Score total_pseudo = 0; Score total_neg = 0; Score pseudo = 0; Score neg = 0; int count_ref_positive = 0; int count_ref_negative = 0; int count_ref_negative_0_5 = 0; int count_ref_negative_5_10 = 0; int count_ref_negative_10_15 = 0; int syn_sites = 0; int nonsyn_sites = 0; int syn_changes = 0; int nonsyn_changes = 0; int diff_score; char diff_aa; char ref_aa; int score_ratio = 0; Score score_neg_5 = Probability2Score(Bits2Probability(-5.0)); Score score_neg_10 = Probability2Score(Bits2Probability(-10.0)); int k; for(i=0;i<ref->len;i+=3) { /* if this has changed, then it is definitely non syn */ if( aminoacid_from_seq(ct,ref->seq+i) != aminoacid_from_seq(ct,diff->seq+i)) { for(k=0;k<3;k++) { if( ref->seq[i+k] != diff->seq[i+k] ) { nonsyn_changes++; } } } else { /* could still be syn change */ for(k=0;k<3;k++) { if( ref->seq[i+k] != diff->seq[i+k] ) { syn_changes++; } } } /* calculate the sites. There is always 2 non syn sites */ nonsyn_sites += 2; if( four_fold_sites_CodonTable(ct,ref->seq+i) > 0 ) { syn_sites++; } else { nonsyn_sites += 1; } } for(i=0;i<ref->len;i++) { if( ref->seq[i] != diff->seq[i] ) { count++; } } est_mutation = (double)count / (double)ref->len; pseudo_dm = DnaProbMatrix_from_match(1.0 - est_mutation,NMaskType_BANNED); negative_dm = DnaProbMatrix_from_match(1.0 - (est_mutation*2),NMaskType_BANNED); ref_trans = translate_Sequence(ref,ct); diff_trans = translate_Sequence(diff,ct); hmmp = pseudo_Protein_from_ThreeStateModel(tsm); for(alc=alb->start;alc != NULL;alc = alc->next) { /* fprintf(stdout,"In position %s\n",alc->alu[0]->text_label); */ if( strcmp(alc->alu[0]->text_label,"SEQUENCE") == 0 && strcmp(alc->alu[1]->text_label,"SEQUENCE") == 0 ) { dna_offset = alc->alu[1]->end*3; pseudo = logl_pseudogene(ref->seq+dna_offset,diff->seq+dna_offset,pseudo_dm); neg = logl_negative_selection(ref->seq+dna_offset,diff->seq+dna_offset,tsm->unit[alc->alu[0]->end],ct, pseudo_dm); /* fprintf(ofp,"Position %d [%c], vs %d [%c,%c] Scores Negative %d, Pseudo %d\n", alc->alu[0]->end,hmmp->baseseq->seq[alc->alu[0]->end], alc->alu[1]->end,ref_trans->seq[alc->alu[1]->end],diff_trans->seq[alc->alu[1]->end], neg, pseudo ); */ ref_aa = ref_trans->seq[alc->alu[1]->end]; diff_aa = diff_trans->seq[alc->alu[1]->end]; if( ref_aa != diff_aa ) { score_ratio += Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']); diff_score = Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']); if( diff_score < 0) { count_ref_negative++; if( diff_score > score_neg_5 ) { count_ref_negative_0_5++; } else if ( diff_score > score_neg_10 ) { count_ref_negative_5_10++; } else { count_ref_negative_10_15++; } } else { count_ref_positive++; } } total_pseudo += pseudo; total_neg += neg; } } fprintf(ofp,"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\n",ref->name,hmmp->baseseq->name,Score2Bits(score_ratio), count_ref_positive,count_ref_negative, count_ref_negative_0_5, count_ref_negative_5_10, count_ref_negative_10_15); /* fprintf(ofp,"%s,%s Total Pseudo %d vs Negative %d, Ratio %.4f Positive %d Negative %d Score %.2f Syn %d Changes %d NonSyn %d Changes %d\n",ref->name,hmmp->baseseq->name,total_pseudo,total_neg,Score2Bits(total_neg-total_pseudo),count_ref_positive,count_ref_negative,Score2Bits(score_ratio),syn_sites,syn_changes,nonsyn_sites,nonsyn_changes); */ free_Protein(hmmp); }