boolean show_pretty_aln(void) { Protein * ps; fprintf(ofp,"\n%s output\nScore %4.2f bits over entire alignment\n",program_name,Score2Bits(pal->score)); if( alg == GWWRAP_2193L || alg == GWWRAP_2193) { fprintf(ofp,"Entrie alignment score contains unseen 'random' score segments\nYou should only use the per-alignments score printed below\nfor the bits score of the alignment\n\n"); } if( use_syn == FALSE ) { fprintf(ofp,"Scores as bits over a flat simple random model\n\n"); } else { fprintf(ofp,"Scores as bits over a synchronous coding model\n\n"); } if( use_tsm == FALSE ) { fprintf(ofp,"Warning: The bits scores is not probablistically correct for single seqs\nSee WWW help for more info\n\n"); protgene_ascii_display(alb,pro->baseseq->seq,pro->baseseq->name,pro->baseseq->offset,gen,ct,15,main_block,(alg == GWWRAP_623L || alg == GWWRAP_2193L || alg == GWWRAP_2193) ? TRUE : FALSE, ofp); } else { ps = pseudo_Protein_from_ThreeStateModel(tsm); protgene_ascii_display(alb,ps->baseseq->seq,ps->baseseq->name,ps->baseseq->offset,gen,ct,15,main_block,(alg == GWWRAP_623L || alg == GWWRAP_2193L || alg == GWWRAP_2193) ? TRUE : FALSE,ofp); free_Protein(ps); } fprintf(ofp,"%s\n",divide_str); return TRUE; }
void free_objects(void) { if( gwdb != NULL ) gwdb = free_GeneWiseDB(gwdb); if( cdb != NULL ) cdb = free_cDNADB(cdb); if( sdb != NULL ) sdb = free_SequenceDB(sdb); if( cdna != NULL ) cdna = free_cDNA(cdna); if( pro != NULL ) pro = free_Protein(pro); if( tsm != NULL ) tsm = free_ThreeStateModel(tsm); if( tsmdb != NULL ) tsmdb = free_ThreeStateDB(tsmdb); if( gws != NULL ) gws = free_GeneWiseScore(gws); if( hs != NULL ) hs = free_Hscore(hs); if( cm != NULL ) cm = free_CodonMapper(cm); if( cps != NULL ) cps = free_cDNAParser(cps); }
/* Function: free_Translation(obj) * * Descrip: Free Function: removes the memory held by obj * Will chain up to owned members and clear all lists * * * Arg: obj [UNKN ] Object that is free'd [Translation *] * * Return [UNKN ] Undocumented return value [Translation *] * */ Translation * free_Translation(Translation * obj) { int return_early = 0; if( obj == NULL) { warn("Attempting to free a NULL pointer to a Translation obj. Should be trappable"); return NULL; } #ifdef PTHREAD assert(pthread_mutex_lock(&(obj->dynamite_mutex)) == 0); #endif if( obj->dynamite_hard_link > 1) { return_early = 1; obj->dynamite_hard_link--; } #ifdef PTHREAD assert(pthread_mutex_unlock(&(obj->dynamite_mutex)) == 0); #endif if( return_early == 1) return NULL; /* obj->parent is linked in */ if( obj->protein != NULL) free_Protein(obj->protein); ckfree(obj); return NULL; }
boolean free_io_objects(void) { if( use_tsm == TRUE) { free_ThreeStateModel(tsm); } else { free_Protein(pro); } free_CodonTable(ct); if( gf != NULL ) { free_GeneFrequency21(gf); } free_RandomModelDNA(rmd); if( is_embl ) { free_GenomicRegion(embl); } free_Genomic(gen); return TRUE; }
boolean build_objects(void) { boolean ret = TRUE; Protein * pro_temp; Genomic * gen_temp; FILE * ifp; startend = threestatemodel_mode_from_string(startend_string); if( startend == TSM_unknown ) { warn("String %s was unable to converted into a start/end policy\n",startend_string); ret = FALSE; } if( tstart_str != NULL ) { if( is_integer_string(tstart_str,&tstart) == FALSE || tstart < 0) { warn("Could not make %s out as target start",tstart); ret = FALSE; } } if( tend_str != NULL ) { if( is_integer_string(tend_str,&tend) == FALSE || tend < 0) { warn("Could not make %s out as target end",tend); ret = FALSE; } } if( is_integer_string(gap_str,&gap) == FALSE ) { warn("Could not make %s out as gap penalty (must be integer at the moment)",gap_str); ret = FALSE; } if( is_integer_string(ext_str,&ext) == FALSE ) { warn("Could not make %s out as gap penalty (must be integer at the moment)",ext_str); ret = FALSE; } if( is_embl == FALSE ) { if( (gen = read_fasta_file_Genomic(dna_seq_file,length_of_N)) == NULL ) { ret = FALSE; warn("Could not read genomic sequence in %s",dna_seq_file); gen = NULL; } } else { embl = read_EMBL_GenomicRegion_file(dna_seq_file); if( embl == NULL ) { warn("Could not read genomic EMBL file in %s",dna_seq_file); gen = NULL; ret = FALSE; } else { gen = hard_link_Genomic(embl->genomic); } } if( gen != NULL ) { if( tstart != -1 || tend != -1 ) { if( tstart == -1 ) tstart = 0; if( tend == -1 ) tend = gen->baseseq->len; gen_temp = truncate_Genomic(gen,tstart-1,tend); if( gen_temp == NULL ){ ret = FALSE; } else { free_Genomic(gen); gen = gen_temp; } } else { /* no truncation required */ } if( reverse == TRUE ) { if( tstart > tend ) { warn("You have already reversed the DNA by using %d - %d truncation. Re-reversing",tstart,tend); } gen_temp = reverse_complement_Genomic(gen); free_Genomic(gen); gen = gen_temp; } } /* * Can't truncate on GenomicRegion (for good reasons!). * but we want only a section of the EMBL file to be used * * So... swap genomic now. Positions in EMBL are still valid, * however - some genes will loose their sequence, which will be damaging. ;) */ if( is_embl ) { free_Genomic(embl->genomic); embl->genomic = hard_link_Genomic(gen); /* pointer could be dead anyway ;) */ } if( target_abs == TRUE ) { if( is_embl == TRUE ) { warn("Sorry you can't both use absolute positioning and EMBL files as I can't cope with all the coordinate remapping. You'll have to convert to fasta."); ret = FALSE; } gen->baseseq->offset = 1; gen->baseseq->end = strlen(gen->baseseq->seq); } if( alg_str != NULL ) { alg = gwrap_alg_type_from_string(alg_str); } else { if( use_tsm == TRUE ) { alg_str = "623L"; } else { alg_str = "623"; } alg = gwrap_alg_type_from_string(alg_str); } if( qstart_str != NULL ) { if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) { warn("Could not make %s out as query start",qstart); ret = FALSE; } } if( qend_str != NULL ) { if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) { warn("Could not make %s out as query end",qend); ret = FALSE; } } if( use_tsm == FALSE ) { if( startend != TSM_default && startend != TSM_global && startend != TSM_local && startend != TSM_endbiased) { warn("Proteins can only have local/global/endbias startend policies set, not %s",startend_string); ret = FALSE; } if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) { ret = FALSE; warn("Could not read Protein sequence in %s",protein_file); } else { if( qstart != -1 || qend != -1 ) { if( qstart == -1 ) qstart = 0; if( qend == -1 ) qend = pro->baseseq->len; pro_temp = truncate_Protein(pro,qstart-1,qend); if( pro_temp == NULL ){ ret = FALSE; } else { free_Protein(pro); pro = pro_temp; } } } } else { /** using a HMM **/ /*tsm = read_HMMer_1_7_ascii_file(hmm_file);*/ /*tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(hmm_file);*/ tsm = HMMer2_read_ThreeStateModel(hmm_file); if( tsm == NULL ) { warn("Could not read hmm from %s\n",hmm_file); ret = FALSE; } else { display_char_in_ThreeStateModel(tsm); if( hmm_name != NULL ) { if( tsm->name != NULL ) ckfree(tsm->name); tsm->name = stringalloc(hmm_name); } if( tsm == NULL ) { warn("Could not read %s as a hmm",hmm_file); } /** have to set start/end **/ set_startend_policy_ThreeStateModel(tsm,startend,30,0.1); } } /* end of else tsm != NULL */ if( main_block_str != NULL ) { if( is_integer_string(main_block_str,&main_block) == FALSE ) { warn("Could not get maximum main_block number %s",main_block_str); ret = FALSE; } } if( is_double_string(subs_string,&subs_error) == FALSE ) { warn("Could not convert %s to a double",subs_error); ret = FALSE; } if( is_double_string(indel_string,&indel_error) == FALSE ) { warn("Could not convert %s to a double",indel_error); ret = FALSE; } if( is_double_string(allN_string,&allN) == FALSE ) { warn("Could not convert %s to a double",allN_string); ret = FALSE; } if( strcmp(cfreq_string,"model") == 0 ) { model_codon = TRUE; } else if ( strcmp(cfreq_string,"flat") == 0 ) { model_codon = FALSE; } else { warn("Cannot interpret [%s] as a codon modelling parameter\n",cfreq_string); ret = FALSE; } if( strcmp(splice_string,"model") == 0 ) { model_splice = TRUE; } else if ( strcmp(splice_string,"flat") == 0 ) { model_splice = FALSE; gmp->use_gtag_splice = TRUE; } else { warn("Cannot interpret [%s] as a splice modelling parameter\n",splice_string); ret = FALSE; } if( strcmp(null_string,"syn") == 0 ) { use_syn = TRUE; } else if ( strcmp(null_string,"flat") == 0 ) { use_syn = FALSE; } else { warn("Cannot interpret [%s] as a null model string\n",null_string); ret = FALSE; } if( strcmp(intron_string,"model") == 0 ) { use_tied_model = FALSE; } else if ( strcmp(intron_string,"tied") == 0 ) { use_tied_model = TRUE; } else { warn("Cannot interpret [%s] as a intron tieing switch\n",intron_string); ret = FALSE; } if( (rm = default_RandomModel()) == NULL) { warn("Could not make default random model\n"); ret = FALSE; } if( use_new_stats == 0 ) { if( (gf = read_GeneFrequency21_file(gene_file)) == NULL) { ret = FALSE; warn("Could not read a GeneFrequency file in %s",gene_file); } } else { if( (gs = GeneStats_from_GeneModelParam(gmp)) == NULL ){ ret=FALSE; warn("Could not read gene statistics in %s",new_gene_file); } } /* end of else using new gene stats */ if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) { if( use_tsm == TRUE ) { info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file); } else { warn("Could not read Comparison matrix file in %s",matrix_file); ret = FALSE; } } if( (ct = read_CodonTable_file(codon_file)) == NULL) { ret = FALSE; warn("Could not read codon table file in %s",codon_file); } if( (ofp = openfile(output_file,"W")) == NULL) { warn("Could not open %s as an output file",output_file); ret = FALSE; } rmd = RandomModelDNA_std(); return ret; }
boolean show_output(void) { int i; cDNA * cdna; Protein * trans; GenomicOverlapResults * gor; AlnColumn * alt; if( show_pretty == TRUE ) { show_pretty_aln(); } if( show_match_sum == TRUE ) { show_MatchSummary_genewise_header(ofp); show_MatchSummarySet_genewise(mss,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_pretty_gene == TRUE ) { show_pretty_GenomicRegion(gr,0,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_supp_gene == TRUE ) { show_pretty_GenomicRegion(gr,1,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_embl == TRUE ) { write_Embl_FT_GenomicRegion(gr,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_diana == TRUE ) { write_Diana_FT_GenomicRegion(gr,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_overlap == TRUE ) { gor = Genomic_overlap(gr,embl); show_GenomicOverlapResults(gor,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_trans == TRUE ) { for(i=0;i<gr->len;i++) { if( gr->gene[i]->ispseudo == TRUE ) { fprintf(ofp,"#Gene %d is a pseudo gene - no translation possible\n",i); } else { trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct); write_fasta_Sequence(trans->baseseq,ofp); } } fprintf(ofp,"%s\n",divide_str); } if( show_pep == TRUE ) { alt = alb->start; for(;alt != NULL;) { trans = Protein_from_GeneWise_AlnColumn(gen->baseseq,alt,1,&alt,ct,is_random_AlnColumn_genewise); if ( trans == NULL ) break; write_fasta_Sequence(trans->baseseq,ofp); free_Protein(trans); } fprintf(ofp,"%s\n",divide_str); } if( show_cdna == TRUE ) { for(i=0;i<gr->len;i++) { cdna = get_cDNA_from_Transcript(gr->gene[i]->transcript[0]); write_fasta_Sequence(cdna->baseseq,ofp); } fprintf(ofp,"%s\n",divide_str); } if( show_ace == TRUE ) { show_ace_GenomicRegion(gr,gen->baseseq->name,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_gff == TRUE ) { show_GFF_GenomicRegion(gr,gen->baseseq->name,"GeneWise",ofp); fprintf(ofp,"%s\n",divide_str); } if( show_gene_plain == TRUE ) { show_GenomicRegion(gr,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_AlnBlock == TRUE ) { mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_cumlative_PackAln == TRUE ) { show_bits_and_cumlative_PackAln(pal,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_PackAln == TRUE ) { show_simple_PackAln(pal,ofp); fprintf(ofp,"%s\n",divide_str); } return TRUE; }
boolean build_objects(void) { boolean ret = TRUE; Protein * pro_temp; SequenceDB * psdb; startend = threestatemodel_mode_from_string(startend_string); if( startend == TSM_unknown ) { warn("String %s was unable to converted into a start/end policy\n",startend_string); ret = FALSE; } if( use_single_dna == TRUE ) { cdna = read_fasta_file_cDNA(dna_seq_file); if( cdna == NULL ) { warn("Could not open single dna sequence in %s",dna_seq_file); ret = FALSE; } } else { sdb = single_fasta_SequenceDB(dna_seq_file); if( sdb == NULL ) { warn("Could not build a sequence database on %s",dna_seq_file); ret = FALSE; } } rm = default_RandomModel(); if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) { if( use_tsm == TRUE ) { info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file); } else { warn("Could not read Comparison matrix file in %s",matrix_file); ret = FALSE; } } if( is_integer_string(gap_str,&gap) == FALSE ) { warn("Could not get gap string number %s",gap_str); ret = FALSE; } if( is_integer_string(ext_str,&ext) == FALSE ) { warn("Could not get ext string number %s",ext_str); ret = FALSE; } if( qstart_str != NULL ) { if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) { warn("Could not make %s out as query start",qstart); ret = FALSE; } } if( qend_str != NULL ) { if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) { warn("Could not make %s out as query end",qend); ret = FALSE; } } if( aln_number_str != NULL ) { if( is_integer_string(aln_number_str,&aln_number) == FALSE || aln_number < 0) { warn("Weird aln number string %s...\n",aln_number_str); ret = FALSE; } } if( report_str != NULL ) { if( is_integer_string(report_str,&report_stagger) == FALSE ) { warn("Weird report stagger asked for %s",report_str); ret = FALSE; } } if( use_pfam1 == TRUE ) { tsmdb = new_PfamHmmer1DB_ThreeStateDB(protein_file); if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) { warn("Unable to set global/local switch on threestatedb"); ret = FALSE; } } else if ( use_pfam2 == TRUE ) { tsmdb = HMMer2_ThreeStateDB(protein_file); if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) { warn("Unable to set global/local switch on threestatedb"); ret = FALSE; } } else if ( use_tsm == TRUE) { /** using a HMM **/ tsm = HMMer2_read_ThreeStateModel(protein_file); if( tsm == NULL ) { warn("Could not read hmm from %s\n",protein_file); ret = FALSE; } else { display_char_in_ThreeStateModel(tsm); if( hmm_name != NULL ) { if( tsm->name != NULL ) ckfree(tsm->name); tsm->name = stringalloc(hmm_name); } else { if( tsm->name == NULL ) { tsm->name = stringalloc(protein_file); } } /** have to set start/end **/ set_startend_policy_ThreeStateModel(tsm,startend,15,0.2); tsmdb = new_single_ThreeStateDB(tsm,rm); if( tsmdb == NULL ) { warn("Could not build a threestatemodel database from a single tsm. Weird!"); ret = FALSE; } } /* end of else tsm != NULL */ } /* end of else is tsm */ else if( use_single_pro ) { if( startend != TSM_default && startend != TSM_global && startend != TSM_local ) { warn("Proteins can only have local/global startend policies set, not %s",startend_string); ret = FALSE; } if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) { ret = FALSE; warn("Could not read Protein sequence in %s",protein_file); } else { if( qstart != -1 || qend != -1 ) { if( qstart == -1 ) qstart = 0; if( qend == -1 ) qend = pro->baseseq->len; pro_temp = truncate_Protein(pro,qstart-1,qend); if( pro_temp == NULL ){ ret = FALSE; } else { free_Protein(pro); pro = pro_temp; } } if( startend == TSM_global) tsm = global_ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext); else tsm = ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext); if( tsm == NULL ) { warn("Could not build ThreeStateModel from a single protein sequence..."); ret = FALSE; } else { tsmdb = new_single_ThreeStateDB(tsm,rm); if( tsmdb == NULL ) { warn("Could not build a threestatemodel database from a single tsm. Weird!"); ret = FALSE; } } /* end of could build a TSM */ } /* else is a real protein */ } /* end of else is single protein */ else if (use_db_pro == TRUE ) { psdb = single_fasta_SequenceDB(protein_file); tsmdb = new_proteindb_ThreeStateDB(psdb,mat,-gap,-ext); free_SequenceDB(psdb); } else { warn("No protein input file! Yikes!"); } /*** if( use_tsm == FALSE ) { } else { ****/ if( main_block_str != NULL ) { if( is_integer_string(main_block_str,&main_block) == FALSE ) { warn("Could not get maximum main_block number %s",main_block_str); ret = FALSE; } } if( evalue_search_str != NULL && is_double_string(evalue_search_str,&evalue_search_cutoff) == FALSE ) { warn("Could not convert %s to a double",evalue_search_str); ret = FALSE; } if( is_double_string(search_cutoff_str,&search_cutoff) == FALSE ) { warn("Could not convert %s to a double",search_cutoff_str); ret = FALSE; } if( is_double_string(subs_string,&subs_error) == FALSE ) { warn("Could not convert %s to a double",subs_error); ret = FALSE; } if( is_double_string(indel_string,&indel_error) == FALSE ) { warn("Could not convert %s to a double",indel_error); ret = FALSE; } if( is_double_string(allN_string,&allN) == FALSE ) { warn("Could not convert %s to a double",allN_string); ret = FALSE; } if( strcmp(null_string,"syn") == 0 ) { use_syn = TRUE; } else if ( strcmp(null_string,"flat") == 0 ) { use_syn = FALSE; } else { warn("Cannot interpret [%s] as a null model string\n",null_string); ret = FALSE; } if( alg_str != NULL ) { alg = alg_estwrap_from_string(alg_str); } else { alg_str = "312"; alg = alg_estwrap_from_string(alg_str); } if( aln_alg_str != NULL ) { aln_alg = alg_estwrap_from_string(aln_alg_str); } else { /* if it is a protein, don't loop */ if( use_single_pro == TRUE || use_db_pro == TRUE ) aln_alg_str = "333"; else aln_alg_str = "333L"; aln_alg = alg_estwrap_from_string(aln_alg_str); } if( (rm = default_RandomModel()) == NULL) { warn("Could not make default random model\n"); ret = FALSE; } if( (ct = read_CodonTable_file(codon_file)) == NULL) { ret = FALSE; warn("Could not read codon table file in %s",codon_file); } if( (ofp = openfile(output_file,"W")) == NULL) { warn("Could not open %s as an output file",output_file); ret = FALSE; } rmd = RandomModelDNA_std(); cps = flat_cDNAParser(indel_error); cm = flat_CodonMapper(ct); sprinkle_errors_over_CodonMapper(cm,subs_error); return ret; }
boolean show_output(void) { int i,k; ThreeStateModel * temptsm; AlnBlock * alb; PackAln * pal; MatchSummarySet * mss; Protein * ps; cDNA * cdna; double bits; boolean fitted_res = FALSE; AlnBlockList * alist; AlnBlock * anchored; SequenceSet * set; AlnColumn * alt; Protein * trans; /* sort by bit score first */ sort_Hscore_by_score(hs); if( search_mode == PC_SEARCH_S2DB ) { if( hs->his == NULL || hs->his->total < 1000 ) { info("Cannot fit histogram to a db smaller than 1,000"); fprintf(ofp,"[Warning: Can't fit histogram to a db smaller than 1,000]\n\n"); show_histogram = FALSE; } else { fitted_res = TRUE; fit_Hscore_to_EVD(hs,20); } } /* deal with initialising anchored alignment. * Could be done for either single HMMs or single proteins, * but we will only do it for HMMs at the moment */ if( make_anchored_aln == TRUE ) { if( tsm == NULL ) { warn("Attempting to make an achored alignment without a HMM. impossible!"); make_anchored_aln = FALSE; } else { anchored = single_unit_AlnBlock(tsm->len,"MATCH_STATE"); set = SequenceSet_alloc_std(); } } /* dofus catcher */ if( aln_alg != alg ) { fprintf(ofp,"\n#\n#WARNING!\n#\n# Your alignment algorithm is different from your search algorithm.\n# This is probably quite sensible but will lead to differing scores.\n# Use the search score as an indicator of the significance of the match\n# Read the docs for more information\n#\n"); } fprintf(ofp,"\n\n#High Score list\n"); fprintf(ofp,"#Protein ID DNA Str ID Bits Evalue\n"); fprintf(ofp,"--------------------------------------------------------------------------\n"); for(i=0;i<hs->len;i++) { bits = Score2Bits(hs->ds[i]->score); if( bits < search_cutoff ) { break; } if( fitted_res == TRUE && evalue_search_str != NULL ) { if( hs->ds[i]->evalue > evalue_search_cutoff ) break; } if( fitted_res == TRUE) fprintf(ofp,"Protein %-20sDNA [%c] %-24s %.2f %.2g\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits,hs->ds[i]->evalue); else fprintf(ofp,"Protein %-20sDNA [%c] %-24s %.2f\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits); } if( search_mode == PC_SEARCH_S2DB && show_histogram == TRUE ) { fprintf(ofp,"\n\n#Histogram\n"); fprintf(ofp,"-----------------------------------------------------------------------\n"); PrintASCIIHistogram(hs->his,ofp); } fprintf(ofp,"\n\n#Alignments\n"); fprintf(ofp,"-----------------------------------------------------------------------\n"); for(i=0;i<hs->len;i++) { bits = Score2Bits(hs->ds[i]->score); if( bits < search_cutoff ) { break; } if( i >= aln_number ) { break; } if( fitted_res == TRUE && evalue_search_str != NULL ) { if( hs->ds[i]->evalue > evalue_search_cutoff ) break; } fprintf(ofp,"\n\n>Results for %s vs %s (%s) [%d]\n",hs->ds[i]->query->name,hs->ds[i]->target->name,hs->ds[i]->target->is_reversed == TRUE ? "reverse" : "forward",i+1 ); cdna = get_cDNA_from_cDNADB(cdb,hs->ds[i]->target); temptsm = indexed_ThreeStateModel_ThreeStateDB(tsmdb,hs->ds[i]->query); alb = AlnBlock_from_TSM_estwise_wrap(temptsm,cdna,cps,cm,ct,rmd,aln_alg,use_syn,allN,flat_insert,dpri,&pal); if( alb == NULL ) { warn("Got a NULL alignment. Exiting now due to presumed problems"); fprintf(ofp,"\n\n*Got a NULL alignment. Exiting now due to presumed problems*\n\n"); return FALSE; } if( use_single_pro == FALSE) mss = MatchSummarySet_from_AlnBlock_genewise(alb,temptsm->name,1,cdna->baseseq); else mss = MatchSummarySet_from_AlnBlock_genewise(alb,pro->baseseq->name,pro->baseseq->offset,cdna->baseseq); if( show_pretty == TRUE ) { fprintf(ofp,"\n%s output\nScore %4.2f bits over entire alignment.\nThis will be different from per-alignment scores. See manual for details\nFor computer parsable output, try %s -help or read the manual\n",program_name,Score2Bits(pal->score),program_name); if( use_syn == FALSE ) { fprintf(ofp,"Scores as bits over a flat simple random model\n\n"); } else { fprintf(ofp,"Scores as bits over a synchronous coding model\n\n"); } ps = pseudo_Protein_from_ThreeStateModel(temptsm); protcdna_ascii_display(alb,ps->baseseq->seq,ps->baseseq->name,ps->baseseq->offset,cdna,ct,15,main_block,TRUE,ofp); free_Protein(ps); fprintf(ofp,"%s\n",divide_str); } if( show_match_sum == TRUE ) { show_MatchSummary_genewise_header(ofp); show_MatchSummarySet_genewise(mss,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_pep == TRUE ) { alt = alb->start; for(;alt != NULL;) { trans = Protein_from_GeneWise_AlnColumn(cdna->baseseq,alt,1,&alt,ct,is_random_AlnColumn_genewise); if ( trans == NULL ) break; write_fasta_Sequence(trans->baseseq,ofp); free_Protein(trans); } fprintf(ofp,"%s\n",divide_str); } if( show_AlnBlock == TRUE ) { mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp); fprintf(ofp,"%s\n",divide_str); } if( show_PackAln == TRUE ) { show_simple_PackAln(pal,ofp); fprintf(ofp,"%s\n",divide_str); } /* * This goes at the end because it destroys the alb structure */ if( make_anchored_aln == TRUE ) { /* attach sequence to als in alb, so we have it for later use */ alb->seq[1]->data = (void *) cdna->baseseq; /* add to SequenceSet so we can destroy the memory */ add_SequenceSet(set,hard_link_Sequence(cdna->baseseq)); alist = split_AlnBlock(alb,is_random_AlnColumn_genewise); for(k=0;k<alist->len;k++) { /* actually produce the anchored alignment */ /*mapped_ascii_AlnBlock(alist->alb[k],Score2Bits,stderr);*/ add_to_anchored_AlnBlock(anchored,alist->alb[k]); /* dump_ascii_AlnBlock(anchored,stderr);*/ } } alb = free_AlnBlock(alb); pal = free_PackAln(pal); mss = free_MatchSummarySet(mss); cdna = free_cDNA(cdna); temptsm = free_ThreeStateModel(temptsm); } if( do_complete_analysis == TRUE ) { fprintf(ofp,"\n\n#Complete Analysis\n"); fprintf(ofp,"-------------------------------------------------------------\n\n"); /* ok - end of loop over relevant hits. If we have an * anchored alignment, print it out! */ if( make_anchored_aln == TRUE ) { /*dump_ascii_AlnBlock(anchored,stderr);*/ write_mul_estwise_AlnBlock(anchored,ct,ofp); fprintf(ofp,"%s\n",divide_str); } } return TRUE; }
ThreeStateModel * read_TSM_ThreeStateDB(ThreeStateDB * mdb,int * return_status) { ThreeStateModel * tsm; Protein * pro; Sequence * seq; if( mdb->hmm_model_end != -1 && mdb->current_no == mdb->hmm_model_end ) { *return_status = DB_RETURN_END; return NULL; } mdb->current_no++; switch( mdb->dbtype ) { case TSMDB_SINGLE : *return_status = DB_RETURN_END; if( mdb->single->rm == NULL ) { warn("Threestate model without an internal random model!"); mdb->single->rm = hard_link_RandomModel(mdb->rm); } return hard_link_ThreeStateModel(mdb->single); case TSMDB_HMMER1PFAM : tsm= read_next_TSM_PfamHmmer1DB(mdb->phdb,return_status); set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2); return tsm; case TSMDB_PROTEIN : if( mdb->seq_cache != NULL ) { /* just after an open. Should actually use this sequence, and flush the cache */ pro = Protein_from_Sequence(hard_link_Sequence(mdb->seq_cache)); mdb->seq_cache = free_Sequence(mdb->seq_cache); *return_status = DB_RETURN_OK; } else { /* reload a sequence from a database */ seq = reload_SequenceDB(NULL,mdb->sdb,return_status); /* exit now if error */ if( *return_status == DB_RETURN_ERROR ) { return NULL; /* might have leaked memory. Ugh! */ } /* if we get NULL... for the moment, silent flag end */ if( seq == NULL ) { *return_status = DB_RETURN_END; return NULL; } pro = Protein_from_Sequence(seq); } if( pro == NULL ) { warn("Could not convert sequence to a protein. Exiting!"); *return_status = DB_RETURN_ERROR; return NULL; } /* convert protein to threestatemodel */ tsm = ThreeStateModel_from_half_bit_Sequence(pro,mdb->comp,mdb->rm,mdb->gap,mdb->ext); if( tsm == NULL ) { warn("Could not convert protein to threestatemode. Exiting!"); free_Protein(pro); *return_status = DB_RETURN_ERROR; return NULL; } /* DB status already set by seqdb */ return tsm; case TSMDB_GENERIC : tsm = ((*mdb->reload_generic)(mdb,return_status)); if( tsm == NULL ) { return NULL; /* means end of database */ } set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2); return tsm; default : warn("Got an unrecognisable tsm db type in read-load"); return NULL; } }
ThreeStateModel * indexed_ThreeStateModel_ThreeStateDB(ThreeStateDB * mdb,DataEntry * en) { Sequence * seq; Protein * pro; ThreeStateModel * tsm; switch(mdb->dbtype) { case TSMDB_SINGLE : return hard_link_ThreeStateModel(mdb->single); case TSMDB_HMMER1PFAM : tsm = ThreeStateModel_from_name_PfamHmmer1DB(mdb->phdb,en->name); set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2); return tsm; case TSMDB_PROTEIN : seq = get_Sequence_from_SequenceDB(mdb->sdb,en); if( seq == NULL ) { warn("could not retrieve %s as a sequence from database",en->name); return NULL; } pro = Protein_from_Sequence(seq); if( pro == NULL ) { warn("Could not convert sequence to a protein. Exiting!"); return NULL; } /* convert protein to threestatemodel */ tsm = ThreeStateModel_from_half_bit_Sequence(pro,mdb->comp,mdb->rm,mdb->gap,mdb->ext); if( tsm == NULL ) { warn("Could not convert protein to threestatemode. Exiting!"); free_Protein(pro); return NULL; } free_Protein(pro); /* DB status already set by seqdb */ return tsm; case TSMDB_GENERIC : tsm = ((*mdb->index_generic)(mdb,en)); if( tsm == NULL ) { return NULL; } /* fprintf(stdout,"Setting %d as policy\n",mdb->type); */ set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2); return tsm; default : warn("Unknown threestatedb type"); return NULL; } warn("Should never get here - in threestatedb reload!"); return NULL; }
void show_verbose_evo(AlnBlock * alb,ThreeStateModel * tsm,Sequence * ref,Sequence * diff,CodonTable * ct,FILE * ofp) { AlnColumn * alc; Protein * hmmp; Sequence * ref_trans; Sequence * diff_trans; DnaProbMatrix * negative_dm; DnaProbMatrix * pseudo_dm; int i; int count = 0; double est_mutation = 0.0; int dna_offset; Score total_pseudo = 0; Score total_neg = 0; Score pseudo = 0; Score neg = 0; int count_ref_positive = 0; int count_ref_negative = 0; int count_ref_negative_0_5 = 0; int count_ref_negative_5_10 = 0; int count_ref_negative_10_15 = 0; int syn_sites = 0; int nonsyn_sites = 0; int syn_changes = 0; int nonsyn_changes = 0; int diff_score; char diff_aa; char ref_aa; int score_ratio = 0; Score score_neg_5 = Probability2Score(Bits2Probability(-5.0)); Score score_neg_10 = Probability2Score(Bits2Probability(-10.0)); int k; for(i=0;i<ref->len;i+=3) { /* if this has changed, then it is definitely non syn */ if( aminoacid_from_seq(ct,ref->seq+i) != aminoacid_from_seq(ct,diff->seq+i)) { for(k=0;k<3;k++) { if( ref->seq[i+k] != diff->seq[i+k] ) { nonsyn_changes++; } } } else { /* could still be syn change */ for(k=0;k<3;k++) { if( ref->seq[i+k] != diff->seq[i+k] ) { syn_changes++; } } } /* calculate the sites. There is always 2 non syn sites */ nonsyn_sites += 2; if( four_fold_sites_CodonTable(ct,ref->seq+i) > 0 ) { syn_sites++; } else { nonsyn_sites += 1; } } for(i=0;i<ref->len;i++) { if( ref->seq[i] != diff->seq[i] ) { count++; } } est_mutation = (double)count / (double)ref->len; pseudo_dm = DnaProbMatrix_from_match(1.0 - est_mutation,NMaskType_BANNED); negative_dm = DnaProbMatrix_from_match(1.0 - (est_mutation*2),NMaskType_BANNED); ref_trans = translate_Sequence(ref,ct); diff_trans = translate_Sequence(diff,ct); hmmp = pseudo_Protein_from_ThreeStateModel(tsm); for(alc=alb->start;alc != NULL;alc = alc->next) { /* fprintf(stdout,"In position %s\n",alc->alu[0]->text_label); */ if( strcmp(alc->alu[0]->text_label,"SEQUENCE") == 0 && strcmp(alc->alu[1]->text_label,"SEQUENCE") == 0 ) { dna_offset = alc->alu[1]->end*3; pseudo = logl_pseudogene(ref->seq+dna_offset,diff->seq+dna_offset,pseudo_dm); neg = logl_negative_selection(ref->seq+dna_offset,diff->seq+dna_offset,tsm->unit[alc->alu[0]->end],ct, pseudo_dm); /* fprintf(ofp,"Position %d [%c], vs %d [%c,%c] Scores Negative %d, Pseudo %d\n", alc->alu[0]->end,hmmp->baseseq->seq[alc->alu[0]->end], alc->alu[1]->end,ref_trans->seq[alc->alu[1]->end],diff_trans->seq[alc->alu[1]->end], neg, pseudo ); */ ref_aa = ref_trans->seq[alc->alu[1]->end]; diff_aa = diff_trans->seq[alc->alu[1]->end]; if( ref_aa != diff_aa ) { score_ratio += Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']); diff_score = Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']); if( diff_score < 0) { count_ref_negative++; if( diff_score > score_neg_5 ) { count_ref_negative_0_5++; } else if ( diff_score > score_neg_10 ) { count_ref_negative_5_10++; } else { count_ref_negative_10_15++; } } else { count_ref_positive++; } } total_pseudo += pseudo; total_neg += neg; } } fprintf(ofp,"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\n",ref->name,hmmp->baseseq->name,Score2Bits(score_ratio), count_ref_positive,count_ref_negative, count_ref_negative_0_5, count_ref_negative_5_10, count_ref_negative_10_15); /* fprintf(ofp,"%s,%s Total Pseudo %d vs Negative %d, Ratio %.4f Positive %d Negative %d Score %.2f Syn %d Changes %d NonSyn %d Changes %d\n",ref->name,hmmp->baseseq->name,total_pseudo,total_neg,Score2Bits(total_neg-total_pseudo),count_ref_positive,count_ref_negative,Score2Bits(score_ratio),syn_sites,syn_changes,nonsyn_sites,nonsyn_changes); */ free_Protein(hmmp); }