Sequence * translate_swapped(Sequence * swapped) { CodonTable * ct; int i,j; Sequence * out; out = Sequence_alloc(); out->name = stringalloc(swapped->name); out->seq = calloc(1+swapped->len/3,sizeof(char)); ct = read_CodonTable_file("codon.table"); for(i=0,j=0;i<swapped->len;i+=3,j++) { out->seq[j] = aminoacid_from_seq(ct,swapped->seq+i); if( isupper(swapped->seq[i]) && isupper(swapped->seq[i+1]) && isupper(swapped->seq[i+2]) ) { out->seq[j] = toupper(out->seq[j]); } else{ out->seq[j] = tolower(out->seq[j]); } } out->seq[j] = '\0'; return out; }
int main(int argc,char ** argv) { AlignGeneModelParam * agmp; SeqAlign * sal; Sequence * seq; CompProb * cp; CodonTable * ct; DnaProbMatrix * dm; dm = DnaProbMatrix_from_match(0.8,NMaskType_VARIABLE); ct = read_CodonTable_file("codon.table"); cp = read_Blast_file_CompProb("wag85"); sal = SeqAlign_alloc_std(); seq = new_Sequence_from_strings("new1","ATGGGG"); add_SeqAlign(sal,seq); seq = new_Sequence_from_strings("new2","ATGGGT"); add_SeqAlign(sal,seq); agmp = std_AlignGeneModelParam(cp,dm,ct); printf("Codon 1 %f vs %f Codon 2 %f v %f \n", coding_probability_AlignGeneModel(sal,2,agmp), non_coding_probability_AlignGeneModel(sal,2,agmp), coding_probability_AlignGeneModel(sal,5,agmp), non_coding_probability_AlignGeneModel(sal,5,agmp) ); }
int main(int argc,char **argv) { int i; AlignGeneModelParam * agmp; GeneStats * gs; GeneModelParam * gmp = NULL; CompProb * comp_prob; DnaProbMatrix * dm; CodonTable * ct; RandomModel * rm; Sequence * test; ct = read_CodonTable_file("codon.table"); rm = default_RandomModel(); comp_prob = read_Blast_file_CompProb("wag85"); gmp = new_GeneModelParam_from_argv(&argc,argv); dm = DnaProbMatrix_from_match(0.8,NMaskType_VARIABLE); if((gs=GeneStats_from_GeneModelParam(gmp)) == NULL ) { fatal("Could not build gene stats"); } agmp = std_AlignGeneModelParam(comp_prob,dm,ct,gs); test = read_fasta_file_Sequence(argv[1]); assert(test); for(i=0;i<test->len;i++) { fprintf(stdout,"%c ss5 %.6f ss3 %.6f\n",test->seq[i],prob_SpliceSiteProb(agmp->ss5,test,i),prob_SpliceSiteProb(agmp->ss3,test,i)); } }
boolean build_objects(void) { boolean ret = TRUE; Protein * pro_temp; Genomic * gen_temp; FILE * ifp; startend = threestatemodel_mode_from_string(startend_string); if( startend == TSM_unknown ) { warn("String %s was unable to converted into a start/end policy\n",startend_string); ret = FALSE; } if( tstart_str != NULL ) { if( is_integer_string(tstart_str,&tstart) == FALSE || tstart < 0) { warn("Could not make %s out as target start",tstart); ret = FALSE; } } if( tend_str != NULL ) { if( is_integer_string(tend_str,&tend) == FALSE || tend < 0) { warn("Could not make %s out as target end",tend); ret = FALSE; } } if( is_integer_string(gap_str,&gap) == FALSE ) { warn("Could not make %s out as gap penalty (must be integer at the moment)",gap_str); ret = FALSE; } if( is_integer_string(ext_str,&ext) == FALSE ) { warn("Could not make %s out as gap penalty (must be integer at the moment)",ext_str); ret = FALSE; } if( is_embl == FALSE ) { if( (gen = read_fasta_file_Genomic(dna_seq_file,length_of_N)) == NULL ) { ret = FALSE; warn("Could not read genomic sequence in %s",dna_seq_file); gen = NULL; } } else { embl = read_EMBL_GenomicRegion_file(dna_seq_file); if( embl == NULL ) { warn("Could not read genomic EMBL file in %s",dna_seq_file); gen = NULL; ret = FALSE; } else { gen = hard_link_Genomic(embl->genomic); } } if( gen != NULL ) { if( tstart != -1 || tend != -1 ) { if( tstart == -1 ) tstart = 0; if( tend == -1 ) tend = gen->baseseq->len; gen_temp = truncate_Genomic(gen,tstart-1,tend); if( gen_temp == NULL ){ ret = FALSE; } else { free_Genomic(gen); gen = gen_temp; } } else { /* no truncation required */ } if( reverse == TRUE ) { if( tstart > tend ) { warn("You have already reversed the DNA by using %d - %d truncation. Re-reversing",tstart,tend); } gen_temp = reverse_complement_Genomic(gen); free_Genomic(gen); gen = gen_temp; } } /* * Can't truncate on GenomicRegion (for good reasons!). * but we want only a section of the EMBL file to be used * * So... swap genomic now. Positions in EMBL are still valid, * however - some genes will loose their sequence, which will be damaging. ;) */ if( is_embl ) { free_Genomic(embl->genomic); embl->genomic = hard_link_Genomic(gen); /* pointer could be dead anyway ;) */ } if( target_abs == TRUE ) { if( is_embl == TRUE ) { warn("Sorry you can't both use absolute positioning and EMBL files as I can't cope with all the coordinate remapping. You'll have to convert to fasta."); ret = FALSE; } gen->baseseq->offset = 1; gen->baseseq->end = strlen(gen->baseseq->seq); } if( alg_str != NULL ) { alg = gwrap_alg_type_from_string(alg_str); } else { if( use_tsm == TRUE ) { alg_str = "623L"; } else { alg_str = "623"; } alg = gwrap_alg_type_from_string(alg_str); } if( qstart_str != NULL ) { if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) { warn("Could not make %s out as query start",qstart); ret = FALSE; } } if( qend_str != NULL ) { if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) { warn("Could not make %s out as query end",qend); ret = FALSE; } } if( use_tsm == FALSE ) { if( startend != TSM_default && startend != TSM_global && startend != TSM_local && startend != TSM_endbiased) { warn("Proteins can only have local/global/endbias startend policies set, not %s",startend_string); ret = FALSE; } if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) { ret = FALSE; warn("Could not read Protein sequence in %s",protein_file); } else { if( qstart != -1 || qend != -1 ) { if( qstart == -1 ) qstart = 0; if( qend == -1 ) qend = pro->baseseq->len; pro_temp = truncate_Protein(pro,qstart-1,qend); if( pro_temp == NULL ){ ret = FALSE; } else { free_Protein(pro); pro = pro_temp; } } } } else { /** using a HMM **/ /*tsm = read_HMMer_1_7_ascii_file(hmm_file);*/ /*tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(hmm_file);*/ tsm = HMMer2_read_ThreeStateModel(hmm_file); if( tsm == NULL ) { warn("Could not read hmm from %s\n",hmm_file); ret = FALSE; } else { display_char_in_ThreeStateModel(tsm); if( hmm_name != NULL ) { if( tsm->name != NULL ) ckfree(tsm->name); tsm->name = stringalloc(hmm_name); } if( tsm == NULL ) { warn("Could not read %s as a hmm",hmm_file); } /** have to set start/end **/ set_startend_policy_ThreeStateModel(tsm,startend,30,0.1); } } /* end of else tsm != NULL */ if( main_block_str != NULL ) { if( is_integer_string(main_block_str,&main_block) == FALSE ) { warn("Could not get maximum main_block number %s",main_block_str); ret = FALSE; } } if( is_double_string(subs_string,&subs_error) == FALSE ) { warn("Could not convert %s to a double",subs_error); ret = FALSE; } if( is_double_string(indel_string,&indel_error) == FALSE ) { warn("Could not convert %s to a double",indel_error); ret = FALSE; } if( is_double_string(allN_string,&allN) == FALSE ) { warn("Could not convert %s to a double",allN_string); ret = FALSE; } if( strcmp(cfreq_string,"model") == 0 ) { model_codon = TRUE; } else if ( strcmp(cfreq_string,"flat") == 0 ) { model_codon = FALSE; } else { warn("Cannot interpret [%s] as a codon modelling parameter\n",cfreq_string); ret = FALSE; } if( strcmp(splice_string,"model") == 0 ) { model_splice = TRUE; } else if ( strcmp(splice_string,"flat") == 0 ) { model_splice = FALSE; gmp->use_gtag_splice = TRUE; } else { warn("Cannot interpret [%s] as a splice modelling parameter\n",splice_string); ret = FALSE; } if( strcmp(null_string,"syn") == 0 ) { use_syn = TRUE; } else if ( strcmp(null_string,"flat") == 0 ) { use_syn = FALSE; } else { warn("Cannot interpret [%s] as a null model string\n",null_string); ret = FALSE; } if( strcmp(intron_string,"model") == 0 ) { use_tied_model = FALSE; } else if ( strcmp(intron_string,"tied") == 0 ) { use_tied_model = TRUE; } else { warn("Cannot interpret [%s] as a intron tieing switch\n",intron_string); ret = FALSE; } if( (rm = default_RandomModel()) == NULL) { warn("Could not make default random model\n"); ret = FALSE; } if( use_new_stats == 0 ) { if( (gf = read_GeneFrequency21_file(gene_file)) == NULL) { ret = FALSE; warn("Could not read a GeneFrequency file in %s",gene_file); } } else { if( (gs = GeneStats_from_GeneModelParam(gmp)) == NULL ){ ret=FALSE; warn("Could not read gene statistics in %s",new_gene_file); } } /* end of else using new gene stats */ if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) { if( use_tsm == TRUE ) { info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file); } else { warn("Could not read Comparison matrix file in %s",matrix_file); ret = FALSE; } } if( (ct = read_CodonTable_file(codon_file)) == NULL) { ret = FALSE; warn("Could not read codon table file in %s",codon_file); } if( (ofp = openfile(output_file,"W")) == NULL) { warn("Could not open %s as an output file",output_file); ret = FALSE; } rmd = RandomModelDNA_std(); return ret; }
int main(int argc,char ** argv) { Sequence * gen; Genomic * genomic; CodonTable * ct = NULL; GenomeEvidenceSet * ges = NULL; RandomCodonScore * rcs; FILE * ifp = NULL; ComplexSequence * cs = NULL; ComplexSequenceEvalSet * cses = NULL; AlnBlock * alb; PackAln * pal; GenomicRegion * gr; int i; Protein * trans; cDNA * cdna; int kbyte = 10000; int stop_codon_pen = 200; int start_codon_pen = 30; int new_gene = 5000; int switch_cost = 100; int smell = 8; DPRunImpl * dpri = NULL; EstEvidence * est; boolean show_trans = TRUE; boolean show_cdna = FALSE; boolean show_genes = TRUE; boolean show_alb = FALSE; boolean show_pal = FALSE; boolean show_gff = TRUE; boolean show_debug = FALSE; boolean show_geneu = TRUE; char * divide_string = "//"; strip_out_boolean_def_argument(&argc,argv,"geneutr",&show_geneu); strip_out_boolean_def_argument(&argc,argv,"genes",&show_genes); strip_out_boolean_def_argument(&argc,argv,"trans",&show_trans); strip_out_boolean_def_argument(&argc,argv,"gff",&show_gff); strip_out_boolean_def_argument(&argc,argv,"alb",&show_alb); strip_out_boolean_def_argument(&argc,argv,"pal",&show_pal); strip_out_boolean_def_argument(&argc,argv,"debug",&show_debug); strip_out_boolean_def_argument(&argc,argv,"cdna",&show_cdna); strip_out_integer_argument(&argc,argv,"stop",&stop_codon_pen); strip_out_integer_argument(&argc,argv,"start",&start_codon_pen); strip_out_integer_argument(&argc,argv,"gene",&new_gene); strip_out_integer_argument(&argc,argv,"switch",&switch_cost); strip_out_integer_argument(&argc,argv,"smell",&smell); dpri = new_DPRunImpl_from_argv(&argc,argv); if( dpri == NULL ) { fatal("Unable to build DPRun implementation. Bad arguments"); } strip_out_standard_options(&argc,argv,show_help,show_version); if( argc != 3 ) { show_help(stdout); exit(12); } ct = read_CodonTable_file("codon.table"); gen = read_fasta_file_Sequence(argv[1]); ifp = openfile(argv[2],"r"); ges = read_est_evidence(ifp,ct); for(i=0;i<ges->len;i++) { est = (EstEvidence *) ges->geu[i]->data; est->in_smell = smell; } rcs= RandomCodonScore_alloc(); for(i=0;i<125;i++) { if( is_stop_codon(i,ct) ) { rcs->codon[i] = -1000000; } else { rcs->codon[i] = 0; } /* fprintf(stderr,"Got %d for %d\n",rcs->codon[i],i); */ } cses = default_genomic_ComplexSequenceEvalSet(); cs = new_ComplexSequence(gen,cses); pal = PackAln_bestmemory_GenomeWise9(ges,cs,-switch_cost,-new_gene,-start_codon_pen,-stop_codon_pen,rcs,NULL,dpri); alb = convert_PackAln_to_AlnBlock_GenomeWise9(pal); genomic = Genomic_from_Sequence(gen); gr = new_GenomicRegion(genomic); add_Genes_to_GenomicRegion_GeneWise(gr,1,gen->len,alb,gen->name,0,NULL); if( show_genes ) { show_pretty_GenomicRegion(gr,0,stdout); fprintf(stdout,"%s\n",divide_string); } if( show_gff ) { show_GFF_GenomicRegion(gr,gen->name,"genomwise",stdout); fprintf(stdout,"%s\n",divide_string); } if( show_trans ) { for(i=0;i<gr->len;i++) { if( gr->gene[i]->ispseudo == TRUE ) { fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i); } else { trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct); write_fasta_Sequence(trans->baseseq,stdout); } } fprintf(stdout,"%s\n",divide_string); } if( show_cdna ) { for(i=0;i<gr->len;i++) { cdna = get_cDNA_from_Transcript(gr->gene[i]->transcript[0]); write_fasta_Sequence(cdna->baseseq,stdout); } fprintf(stdout,"%s\n",divide_string); } if( show_geneu ) { show_utr_exon_genomewise(alb,stdout); fprintf(stdout,"%s\n",divide_string); } if( show_alb ) { mapped_ascii_AlnBlock(alb,id,1,stdout); fprintf(stdout,"%s\n",divide_string); } if( show_debug ) { debug_genomewise(alb,ges,ct,gen,stdout); fprintf(stdout,"%s\n",divide_string); } if( show_pal ) { show_simple_PackAln(pal,stdout); fprintf(stdout,"%s\n",divide_string); } return 0; }
boolean build_objects(void) { boolean ret = TRUE; Protein * pro_temp; SequenceDB * psdb; startend = threestatemodel_mode_from_string(startend_string); if( startend == TSM_unknown ) { warn("String %s was unable to converted into a start/end policy\n",startend_string); ret = FALSE; } if( use_single_dna == TRUE ) { cdna = read_fasta_file_cDNA(dna_seq_file); if( cdna == NULL ) { warn("Could not open single dna sequence in %s",dna_seq_file); ret = FALSE; } } else { sdb = single_fasta_SequenceDB(dna_seq_file); if( sdb == NULL ) { warn("Could not build a sequence database on %s",dna_seq_file); ret = FALSE; } } rm = default_RandomModel(); if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) { if( use_tsm == TRUE ) { info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file); } else { warn("Could not read Comparison matrix file in %s",matrix_file); ret = FALSE; } } if( is_integer_string(gap_str,&gap) == FALSE ) { warn("Could not get gap string number %s",gap_str); ret = FALSE; } if( is_integer_string(ext_str,&ext) == FALSE ) { warn("Could not get ext string number %s",ext_str); ret = FALSE; } if( qstart_str != NULL ) { if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) { warn("Could not make %s out as query start",qstart); ret = FALSE; } } if( qend_str != NULL ) { if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) { warn("Could not make %s out as query end",qend); ret = FALSE; } } if( aln_number_str != NULL ) { if( is_integer_string(aln_number_str,&aln_number) == FALSE || aln_number < 0) { warn("Weird aln number string %s...\n",aln_number_str); ret = FALSE; } } if( report_str != NULL ) { if( is_integer_string(report_str,&report_stagger) == FALSE ) { warn("Weird report stagger asked for %s",report_str); ret = FALSE; } } if( use_pfam1 == TRUE ) { tsmdb = new_PfamHmmer1DB_ThreeStateDB(protein_file); if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) { warn("Unable to set global/local switch on threestatedb"); ret = FALSE; } } else if ( use_pfam2 == TRUE ) { tsmdb = HMMer2_ThreeStateDB(protein_file); if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) { warn("Unable to set global/local switch on threestatedb"); ret = FALSE; } } else if ( use_tsm == TRUE) { /** using a HMM **/ tsm = HMMer2_read_ThreeStateModel(protein_file); if( tsm == NULL ) { warn("Could not read hmm from %s\n",protein_file); ret = FALSE; } else { display_char_in_ThreeStateModel(tsm); if( hmm_name != NULL ) { if( tsm->name != NULL ) ckfree(tsm->name); tsm->name = stringalloc(hmm_name); } else { if( tsm->name == NULL ) { tsm->name = stringalloc(protein_file); } } /** have to set start/end **/ set_startend_policy_ThreeStateModel(tsm,startend,15,0.2); tsmdb = new_single_ThreeStateDB(tsm,rm); if( tsmdb == NULL ) { warn("Could not build a threestatemodel database from a single tsm. Weird!"); ret = FALSE; } } /* end of else tsm != NULL */ } /* end of else is tsm */ else if( use_single_pro ) { if( startend != TSM_default && startend != TSM_global && startend != TSM_local ) { warn("Proteins can only have local/global startend policies set, not %s",startend_string); ret = FALSE; } if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) { ret = FALSE; warn("Could not read Protein sequence in %s",protein_file); } else { if( qstart != -1 || qend != -1 ) { if( qstart == -1 ) qstart = 0; if( qend == -1 ) qend = pro->baseseq->len; pro_temp = truncate_Protein(pro,qstart-1,qend); if( pro_temp == NULL ){ ret = FALSE; } else { free_Protein(pro); pro = pro_temp; } } if( startend == TSM_global) tsm = global_ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext); else tsm = ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext); if( tsm == NULL ) { warn("Could not build ThreeStateModel from a single protein sequence..."); ret = FALSE; } else { tsmdb = new_single_ThreeStateDB(tsm,rm); if( tsmdb == NULL ) { warn("Could not build a threestatemodel database from a single tsm. Weird!"); ret = FALSE; } } /* end of could build a TSM */ } /* else is a real protein */ } /* end of else is single protein */ else if (use_db_pro == TRUE ) { psdb = single_fasta_SequenceDB(protein_file); tsmdb = new_proteindb_ThreeStateDB(psdb,mat,-gap,-ext); free_SequenceDB(psdb); } else { warn("No protein input file! Yikes!"); } /*** if( use_tsm == FALSE ) { } else { ****/ if( main_block_str != NULL ) { if( is_integer_string(main_block_str,&main_block) == FALSE ) { warn("Could not get maximum main_block number %s",main_block_str); ret = FALSE; } } if( evalue_search_str != NULL && is_double_string(evalue_search_str,&evalue_search_cutoff) == FALSE ) { warn("Could not convert %s to a double",evalue_search_str); ret = FALSE; } if( is_double_string(search_cutoff_str,&search_cutoff) == FALSE ) { warn("Could not convert %s to a double",search_cutoff_str); ret = FALSE; } if( is_double_string(subs_string,&subs_error) == FALSE ) { warn("Could not convert %s to a double",subs_error); ret = FALSE; } if( is_double_string(indel_string,&indel_error) == FALSE ) { warn("Could not convert %s to a double",indel_error); ret = FALSE; } if( is_double_string(allN_string,&allN) == FALSE ) { warn("Could not convert %s to a double",allN_string); ret = FALSE; } if( strcmp(null_string,"syn") == 0 ) { use_syn = TRUE; } else if ( strcmp(null_string,"flat") == 0 ) { use_syn = FALSE; } else { warn("Cannot interpret [%s] as a null model string\n",null_string); ret = FALSE; } if( alg_str != NULL ) { alg = alg_estwrap_from_string(alg_str); } else { alg_str = "312"; alg = alg_estwrap_from_string(alg_str); } if( aln_alg_str != NULL ) { aln_alg = alg_estwrap_from_string(aln_alg_str); } else { /* if it is a protein, don't loop */ if( use_single_pro == TRUE || use_db_pro == TRUE ) aln_alg_str = "333"; else aln_alg_str = "333L"; aln_alg = alg_estwrap_from_string(aln_alg_str); } if( (rm = default_RandomModel()) == NULL) { warn("Could not make default random model\n"); ret = FALSE; } if( (ct = read_CodonTable_file(codon_file)) == NULL) { ret = FALSE; warn("Could not read codon table file in %s",codon_file); } if( (ofp = openfile(output_file,"W")) == NULL) { warn("Could not open %s as an output file",output_file); ret = FALSE; } rmd = RandomModelDNA_std(); cps = flat_cDNAParser(indel_error); cm = flat_CodonMapper(ct); sprinkle_errors_over_CodonMapper(cm,subs_error); return ret; }
int main(int argc,char ** argv) { int i; DPRunImpl * dpri = NULL; GeneModelParam * gmp = NULL; GeneModel * gm = NULL; Sequence * seq; RandomCodon * rc; RandomModelDNA * rmd; RandomCodonScore * rcs; ComplexSequenceEval * splice5; ComplexSequenceEval * splice3; ComplexSequenceEvalSet * cses; ComplexSequence * cseq; SyExonScore * exonscore; PackAln * pal; AlnBlock * alb; Genomic * genomic; GenomicRegion * gr; Protein * trans; dpri = new_DPRunImpl_from_argv(&argc,argv); if( dpri == NULL ) { fatal("Unable to build DPRun implementation. Bad arguments"); } gmp = new_GeneModelParam_from_argv(&argc,argv); ct= read_CodonTable_file("codon.table"); strip_out_standard_options(&argc,argv,show_help,show_version); if( argc != 2 ) { show_help(stdout); exit(12); } if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) { fatal("Could not build gene model"); } seq = read_fasta_file_Sequence(argv[1]); assert(seq); cses = new_ComplexSequenceEvalSet_from_GeneModel(gm); cseq = new_ComplexSequence(seq,cses); rc = flat_RandomCodon(ct); rmd = RandomModelDNA_std(); fold_in_RandomModelDNA_into_RandomCodon(rc,rmd); rcs = RandomCodonScore_from_RandomCodon(rc); exonscore = SyExonScore_flat_model(200,250,0.1,0.1); /* for(i=0;i<cseq->length;i++) { fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]); } exit(0); */ /* show_RandomCodonScore(rcs,stdout); for(i=3;i<seq->len;i++) { fprintf(stdout,"seq %d is %c with score %d\n",i,aminoacid_from_seq(ct,seq->seq+i-2),rcs->codon[CSEQ_GENOMIC_CODON(cseq,i)]); } exit(0); */ pal = PackAln_bestmemory_StatWise10(exonscore,cseq,rcs,Probability2Score(1.0/10.0),Probability2Score(1.0/10.0),NULL,dpri); alb = convert_PackAln_to_AlnBlock_StatWise10(pal); mapped_ascii_AlnBlock(alb,id,1,stdout); genomic = Genomic_from_Sequence(seq); gr = new_GenomicRegion(genomic); add_Genes_to_GenomicRegion_GeneWise(gr,1,seq->len,alb,"bollocks",0,NULL); for(i=0;i<gr->len;i++) { if( gr->gene[i]->ispseudo == TRUE ) { fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i); } else { trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct); write_fasta_Sequence(trans->baseseq,stdout); } } return 0; }
int main(int argc,char ** argv) { int i; SequenceSet * in; Sequence * trans; ThreeStateDB * tsd; DPRunImpl * dpri; CodonTable * ct; int return_status; ThreeStateModel * tsm; ThreeStateScore * tss; Protein * hmmp; ComplexSequence * cs; ComplexSequenceEvalSet * cses; PackAln * pal; AlnBlock * alb; int show_align = 0; int show_alb = 0; int show_verbose = 1; int show_trans = 0; ct = read_CodonTable_file("codon.table"); cses = default_aminoacid_ComplexSequenceEvalSet(); dpri = new_DPRunImpl_from_argv(&argc,argv); strip_out_boolean_def_argument(&argc,argv,"pretty",&show_align); strip_out_boolean_def_argument(&argc,argv,"alb",&show_alb); strip_out_boolean_def_argument(&argc,argv,"trans",&show_trans); if( argc != 3 ) { show_help(stdout); exit(63); } in = read_fasta_SequenceSet_file(argv[1]); tsd = HMMer2_ThreeStateDB(argv[2]); assert(in); assert(tsd); assert(in->len == 2); trans = translate_Sequence(in->set[0],ct); if( show_trans ) { write_fasta_Sequence(trans,stdout); } cs = new_ComplexSequence(trans,cses); open_ThreeStateDB(tsd); while( (tsm = read_TSM_ThreeStateDB(tsd,&return_status)) != NULL ) { fold_RandomModel_into_ThreeStateModel(tsm,tsm->rm); set_startend_policy_ThreeStateModel(tsm,TSM_local,10,1.0); tss = ThreeStateScore_from_ThreeStateModel(tsm); hmmp = pseudo_Protein_from_ThreeStateModel(tsm); pal = PackAln_bestmemory_ThreeStateLoop(tss,cs,NULL,dpri); alb = convert_PackAln_to_AlnBlock_ThreeStateLoop(pal); if( show_alb ) { show_flat_AlnBlock(alb,stdout); } if( show_align ) { write_pretty_seq_align(alb,hmmp->baseseq,trans,15,50,stdout); } if( show_verbose ) { show_verbose_evo(alb,tsm,in->set[0],in->set[1],ct,stdout); } } }
int main(int argc,char ** argv) { int i; DPRunImpl * dpri = NULL; GeneModelParam * gmp = NULL; GeneModel * gm = NULL; FILE * ifp; SeqAlign * al; PairBaseSeq * pbs; ComplexSequenceEval * splice5; ComplexSequenceEval * splice3; ComplexSequence * cseq; CompMat * score_mat; CompProb * comp_prob; RandomModel * rm; PairBaseCodonModelScore * codon_score; PairBaseModelScore* nonc_score; PairBaseCodonModelScore * start; PairBaseCodonModelScore * stop; SyExonScore * exonscore; PackAln * pal; AlnBlock * alb; Genomic * genomic; GenomicRegion * gr; GenomicRegion * gr2; Protein * trans; StandardOutputOptions * std_opt; ShowGenomicRegionOptions * sgro; char * dump_packaln = NULL; char * read_packaln = NULL; FILE * packifp = NULL; boolean show_trans = 1; boolean show_gene_raw = 0; ct = read_CodonTable_file(codon_table); /* score_mat = read_Blast_file_CompMat("blosum62.bla"); comp_prob = CompProb_from_halfbit(score_mat); */ rm = default_RandomModel(); comp_prob = read_Blast_file_CompProb("wag85"); fold_column_RandomModel_CompProb(comp_prob,rm); dpri = new_DPRunImpl_from_argv(&argc,argv); if( dpri == NULL ) { fatal("Unable to build DPRun implementation. Bad arguments"); } gmp = new_GeneModelParam_from_argv(&argc,argv); std_opt = new_StandardOutputOptions_from_argv(&argc,argv); sgro = new_ShowGenomicRegionOptions_from_argv(&argc,argv); dump_packaln = strip_out_assigned_argument(&argc,argv,"dump"); read_packaln = strip_out_assigned_argument(&argc,argv,"recover"); strip_out_standard_options(&argc,argv,show_help,show_version); if( argc != 2 ) { show_help(stdout); exit(12); } if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) { fatal("Could not build gene model"); } codon_score = make_PairBaseCodonModelScore(comp_prob); nonc_score = make_PairBaseModelScore(); splice5 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice5score); splice3 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice3score); if((ifp = openfile(argv[1],"r")) == NULL ) { fatal("Could not open file %s",argv[1]); } al = read_fasta_SeqAlign(ifp); assert(al); assert(al->len == 2); assert(al->seq[0]->len > 0); assert(al->seq[1]->len > 0); /* write_fasta_SeqAlign(al,stdout);*/ pbs = new_PairBaseSeq_SeqAlign(al); if( read_packaln == NULL ) { cseq = ComplexSequence_from_PairBaseSeq(pbs,splice5,splice3); } start = make_start_PairBaseCodonModelScore(ct); stop = make_stop_PairBaseCodonModelScore(ct); /* show_PairBaseCodonModelScore(stop,ct,stdout); */ /* for(i=0;i<pbs->anchor->len;i++) { printf("%3d %c For %-6d %-6d %c Rev %-6d %-6d\n",i,pbs->anchor->seq[i], CSEQ_PAIR_5SS(cseq,i),CSEQ_PAIR_3SS(cseq,i), char_complement_base(pbs->anchor->seq[i]), CSEQ_REV_PAIR_5SS(cseq,i),CSEQ_REV_PAIR_3SS(cseq,i)); } */ /* show_ComplexSequence(cseq,stdout); */ exonscore = SyExonScore_flat_model(100,150,0.1,1.0); /* for(i=0;i<cseq->length;i++) { fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]); } exit(0); */ if( read_packaln != NULL ) { packifp = openfile(read_packaln,"r"); if( packifp == NULL ) { fatal("File %s is unopenable - ignoring dump command",dump_packaln); } else { pal = read_simple_PackAln(packifp); } } else { pal = PackAln_bestmemory_SyWise20(exonscore,cseq,codon_score,nonc_score,start,stop,Probability2Score(1.0/100.0),Probability2Score(1.0/10000.0),Probability2Score(1.0/10.0),NULL,dpri); } alb = convert_PackAln_to_AlnBlock_SyWise20(pal); if( dump_packaln != NULL ) { packifp = openfile(dump_packaln,"w"); if( packifp == NULL ) { warn("File %s is unopenable - ignoring dump command",dump_packaln); } else { show_simple_PackAln(pal,packifp); } } show_score_sequence(alb,pbs,nonc_score,stdout); /* show_StandardOutputOptions(std_opt,alb,pal,"//",stdout); */ genomic = Genomic_from_Sequence(al->seq[0]); gr = new_GenomicRegion(genomic); gr2 = new_GenomicRegion(genomic); add_Genes_to_GenomicRegion_new(gr,alb); show_GenomicRegionOptions(sgro,gr,ct,"//",stdout); return 0; }
int main(int argc,char ** argv) { Sequence * cdna; Sequence * gen; Sequence * active_gen; Sequence * active_cdna; int i; int dstart = -1; int dend = -1; int cstart = -1; int cend = -1; CodonTable * ct = NULL; CodonMatrixScore * cm = NULL; RandomCodon * rndcodon = NULL; RandomCodonScore * rndcodonscore = NULL; DnaMatrix * dm = NULL; DPRunImpl * dpri = NULL; GeneModel * gm; GeneModelParam * gmp; GeneStats * gs; GeneParser21 * gp21; GeneParser21Score * gp21s; GeneParser4Score * gp; ComplexSequenceEvalSet * cdna_cses; ComplexSequenceEvalSet * gen_cses; ComplexSequence * cs_cdna; ComplexSequence * cs_gen; Genomic * gent; GenomicRegion * gr; CompMat * cmat; CompProb * cprob; char * matfile = "blosum62.bla"; Protein * trans; PackAln * pal; AlnBlock * alb; FILE * ofp = stdout; dpri = new_DPRunImpl_from_argv(&argc,argv); gmp = new_GeneModelParam_from_argv(&argc,argv); strip_out_integer_argument(&argc,argv,"u",&dstart); strip_out_integer_argument(&argc,argv,"v",&dend); strip_out_integer_argument(&argc,argv,"s",&cstart); strip_out_integer_argument(&argc,argv,"t",&cend); strip_out_standard_options(&argc,argv,show_help,show_version); ct = read_CodonTable_file(codon_file); cmat = read_Blast_file_CompMat(matfile); cprob = CompProb_from_halfbit(cmat); cm = naive_CodonMatrixScore_from_prob(ct,cprob); gm = GeneModel_from_GeneModelParam(gmp); cdna = read_fasta_file_Sequence(argv[1]); gen = read_fasta_file_Sequence(argv[2]); if( dstart != -1 || dend != -1 ) { if( dstart == -1 ) { dstart = 1; } if( dend == -1 ) { dend = gen->len; } active_gen = magic_trunc_Sequence(gen,dstart,dend); } else { active_gen = hard_link_Sequence(gen); } if( cstart != -1 || cend != -1 ) { if( cstart == -1 ) { cstart = 1; } if( cend == -1 ) { cend = gen->len; } active_cdna = magic_trunc_Sequence(gen,cstart,cend); } else { active_cdna = hard_link_Sequence(gen); } rndcodon = RandomCodon_from_raw_CodonFrequency(gm->codon,ct); fold_in_RandomModelDNA_into_RandomCodon(rndcodon,gm->rnd); rndcodonscore = RandomCodonScore_from_RandomCodon(rndcodon); assert(active_cdna); assert(active_gen); cdna_cses = default_cDNA_ComplexSequenceEvalSet(); gen_cses = new_ComplexSequenceEvalSet_from_GeneModel(gm); cs_cdna = new_ComplexSequence(active_cdna,cdna_cses); cs_gen = new_ComplexSequence(active_gen,gen_cses); gp21 = std_GeneParser21(); GeneParser21_fold_in_RandomModelDNA(gp21,gm->rnd); gp21s = GeneParser21Score_from_GeneParser21(gp21); gp = GeneParser4Score_from_GeneParser21Score(gp21s); dm = identity_DnaMatrix(Probability2Score(halfbit2Probability(1)),Probability2Score(halfbit2Probability(-1))); assert(cs_cdna); assert(cs_gen); assert(gp); assert(rndcodonscore); assert(dm); assert(dpri); /* show_CodonMatrixScore(cm,ct,ofp);*/ pal = PackAln_bestmemory_CdnaWise10(cs_cdna,cs_gen,gp,cm,rndcodonscore,dm, Probability2Score(halfbit2Probability(-12)), Probability2Score(halfbit2Probability(-2)), Probability2Score(halfbit2Probability(-5)), Probability2Score(halfbit2Probability(0)), NULL, dpri); alb = convert_PackAln_to_AlnBlock_CdnaWise10(pal); gent = Genomic_from_Sequence(gen); assert(gent); gr = new_GenomicRegion(gent); assert(gr); add_Genes_to_GenomicRegion_GeneWise(gr,active_gen->offset,active_gen->end,alb,cdna->name,0,NULL); mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp); show_pretty_GenomicRegion(gr,0,ofp); for(i=0;i<gr->len;i++) { if( gr->gene[i]->ispseudo == TRUE ) { fprintf(ofp,"#Gene %d is a pseudo gene - no translation possible\n",i); } else { trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct); write_fasta_Sequence(trans->baseseq,ofp); } } }