int main(int argc,char **argv) { int i; AlignGeneModelParam * agmp; GeneStats * gs; GeneModelParam * gmp = NULL; CompProb * comp_prob; DnaProbMatrix * dm; CodonTable * ct; RandomModel * rm; Sequence * test; ct = read_CodonTable_file("codon.table"); rm = default_RandomModel(); comp_prob = read_Blast_file_CompProb("wag85"); gmp = new_GeneModelParam_from_argv(&argc,argv); dm = DnaProbMatrix_from_match(0.8,NMaskType_VARIABLE); if((gs=GeneStats_from_GeneModelParam(gmp)) == NULL ) { fatal("Could not build gene stats"); } agmp = std_AlignGeneModelParam(comp_prob,dm,ct,gs); test = read_fasta_file_Sequence(argv[1]); assert(test); for(i=0;i<test->len;i++) { fprintf(stdout,"%c ss5 %.6f ss3 %.6f\n",test->seq[i],prob_SpliceSiteProb(agmp->ss5,test,i),prob_SpliceSiteProb(agmp->ss3,test,i)); } }
int main(int argc,char ** argv) { int i; char * temp; build_defaults(); strip_out_standard_options(&argc,argv,show_help,show_version); potential_file = strip_out_assigned_argument(&argc,argv,"pg"); pal_file = strip_out_assigned_argument(&argc,argv,"pal_file"); if( (temp = strip_out_assigned_argument(&argc,argv,"gap")) != NULL ) gap_str = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"g")) != NULL ) gap_str = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"ext")) != NULL ) ext_str = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"e")) != NULL ) ext_str = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"matrix")) != NULL ) matrix_file = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"m")) != NULL ) matrix_file = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"s")) != NULL ) qstart_str = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"t")) != NULL ) qend_str = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"u")) != NULL ) tstart_str = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"v")) != NULL ) tend_str = temp; if( (strip_out_boolean_argument(&argc,argv,"trev")) == TRUE ) reverse = TRUE; if( (strip_out_boolean_argument(&argc,argv,"[no]newgene")) == TRUE ) use_new_stats = TRUE; if( (strip_out_boolean_argument(&argc,argv,"tfor")) == TRUE ){ if( reverse == TRUE ) { warn("You have specified both trev and tfor. Treating as both"); do_both = TRUE; reverse = FALSE; } else { reverse = FALSE; } } if( (temp = strip_out_assigned_argument(&argc,argv,"insert")) != NULL ) { if( strcmp(temp,"flat") == 0 ) { flat_insert = TRUE; } else { flat_insert = FALSE; } } if( (strip_out_boolean_argument(&argc,argv,"both")) == TRUE ) do_both = TRUE; if( (strip_out_boolean_argument(&argc,argv,"fembl")) == TRUE ) is_embl = TRUE; if( (strip_out_boolean_argument(&argc,argv,"tabs")) == TRUE ) target_abs = TRUE; pseudo = strip_out_boolean_argument(&argc,argv,"pseudo"); if( (temp = strip_out_assigned_argument(&argc,argv,"codon")) != NULL ) codon_file = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"gene")) != NULL ) gene_file = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"alg")) != NULL ) alg_str = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"kbyte")) != NULL ) kbyte_str = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"subs")) != NULL ) subs_string = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"indel")) != NULL ) indel_string = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"cfreq")) != NULL ) cfreq_string = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"splice")) != NULL ) { warn("deprecated command line option -splice. use -splice_gtag now"); splice_string = temp; } if( (temp = strip_out_assigned_argument(&argc,argv,"init")) != NULL ) startend_string = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"null")) != NULL ) null_string = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"intron")) != NULL ) intron_string = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"alln")) != NULL ) allN_string = temp; if( (strip_out_boolean_argument(&argc,argv,"hmmer")) == TRUE ) use_tsm = TRUE; if( (strip_out_boolean_argument(&argc,argv,"intie")) == TRUE ) use_tied_model = TRUE; if( (temp = strip_out_assigned_argument(&argc,argv,"hname")) != NULL ) hmm_name = temp; if( (strip_out_boolean_argument(&argc,argv,"pretty")) != FALSE ) show_pretty = TRUE; if( (strip_out_boolean_argument(&argc,argv,"gff")) != FALSE ) show_gff = TRUE; if( (strip_out_boolean_argument(&argc,argv,"diana")) != FALSE ) show_diana = TRUE; if( (strip_out_boolean_argument(&argc,argv,"embl")) != FALSE ) show_embl = TRUE; if( (strip_out_boolean_argument(&argc,argv,"genes")) != FALSE ) show_pretty_gene = TRUE; if( (strip_out_boolean_argument(&argc,argv,"genesf")) != FALSE ) show_supp_gene = TRUE; if( (strip_out_boolean_argument(&argc,argv,"para")) != FALSE ) show_para = TRUE; if( (strip_out_boolean_argument(&argc,argv,"trans")) != FALSE ) show_trans = TRUE; if( (strip_out_boolean_argument(&argc,argv,"pep")) != FALSE ) show_pep = TRUE; if( (strip_out_boolean_argument(&argc,argv,"cdna")) != FALSE ) show_cdna = TRUE; if( (strip_out_boolean_argument(&argc,argv,"sum")) != FALSE ) show_match_sum = TRUE; if( (strip_out_boolean_argument(&argc,argv,"alb")) != FALSE ) show_AlnBlock = TRUE; if( (strip_out_boolean_argument(&argc,argv,"ace")) != FALSE ) show_ace = TRUE; if( (strip_out_boolean_argument(&argc,argv,"pal")) != FALSE ) show_PackAln = TRUE; if( (strip_out_boolean_argument(&argc,argv,"gener")) != FALSE ) show_gene_plain = TRUE; if( (strip_out_boolean_argument(&argc,argv,"over")) != FALSE ) show_overlap = TRUE; if( (temp = strip_out_assigned_argument(&argc,argv,"divide")) != NULL ) divide_str = temp; if( (temp = strip_out_assigned_argument(&argc,argv,"block")) != NULL ) main_block_str = temp; dpri = new_DPRunImpl_from_argv(&argc,argv); gmp = new_GeneModelParam_from_argv(&argc,argv); ppp = new_PhasedProteinPara_from_argv(&argc,argv); gwrp = new_GeneWiseRunPara_from_argv(&argc,argv); strip_out_remaining_options_with_warning(&argc,argv); if( argc != 3 ) { warn("Wrong number of arguments (expect 2)!\n"); if( argc > 1 ){ warn("Arg line looked like (after option processing)"); for(i=1;i<argc;i++) { fprintf(stderr," %s\n",argv[i]); } } show_short_help(); } if( show_embl == FALSE && show_diana == FALSE && show_gff == FALSE && show_overlap == FALSE && show_pretty_gene == FALSE && show_match_sum == FALSE && show_ace == FALSE && show_gene_plain == FALSE && show_pretty == FALSE && show_AlnBlock == FALSE && show_PackAln == FALSE && show_pep == FALSE ) { show_pretty = TRUE; show_para = TRUE; } dna_seq_file = argv[2]; if( use_tsm == FALSE) protein_file = argv[1]; else hmm_file = argv[1]; if( build_objects() == FALSE) fatal("Could not build objects!"); if( show_para == TRUE) { show_parameters(); } if( build_alignment() == FALSE) fatal("Could not build alignment!"); if( show_output() == FALSE) fatal("Could not show alignment. Sorry!"); if( do_both == TRUE) { reverse_target(); if( build_alignment() == FALSE) fatal("Could not build alignment!"); if( show_output() == FALSE) fatal("Could not show alignment. Sorry!"); } free_temporary_objects(); free_io_objects(); return 0; }
int main(int argc,char ** argv) { int i; DPRunImpl * dpri = NULL; GeneModelParam * gmp = NULL; GeneModel * gm = NULL; Sequence * seq; RandomCodon * rc; RandomModelDNA * rmd; RandomCodonScore * rcs; ComplexSequenceEval * splice5; ComplexSequenceEval * splice3; ComplexSequenceEvalSet * cses; ComplexSequence * cseq; SyExonScore * exonscore; PackAln * pal; AlnBlock * alb; Genomic * genomic; GenomicRegion * gr; Protein * trans; dpri = new_DPRunImpl_from_argv(&argc,argv); if( dpri == NULL ) { fatal("Unable to build DPRun implementation. Bad arguments"); } gmp = new_GeneModelParam_from_argv(&argc,argv); ct= read_CodonTable_file("codon.table"); strip_out_standard_options(&argc,argv,show_help,show_version); if( argc != 2 ) { show_help(stdout); exit(12); } if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) { fatal("Could not build gene model"); } seq = read_fasta_file_Sequence(argv[1]); assert(seq); cses = new_ComplexSequenceEvalSet_from_GeneModel(gm); cseq = new_ComplexSequence(seq,cses); rc = flat_RandomCodon(ct); rmd = RandomModelDNA_std(); fold_in_RandomModelDNA_into_RandomCodon(rc,rmd); rcs = RandomCodonScore_from_RandomCodon(rc); exonscore = SyExonScore_flat_model(200,250,0.1,0.1); /* for(i=0;i<cseq->length;i++) { fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]); } exit(0); */ /* show_RandomCodonScore(rcs,stdout); for(i=3;i<seq->len;i++) { fprintf(stdout,"seq %d is %c with score %d\n",i,aminoacid_from_seq(ct,seq->seq+i-2),rcs->codon[CSEQ_GENOMIC_CODON(cseq,i)]); } exit(0); */ pal = PackAln_bestmemory_StatWise10(exonscore,cseq,rcs,Probability2Score(1.0/10.0),Probability2Score(1.0/10.0),NULL,dpri); alb = convert_PackAln_to_AlnBlock_StatWise10(pal); mapped_ascii_AlnBlock(alb,id,1,stdout); genomic = Genomic_from_Sequence(seq); gr = new_GenomicRegion(genomic); add_Genes_to_GenomicRegion_GeneWise(gr,1,seq->len,alb,"bollocks",0,NULL); for(i=0;i<gr->len;i++) { if( gr->gene[i]->ispseudo == TRUE ) { fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i); } else { trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct); write_fasta_Sequence(trans->baseseq,stdout); } } return 0; }
int main(int argc,char ** argv) { int i; DPRunImpl * dpri = NULL; GeneModelParam * gmp = NULL; GeneModel * gm = NULL; FILE * ifp; SeqAlign * al; PairBaseSeq * pbs; ComplexSequenceEval * splice5; ComplexSequenceEval * splice3; ComplexSequence * cseq; CompMat * score_mat; CompProb * comp_prob; RandomModel * rm; PairBaseCodonModelScore * codon_score; PairBaseModelScore* nonc_score; PairBaseCodonModelScore * start; PairBaseCodonModelScore * stop; SyExonScore * exonscore; PackAln * pal; AlnBlock * alb; Genomic * genomic; GenomicRegion * gr; GenomicRegion * gr2; Protein * trans; StandardOutputOptions * std_opt; ShowGenomicRegionOptions * sgro; char * dump_packaln = NULL; char * read_packaln = NULL; FILE * packifp = NULL; boolean show_trans = 1; boolean show_gene_raw = 0; ct = read_CodonTable_file(codon_table); /* score_mat = read_Blast_file_CompMat("blosum62.bla"); comp_prob = CompProb_from_halfbit(score_mat); */ rm = default_RandomModel(); comp_prob = read_Blast_file_CompProb("wag85"); fold_column_RandomModel_CompProb(comp_prob,rm); dpri = new_DPRunImpl_from_argv(&argc,argv); if( dpri == NULL ) { fatal("Unable to build DPRun implementation. Bad arguments"); } gmp = new_GeneModelParam_from_argv(&argc,argv); std_opt = new_StandardOutputOptions_from_argv(&argc,argv); sgro = new_ShowGenomicRegionOptions_from_argv(&argc,argv); dump_packaln = strip_out_assigned_argument(&argc,argv,"dump"); read_packaln = strip_out_assigned_argument(&argc,argv,"recover"); strip_out_standard_options(&argc,argv,show_help,show_version); if( argc != 2 ) { show_help(stdout); exit(12); } if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) { fatal("Could not build gene model"); } codon_score = make_PairBaseCodonModelScore(comp_prob); nonc_score = make_PairBaseModelScore(); splice5 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice5score); splice3 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice3score); if((ifp = openfile(argv[1],"r")) == NULL ) { fatal("Could not open file %s",argv[1]); } al = read_fasta_SeqAlign(ifp); assert(al); assert(al->len == 2); assert(al->seq[0]->len > 0); assert(al->seq[1]->len > 0); /* write_fasta_SeqAlign(al,stdout);*/ pbs = new_PairBaseSeq_SeqAlign(al); if( read_packaln == NULL ) { cseq = ComplexSequence_from_PairBaseSeq(pbs,splice5,splice3); } start = make_start_PairBaseCodonModelScore(ct); stop = make_stop_PairBaseCodonModelScore(ct); /* show_PairBaseCodonModelScore(stop,ct,stdout); */ /* for(i=0;i<pbs->anchor->len;i++) { printf("%3d %c For %-6d %-6d %c Rev %-6d %-6d\n",i,pbs->anchor->seq[i], CSEQ_PAIR_5SS(cseq,i),CSEQ_PAIR_3SS(cseq,i), char_complement_base(pbs->anchor->seq[i]), CSEQ_REV_PAIR_5SS(cseq,i),CSEQ_REV_PAIR_3SS(cseq,i)); } */ /* show_ComplexSequence(cseq,stdout); */ exonscore = SyExonScore_flat_model(100,150,0.1,1.0); /* for(i=0;i<cseq->length;i++) { fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]); } exit(0); */ if( read_packaln != NULL ) { packifp = openfile(read_packaln,"r"); if( packifp == NULL ) { fatal("File %s is unopenable - ignoring dump command",dump_packaln); } else { pal = read_simple_PackAln(packifp); } } else { pal = PackAln_bestmemory_SyWise20(exonscore,cseq,codon_score,nonc_score,start,stop,Probability2Score(1.0/100.0),Probability2Score(1.0/10000.0),Probability2Score(1.0/10.0),NULL,dpri); } alb = convert_PackAln_to_AlnBlock_SyWise20(pal); if( dump_packaln != NULL ) { packifp = openfile(dump_packaln,"w"); if( packifp == NULL ) { warn("File %s is unopenable - ignoring dump command",dump_packaln); } else { show_simple_PackAln(pal,packifp); } } show_score_sequence(alb,pbs,nonc_score,stdout); /* show_StandardOutputOptions(std_opt,alb,pal,"//",stdout); */ genomic = Genomic_from_Sequence(al->seq[0]); gr = new_GenomicRegion(genomic); gr2 = new_GenomicRegion(genomic); add_Genes_to_GenomicRegion_new(gr,alb); show_GenomicRegionOptions(sgro,gr,ct,"//",stdout); return 0; }
int main(int argc,char ** argv) { Sequence * cdna; Sequence * gen; Sequence * active_gen; Sequence * active_cdna; int i; int dstart = -1; int dend = -1; int cstart = -1; int cend = -1; CodonTable * ct = NULL; CodonMatrixScore * cm = NULL; RandomCodon * rndcodon = NULL; RandomCodonScore * rndcodonscore = NULL; DnaMatrix * dm = NULL; DPRunImpl * dpri = NULL; GeneModel * gm; GeneModelParam * gmp; GeneStats * gs; GeneParser21 * gp21; GeneParser21Score * gp21s; GeneParser4Score * gp; ComplexSequenceEvalSet * cdna_cses; ComplexSequenceEvalSet * gen_cses; ComplexSequence * cs_cdna; ComplexSequence * cs_gen; Genomic * gent; GenomicRegion * gr; CompMat * cmat; CompProb * cprob; char * matfile = "blosum62.bla"; Protein * trans; PackAln * pal; AlnBlock * alb; FILE * ofp = stdout; dpri = new_DPRunImpl_from_argv(&argc,argv); gmp = new_GeneModelParam_from_argv(&argc,argv); strip_out_integer_argument(&argc,argv,"u",&dstart); strip_out_integer_argument(&argc,argv,"v",&dend); strip_out_integer_argument(&argc,argv,"s",&cstart); strip_out_integer_argument(&argc,argv,"t",&cend); strip_out_standard_options(&argc,argv,show_help,show_version); ct = read_CodonTable_file(codon_file); cmat = read_Blast_file_CompMat(matfile); cprob = CompProb_from_halfbit(cmat); cm = naive_CodonMatrixScore_from_prob(ct,cprob); gm = GeneModel_from_GeneModelParam(gmp); cdna = read_fasta_file_Sequence(argv[1]); gen = read_fasta_file_Sequence(argv[2]); if( dstart != -1 || dend != -1 ) { if( dstart == -1 ) { dstart = 1; } if( dend == -1 ) { dend = gen->len; } active_gen = magic_trunc_Sequence(gen,dstart,dend); } else { active_gen = hard_link_Sequence(gen); } if( cstart != -1 || cend != -1 ) { if( cstart == -1 ) { cstart = 1; } if( cend == -1 ) { cend = gen->len; } active_cdna = magic_trunc_Sequence(gen,cstart,cend); } else { active_cdna = hard_link_Sequence(gen); } rndcodon = RandomCodon_from_raw_CodonFrequency(gm->codon,ct); fold_in_RandomModelDNA_into_RandomCodon(rndcodon,gm->rnd); rndcodonscore = RandomCodonScore_from_RandomCodon(rndcodon); assert(active_cdna); assert(active_gen); cdna_cses = default_cDNA_ComplexSequenceEvalSet(); gen_cses = new_ComplexSequenceEvalSet_from_GeneModel(gm); cs_cdna = new_ComplexSequence(active_cdna,cdna_cses); cs_gen = new_ComplexSequence(active_gen,gen_cses); gp21 = std_GeneParser21(); GeneParser21_fold_in_RandomModelDNA(gp21,gm->rnd); gp21s = GeneParser21Score_from_GeneParser21(gp21); gp = GeneParser4Score_from_GeneParser21Score(gp21s); dm = identity_DnaMatrix(Probability2Score(halfbit2Probability(1)),Probability2Score(halfbit2Probability(-1))); assert(cs_cdna); assert(cs_gen); assert(gp); assert(rndcodonscore); assert(dm); assert(dpri); /* show_CodonMatrixScore(cm,ct,ofp);*/ pal = PackAln_bestmemory_CdnaWise10(cs_cdna,cs_gen,gp,cm,rndcodonscore,dm, Probability2Score(halfbit2Probability(-12)), Probability2Score(halfbit2Probability(-2)), Probability2Score(halfbit2Probability(-5)), Probability2Score(halfbit2Probability(0)), NULL, dpri); alb = convert_PackAln_to_AlnBlock_CdnaWise10(pal); gent = Genomic_from_Sequence(gen); assert(gent); gr = new_GenomicRegion(gent); assert(gr); add_Genes_to_GenomicRegion_GeneWise(gr,active_gen->offset,active_gen->end,alb,cdna->name,0,NULL); mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp); show_pretty_GenomicRegion(gr,0,ofp); for(i=0;i<gr->len;i++) { if( gr->gene[i]->ispseudo == TRUE ) { fprintf(ofp,"#Gene %d is a pseudo gene - no translation possible\n",i); } else { trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct); write_fasta_Sequence(trans->baseseq,ofp); } } }