CompMat * read_Blast_CompMat(FILE * ifp) { char buffer[MAXLINE]; int alphabet[MAXLINE]; char * runner; int len; int linenum; int row; CompMat * out; /*** Skip over # lines... read first line: is alphabet ie A R T G .... * ***/ while( fgets(buffer,MAXLINE,ifp) != NULL) if( buffer[0] != '#') break; /** loop over line, getting letters: warn if longer than one, or not a letter **/ for(len=0,runner=strtok(buffer,spacestr);runner != NULL;runner=strtok(NULL,spacestr)) { if( *runner == '*' ) break; /* end column */ if( !isalpha((int)*runner) || strlen(runner) > 1 ) { warn("In read Blast comp mat, probably an error: trying to interpret [%s] as an amino acid",runner); return NULL; } alphabet[len++] = toupper((int)*runner) -'A'; } out = blank_CompMat(); linenum = 0; /** get len lines, each line, get len numbers and put them away **/ while( fgets(buffer,MAXLINE,ifp) != NULL ) { if( linenum >= len ) break; for(runner=strtok(buffer,spacestr),row = 0;runner != NULL && row < len;runner=strtok(NULL,spacestr),row++) { if( is_integer_string(runner,&out->comp[alphabet[linenum]][alphabet[row]]) == FALSE ) { warn("In read Blast comp mat, probably an error: trying to interpret [%s] as a number ... continuing",runner); } } linenum++; } return out; }
boolean strip_out_integer_argument(int * argc,char ** argv,char * tag,int * value) { char * arg; if( (arg = strip_out_assigned_argument(argc,argv,tag)) == NULL ) return FALSE; if( is_integer_string(arg,value) == FALSE ) { warn("Argument [%s] to [%s] is not an integer. Not changing the value [%d]",arg,tag,value); return FALSE; } return TRUE; }
DPImplementation * new_DPImplementation_from_argstr(int * argc,char ** argv) { DPImplementation * out; char * temp; out = DPImplementation_alloc(); if( (strip_out_boolean_argument(argc,argv,"pthreads")) == TRUE ) { out->do_threads = TRUE; } if( (temp=strip_out_assigned_argument(argc,argv,"dbtrace")) != NULL ) { if( is_integer_string(temp,&out->db_trace_level) == FALSE ) { warn("%s is not an integer argument for dbtrace",temp); } } if( strip_out_boolean_argument(argc,argv,"O") == TRUE ) { out->largemem= TRUE; /* other optimisations */ } strip_out_boolean_def_argument(argc,argv,"largemem",&out->largemem); strip_out_boolean_def_argument(argc,argv,"onemodel",&out->doone); if( strip_out_boolean_argument(argc,argv,"prob") == TRUE ) { out->doprob = TRUE; } if( strip_out_boolean_argument(argc,argv,"g") == TRUE ) { out->dydebug = TRUE; } if( (temp=strip_out_assigned_argument(argc,argv,"logsum")) != NULL ) { out->calcfunc = stringalloc(temp); } else { out->calcfunc = stringalloc("Probability_logsum"); } out->dycw = new_DycWarning_from_argstr(argc,argv); /* fprintf(stderr,"And %d is extern warning",out->dycw->warn_extern);*/ return out; }
DPRunImpl * new_DPRunImpl_from_argv(int * argc,char ** argv) { DPRunImpl * out; char * temp; out = DPRunImpl_alloc(); if( (temp = strip_out_assigned_argument(argc,argv,"dymem")) != NULL ) { if( strcmp(temp,"explicit") == 0) { out->memory = DPIM_Explicit; } else if( strcmp(temp,"linear") == 0 ) { out->memory = DPIM_Linear; } else if( strcmp(temp,"default") == 0 ) { out->memory = DPIM_Default; } else { warn("String [%s] for dynamic memory layout is not recognised",temp); free_DPRunImpl(out); return NULL; } } if( (temp = strip_out_assigned_argument(argc,argv,"kbyte")) != NULL ) { if( is_integer_string(temp,&out->kbyte_size) == FALSE ) { warn("String [%s] for dynamic memory size is not recognised",temp); free_DPRunImpl(out); return NULL; } } if(strip_out_boolean_argument(argc,argv,"dydebug") == TRUE ) { out->debug = 1; out->memory = DPIM_Explicit; } return out; }
Gene * read_EMBL_feature_Gene(char * buffer,int maxlen,FILE * ifp) { Gene * gene; Transcript * tr; Translation * ts; Exon * exon; char * runner; char * base; char * next; int i; int exon_start[MAX_EMBL_EXON_PARSE]; int exon_end[MAX_EMBL_EXON_PARSE]; int number; int exon_no = 0; int isstart = 1; int is_complement = 0; int is_cds = 0; int break_at_end = 0; if( strstartcmp(buffer,"FT") != 0 ) { warn("passed in a bad line [%s] to be used for feature table parsing",buffer); return NULL; } if( (runner=strtok(buffer+2,spacestr)) == NULL ) { warn("Bad embl feature line [%s]",buffer); return NULL; } if( strcmp(runner,"CDS") != 0 && strcmp(runner,"mRNA") != 0 ) { warn("passed in a feature line to read_EMBL_feature_Gene with a %s tag. This only handles CDS and mRNA tags",runner); return NULL; } if( strcmp(runner,"CDS") == 0 ) { is_cds = TRUE; } runner = strtok(NULL,spacestr); if( runner == NULL ) { warn("Bad embl feature line [%s]",buffer); return NULL; } if( strstartcmp(runner,"complement") == 0 ) { runner = strchr(runner,'('); if( runner == NULL) { warn("Could not find bracket on EMBL feature complement line"); return NULL; } is_complement = 1; runner++; } if( strstartcmp(runner,"join") == 0 ) { runner = strchr(runner,'('); runner++; } else if( isdigit((int)*runner) || *runner == '<' ) { /** ok - starts with the numbers. We'll cope!**/ } else { warn("Expecting a join statement, got a [%s]",runner); return NULL; } /*** ok, now the major number loop ***/ for(;;) { base= runner; for(;*runner && *runner != ')' && *runner != '.' && *runner != ',' && *runner != '>' && !isspace((int)*runner);runner++) ; /*fprintf(stderr,"Got a runner of %s\n ",runner); */ if( *runner == '\0' ) next = runner; else next = runner+1; if( *runner == ')' ) { break_at_end = TRUE; /* out of reading exons */ } *runner='\0'; if( strstartcmp(base,"complement(") == 0 ) { is_complement = TRUE; for(;*base != '(';base++) ; base++; break_at_end = FALSE; /* we found an bracket too early! */ } if( is_integer_string(base,&number) == FALSE ) { warn("Got a non integer [%s] in the middle of a join statement in EMBL parsing",runner); return NULL; } /** put this number away **/ if( isstart ) { exon_start[exon_no] = number; isstart = 0; } else { exon_end[exon_no++] = number; isstart = 1; } if( break_at_end == TRUE) break; for(runner=next;*runner && (*runner == '.' || isspace((int)*runner));runner++) ; if( *runner == '\0' ) { if( next_feature_tab_line(buffer,maxlen,ifp) == FALSE) { warn("In the middle of getting a join statement, got a [%s]. Yuk!",buffer); return NULL; } if( !isdigit((int)buffer[0]) && buffer[0] != '.' && buffer[0] != ',') { /*** ok - sometimes people very boring end things in here ***/ /* warn("In the middle of getting a join statement, got a [%s]. Ugh!",buffer); */ break; } runner = buffer; } } if( isstart == 0 ) { warn("I have read an uneven number of start-end points in the exon thing. Yuk!"); return NULL; } /** runner should now be on bracket **/ if( is_complement == 1 ) { /** ok . should be another bracket. Do we care? **/ } gene = Gene_alloc_len(1); tr = Transcript_alloc_len(exon_no); add_Gene(gene,tr); tr->parent = gene; if( is_complement == 1 ) { gene->start = exon_end[exon_no-1]-1; gene->end = exon_start[0] -1; for(i=exon_no -1;i >= 0;i--) { exon = Exon_alloc(); exon->start = (gene->start+1) - exon_end[i]; exon->end = (gene->start+1) - exon_start[i] +1; add_ex_Transcript(tr,exon); } } else { gene->start = exon_start[0] -1; gene->end = exon_end[exon_no-1] -1; for(i=0;i<exon_no;i++) { exon = Exon_alloc(); exon->start = exon_start[i] - (gene->start+1); exon->end = exon_end[i] - (gene->start+1)+1; add_ex_Transcript(tr,exon); } } if( is_cds == TRUE ) { ts = Translation_alloc(); ts->start = 0; ts->end = length_Transcript(tr); ts->parent = tr; add_Transcript(tr,ts); } /*** read the rest of this feature ***/ while( next_feature_tab_line(buffer,maxlen,ifp) == TRUE) ; return gene; }
int main(int argc,char * argv[]) { FailureType fail = 0 ; FailureType should_fail_on = 0; int i; boolean doinfo = FALSE; boolean noaddnumbers = FALSE; MethodTypeSet * mts; MethodTypeSet * cp; boolean no_config_mts = FALSE; int prot_level = 0; int should_hard_link = 0; boolean should_warn_undoc = FALSE; char * prot_str; char * runner; char *config_dir=NULL; char buffer[64]; /** really for removing files **/ char * telegraph; APIpara api; char * pack; /** we no longer read in configs **/ mts = standard_dynamite_MethodTypeSet(); if( strip_out_boolean_argument(&argc,argv,"h") == TRUE || strip_out_boolean_argument(&argc,argv,"u") == TRUE /* arve */ || argc == 1 ) { show_usage(stdout); exit(1); } noaddnumbers = strip_out_boolean_argument(&argc,argv,"m"); doinfo = strip_out_boolean_argument(&argc,argv,"i"); no_config_mts = strip_out_boolean_argument(&argc,argv,"U"); should_hard_link = strip_out_boolean_argument(&argc,argv,"l"); prot_str = strip_out_assigned_argument(&argc,argv,"P"); should_warn_undoc = strip_out_boolean_argument(&argc,argv,"D"); telegraph = strip_out_assigned_argument(&argc,argv,"tele"); pack = strip_out_assigned_argument(&argc,argv,"n"); api.xs_ext = NULL; api.typemap_ext = NULL; api.pod_ext = NULL; api.c_extension_name = strip_out_assigned_argument(&argc,argv,"a"); api.t_extension_name = strip_out_assigned_argument(&argc,argv,"b"); api.pfdoc_ext = strip_out_assigned_argument(&argc,argv,"p"); api.xs_ext = strip_out_assigned_argument(&argc,argv,"x"); api.typemap_ext = strip_out_assigned_argument(&argc,argv,"tym"); api.all_callable = strip_out_boolean_argument(&argc,argv,"c"); api.make_perl = strip_out_boolean_argument(&argc,argv,"perl"); api.latex_ext = strip_out_assigned_argument(&argc,argv,"exttex"); api.make_latex = strip_out_boolean_argument(&argc,argv,"latex"); if( api.make_perl == TRUE) { if( api.xs_ext == NULL ) { api.xs_ext = ".xs"; } if( api.typemap_ext == NULL ) { api.typemap_ext = ".typemap"; } if( api.pod_ext == NULL ) { api.pod_ext = ".pod"; } } if( strip_out_boolean_argument(&argc,argv,"F") == TRUE) { should_fail_on = FailureType_dyc_All; } /* do DPImplementation */ dpi = new_DPImplementation_from_argstr(&argc,argv); if( prot_str != NULL ) { if( is_integer_string(prot_str,&prot_level) == FALSE ) { warn("Protection level %s is no integer!"); prot_level = 0; } } /* Override/set WISECONFIGDIR on the cmdline. (arve) */ config_dir = strip_out_assigned_argument(&argc, argv, "I"); if (config_dir != NULL) { set_config_dir(config_dir); } if( read_into_MethodTypeSet_filename(mts,"methods") == FALSE){ warn("You have no config file called 'methods'. This is bad news for dynamite matrices. I will attempt to compile, but you cannot use logical types. 'methods' should be either in the current directory, the $WISECONFIGDIR or your $WISEPERSONALDIR"); } /*** ok,loop over and do it ***/ if( argc < 1 ) { warn("You must have at least one dynamite source file to compile!"); show_usage(stdout); exit(1); } if( telegraph != NULL ) { tele_file= fopen(telegraph,"w"); } for(i=1;i<argc;i++) { if( mts != NULL) cp = copy_MethodTypeSet(mts); /* actually very cheap */ if( do_a_file(argv[i],mts,FALSE,prot_level,should_hard_link,should_warn_undoc,noaddnumbers == TRUE ? FALSE : TRUE,pack,&api,&fail) == FALSE ) { fatal("Terminated dyc one %d argument %s",i,argv[i]); } if( (should_fail_on == 01 && fail != 0) || (fail & should_fail_on) ) { /*** remove files which fail ****/ /*** ugh this should be done better ***/ for(runner=argv[i]+strlen(argv[i]) - 1;runner > argv[i] && *runner != '.';runner--) ; if( runner != argv[i] ) { *runner = '\0'; sprintf(buffer,"%s.c",argv[i]); if( remove_file(buffer) == FALSE ) { warn("Could not remove file %s from filesystem",buffer); } sprintf(buffer,"%s.h",argv[i]); if( remove_file(buffer) == FALSE ) { warn("Could not remove file %s from filesystem",buffer); } } /* else - well - something bad has happened */ fatal("Failed on file %s due to user defined fails",argv[i]); } if( mts != NULL ) { free_MethodTypeSet(mts); mts = cp; } } free_MethodTypeSet(mts); return 0; }
boolean build_objects(void) { boolean ret = TRUE; Protein * pro_temp; Genomic * gen_temp; FILE * ifp; startend = threestatemodel_mode_from_string(startend_string); if( startend == TSM_unknown ) { warn("String %s was unable to converted into a start/end policy\n",startend_string); ret = FALSE; } if( tstart_str != NULL ) { if( is_integer_string(tstart_str,&tstart) == FALSE || tstart < 0) { warn("Could not make %s out as target start",tstart); ret = FALSE; } } if( tend_str != NULL ) { if( is_integer_string(tend_str,&tend) == FALSE || tend < 0) { warn("Could not make %s out as target end",tend); ret = FALSE; } } if( is_integer_string(gap_str,&gap) == FALSE ) { warn("Could not make %s out as gap penalty (must be integer at the moment)",gap_str); ret = FALSE; } if( is_integer_string(ext_str,&ext) == FALSE ) { warn("Could not make %s out as gap penalty (must be integer at the moment)",ext_str); ret = FALSE; } if( is_embl == FALSE ) { if( (gen = read_fasta_file_Genomic(dna_seq_file,length_of_N)) == NULL ) { ret = FALSE; warn("Could not read genomic sequence in %s",dna_seq_file); gen = NULL; } } else { embl = read_EMBL_GenomicRegion_file(dna_seq_file); if( embl == NULL ) { warn("Could not read genomic EMBL file in %s",dna_seq_file); gen = NULL; ret = FALSE; } else { gen = hard_link_Genomic(embl->genomic); } } if( gen != NULL ) { if( tstart != -1 || tend != -1 ) { if( tstart == -1 ) tstart = 0; if( tend == -1 ) tend = gen->baseseq->len; gen_temp = truncate_Genomic(gen,tstart-1,tend); if( gen_temp == NULL ){ ret = FALSE; } else { free_Genomic(gen); gen = gen_temp; } } else { /* no truncation required */ } if( reverse == TRUE ) { if( tstart > tend ) { warn("You have already reversed the DNA by using %d - %d truncation. Re-reversing",tstart,tend); } gen_temp = reverse_complement_Genomic(gen); free_Genomic(gen); gen = gen_temp; } } /* * Can't truncate on GenomicRegion (for good reasons!). * but we want only a section of the EMBL file to be used * * So... swap genomic now. Positions in EMBL are still valid, * however - some genes will loose their sequence, which will be damaging. ;) */ if( is_embl ) { free_Genomic(embl->genomic); embl->genomic = hard_link_Genomic(gen); /* pointer could be dead anyway ;) */ } if( target_abs == TRUE ) { if( is_embl == TRUE ) { warn("Sorry you can't both use absolute positioning and EMBL files as I can't cope with all the coordinate remapping. You'll have to convert to fasta."); ret = FALSE; } gen->baseseq->offset = 1; gen->baseseq->end = strlen(gen->baseseq->seq); } if( alg_str != NULL ) { alg = gwrap_alg_type_from_string(alg_str); } else { if( use_tsm == TRUE ) { alg_str = "623L"; } else { alg_str = "623"; } alg = gwrap_alg_type_from_string(alg_str); } if( qstart_str != NULL ) { if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) { warn("Could not make %s out as query start",qstart); ret = FALSE; } } if( qend_str != NULL ) { if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) { warn("Could not make %s out as query end",qend); ret = FALSE; } } if( use_tsm == FALSE ) { if( startend != TSM_default && startend != TSM_global && startend != TSM_local && startend != TSM_endbiased) { warn("Proteins can only have local/global/endbias startend policies set, not %s",startend_string); ret = FALSE; } if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) { ret = FALSE; warn("Could not read Protein sequence in %s",protein_file); } else { if( qstart != -1 || qend != -1 ) { if( qstart == -1 ) qstart = 0; if( qend == -1 ) qend = pro->baseseq->len; pro_temp = truncate_Protein(pro,qstart-1,qend); if( pro_temp == NULL ){ ret = FALSE; } else { free_Protein(pro); pro = pro_temp; } } } } else { /** using a HMM **/ /*tsm = read_HMMer_1_7_ascii_file(hmm_file);*/ /*tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(hmm_file);*/ tsm = HMMer2_read_ThreeStateModel(hmm_file); if( tsm == NULL ) { warn("Could not read hmm from %s\n",hmm_file); ret = FALSE; } else { display_char_in_ThreeStateModel(tsm); if( hmm_name != NULL ) { if( tsm->name != NULL ) ckfree(tsm->name); tsm->name = stringalloc(hmm_name); } if( tsm == NULL ) { warn("Could not read %s as a hmm",hmm_file); } /** have to set start/end **/ set_startend_policy_ThreeStateModel(tsm,startend,30,0.1); } } /* end of else tsm != NULL */ if( main_block_str != NULL ) { if( is_integer_string(main_block_str,&main_block) == FALSE ) { warn("Could not get maximum main_block number %s",main_block_str); ret = FALSE; } } if( is_double_string(subs_string,&subs_error) == FALSE ) { warn("Could not convert %s to a double",subs_error); ret = FALSE; } if( is_double_string(indel_string,&indel_error) == FALSE ) { warn("Could not convert %s to a double",indel_error); ret = FALSE; } if( is_double_string(allN_string,&allN) == FALSE ) { warn("Could not convert %s to a double",allN_string); ret = FALSE; } if( strcmp(cfreq_string,"model") == 0 ) { model_codon = TRUE; } else if ( strcmp(cfreq_string,"flat") == 0 ) { model_codon = FALSE; } else { warn("Cannot interpret [%s] as a codon modelling parameter\n",cfreq_string); ret = FALSE; } if( strcmp(splice_string,"model") == 0 ) { model_splice = TRUE; } else if ( strcmp(splice_string,"flat") == 0 ) { model_splice = FALSE; gmp->use_gtag_splice = TRUE; } else { warn("Cannot interpret [%s] as a splice modelling parameter\n",splice_string); ret = FALSE; } if( strcmp(null_string,"syn") == 0 ) { use_syn = TRUE; } else if ( strcmp(null_string,"flat") == 0 ) { use_syn = FALSE; } else { warn("Cannot interpret [%s] as a null model string\n",null_string); ret = FALSE; } if( strcmp(intron_string,"model") == 0 ) { use_tied_model = FALSE; } else if ( strcmp(intron_string,"tied") == 0 ) { use_tied_model = TRUE; } else { warn("Cannot interpret [%s] as a intron tieing switch\n",intron_string); ret = FALSE; } if( (rm = default_RandomModel()) == NULL) { warn("Could not make default random model\n"); ret = FALSE; } if( use_new_stats == 0 ) { if( (gf = read_GeneFrequency21_file(gene_file)) == NULL) { ret = FALSE; warn("Could not read a GeneFrequency file in %s",gene_file); } } else { if( (gs = GeneStats_from_GeneModelParam(gmp)) == NULL ){ ret=FALSE; warn("Could not read gene statistics in %s",new_gene_file); } } /* end of else using new gene stats */ if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) { if( use_tsm == TRUE ) { info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file); } else { warn("Could not read Comparison matrix file in %s",matrix_file); ret = FALSE; } } if( (ct = read_CodonTable_file(codon_file)) == NULL) { ret = FALSE; warn("Could not read codon table file in %s",codon_file); } if( (ofp = openfile(output_file,"W")) == NULL) { warn("Could not open %s as an output file",output_file); ret = FALSE; } rmd = RandomModelDNA_std(); return ret; }
GeneStats * read_GeneStats(FILE * ifp) { char buffer[MAXLINE]; GeneStats * out; SeqAlign * temp; char **base; char **brk; out = GeneStats_alloc(); out->rnd = NULL; while( fgets(buffer,MAXLINE,ifp) != NULL ) { /* fprintf(stderr,"Reading (main loop) %s",buffer); */ if( buffer[0] == '#' ) continue; if( buffer[0] == '%' && buffer[1] == '%' ) break; if( strstartcmp(buffer,"splice5") == 0 ) { base = brk = breakstring(buffer,spacestr); if( *brk == NULL || *(brk+1) == NULL || is_integer_string(*(brk+1),&out->splice5_offset) == 0) { warn("Cannot read splice5 offset - must be splice5 <number>"); return NULL; } ckfree(base); temp = read_selex_SeqAlign(ifp); if( temp == NULL ) { warn("Could not read in selex alignment for splice5"); continue; } out->splice5 = temp; continue; } if( strstartcmp(buffer,"splice3") == 0 ) { base = brk = breakstring(buffer,spacestr); if( *brk == NULL || *(brk+1) == NULL || is_integer_string(*(brk+1),&out->splice3_offset) == 0) { warn("Cannot read splice3 offset - must be splice3 <number>"); return NULL; } ckfree(base); temp = read_selex_SeqAlign(ifp); if( temp == NULL ) { warn("Could not read in selex alignment for splice5"); continue; } out->splice3 = temp; continue; } if( strstartcmp(buffer,"intron_emission") == 0 ) { if( fgets(buffer,MAXLINE,ifp) == NULL ) { warn("Could not read in intron emission line"); break; } out->intron = get_genestat_emission(buffer); if( fgets(buffer,MAXLINE,ifp) != NULL ) { continue; } else { break; } } if( strstartcmp(buffer,"polyp_emission") == 0 ) { if( fgets(buffer,MAXLINE,ifp) == NULL ) { warn("Could not read in polyp emission line"); break; } out->polyp = get_genestat_emission(buffer); if( fgets(buffer,MAXLINE,ifp) != NULL ) { continue; } else { break; } } if( strstartcmp(buffer,"rnd_emission") == 0 ) { if( fgets(buffer,MAXLINE,ifp) == NULL ) { warn("Could not read in rnd emission line"); break; } out->rnd = get_genestat_emission(buffer); if( fgets(buffer,MAXLINE,ifp) != NULL ) { continue; } else { break; } } if( strstartcmp(buffer,"rndcodon") == 0 ) { if( read_codon_GeneStats(out->codon,buffer,ifp) == FALSE ) { warn("Problem in reading codon line!"); } continue; } if( isalpha(buffer[0]) ) { warn("Could not read line %s in genestats reading\n",buffer); } } assert(out); assert(out->splice5); assert(out->splice3); return out; }
boolean build_objects(void) { boolean ret = TRUE; Protein * pro_temp; SequenceDB * psdb; startend = threestatemodel_mode_from_string(startend_string); if( startend == TSM_unknown ) { warn("String %s was unable to converted into a start/end policy\n",startend_string); ret = FALSE; } if( use_single_dna == TRUE ) { cdna = read_fasta_file_cDNA(dna_seq_file); if( cdna == NULL ) { warn("Could not open single dna sequence in %s",dna_seq_file); ret = FALSE; } } else { sdb = single_fasta_SequenceDB(dna_seq_file); if( sdb == NULL ) { warn("Could not build a sequence database on %s",dna_seq_file); ret = FALSE; } } rm = default_RandomModel(); if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) { if( use_tsm == TRUE ) { info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file); } else { warn("Could not read Comparison matrix file in %s",matrix_file); ret = FALSE; } } if( is_integer_string(gap_str,&gap) == FALSE ) { warn("Could not get gap string number %s",gap_str); ret = FALSE; } if( is_integer_string(ext_str,&ext) == FALSE ) { warn("Could not get ext string number %s",ext_str); ret = FALSE; } if( qstart_str != NULL ) { if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) { warn("Could not make %s out as query start",qstart); ret = FALSE; } } if( qend_str != NULL ) { if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) { warn("Could not make %s out as query end",qend); ret = FALSE; } } if( aln_number_str != NULL ) { if( is_integer_string(aln_number_str,&aln_number) == FALSE || aln_number < 0) { warn("Weird aln number string %s...\n",aln_number_str); ret = FALSE; } } if( report_str != NULL ) { if( is_integer_string(report_str,&report_stagger) == FALSE ) { warn("Weird report stagger asked for %s",report_str); ret = FALSE; } } if( use_pfam1 == TRUE ) { tsmdb = new_PfamHmmer1DB_ThreeStateDB(protein_file); if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) { warn("Unable to set global/local switch on threestatedb"); ret = FALSE; } } else if ( use_pfam2 == TRUE ) { tsmdb = HMMer2_ThreeStateDB(protein_file); if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) { warn("Unable to set global/local switch on threestatedb"); ret = FALSE; } } else if ( use_tsm == TRUE) { /** using a HMM **/ tsm = HMMer2_read_ThreeStateModel(protein_file); if( tsm == NULL ) { warn("Could not read hmm from %s\n",protein_file); ret = FALSE; } else { display_char_in_ThreeStateModel(tsm); if( hmm_name != NULL ) { if( tsm->name != NULL ) ckfree(tsm->name); tsm->name = stringalloc(hmm_name); } else { if( tsm->name == NULL ) { tsm->name = stringalloc(protein_file); } } /** have to set start/end **/ set_startend_policy_ThreeStateModel(tsm,startend,15,0.2); tsmdb = new_single_ThreeStateDB(tsm,rm); if( tsmdb == NULL ) { warn("Could not build a threestatemodel database from a single tsm. Weird!"); ret = FALSE; } } /* end of else tsm != NULL */ } /* end of else is tsm */ else if( use_single_pro ) { if( startend != TSM_default && startend != TSM_global && startend != TSM_local ) { warn("Proteins can only have local/global startend policies set, not %s",startend_string); ret = FALSE; } if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) { ret = FALSE; warn("Could not read Protein sequence in %s",protein_file); } else { if( qstart != -1 || qend != -1 ) { if( qstart == -1 ) qstart = 0; if( qend == -1 ) qend = pro->baseseq->len; pro_temp = truncate_Protein(pro,qstart-1,qend); if( pro_temp == NULL ){ ret = FALSE; } else { free_Protein(pro); pro = pro_temp; } } if( startend == TSM_global) tsm = global_ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext); else tsm = ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext); if( tsm == NULL ) { warn("Could not build ThreeStateModel from a single protein sequence..."); ret = FALSE; } else { tsmdb = new_single_ThreeStateDB(tsm,rm); if( tsmdb == NULL ) { warn("Could not build a threestatemodel database from a single tsm. Weird!"); ret = FALSE; } } /* end of could build a TSM */ } /* else is a real protein */ } /* end of else is single protein */ else if (use_db_pro == TRUE ) { psdb = single_fasta_SequenceDB(protein_file); tsmdb = new_proteindb_ThreeStateDB(psdb,mat,-gap,-ext); free_SequenceDB(psdb); } else { warn("No protein input file! Yikes!"); } /*** if( use_tsm == FALSE ) { } else { ****/ if( main_block_str != NULL ) { if( is_integer_string(main_block_str,&main_block) == FALSE ) { warn("Could not get maximum main_block number %s",main_block_str); ret = FALSE; } } if( evalue_search_str != NULL && is_double_string(evalue_search_str,&evalue_search_cutoff) == FALSE ) { warn("Could not convert %s to a double",evalue_search_str); ret = FALSE; } if( is_double_string(search_cutoff_str,&search_cutoff) == FALSE ) { warn("Could not convert %s to a double",search_cutoff_str); ret = FALSE; } if( is_double_string(subs_string,&subs_error) == FALSE ) { warn("Could not convert %s to a double",subs_error); ret = FALSE; } if( is_double_string(indel_string,&indel_error) == FALSE ) { warn("Could not convert %s to a double",indel_error); ret = FALSE; } if( is_double_string(allN_string,&allN) == FALSE ) { warn("Could not convert %s to a double",allN_string); ret = FALSE; } if( strcmp(null_string,"syn") == 0 ) { use_syn = TRUE; } else if ( strcmp(null_string,"flat") == 0 ) { use_syn = FALSE; } else { warn("Cannot interpret [%s] as a null model string\n",null_string); ret = FALSE; } if( alg_str != NULL ) { alg = alg_estwrap_from_string(alg_str); } else { alg_str = "312"; alg = alg_estwrap_from_string(alg_str); } if( aln_alg_str != NULL ) { aln_alg = alg_estwrap_from_string(aln_alg_str); } else { /* if it is a protein, don't loop */ if( use_single_pro == TRUE || use_db_pro == TRUE ) aln_alg_str = "333"; else aln_alg_str = "333L"; aln_alg = alg_estwrap_from_string(aln_alg_str); } if( (rm = default_RandomModel()) == NULL) { warn("Could not make default random model\n"); ret = FALSE; } if( (ct = read_CodonTable_file(codon_file)) == NULL) { ret = FALSE; warn("Could not read codon table file in %s",codon_file); } if( (ofp = openfile(output_file,"W")) == NULL) { warn("Could not open %s as an output file",output_file); ret = FALSE; } rmd = RandomModelDNA_std(); cps = flat_cDNAParser(indel_error); cm = flat_CodonMapper(ct); sprinkle_errors_over_CodonMapper(cm,subs_error); return ret; }
boolean user_DebugMatrix(DebugMatrix * de) { char buffer[MAXLINE]; char ** base; char ** brk; FILE * in; FILE * out; DebugBreakPoint * bp; assert(de); assert(de->in); assert(de->out); in = de->in; out = de->out; /* set reset to FALSE */ de->reset = 0; fprintf(out,"Entering dynamite debugger. Type help for help\n"); while(1) { fprintf(out,"Dy %5d:%5d max: %d >",de->currenti,de->currentj,de->max_score); fgets(buffer,MAXLINE,in); if( strstartcmp(buffer,"quit") == 0 ) { exit(1); } if( strstartcmp(buffer,"show") == 0 ) { show_DebugMatrix(de,buffer); continue; } if( strstartcmp(buffer,"run") == 0 ) { /* return to calling function */ return 0; } if( strstartcmp(buffer,"break") == 0 ) { base = brk = breakstring(buffer,spacestr); brk++; if( *brk == NULL || *(brk+1) == NULL ) { fprintf(out,">>>> break must have i j positions\n"); continue; } else { bp = DebugBreakPoint_alloc(); bp->type = MDBP_Cursor; /* reset cursor positions */ if( is_integer_string(*brk,&bp->posi) == FALSE ) { fprintf(out,">>>> i position not an integer.\n"); } brk++; if( is_integer_string(*brk,&bp->posj) == FALSE ) { fprintf(out,">>>> j position not an integer.\n"); } fprintf(out,"Adding cursor break point %d,%d\n",bp->posi,bp->posj); add_bp_DebugMatrix(de,bp); } ckfree(base); continue; } if( strstartcmp(buffer,"set") == 0 ) { base = brk = breakstring(buffer,spacestr); brk++; if( *brk == NULL || *(brk+1) == NULL ) { fprintf(out,">>>> set must have i j positions\n"); } else { /* reset cursor positions */ if( is_integer_string(*brk,&de->currenti) == FALSE ) { fprintf(out,">>>> i position not an integer.\n"); } if( is_integer_string(*brk,&de->currenti) == FALSE ) { fprintf(out,">>>> j position not an integer.\n"); } } de->reset = 1; ckfree(base); continue; } } }