ThreeStateModel * read_TSM_from_PfamHmmer1Entry(PfamHmmer1Entry * en,char * dir) { char buffer[512]; ThreeStateModel * tsm; sprintf(buffer,"%s/%s.hmm",dir,en->entryname); /* tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(buffer); */ tsm = HMMer2_read_ThreeStateModel(buffer); if( tsm == NULL ) { warn("Could not open Hmmer1 style hmm from Pfam db on file [%s]",buffer); return NULL; } if( tsm->name != NULL ) { ckfree(tsm->name); } tsm->name = stringalloc(en->entryname); display_char_in_ThreeStateModel(tsm); /* ignore random stuff for the moment */ if( en->is_hmmls == FALSE ) { force_weighted_local_model(tsm,1.0,1.0,1.0); } else { force_weighted_local_model(tsm,1.0,0.5,0.5); } return tsm; }
ThreeStateModel * ThreeStateModel_from_name_PfamHmmer1DB(PfamHmmer1DB * phd,char * name) { char buffer[512]; ThreeStateModel * tsm; sprintf(buffer,"%s/%s.hmm",phd->dirname,name); /* tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(buffer); */ tsm = HMMer2_read_ThreeStateModel(buffer); if( tsm == NULL ) { warn("Could not open Hmmer1 style hmm from Pfam db on file [%s]",buffer); return NULL; } if( tsm->name != NULL ) { ckfree(tsm->name); } tsm->name = stringalloc(name); /* ignore random stuff for the moment */ return tsm; }
boolean build_objects(void) { boolean ret = TRUE; Protein * pro_temp; Genomic * gen_temp; FILE * ifp; startend = threestatemodel_mode_from_string(startend_string); if( startend == TSM_unknown ) { warn("String %s was unable to converted into a start/end policy\n",startend_string); ret = FALSE; } if( tstart_str != NULL ) { if( is_integer_string(tstart_str,&tstart) == FALSE || tstart < 0) { warn("Could not make %s out as target start",tstart); ret = FALSE; } } if( tend_str != NULL ) { if( is_integer_string(tend_str,&tend) == FALSE || tend < 0) { warn("Could not make %s out as target end",tend); ret = FALSE; } } if( is_integer_string(gap_str,&gap) == FALSE ) { warn("Could not make %s out as gap penalty (must be integer at the moment)",gap_str); ret = FALSE; } if( is_integer_string(ext_str,&ext) == FALSE ) { warn("Could not make %s out as gap penalty (must be integer at the moment)",ext_str); ret = FALSE; } if( is_embl == FALSE ) { if( (gen = read_fasta_file_Genomic(dna_seq_file,length_of_N)) == NULL ) { ret = FALSE; warn("Could not read genomic sequence in %s",dna_seq_file); gen = NULL; } } else { embl = read_EMBL_GenomicRegion_file(dna_seq_file); if( embl == NULL ) { warn("Could not read genomic EMBL file in %s",dna_seq_file); gen = NULL; ret = FALSE; } else { gen = hard_link_Genomic(embl->genomic); } } if( gen != NULL ) { if( tstart != -1 || tend != -1 ) { if( tstart == -1 ) tstart = 0; if( tend == -1 ) tend = gen->baseseq->len; gen_temp = truncate_Genomic(gen,tstart-1,tend); if( gen_temp == NULL ){ ret = FALSE; } else { free_Genomic(gen); gen = gen_temp; } } else { /* no truncation required */ } if( reverse == TRUE ) { if( tstart > tend ) { warn("You have already reversed the DNA by using %d - %d truncation. Re-reversing",tstart,tend); } gen_temp = reverse_complement_Genomic(gen); free_Genomic(gen); gen = gen_temp; } } /* * Can't truncate on GenomicRegion (for good reasons!). * but we want only a section of the EMBL file to be used * * So... swap genomic now. Positions in EMBL are still valid, * however - some genes will loose their sequence, which will be damaging. ;) */ if( is_embl ) { free_Genomic(embl->genomic); embl->genomic = hard_link_Genomic(gen); /* pointer could be dead anyway ;) */ } if( target_abs == TRUE ) { if( is_embl == TRUE ) { warn("Sorry you can't both use absolute positioning and EMBL files as I can't cope with all the coordinate remapping. You'll have to convert to fasta."); ret = FALSE; } gen->baseseq->offset = 1; gen->baseseq->end = strlen(gen->baseseq->seq); } if( alg_str != NULL ) { alg = gwrap_alg_type_from_string(alg_str); } else { if( use_tsm == TRUE ) { alg_str = "623L"; } else { alg_str = "623"; } alg = gwrap_alg_type_from_string(alg_str); } if( qstart_str != NULL ) { if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) { warn("Could not make %s out as query start",qstart); ret = FALSE; } } if( qend_str != NULL ) { if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) { warn("Could not make %s out as query end",qend); ret = FALSE; } } if( use_tsm == FALSE ) { if( startend != TSM_default && startend != TSM_global && startend != TSM_local && startend != TSM_endbiased) { warn("Proteins can only have local/global/endbias startend policies set, not %s",startend_string); ret = FALSE; } if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) { ret = FALSE; warn("Could not read Protein sequence in %s",protein_file); } else { if( qstart != -1 || qend != -1 ) { if( qstart == -1 ) qstart = 0; if( qend == -1 ) qend = pro->baseseq->len; pro_temp = truncate_Protein(pro,qstart-1,qend); if( pro_temp == NULL ){ ret = FALSE; } else { free_Protein(pro); pro = pro_temp; } } } } else { /** using a HMM **/ /*tsm = read_HMMer_1_7_ascii_file(hmm_file);*/ /*tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(hmm_file);*/ tsm = HMMer2_read_ThreeStateModel(hmm_file); if( tsm == NULL ) { warn("Could not read hmm from %s\n",hmm_file); ret = FALSE; } else { display_char_in_ThreeStateModel(tsm); if( hmm_name != NULL ) { if( tsm->name != NULL ) ckfree(tsm->name); tsm->name = stringalloc(hmm_name); } if( tsm == NULL ) { warn("Could not read %s as a hmm",hmm_file); } /** have to set start/end **/ set_startend_policy_ThreeStateModel(tsm,startend,30,0.1); } } /* end of else tsm != NULL */ if( main_block_str != NULL ) { if( is_integer_string(main_block_str,&main_block) == FALSE ) { warn("Could not get maximum main_block number %s",main_block_str); ret = FALSE; } } if( is_double_string(subs_string,&subs_error) == FALSE ) { warn("Could not convert %s to a double",subs_error); ret = FALSE; } if( is_double_string(indel_string,&indel_error) == FALSE ) { warn("Could not convert %s to a double",indel_error); ret = FALSE; } if( is_double_string(allN_string,&allN) == FALSE ) { warn("Could not convert %s to a double",allN_string); ret = FALSE; } if( strcmp(cfreq_string,"model") == 0 ) { model_codon = TRUE; } else if ( strcmp(cfreq_string,"flat") == 0 ) { model_codon = FALSE; } else { warn("Cannot interpret [%s] as a codon modelling parameter\n",cfreq_string); ret = FALSE; } if( strcmp(splice_string,"model") == 0 ) { model_splice = TRUE; } else if ( strcmp(splice_string,"flat") == 0 ) { model_splice = FALSE; gmp->use_gtag_splice = TRUE; } else { warn("Cannot interpret [%s] as a splice modelling parameter\n",splice_string); ret = FALSE; } if( strcmp(null_string,"syn") == 0 ) { use_syn = TRUE; } else if ( strcmp(null_string,"flat") == 0 ) { use_syn = FALSE; } else { warn("Cannot interpret [%s] as a null model string\n",null_string); ret = FALSE; } if( strcmp(intron_string,"model") == 0 ) { use_tied_model = FALSE; } else if ( strcmp(intron_string,"tied") == 0 ) { use_tied_model = TRUE; } else { warn("Cannot interpret [%s] as a intron tieing switch\n",intron_string); ret = FALSE; } if( (rm = default_RandomModel()) == NULL) { warn("Could not make default random model\n"); ret = FALSE; } if( use_new_stats == 0 ) { if( (gf = read_GeneFrequency21_file(gene_file)) == NULL) { ret = FALSE; warn("Could not read a GeneFrequency file in %s",gene_file); } } else { if( (gs = GeneStats_from_GeneModelParam(gmp)) == NULL ){ ret=FALSE; warn("Could not read gene statistics in %s",new_gene_file); } } /* end of else using new gene stats */ if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) { if( use_tsm == TRUE ) { info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file); } else { warn("Could not read Comparison matrix file in %s",matrix_file); ret = FALSE; } } if( (ct = read_CodonTable_file(codon_file)) == NULL) { ret = FALSE; warn("Could not read codon table file in %s",codon_file); } if( (ofp = openfile(output_file,"W")) == NULL) { warn("Could not open %s as an output file",output_file); ret = FALSE; } rmd = RandomModelDNA_std(); return ret; }
boolean build_objects(void) { boolean ret = TRUE; Protein * pro_temp; SequenceDB * psdb; startend = threestatemodel_mode_from_string(startend_string); if( startend == TSM_unknown ) { warn("String %s was unable to converted into a start/end policy\n",startend_string); ret = FALSE; } if( use_single_dna == TRUE ) { cdna = read_fasta_file_cDNA(dna_seq_file); if( cdna == NULL ) { warn("Could not open single dna sequence in %s",dna_seq_file); ret = FALSE; } } else { sdb = single_fasta_SequenceDB(dna_seq_file); if( sdb == NULL ) { warn("Could not build a sequence database on %s",dna_seq_file); ret = FALSE; } } rm = default_RandomModel(); if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) { if( use_tsm == TRUE ) { info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file); } else { warn("Could not read Comparison matrix file in %s",matrix_file); ret = FALSE; } } if( is_integer_string(gap_str,&gap) == FALSE ) { warn("Could not get gap string number %s",gap_str); ret = FALSE; } if( is_integer_string(ext_str,&ext) == FALSE ) { warn("Could not get ext string number %s",ext_str); ret = FALSE; } if( qstart_str != NULL ) { if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) { warn("Could not make %s out as query start",qstart); ret = FALSE; } } if( qend_str != NULL ) { if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) { warn("Could not make %s out as query end",qend); ret = FALSE; } } if( aln_number_str != NULL ) { if( is_integer_string(aln_number_str,&aln_number) == FALSE || aln_number < 0) { warn("Weird aln number string %s...\n",aln_number_str); ret = FALSE; } } if( report_str != NULL ) { if( is_integer_string(report_str,&report_stagger) == FALSE ) { warn("Weird report stagger asked for %s",report_str); ret = FALSE; } } if( use_pfam1 == TRUE ) { tsmdb = new_PfamHmmer1DB_ThreeStateDB(protein_file); if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) { warn("Unable to set global/local switch on threestatedb"); ret = FALSE; } } else if ( use_pfam2 == TRUE ) { tsmdb = HMMer2_ThreeStateDB(protein_file); if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) { warn("Unable to set global/local switch on threestatedb"); ret = FALSE; } } else if ( use_tsm == TRUE) { /** using a HMM **/ tsm = HMMer2_read_ThreeStateModel(protein_file); if( tsm == NULL ) { warn("Could not read hmm from %s\n",protein_file); ret = FALSE; } else { display_char_in_ThreeStateModel(tsm); if( hmm_name != NULL ) { if( tsm->name != NULL ) ckfree(tsm->name); tsm->name = stringalloc(hmm_name); } else { if( tsm->name == NULL ) { tsm->name = stringalloc(protein_file); } } /** have to set start/end **/ set_startend_policy_ThreeStateModel(tsm,startend,15,0.2); tsmdb = new_single_ThreeStateDB(tsm,rm); if( tsmdb == NULL ) { warn("Could not build a threestatemodel database from a single tsm. Weird!"); ret = FALSE; } } /* end of else tsm != NULL */ } /* end of else is tsm */ else if( use_single_pro ) { if( startend != TSM_default && startend != TSM_global && startend != TSM_local ) { warn("Proteins can only have local/global startend policies set, not %s",startend_string); ret = FALSE; } if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) { ret = FALSE; warn("Could not read Protein sequence in %s",protein_file); } else { if( qstart != -1 || qend != -1 ) { if( qstart == -1 ) qstart = 0; if( qend == -1 ) qend = pro->baseseq->len; pro_temp = truncate_Protein(pro,qstart-1,qend); if( pro_temp == NULL ){ ret = FALSE; } else { free_Protein(pro); pro = pro_temp; } } if( startend == TSM_global) tsm = global_ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext); else tsm = ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext); if( tsm == NULL ) { warn("Could not build ThreeStateModel from a single protein sequence..."); ret = FALSE; } else { tsmdb = new_single_ThreeStateDB(tsm,rm); if( tsmdb == NULL ) { warn("Could not build a threestatemodel database from a single tsm. Weird!"); ret = FALSE; } } /* end of could build a TSM */ } /* else is a real protein */ } /* end of else is single protein */ else if (use_db_pro == TRUE ) { psdb = single_fasta_SequenceDB(protein_file); tsmdb = new_proteindb_ThreeStateDB(psdb,mat,-gap,-ext); free_SequenceDB(psdb); } else { warn("No protein input file! Yikes!"); } /*** if( use_tsm == FALSE ) { } else { ****/ if( main_block_str != NULL ) { if( is_integer_string(main_block_str,&main_block) == FALSE ) { warn("Could not get maximum main_block number %s",main_block_str); ret = FALSE; } } if( evalue_search_str != NULL && is_double_string(evalue_search_str,&evalue_search_cutoff) == FALSE ) { warn("Could not convert %s to a double",evalue_search_str); ret = FALSE; } if( is_double_string(search_cutoff_str,&search_cutoff) == FALSE ) { warn("Could not convert %s to a double",search_cutoff_str); ret = FALSE; } if( is_double_string(subs_string,&subs_error) == FALSE ) { warn("Could not convert %s to a double",subs_error); ret = FALSE; } if( is_double_string(indel_string,&indel_error) == FALSE ) { warn("Could not convert %s to a double",indel_error); ret = FALSE; } if( is_double_string(allN_string,&allN) == FALSE ) { warn("Could not convert %s to a double",allN_string); ret = FALSE; } if( strcmp(null_string,"syn") == 0 ) { use_syn = TRUE; } else if ( strcmp(null_string,"flat") == 0 ) { use_syn = FALSE; } else { warn("Cannot interpret [%s] as a null model string\n",null_string); ret = FALSE; } if( alg_str != NULL ) { alg = alg_estwrap_from_string(alg_str); } else { alg_str = "312"; alg = alg_estwrap_from_string(alg_str); } if( aln_alg_str != NULL ) { aln_alg = alg_estwrap_from_string(aln_alg_str); } else { /* if it is a protein, don't loop */ if( use_single_pro == TRUE || use_db_pro == TRUE ) aln_alg_str = "333"; else aln_alg_str = "333L"; aln_alg = alg_estwrap_from_string(aln_alg_str); } if( (rm = default_RandomModel()) == NULL) { warn("Could not make default random model\n"); ret = FALSE; } if( (ct = read_CodonTable_file(codon_file)) == NULL) { ret = FALSE; warn("Could not read codon table file in %s",codon_file); } if( (ofp = openfile(output_file,"W")) == NULL) { warn("Could not open %s as an output file",output_file); ret = FALSE; } rmd = RandomModelDNA_std(); cps = flat_cDNAParser(indel_error); cm = flat_CodonMapper(ct); sprinkle_errors_over_CodonMapper(cm,subs_error); return ret; }