int main(int argc,char ** argv) { SequenceDB * db; Sequence * seq; SeqLookupInterface * sli; SeqLookupPos * slp; HSPScanInterface * hsi; LinearHSPmanager * lm; HitList * hl; CompMat * mat; int ret; HSPScanInterfacePara p; p.min_score= 30; p.max_results = 200; db = single_fasta_SequenceDB(argv[1]); mat = read_Blast_file_CompMat("blosum62.bla"); sli = new_ghash_SeqLookupInterface(); for(seq = init_SequenceDB(db,&ret); seq != NULL;seq = get_next_SequenceDB(db) ) { load_aa_flat_Sequence_SeqLookupInterface(sli,hard_link_Sequence(seq)); } seq = read_fasta_file_Sequence(argv[2]); assert(seq); hsi = Wise2_new_one_off_HSPScanInterface(sli,mat,20,10); /* hspm = simple_HSPScan_scan_query((void*)hsi->data,seq); */ lm = (*hsi->scan_query)(hsi->data,seq,&p); hl = Wise2_HitList_from_LinearHSPmanager(lm); Wise2_write_pseudoblast_HitList(hl,stdout); }
HSPScanInterface * new_HSPScanInterface_from_ScanWiseHSPImpl(ScanWiseHSPImpl * i,ProteinIndexConstructor * pic,SeqLookupLoadPara * slp) { HSPScanInterface * out; SeqLookupInterface * sli; SequenceDB * db; Sequence * seq; CompMat * mat; int ret; int c; mat = read_Blast_file_CompMat(i->matrix_file); if( i->use_corba == FALSE && i->use_mysql == FALSE && i->use_wiseserver == FALSE && i->use_compress == FALSE && i->use_multiscan == FALSE) { if( i->direct_sequence == NULL ) { fatal("If no server based sequence, must have direct sequence"); } else { db = single_fasta_SequenceDB(i->direct_sequence); sli = new_SeqLookupInterface_from_ProteinIndexConstructor(pic); load_SequenceDB_SeqLookupLoadPara(slp,db,sli); free_SequenceDB(db); out = new_one_off_HSPScanInterface(sli,mat,15,40); } } else if( i->use_corba == TRUE ) { #ifdef SCAN_CORBA if( i->ior_file == NULL ) { fatal("Corba specified, but no ior file given"); } out = new_corba_HSPScan(sorb,i->ior_file,mat); #else fatal("Asking for CORBA, but scanwisep was not compiled with SCAN_CORBA defined."); #endif } else if ( i->use_mysql == TRUE ) { #ifdef SCAN_MYSQL out = new_HSPScanInterface_MysqlProteinIndex(i->host,i->dbname,i->username,i->password,mat,i->step); #else fatal("Asking for mysql, but scanwisep was not compiled with SCAN_MYSQL defined"); #endif } else if ( i->use_wiseserver == TRUE ) { #ifdef SCAN_WISESERVER out = new_wise_transfer_HSPScanInterface(i->host,i->port); #else fatal("Asking for wiseserver, but scanwisep was not compiled with SCAN_WISESERVER defined"); #endif } else if ( i->use_compress == TRUE ) { #ifdef SCAN_COMPRESS sli = new_direct_CompressedProteinLookup(); db = single_fasta_SequenceDB(i->direct_sequence); load_SequenceDB_SeqLookupLoadPara(slp,db,sli); free_SequenceDB(db); out = new_one_off_HSPScanInterface(sli,mat,15,40); #else fatal("Asking for compressed, but scanwisep was not compiled with SCAN_COMPRESS defined"); #endif } else if( i->use_multiscan == TRUE ) { if( i->multiscan_file == NULL ) { fatal("Must provide a file for a multiple server scan"); } out = new_multiclient_HSPScanInterface(i->multiscan_file); } assert(out != NULL); free_CompMat(mat); /* hard linked internally */ return out; }
boolean build_objects(void) { boolean ret = TRUE; Protein * pro_temp; SequenceDB * psdb; startend = threestatemodel_mode_from_string(startend_string); if( startend == TSM_unknown ) { warn("String %s was unable to converted into a start/end policy\n",startend_string); ret = FALSE; } if( use_single_dna == TRUE ) { cdna = read_fasta_file_cDNA(dna_seq_file); if( cdna == NULL ) { warn("Could not open single dna sequence in %s",dna_seq_file); ret = FALSE; } } else { sdb = single_fasta_SequenceDB(dna_seq_file); if( sdb == NULL ) { warn("Could not build a sequence database on %s",dna_seq_file); ret = FALSE; } } rm = default_RandomModel(); if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) { if( use_tsm == TRUE ) { info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file); } else { warn("Could not read Comparison matrix file in %s",matrix_file); ret = FALSE; } } if( is_integer_string(gap_str,&gap) == FALSE ) { warn("Could not get gap string number %s",gap_str); ret = FALSE; } if( is_integer_string(ext_str,&ext) == FALSE ) { warn("Could not get ext string number %s",ext_str); ret = FALSE; } if( qstart_str != NULL ) { if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) { warn("Could not make %s out as query start",qstart); ret = FALSE; } } if( qend_str != NULL ) { if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) { warn("Could not make %s out as query end",qend); ret = FALSE; } } if( aln_number_str != NULL ) { if( is_integer_string(aln_number_str,&aln_number) == FALSE || aln_number < 0) { warn("Weird aln number string %s...\n",aln_number_str); ret = FALSE; } } if( report_str != NULL ) { if( is_integer_string(report_str,&report_stagger) == FALSE ) { warn("Weird report stagger asked for %s",report_str); ret = FALSE; } } if( use_pfam1 == TRUE ) { tsmdb = new_PfamHmmer1DB_ThreeStateDB(protein_file); if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) { warn("Unable to set global/local switch on threestatedb"); ret = FALSE; } } else if ( use_pfam2 == TRUE ) { tsmdb = HMMer2_ThreeStateDB(protein_file); if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) { warn("Unable to set global/local switch on threestatedb"); ret = FALSE; } } else if ( use_tsm == TRUE) { /** using a HMM **/ tsm = HMMer2_read_ThreeStateModel(protein_file); if( tsm == NULL ) { warn("Could not read hmm from %s\n",protein_file); ret = FALSE; } else { display_char_in_ThreeStateModel(tsm); if( hmm_name != NULL ) { if( tsm->name != NULL ) ckfree(tsm->name); tsm->name = stringalloc(hmm_name); } else { if( tsm->name == NULL ) { tsm->name = stringalloc(protein_file); } } /** have to set start/end **/ set_startend_policy_ThreeStateModel(tsm,startend,15,0.2); tsmdb = new_single_ThreeStateDB(tsm,rm); if( tsmdb == NULL ) { warn("Could not build a threestatemodel database from a single tsm. Weird!"); ret = FALSE; } } /* end of else tsm != NULL */ } /* end of else is tsm */ else if( use_single_pro ) { if( startend != TSM_default && startend != TSM_global && startend != TSM_local ) { warn("Proteins can only have local/global startend policies set, not %s",startend_string); ret = FALSE; } if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) { ret = FALSE; warn("Could not read Protein sequence in %s",protein_file); } else { if( qstart != -1 || qend != -1 ) { if( qstart == -1 ) qstart = 0; if( qend == -1 ) qend = pro->baseseq->len; pro_temp = truncate_Protein(pro,qstart-1,qend); if( pro_temp == NULL ){ ret = FALSE; } else { free_Protein(pro); pro = pro_temp; } } if( startend == TSM_global) tsm = global_ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext); else tsm = ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext); if( tsm == NULL ) { warn("Could not build ThreeStateModel from a single protein sequence..."); ret = FALSE; } else { tsmdb = new_single_ThreeStateDB(tsm,rm); if( tsmdb == NULL ) { warn("Could not build a threestatemodel database from a single tsm. Weird!"); ret = FALSE; } } /* end of could build a TSM */ } /* else is a real protein */ } /* end of else is single protein */ else if (use_db_pro == TRUE ) { psdb = single_fasta_SequenceDB(protein_file); tsmdb = new_proteindb_ThreeStateDB(psdb,mat,-gap,-ext); free_SequenceDB(psdb); } else { warn("No protein input file! Yikes!"); } /*** if( use_tsm == FALSE ) { } else { ****/ if( main_block_str != NULL ) { if( is_integer_string(main_block_str,&main_block) == FALSE ) { warn("Could not get maximum main_block number %s",main_block_str); ret = FALSE; } } if( evalue_search_str != NULL && is_double_string(evalue_search_str,&evalue_search_cutoff) == FALSE ) { warn("Could not convert %s to a double",evalue_search_str); ret = FALSE; } if( is_double_string(search_cutoff_str,&search_cutoff) == FALSE ) { warn("Could not convert %s to a double",search_cutoff_str); ret = FALSE; } if( is_double_string(subs_string,&subs_error) == FALSE ) { warn("Could not convert %s to a double",subs_error); ret = FALSE; } if( is_double_string(indel_string,&indel_error) == FALSE ) { warn("Could not convert %s to a double",indel_error); ret = FALSE; } if( is_double_string(allN_string,&allN) == FALSE ) { warn("Could not convert %s to a double",allN_string); ret = FALSE; } if( strcmp(null_string,"syn") == 0 ) { use_syn = TRUE; } else if ( strcmp(null_string,"flat") == 0 ) { use_syn = FALSE; } else { warn("Cannot interpret [%s] as a null model string\n",null_string); ret = FALSE; } if( alg_str != NULL ) { alg = alg_estwrap_from_string(alg_str); } else { alg_str = "312"; alg = alg_estwrap_from_string(alg_str); } if( aln_alg_str != NULL ) { aln_alg = alg_estwrap_from_string(aln_alg_str); } else { /* if it is a protein, don't loop */ if( use_single_pro == TRUE || use_db_pro == TRUE ) aln_alg_str = "333"; else aln_alg_str = "333L"; aln_alg = alg_estwrap_from_string(aln_alg_str); } if( (rm = default_RandomModel()) == NULL) { warn("Could not make default random model\n"); ret = FALSE; } if( (ct = read_CodonTable_file(codon_file)) == NULL) { ret = FALSE; warn("Could not read codon table file in %s",codon_file); } if( (ofp = openfile(output_file,"W")) == NULL) { warn("Could not open %s as an output file",output_file); ret = FALSE; } rmd = RandomModelDNA_std(); cps = flat_cDNAParser(indel_error); cm = flat_CodonMapper(ct); sprinkle_errors_over_CodonMapper(cm,subs_error); return ret; }
int main(int argc,char ** argv) { DPRunImpl * dpri = NULL; ScanWiseHSPImpl * scani = NULL; HSP2HitListImpl * hsp2hiti = NULL; HitListOutputImpl * hloi = NULL; ProteinIndexConstructor * pic = NULL; HSPScanInterface * hsi; HSPScanInterfacePara * para; SearchStatInterface * ssi; SearchStatInterface * ssl; SeqLookupLoadPara * slp; HSPset2HitPairPara * hsp2hit; CompMat * mat; SequenceDB * db; Sequence * seq; int ret; int i; int effective_db_size = 300000; int kk; int count = 0; LinearHSPmanager * lm; HitList * hl; boolean use_mott = 1; boolean trunc_best_hsp = 0; boolean verbose = 0; static struct rusage use; struct timeval t0, t1; gettimeofday(&t0, NULL); dpri = new_DPRunImpl_from_argv(&argc,argv); dpri->memory = DPIM_Explicit; scani = new_ScanWiseHSPImpl_from_argv(&argc,argv); hsp2hiti = new_HSP2HitListImpl_from_argv(&argc,argv); hloi = new_HitListOutputImpl_from_argv(&argc,argv); slp = new_SeqLookupLoadPara_from_argv(&argc,argv); pic = new_ProteinIndexConstructor_from_argv(&argc,argv); hsp2hit = new_HSPset2HitPairPara_from_argv(&argc,argv); para = new_HSPScanInterfacePara_from_argv(&argc,argv); verbose = strip_out_boolean_argument(&argc,argv,"verbose") ; strip_out_boolean_def_argument(&argc,argv,"mott",&use_mott); strip_out_boolean_def_argument(&argc,argv,"besthsp",&trunc_best_hsp); strip_out_integer_argument(&argc,argv,"dbsize",&effective_db_size); #ifdef SCAN_CORBA sorb = get_Wise2Corba_Singleton(&argc,argv,"orbit-local-orb"); #endif if( dpri == NULL ) { fatal("Unable to build DPRun implementation. Bad arguments"); } strip_out_standard_options(&argc,argv,show_help,show_version); if( argc != 2 ) { show_help(stdout); exit(12); } /* ugly, but we don't want to bounce matrices around the network... */ mat = read_Blast_file_CompMat("BLOSUM62.bla"); erroroff(REPORT); hsi = new_HSPScanInterface_from_ScanWiseHSPImpl(scani,pic,slp); ssi = new_Mott_SearchStatInterface(); ssl = new_lookup_SearchStatInterface(40,2.3); if( verbose ) { info("contacted database"); } db = single_fasta_SequenceDB(argv[1]); if( db == NULL ) { fatal("Could not open sequence db...\n"); } for(seq = init_SequenceDB(db,&ret); seq != NULL;seq = get_next_SequenceDB(db) ) { count++; for(i=0;i<seq->len;i++) { if( !isalpha(seq->seq[i]) ) { fatal("Sequence position %d [%c] is not valid",i,seq->seq[i]); } seq->seq[i] = toupper(seq->seq[i]); } info("Processing %s",seq->name); getrusage(RUSAGE_SELF,&use); /* info("Before query %s %.3fu %.3fs\n", seq->name, use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND, use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND ); */ lm = (*hsi->scan_query)(hsi->data,seq,para); fprintf(stderr,"Got linear manager is %d entries\n",lm->len); if( lm->mat == NULL ) { lm->mat = hard_link_CompMat(mat); } getrusage(RUSAGE_SELF,&use); /* info("After query %s %.3fu %.3fs\n", seq->name, use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND, use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND ); */ sort_LinearHSPmanager(lm,compare_HSPset_score); if( trunc_best_hsp == 1 ) { for(kk=1;kk<lm->len;kk++) { free_HSPset(lm->set[kk]); lm->set[kk] = NULL; } lm->len = 1; } getrusage(RUSAGE_SELF,&use); /* info("After sort %s %.3fu %.3fs\n", seq->name, use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND, use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND ); */ hl = HitList_from_HSP_HSP2HitListImpl(hsp2hiti,lm,dpri,hsp2hit); getrusage(RUSAGE_SELF,&use); /* info("After conversion %s %.3fu %.3fs\n", seq->name, use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND, use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND ); */ free_LinearHSPmanager(lm); if( use_mott == 1 ) { apply_SearchStat_to_HitList(hl,ssi,effective_db_size); } else { for(kk=0;kk<hl->len;kk++) { hl->pair[kk]->bit_score = hl->pair[kk]->raw_score / 2.0; } } sort_HitList_by_score(hl); show_HitList_HitListOutputImpl(hloi,hl,stdout); getrusage(RUSAGE_SELF,&use); /* info("After output %s %.3fu %.3fs\n", seq->name, use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND, use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND ); */ free_HitList(hl); free_Sequence(seq); } free_DPRunImpl(dpri); free_HSPScanInterface(hsi); gettimeofday(&t1, NULL); fprintf(stderr, "[client stats] queries, time (s): %d %f\n", count, (t1.tv_sec - t0.tv_sec) + (t1.tv_usec - t0.tv_usec) * 1e-6); return 0; }