boolean open_ThreeStateDB(ThreeStateDB * mdb) { int ret; int return_status; ThreeStateModel * temp; int count; mdb->current_no = 0; switch( mdb->dbtype ) { case TSMDB_SINGLE : return TRUE; /* should be fine! */ case TSMDB_HMMER1PFAM : if( mdb->phdb == NULL ) { warn("No hmmer1 db to open for threestatedb!"); return FALSE; } mdb->phdb->cur = 0; break; case TSMDB_PROTEIN : if( mdb->sdb == NULL ) { warn("Attempting to open a protein tsm with no sequence db!"); return FALSE; } mdb->seq_cache = init_SequenceDB(mdb->sdb,&ret); if( ret == DB_RETURN_ERROR ) { return FALSE; } if( ret == DB_RETURN_END ) { warn("Due to some bad coding, can't cope with single protein databases in tsmdbs. oooops!"); } break; case TSMDB_GENERIC : ((*mdb->open_generic)(mdb)); break; default : warn("Got an unrecognisable tsm db type in opening tsm %d",mdb->dbtype); return FALSE; } if( mdb->hmm_model_start != -1 && mdb->hmm_model_end != -1 ) { for(count=0;count<mdb->hmm_model_start;count++) { temp = read_TSM_ThreeStateDB(mdb,&return_status); free_ThreeStateModel(temp); } } return TRUE; }
int main(int argc,char ** argv) { SequenceDB * db; Sequence * seq; SeqLookupInterface * sli; SeqLookupPos * slp; HSPScanInterface * hsi; LinearHSPmanager * lm; HitList * hl; CompMat * mat; int ret; HSPScanInterfacePara p; p.min_score= 30; p.max_results = 200; db = single_fasta_SequenceDB(argv[1]); mat = read_Blast_file_CompMat("blosum62.bla"); sli = new_ghash_SeqLookupInterface(); for(seq = init_SequenceDB(db,&ret); seq != NULL;seq = get_next_SequenceDB(db) ) { load_aa_flat_Sequence_SeqLookupInterface(sli,hard_link_Sequence(seq)); } seq = read_fasta_file_Sequence(argv[2]); assert(seq); hsi = Wise2_new_one_off_HSPScanInterface(sli,mat,20,10); /* hspm = simple_HSPScan_scan_query((void*)hsi->data,seq); */ lm = (*hsi->scan_query)(hsi->data,seq,&p); hl = Wise2_HitList_from_LinearHSPmanager(lm); Wise2_write_pseudoblast_HitList(hl,stdout); }
ComplexSequence * init_cDNADB(cDNADB * cdnadb,int * return_status) { ComplexSequence * cs; Sequence * seq; if( cdnadb->is_single_seq == TRUE) { *return_status = DB_RETURN_OK; cdnadb->done_forward = TRUE; return hard_link_ComplexSequence(cdnadb->forw); } /* is a seq db */ seq = init_SequenceDB(cdnadb->sdb,return_status); if( seq == NULL || *return_status == DB_RETURN_ERROR || *return_status == DB_RETURN_END ) { return NULL; /** error already reported **/ } if( force_to_dna_Sequence(seq,cdnadb->error_tol,NULL) == FALSE ) { warn("first sequence below error level, have to fail at the moment. Ooops..."); free_Sequence(seq); *return_status = DB_RETURN_ERROR; return NULL; } cdnadb->current = seq; cdnadb->done_forward = TRUE; cs = new_ComplexSequence(seq,cdnadb->cses); if( cs == NULL ) { warn("Cannot make initial ComplexSequence. Unable to error catch this. Failing!"); *return_status = DB_RETURN_ERROR; return NULL; } return cs; }
ComplexSequence * init_GenomicDB(GenomicDB * gendb,int * return_status) { ComplexSequence * cs; Sequence * seq; if( gendb->is_single_seq == TRUE) { *return_status = DB_RETURN_OK; gendb->done_forward = TRUE; return hard_link_ComplexSequence(gendb->forw); } /* is a seq db */ seq = init_SequenceDB(gendb->sdb,return_status); if( seq == NULL || *return_status == DB_RETURN_ERROR || *return_status == DB_RETURN_END ) { return NULL; /** error already reported **/ } /* check dna status. We assumme someone knows what he is doing when he makes a genomic db!*/ if( seq->type != SEQUENCE_DNA) { warn("Sequence from %s data entry doesn't look like DNA. Forcing it to",seq->name); } force_to_dna_Sequence(seq,1.0,NULL); /* map to Genomic on length of N buiness */ gendb->current = Genomic_from_Sequence_Nheuristic(seq,gendb->length_of_N); gendb->done_forward = TRUE; cs = evaluate_ComplexSequence_Genomic(gendb->current,gendb->cses,0,gendb->repeat_in_cds_score); if( cs == NULL ) { warn("Cannot make initial ComplexSequence. Unable to error catch this. Failing!"); *return_status = DB_RETURN_ERROR; return NULL; } return cs; }
int main(int argc,char ** argv) { DPRunImpl * dpri = NULL; ScanWiseHSPImpl * scani = NULL; HSP2HitListImpl * hsp2hiti = NULL; HitListOutputImpl * hloi = NULL; ProteinIndexConstructor * pic = NULL; HSPScanInterface * hsi; HSPScanInterfacePara * para; SearchStatInterface * ssi; SearchStatInterface * ssl; SeqLookupLoadPara * slp; HSPset2HitPairPara * hsp2hit; CompMat * mat; SequenceDB * db; Sequence * seq; int ret; int i; int effective_db_size = 300000; int kk; int count = 0; LinearHSPmanager * lm; HitList * hl; boolean use_mott = 1; boolean trunc_best_hsp = 0; boolean verbose = 0; static struct rusage use; struct timeval t0, t1; gettimeofday(&t0, NULL); dpri = new_DPRunImpl_from_argv(&argc,argv); dpri->memory = DPIM_Explicit; scani = new_ScanWiseHSPImpl_from_argv(&argc,argv); hsp2hiti = new_HSP2HitListImpl_from_argv(&argc,argv); hloi = new_HitListOutputImpl_from_argv(&argc,argv); slp = new_SeqLookupLoadPara_from_argv(&argc,argv); pic = new_ProteinIndexConstructor_from_argv(&argc,argv); hsp2hit = new_HSPset2HitPairPara_from_argv(&argc,argv); para = new_HSPScanInterfacePara_from_argv(&argc,argv); verbose = strip_out_boolean_argument(&argc,argv,"verbose") ; strip_out_boolean_def_argument(&argc,argv,"mott",&use_mott); strip_out_boolean_def_argument(&argc,argv,"besthsp",&trunc_best_hsp); strip_out_integer_argument(&argc,argv,"dbsize",&effective_db_size); #ifdef SCAN_CORBA sorb = get_Wise2Corba_Singleton(&argc,argv,"orbit-local-orb"); #endif if( dpri == NULL ) { fatal("Unable to build DPRun implementation. Bad arguments"); } strip_out_standard_options(&argc,argv,show_help,show_version); if( argc != 2 ) { show_help(stdout); exit(12); } /* ugly, but we don't want to bounce matrices around the network... */ mat = read_Blast_file_CompMat("BLOSUM62.bla"); erroroff(REPORT); hsi = new_HSPScanInterface_from_ScanWiseHSPImpl(scani,pic,slp); ssi = new_Mott_SearchStatInterface(); ssl = new_lookup_SearchStatInterface(40,2.3); if( verbose ) { info("contacted database"); } db = single_fasta_SequenceDB(argv[1]); if( db == NULL ) { fatal("Could not open sequence db...\n"); } for(seq = init_SequenceDB(db,&ret); seq != NULL;seq = get_next_SequenceDB(db) ) { count++; for(i=0;i<seq->len;i++) { if( !isalpha(seq->seq[i]) ) { fatal("Sequence position %d [%c] is not valid",i,seq->seq[i]); } seq->seq[i] = toupper(seq->seq[i]); } info("Processing %s",seq->name); getrusage(RUSAGE_SELF,&use); /* info("Before query %s %.3fu %.3fs\n", seq->name, use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND, use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND ); */ lm = (*hsi->scan_query)(hsi->data,seq,para); fprintf(stderr,"Got linear manager is %d entries\n",lm->len); if( lm->mat == NULL ) { lm->mat = hard_link_CompMat(mat); } getrusage(RUSAGE_SELF,&use); /* info("After query %s %.3fu %.3fs\n", seq->name, use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND, use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND ); */ sort_LinearHSPmanager(lm,compare_HSPset_score); if( trunc_best_hsp == 1 ) { for(kk=1;kk<lm->len;kk++) { free_HSPset(lm->set[kk]); lm->set[kk] = NULL; } lm->len = 1; } getrusage(RUSAGE_SELF,&use); /* info("After sort %s %.3fu %.3fs\n", seq->name, use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND, use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND ); */ hl = HitList_from_HSP_HSP2HitListImpl(hsp2hiti,lm,dpri,hsp2hit); getrusage(RUSAGE_SELF,&use); /* info("After conversion %s %.3fu %.3fs\n", seq->name, use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND, use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND ); */ free_LinearHSPmanager(lm); if( use_mott == 1 ) { apply_SearchStat_to_HitList(hl,ssi,effective_db_size); } else { for(kk=0;kk<hl->len;kk++) { hl->pair[kk]->bit_score = hl->pair[kk]->raw_score / 2.0; } } sort_HitList_by_score(hl); show_HitList_HitListOutputImpl(hloi,hl,stdout); getrusage(RUSAGE_SELF,&use); /* info("After output %s %.3fu %.3fs\n", seq->name, use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND, use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND ); */ free_HitList(hl); free_Sequence(seq); } free_DPRunImpl(dpri); free_HSPScanInterface(hsi); gettimeofday(&t1, NULL); fprintf(stderr, "[client stats] queries, time (s): %d %f\n", count, (t1.tv_sec - t0.tv_sec) + (t1.tv_usec - t0.tv_usec) * 1e-6); return 0; }