int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); int N = esl_opt_GetInteger(go, "-N"); P7_TOPHITS *h1 = NULL; P7_TOPHITS *h2 = NULL; P7_TOPHITS *h3 = NULL; char name[] = "not_unique_name"; char acc[] = "not_unique_acc"; char desc[] = "Test description for the purposes of making the test driver allocate space"; double key; int i; fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(r)); h1 = p7_tophits_Create(p7_TOPHITS_DEFAULT_INIT_ALLOC); h2 = p7_tophits_Create(p7_TOPHITS_DEFAULT_INIT_ALLOC); h3 = p7_tophits_Create(p7_TOPHITS_DEFAULT_INIT_ALLOC); for (i = 0; i < N; i++) { key = esl_random(r); tophits_Add(h1, name, acc, desc, key); key = 10.0 * esl_random(r); tophits_Add(h2, name, acc, desc, key); key = 0.1 * esl_random(r); tophits_Add(h3, name, acc, desc, key); } tophits_Add(h1, "last", NULL, NULL, -1.0); tophits_Add(h1, "first", NULL, NULL, 20.0); p7_tophits_SortBySortkey(h1); if (strcmp(h1->hit[0]->name, "first") != 0) esl_fatal("sort failed (top is %s = %f)", h1->hit[0]->name, h1->hit[0]->sortkey); if (strcmp(h1->hit[N+1]->name, "last") != 0) esl_fatal("sort failed (last is %s = %f)", h1->hit[N+1]->name, h1->hit[N+1]->sortkey); p7_tophits_Merge(h1, h2); if (strcmp(h1->hit[0]->name, "first") != 0) esl_fatal("after merge 1, sort failed (top is %s = %f)", h1->hit[0]->name, h1->hit[0]->sortkey); if (strcmp(h1->hit[2*N+1]->name, "last") != 0) esl_fatal("after merge 1, sort failed (last is %s = %f)", h1->hit[2*N+1]->name, h1->hit[2*N+1]->sortkey); p7_tophits_Merge(h3, h1); if (strcmp(h3->hit[0]->name, "first") != 0) esl_fatal("after merge 2, sort failed (top is %s = %f)", h3->hit[0]->name, h3->hit[0]->sortkey); if (strcmp(h3->hit[3*N+1]->name, "last") != 0) esl_fatal("after merge 2, sort failed (last is %s = %f)", h3->hit[3*N+1]->name, h3->hit[3*N+1]->sortkey); if (p7_tophits_GetMaxNameLength(h3) != strlen(name)) esl_fatal("GetMaxNameLength() failed"); p7_tophits_Destroy(h1); p7_tophits_Destroy(h2); p7_tophits_Destroy(h3); esl_randomness_Destroy(r); esl_getopts_Destroy(go); fprintf(stderr, "# status = ok\n"); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); int N = esl_opt_GetInteger(go, "-N"); int M = esl_opt_GetInteger(go, "-M"); P7_TOPHITS **h = NULL; P7_HIT *hit = NULL; double *sortkeys = NULL; char name[] = "not_unique_name"; char acc[] = "not_unique_acc"; char desc[] = "Test description for the purposes of making the benchmark allocate space"; int i,j; int status; /* prep work: generate our sort keys before starting to time anything */ ESL_ALLOC(h, sizeof(P7_TOPHITS *) * M); /* allocate pointers for M lists */ ESL_ALLOC(sortkeys, sizeof(double) * N * M); for (i = 0; i < N*M; i++) sortkeys[i] = esl_random(r); esl_stopwatch_Start(w); /* generate M "random" lists and sort them */ for (j = 0; j < M; j++) { h[j] = p7_tophits_Create(p7_TOPHITS_DEFAULT_INIT_ALLOC); for (i = 0; i < N; i++) { p7_tophits_CreateNextHit(h[j], &hit); esl_strdup(name, -1, &(hit->name)); esl_strdup(acc, -1, &(hit->acc)); esl_strdup(desc, -1, &(hit->desc)); hit->sortkey = sortkeys[j*N + i]; hit->score = (float) sortkeys[j*N+i]; hit->pre_score = 0.0; hit->sum_score = 0.0; hit->lnP = sortkeys[j*N+i]; hit->pre_lnP = 0.0; hit->sum_lnP = 0.0; hit->ndom = N; hit->noverlaps = 0; hit->nexpected = 0; hit->flags = 0; hit->nreported = 0; hit->nincluded = 0; hit->best_domain = 0; hit->dcl = NULL; } p7_tophits_SortBySortkey(h[j]); } /* then merge them into one big list in h[0] */ for (j = 1; j < M; j++) { p7_tophits_Merge(h[0], h[j]); p7_tophits_Destroy(h[j]); } esl_stopwatch_Stop(w); p7_tophits_Destroy(h[0]); status = eslOK; ERROR: esl_getopts_Destroy(go); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); if (sortkeys != NULL) free(sortkeys); if (h != NULL) free(h); return status; }
/* serial_master() * The serial version of hmmsearch. * For each query HMM in <hmmdb> search the database for hits. * * A master can only return if it's successful. All errors are handled * immediately and fatally with p7_Fail(). */ static int serial_master(ESL_GETOPTS *go, struct cfg_s *cfg) { FILE *ofp = stdout; /* output file for results (default stdout) */ FILE *tblfp = NULL; /* output stream for tabular per-seq (--tblout) */ FILE *dfamtblfp = NULL; /* output stream for tabular Dfam format (--dfamtblout) */ FILE *aliscoresfp = NULL; /* output stream for alignment scores (--aliscoresout) */ // P7_HMM *hmm = NULL; /* one HMM query */ // P7_SCOREDATA *scoredata = NULL; int seqfmt = eslSQFILE_UNKNOWN; /* format of seqfile */ ESL_SQFILE *sqfp = NULL; /* open seqfile */ P7_HMMFILE *hfp = NULL; /* open HMM database file */ ESL_ALPHABET *abc = NULL; /* sequence alphabet */ P7_OPROFILE *om = NULL; /* target profile */ ESL_STOPWATCH *w = NULL; /* timing */ ESL_SQ *qsq = NULL; /* query sequence */ int nquery = 0; int textw; int status = eslOK; int hstatus = eslOK; int sstatus = eslOK; int i; int ncpus = 0; int infocnt = 0; WORKER_INFO *info = NULL; #ifdef HMMER_THREADS P7_OM_BLOCK *block = NULL; ESL_THREADS *threadObj= NULL; ESL_WORK_QUEUE *queue = NULL; #endif char errbuf[eslERRBUFSIZE]; double window_beta = -1.0 ; int window_length = -1; if (esl_opt_IsUsed(go, "--w_beta")) { if ( ( window_beta = esl_opt_GetReal(go, "--w_beta") ) < 0 || window_beta > 1 ) esl_fatal("Invalid window-length beta value\n"); } if (esl_opt_IsUsed(go, "--w_length")) { if (( window_length = esl_opt_GetInteger(go, "--w_length")) < 4 ) esl_fatal("Invalid window length value\n"); } w = esl_stopwatch_Create(); if (esl_opt_GetBoolean(go, "--notextw")) textw = 0; else textw = esl_opt_GetInteger(go, "--textw"); /* If caller declared an input format, decode it */ if (esl_opt_IsOn(go, "--qformat")) { seqfmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--qformat")); if (seqfmt == eslSQFILE_UNKNOWN) p7_Fail("%s is not a recognized input sequence file format\n", esl_opt_GetString(go, "--qformat")); } /* validate options if running as a daemon */ // if (esl_opt_IsOn(go, "--daemon")) { /* running as a daemon, the input format must be type daemon */ // if (seqfmt != eslSQFILE_UNKNOWN && seqfmt != eslSQFILE_DAEMON) // esl_fatal("Input format %s not supported. Must be daemon\n", esl_opt_GetString(go, "--qformat")); // seqfmt = eslSQFILE_DAEMON; // if (strcmp(cfg->seqfile, "-") != 0) esl_fatal("Query sequence file must be '-'\n"); // } /* Open the target profile database to get the sequence alphabet */ status = p7_hmmfile_OpenE(cfg->hmmfile, p7_HMMDBENV, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", cfg->hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem, trying to open HMM file %s.\n%s\n", cfg->hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, cfg->hmmfile, errbuf); if (! hfp->is_pressed) p7_Fail("Failed to open binary auxfiles for %s: use hmmpress first\n", hfp->fname); hstatus = p7_oprofile_ReadMSV(hfp, &abc, &om); if (hstatus == eslEFORMAT) p7_Fail("bad format, binary auxfiles, %s:\n%s", cfg->hmmfile, hfp->errbuf); else if (hstatus == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", cfg->hmmfile); else if (hstatus != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", cfg->hmmfile); p7_oprofile_Destroy(om); p7_hmmfile_Close(hfp); /* Open the query sequence database */ status = esl_sqfile_OpenDigital(abc, cfg->seqfile, seqfmt, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("Failed to open sequence file %s for reading\n", cfg->seqfile); else if (status == eslEFORMAT) p7_Fail("Sequence file %s is empty or misformatted\n", cfg->seqfile); else if (status == eslEINVAL) p7_Fail("Can't autodetect format of a stdin or .gz seqfile"); else if (status != eslOK) p7_Fail("Unexpected error %d opening sequence file %s\n", status, cfg->seqfile); if (sqfp->format > 100) // breaking the law! That range is reserved for msa, for aligned formats p7_Fail("%s contains a multiple sequence alignment; expect unaligned sequences, like FASTA\n", cfg->seqfile); qsq = esl_sq_CreateDigital(abc); /* Open the results output files */ if (esl_opt_IsOn(go, "-o")) { if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL) esl_fatal("Failed to open output file %s for writing\n", esl_opt_GetString(go, "-o")); } if (esl_opt_IsOn(go, "--tblout")) { if ((tblfp = fopen(esl_opt_GetString(go, "--tblout"), "w")) == NULL) esl_fatal("Failed to open tabular per-seq output file %s for writing\n", esl_opt_GetString(go, "--tblfp")); } if (esl_opt_IsOn(go, "--dfamtblout")) { if ((dfamtblfp = fopen(esl_opt_GetString(go, "--dfamtblout"),"w")) == NULL) esl_fatal("Failed to open tabular dfam output file %s for writing\n", esl_opt_GetString(go, "--dfamtblout")); } if (esl_opt_IsOn(go, "--aliscoresout")) { if ((aliscoresfp = fopen(esl_opt_GetString(go, "--aliscoresout"),"w")) == NULL) esl_fatal("Failed to open alignment scores output file %s for writing\n", esl_opt_GetString(go, "--aliscoresout")); } output_header(ofp, go, cfg->hmmfile, cfg->seqfile); #ifdef HMMER_THREADS /* initialize thread data */ if (esl_opt_IsOn(go, "--cpu")) ncpus = esl_opt_GetInteger(go, "--cpu"); else esl_threads_CPUCount(&ncpus); if (ncpus > 0) { threadObj = esl_threads_Create(&pipeline_thread); queue = esl_workqueue_Create(ncpus * 2); } #endif infocnt = (ncpus == 0) ? 1 : ncpus; ESL_ALLOC(info, sizeof(*info) * infocnt); for (i = 0; i < infocnt; ++i) { info[i].bg = p7_bg_Create(abc); #ifdef HMMER_THREADS info[i].queue = queue; #endif } #ifdef HMMER_THREADS for (i = 0; i < ncpus * 2; ++i) { block = p7_oprofile_CreateBlock(BLOCK_SIZE); if (block == NULL) esl_fatal("Failed to allocate sequence block"); status = esl_workqueue_Init(queue, block); if (status != eslOK) esl_fatal("Failed to add block to work queue"); } #endif /* Outside loop: over each query sequence in <seqfile>. */ while ((sstatus = esl_sqio_Read(sqfp, qsq)) == eslOK) { if (sstatus == eslEMEM) p7_Fail("Memory allocation error reading sequence file\n", status); if (sstatus == eslEINCONCEIVABLE) p7_Fail("Unexpected error %d reading sequence file\n", status); // if (qsq->L > NHMMER_MAX_RESIDUE_COUNT) p7_Fail("Input sequence %s in file %s exceeds maximum length of %d bases.\n", qsq->name, cfg->seqfile, NHMMER_MAX_RESIDUE_COUNT); nquery++; esl_stopwatch_Start(w); /* Open the target profile database */ status = p7_hmmfile_OpenE(cfg->hmmfile, p7_HMMDBENV, &hfp, NULL); if (status != eslOK) p7_Fail("Unexpected error %d in opening hmm file %s.\n", status, cfg->hmmfile); #ifdef HMMER_THREADS /* if we are threaded, create a lock to prevent multiple readers */ if (ncpus > 0) { status = p7_hmmfile_CreateLock(hfp); if (status != eslOK) p7_Fail("Unexpected error %d creating lock\n", status); } #endif if (fprintf(ofp, "Query: %s [L=%ld]\n", qsq->name, (long) qsq->n) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (qsq->acc[0] != 0 && fprintf(ofp, "Accession: %s\n", qsq->acc) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (qsq->desc[0] != 0 && fprintf(ofp, "Description: %s\n", qsq->desc) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); for (i = 0; i < infocnt; ++i) { /* Create processing pipeline and hit list */ info[i].th = p7_tophits_Create(); info[i].pli = p7_pipeline_Create(go, 100, 100, TRUE, p7_SCAN_MODELS); /* M_hint = 100, L_hint = 100 are just dummies for now */ info[i].pli->hfp = hfp; /* for two-stage input, pipeline needs <hfp> */ p7_pli_NewSeq(info[i].pli, qsq); info[i].qsq = qsq; if ( esl_opt_IsUsed(go, "--toponly") ) info[i].pli->strand = p7_STRAND_TOPONLY; else if ( esl_opt_IsUsed(go, "--bottomonly") ) info[i].pli->strand = p7_STRAND_BOTTOMONLY; else info[i].pli->strand = p7_STRAND_BOTH; #ifdef HMMER_THREADS if (ncpus > 0) esl_threads_AddThread(threadObj, &info[i]); #endif } #ifdef HMMER_THREADS if (ncpus > 0) hstatus = thread_loop(threadObj, queue, hfp); else hstatus = serial_loop(info, hfp); #else hstatus = serial_loop(info, hfp); #endif switch(hstatus) { case eslEFORMAT: p7_Fail("bad file format in HMM file %s", cfg->hmmfile); break; case eslEINCOMPAT: p7_Fail("HMM file %s contains different alphabets", cfg->hmmfile); break; case eslEOF: case eslOK: /* do nothing */ break; default: p7_Fail("Unexpected error in reading HMMs from %s", cfg->hmmfile); } /* merge the results of the search results */ for (i = 1; i < infocnt; ++i) { p7_tophits_Merge(info[0].th, info[i].th); p7_pipeline_Merge(info[0].pli, info[i].pli); p7_pipeline_Destroy(info[i].pli); p7_tophits_Destroy(info[i].th); } /* modify e-value to account for number of models */ for (i = 0; i < info->th->N ; i++) { info->th->unsrt[i].lnP += log((float)info->pli->nmodels); info->th->unsrt[i].dcl[0].lnP = info->th->unsrt[i].lnP; info->th->unsrt[i].sortkey = -1.0 * info->th->unsrt[i].lnP; } /* it's possible to have duplicates based on how viterbi ranges can overlap */ p7_tophits_SortByModelnameAndAlipos(info->th); p7_tophits_RemoveDuplicates(info->th, info->pli->use_bit_cutoffs); /* Print results */ p7_tophits_SortBySortkey(info->th); p7_tophits_Threshold(info->th, info->pli); //tally up total number of hits and target coverage info->pli->n_output = info->pli->pos_output = 0; for (i = 0; i < info->th->N; i++) { if ( (info->th->hit[i]->flags & p7_IS_REPORTED) || info->th->hit[i]->flags & p7_IS_INCLUDED) { info->pli->n_output++; info->pli->pos_output += abs(info->th->hit[i]->dcl[0].jali - info->th->hit[i]->dcl[0].iali) + 1; } } p7_tophits_Targets(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); p7_tophits_Domains(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (tblfp) p7_tophits_TabularTargets(tblfp, qsq->name, qsq->acc, info->th, info->pli, (nquery == 1)); if (dfamtblfp) p7_tophits_TabularXfam(dfamtblfp, qsq->name, NULL, info->th, info->pli); if (aliscoresfp) p7_tophits_AliScores(aliscoresfp, qsq->name, info->th ); esl_stopwatch_Stop(w); info->pli->nseqs = 1; p7_pli_Statistics(ofp, info->pli, w); if (fprintf(ofp, "//\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); fflush(ofp); p7_hmmfile_Close(hfp); p7_pipeline_Destroy(info->pli); p7_tophits_Destroy(info->th); esl_sq_Reuse(qsq); } if (sstatus == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n", sqfp->filename, esl_sqfile_GetErrorBuf(sqfp)); else if (sstatus != eslEOF) esl_fatal("Unexpected error %d reading sequence file %s", sstatus, sqfp->filename); /* Terminate outputs - any last words? */ if (tblfp) p7_tophits_TabularTail(tblfp, "hmmscan", p7_SCAN_MODELS, cfg->seqfile, cfg->hmmfile, go); if (ofp) { if (fprintf(ofp, "[ok]\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); } /* Cleanup - prepare for successful exit */ for (i = 0; i < infocnt; ++i) p7_bg_Destroy(info[i].bg); #ifdef HMMER_THREADS if (ncpus > 0) { esl_workqueue_Reset(queue); while (esl_workqueue_Remove(queue, (void **) &block) == eslOK) p7_oprofile_DestroyBlock(block); esl_workqueue_Destroy(queue); esl_threads_Destroy(threadObj); } #endif free(info); esl_sq_Destroy(qsq); esl_stopwatch_Destroy(w); esl_alphabet_Destroy(abc); esl_sqfile_Close(sqfp); if (ofp != stdout) fclose(ofp); if (tblfp) fclose(tblfp); if (dfamtblfp) fclose(dfamtblfp); if (aliscoresfp) fclose(aliscoresfp); return eslOK; ERROR: if (ofp != stdout) fclose(ofp); if (tblfp) fclose(tblfp); if (dfamtblfp) fclose(dfamtblfp); if (aliscoresfp) fclose(aliscoresfp); return status; }
static void process_SearchCmd(HMMD_COMMAND *cmd, WORKER_ENV *env) { int i; int cnt; int limit; int status; int blk_size; WORKER_INFO *info = NULL; ESL_ALPHABET *abc; ESL_STOPWATCH *w; ESL_THREADS *threadObj = NULL; pthread_mutex_t inx_mutex; int current_index; QUEUE_DATA *query = NULL; time_t date; char timestamp[32]; w = esl_stopwatch_Create(); abc = esl_alphabet_Create(eslAMINO); if (pthread_mutex_init(&inx_mutex, NULL) != 0) p7_Fail("mutex init failed"); ESL_ALLOC(info, sizeof(*info) * env->ncpus); /* Log the current time (at search start) */ date = time(NULL); ctime_r(&date, timestamp); printf("\n%s", timestamp); /* note that ctime_r() leaves \n on end of timestamp */ /* initialize thread data */ query = process_QueryCmd(cmd, env); esl_stopwatch_Start(w); info->range_list = NULL; if (esl_opt_IsUsed(query->opts, "--seqdb_ranges")) { ESL_ALLOC(info->range_list, sizeof(RANGE_LIST)); hmmpgmd_GetRanges(info->range_list, esl_opt_GetString(query->opts, "--seqdb_ranges")); } if (query->cmd_type == HMMD_CMD_SEARCH) threadObj = esl_threads_Create(&search_thread); else threadObj = esl_threads_Create(&scan_thread); if (query->query_type == HMMD_SEQUENCE) { fprintf(stdout, "Search seq %s [L=%ld]", query->seq->name, (long) query->seq->n); } else { fprintf(stdout, "Search hmm %s [M=%d]", query->hmm->name, query->hmm->M); } fprintf(stdout, " vs %s DB %d [%d - %d]", (query->cmd_type == HMMD_CMD_SEARCH) ? "SEQ" : "HMM", query->dbx, query->inx, query->inx + query->cnt - 1); if (info->range_list) fprintf(stdout, " in range(s) %s", esl_opt_GetString(query->opts, "--seqdb_ranges")); fprintf(stdout, "\n"); /* Create processing pipeline and hit list */ for (i = 0; i < env->ncpus; ++i) { info[i].abc = query->abc; info[i].hmm = query->hmm; info[i].seq = query->seq; info[i].opts = query->opts; info[i].range_list = info[0].range_list; info[i].th = NULL; info[i].pli = NULL; info[i].inx_mutex = &inx_mutex; info[i].inx = ¤t_index;/* this is confusing trickery - to share a single variable across all threads */ info[i].blk_size = &blk_size; /* ditto */ info[i].limit = &limit; /* ditto. TODO: come back and clean this up. */ if (query->cmd_type == HMMD_CMD_SEARCH) { HMMER_SEQ **list = env->seq_db->db[query->dbx].list; info[i].sq_list = &list[query->inx]; info[i].sq_cnt = query->cnt; info[i].db_Z = env->seq_db->db[query->dbx].K; info[i].om_list = NULL; info[i].om_cnt = 0; } else { info[i].sq_list = NULL; info[i].sq_cnt = 0; info[i].db_Z = 0; info[i].om_list = &env->hmm_db->list[query->inx]; info[i].om_cnt = query->cnt; } esl_threads_AddThread(threadObj, &info[i]); } /* try block size of 5000. we will need enough sequences for four * blocks per thread or better. */ blk_size = 5000; cnt = query->cnt / env->ncpus / blk_size; limit = query->cnt * 2 / 3; if (cnt < 4) { /* try block size of 1000 */ blk_size /= 5; cnt = query->cnt / env->ncpus / blk_size; if (cnt < 4) { /* still not enough. just divide it up into one block per thread */ blk_size = query->cnt / env->ncpus + 1; limit = query->cnt * 2; } } current_index = 0; esl_threads_WaitForStart(threadObj); esl_threads_WaitForFinish(threadObj); esl_stopwatch_Stop(w); #if 1 fprintf (stdout, " Sequences Residues Elapsed\n"); for (i = 0; i < env->ncpus; ++i) { print_timings(i, info[i].elapsed, info[i].pli); } #endif /* merge the results of the search results */ for (i = 1; i < env->ncpus; ++i) { p7_tophits_Merge(info[0].th, info[i].th); p7_pipeline_Merge(info[0].pli, info[i].pli); p7_pipeline_Destroy(info[i].pli); p7_tophits_Destroy(info[i].th); } print_timings(99, w->elapsed, info[0].pli); send_results(env->fd, w, info); /* free the last of the pipeline data */ p7_pipeline_Destroy(info->pli); p7_tophits_Destroy(info->th); free_QueueData(query); esl_threads_Destroy(threadObj); pthread_mutex_destroy(&inx_mutex); if (info->range_list) { if (info->range_list->starts) free(info->range_list->starts); if (info->range_list->ends) free(info->range_list->ends); free (info->range_list); } free(info); esl_stopwatch_Destroy(w); esl_alphabet_Destroy(abc); return; ERROR: LOG_FATAL_MSG("malloc", errno); }
/* serial_master() * The serial version of hmmsearch. * For each query HMM in <hmmfile> search the database for hits. * * A master can only return if it's successful. All errors are handled * immediately and fatally with p7_Fail(). We also use the * ESL_EXCEPTION and ERROR: mechanisms, but only because we know we're * using a fatal exception handler. */ static int serial_master(ESL_GETOPTS *go, struct cfg_s *cfg) { FILE *ofp = stdout; /* results output file (-o) */ P7_HMMFILE *hfp = NULL; /* open input HMM file */ ESL_SQFILE *dbfp = NULL; /* open input sequence file */ P7_HMM *hmm = NULL; /* one HMM query */ ESL_ALPHABET *abc = NULL; /* digital alphabet */ int dbfmt = eslSQFILE_UNKNOWN; /* format code for sequence database file */ ESL_STOPWATCH *w; int textw = 0; int nquery = 0; int status = eslOK; int hstatus = eslOK; int sstatus = eslOK; int i; int ncpus = 0; int infocnt = 0; WORKER_INFO *info = NULL; char errbuf[eslERRBUFSIZE]; w = esl_stopwatch_Create(); if (esl_opt_GetBoolean(go, "--notextw")) textw = 0; else textw = esl_opt_GetInteger(go, "--textw"); if (esl_opt_IsOn(go, "--tformat")) { dbfmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--tformat")); if (dbfmt == eslSQFILE_UNKNOWN) p7_Fail("%s is not a recognized sequence database file format\n", esl_opt_GetString(go, "--tformat")); } /* Open the target sequence database */ status = esl_sqfile_Open(cfg->dbfile, dbfmt, p7_SEQDBENV, &dbfp); if (status == eslENOTFOUND) p7_Fail("Failed to open sequence file %s for reading\n", cfg->dbfile); else if (status == eslEFORMAT) p7_Fail("Sequence file %s is empty or misformatted\n", cfg->dbfile); else if (status == eslEINVAL) p7_Fail("Can't autodetect format of a stdin or .gz seqfile"); else if (status != eslOK) p7_Fail("Unexpected error %d opening sequence file %s\n", status, cfg->dbfile); if (esl_opt_IsUsed(go, "--restrictdb_stkey") || esl_opt_IsUsed(go, "--restrictdb_n")) { if (esl_opt_IsUsed(go, "--ssifile")) esl_sqfile_OpenSSI(dbfp, esl_opt_GetString(go, "--ssifile")); else esl_sqfile_OpenSSI(dbfp, NULL); } /* Open the query profile HMM file */ status = p7_hmmfile_OpenE(cfg->hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", cfg->hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", cfg->hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, cfg->hmmfile, errbuf); /* Open the results output files */ if (esl_opt_IsOn(go, "-o")) { if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w+")) == NULL) p7_Fail("Failed to open output file %s for writing\n", esl_opt_GetString(go, "-o")); } infocnt = 1; ESL_ALLOC(info, sizeof(*info) * infocnt); /* <abc> is not known 'til first HMM is read. */ hstatus = p7_hmmfile_Read(hfp, &abc, &hmm); if (hstatus == eslOK) { /* One-time initializations after alphabet <abc> becomes known */ // output_header(ofp, go, cfg->hmmfile, cfg->dbfile); // dbfp->abc = abc; //ReadBlock requires knowledge of the alphabet to decide how best to read blocks // for (i = 0; i < infocnt; ++i) // { // info[i].bg = p7_bg_Create(abc); // } } /* Outer loop: over each query HMM in <hmmfile>. */ // while (hstatus == eslOK) // { P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; /* optimized query profile */ nquery++; esl_stopwatch_Start(w); /* seqfile may need to be rewound (multiquery mode) */ if (nquery > 1) { if (! esl_sqfile_IsRewindable(dbfp)) esl_fatal("Target sequence file %s isn't rewindable; can't search it with multiple queries", cfg->dbfile); if (! esl_opt_IsUsed(go, "--restrictdb_stkey") ) esl_sqfile_Position(dbfp, 0); //only re-set current position to 0 if we're not planning to set it in a moment } if ( cfg->firstseq_key != NULL ) { //it's tempting to want to do this once and capture the offset position for future passes, but ncbi files make this non-trivial, so this keeps it general sstatus = esl_sqfile_PositionByKey(dbfp, cfg->firstseq_key); if (sstatus != eslOK) p7_Fail("Failure setting restrictdb_stkey to %d\n", cfg->firstseq_key); } // if (fprintf(ofp, "Query: %s [M=%d]\n", hmm->name, hmm->M) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); // if (hmm->acc) { if (fprintf(ofp, "Accession: %s\n", hmm->acc) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); } // if (hmm->desc) { if (fprintf(ofp, "Description: %s\n", hmm->desc) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); } /* Convert to an optimized model */ gm = p7_profile_Create (hmm->M, abc); om = p7_oprofile_Create(hmm->M, abc); // p7_ProfileConfig(hmm, info->bg, gm, 100, p7_LOCAL); /* 100 is a dummy length for now; and MSVFilter requires local mode */ p7_oprofile_Convert(gm, om); /* <om> is now p7_LOCAL, multihit */ for (i = 0; i < infocnt; ++i) { /* Create processing pipeline and hit list */ info[i].th = p7_tophits_Create(); info[i].om = p7_oprofile_Clone(om); info[i].pli = p7_pipeline_Create(go, om->M, 100, FALSE, p7_SEARCH_SEQS); /* L_hint = 100 is just a dummy for now */ P7_PIPELINE *pli = info[i].pli; pli->nmodels++; pli->nnodes += info[i].om->M; // if (pli->Z_setby == p7_ZSETBY_NTARGETS && pli->mode == p7_SCAN_MODELS) pli->Z = pli->nmodels; // if (pli->do_biasfilter) p7_bg_SetFilter(info[i].bg, info[i].om->M, info[i].om->compo); // if (pli->mode == p7_SEARCH_SEQS) // status = p7_pli_NewModelThresholds(pli, info[i].om); pli->W = info[i].om->max_length; } sstatus = serial_loop(info, dbfp, cfg->n_targetseq, ofp); switch(sstatus) { case eslEFORMAT: esl_fatal("Parse failed (sequence file %s):\n%s\n", dbfp->filename, esl_sqfile_GetErrorBuf(dbfp)); break; case eslEOF: /* do nothing */ break; default: esl_fatal("Unexpected error %d reading sequence file %s", sstatus, dbfp->filename); } /* merge the results of the search results */ for (i = 1; i < infocnt; ++i) { p7_tophits_Merge(info[0].th, info[i].th); p7_pipeline_Merge(info[0].pli, info[i].pli); p7_pipeline_Destroy(info[i].pli); p7_tophits_Destroy(info[i].th); p7_oprofile_Destroy(info[i].om); } /* Print the results. */ p7_tophits_SortBySortkey(info->th); p7_tophits_Threshold(info->th, info->pli); // p7_tophits_Targets(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); // p7_tophits_Domains(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); esl_stopwatch_Stop(w); // p7_pli_Statistics(ofp, info->pli, w); // if (fprintf(ofp, "//\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); p7_pipeline_Destroy(info->pli); p7_tophits_Destroy(info->th); p7_oprofile_Destroy(info->om); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); // hstatus = p7_hmmfile_Read(hfp, &abc, &hmm); // } /* end outer loop over query HMMs */ switch(hstatus) { case eslEOD: p7_Fail("read failed, HMM file %s may be truncated?", cfg->hmmfile); break; case eslEFORMAT: p7_Fail("bad file format in HMM file %s", cfg->hmmfile); break; case eslEINCOMPAT: p7_Fail("HMM file %s contains different alphabets", cfg->hmmfile); break; case eslEOF: case eslOK: /* do nothing. EOF is what we want. */ break; default: p7_Fail("Unexpected error (%d) in reading HMMs from %s", hstatus, cfg->hmmfile); } /* Terminate outputs... any last words? */ /* Cleanup - prepare for exit */ // for (i = 0; i < infocnt; ++i) // p7_bg_Destroy(info[i].bg); free(info); p7_hmmfile_Close(hfp); esl_sqfile_Close(dbfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); if (ofp != stdout) fclose(ofp); printf("44HHHH \n"); return eslOK; ERROR: return eslFAIL; }