static void utest_normalization(ESL_GETOPTS *go) { char *msg = "seqmodel normalization utest failed"; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); char *seq = "ACDEFGHIKLMNPQRSTVWYBJZOUX"; int L = strlen(seq); ESL_DSQ *dsq = NULL; float popen = 0.1; float pextend = 0.4; P7_BUILDER *bld = NULL; P7_BG *bg = p7_bg_Create(abc); P7_HMM *hmm = NULL; char errbuf[eslERRBUFSIZE]; if ( esl_abc_CreateDsq(abc, seq, &dsq) != eslOK) esl_fatal(msg); if ( (bld = p7_builder_Create(NULL, abc)) == NULL) esl_fatal(msg); if ( p7_builder_LoadScoreSystem(bld, "BLOSUM62", popen, pextend, bg) != eslOK) esl_fatal(msg); if ( p7_Seqmodel(abc, dsq, L, "aatest", bld->Q, bg->f, bld->popen, bld->pextend, &hmm) != eslOK) esl_fatal(msg); if (p7_hmm_Validate(hmm, errbuf, 0.0001) != eslOK) esl_fatal("normalization utest failed\n%s\n", errbuf); free(dsq); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_builder_Destroy(bld); esl_alphabet_Destroy(abc); }
static void search_thread(void *arg) { int i; int count; int seed; int status; int workeridx; WORKER_INFO *info; ESL_THREADS *obj; ESL_SQ dbsq; ESL_STOPWATCH *w = NULL; /* timing stopwatch */ P7_BUILDER *bld = NULL; /* HMM construction configuration */ P7_BG *bg = NULL; /* null model */ P7_PIPELINE *pli = NULL; /* work pipeline */ P7_TOPHITS *th = NULL; /* top hit results */ P7_PROFILE *gm = NULL; /* generic model */ P7_OPROFILE *om = NULL; /* optimized query profile */ obj = (ESL_THREADS *) arg; esl_threads_Started(obj, &workeridx); info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx); w = esl_stopwatch_Create(); bg = p7_bg_Create(info->abc); esl_stopwatch_Start(w); /* set up the dummy description and accession fields */ dbsq.desc = ""; dbsq.acc = ""; /* process a query sequence or hmm */ if (info->seq != NULL) { bld = p7_builder_Create(NULL, info->abc); if ((seed = esl_opt_GetInteger(info->opts, "--seed")) > 0) { esl_randomness_Init(bld->r, seed); bld->do_reseeding = TRUE; } bld->EmL = esl_opt_GetInteger(info->opts, "--EmL"); bld->EmN = esl_opt_GetInteger(info->opts, "--EmN"); bld->EvL = esl_opt_GetInteger(info->opts, "--EvL"); bld->EvN = esl_opt_GetInteger(info->opts, "--EvN"); bld->EfL = esl_opt_GetInteger(info->opts, "--EfL"); bld->EfN = esl_opt_GetInteger(info->opts, "--EfN"); bld->Eft = esl_opt_GetReal (info->opts, "--Eft"); if (esl_opt_IsOn(info->opts, "--mxfile")) status = p7_builder_SetScoreSystem (bld, esl_opt_GetString(info->opts, "--mxfile"), NULL, esl_opt_GetReal(info->opts, "--popen"), esl_opt_GetReal(info->opts, "--pextend"), bg); else status = p7_builder_LoadScoreSystem(bld, esl_opt_GetString(info->opts, "--mx"), esl_opt_GetReal(info->opts, "--popen"), esl_opt_GetReal(info->opts, "--pextend"), bg); if (status != eslOK) { //client_error(info->sock, status, "hmmgpmd: failed to set single query sequence score system: %s", bld->errbuf); fprintf(stderr, "hmmpgmd: failed to set single query sequence score system: %s", bld->errbuf); pthread_exit(NULL); return; } p7_SingleBuilder(bld, info->seq, bg, NULL, NULL, NULL, &om); /* bypass HMM - only need model */ p7_builder_Destroy(bld); } else { gm = p7_profile_Create (info->hmm->M, info->abc); om = p7_oprofile_Create(info->hmm->M, info->abc); p7_ProfileConfig(info->hmm, bg, gm, 100, p7_LOCAL); p7_oprofile_Convert(gm, om); } /* Create processing pipeline and hit list */ th = p7_tophits_Create(); pli = p7_pipeline_Create(info->opts, om->M, 100, FALSE, p7_SEARCH_SEQS); p7_pli_NewModel(pli, om, bg); if (pli->Z_setby == p7_ZSETBY_NTARGETS) pli->Z = info->db_Z; /* loop until all sequences have been processed */ count = 1; while (count > 0) { int inx; int blksz; HMMER_SEQ **sq; /* grab the next block of sequences */ if (pthread_mutex_lock(info->inx_mutex) != 0) p7_Fail("mutex lock failed"); inx = *info->inx; blksz = *info->blk_size; if (inx > *info->limit) { blksz /= 5; if (blksz < 1000) { *info->limit = info->sq_cnt * 2; } else { *info->limit = inx + (info->sq_cnt - inx) * 2 / 3; } } *info->blk_size = blksz; *info->inx += blksz; if (pthread_mutex_unlock(info->inx_mutex) != 0) p7_Fail("mutex unlock failed"); sq = info->sq_list + inx; count = info->sq_cnt - inx; if (count > blksz) count = blksz; /* Main loop: */ for (i = 0; i < count; ++i, ++sq) { if ( !(info->range_list) || hmmpgmd_IsWithinRanges ((*sq)->idx, info->range_list)) { dbsq.name = (*sq)->name; dbsq.dsq = (*sq)->dsq; dbsq.n = (*sq)->n; dbsq.idx = (*sq)->idx; if((*sq)->desc != NULL) dbsq.desc = (*sq)->desc; p7_bg_SetLength(bg, dbsq.n); p7_oprofile_ReconfigLength(om, dbsq.n); p7_Pipeline(pli, om, bg, &dbsq, th); p7_pipeline_Reuse(pli); } } } /* make available the pipeline objects to the main thread */ info->th = th; info->pli = pli; /* clean up */ p7_bg_Destroy(bg); p7_oprofile_Destroy(om); if (gm != NULL) p7_profile_Destroy(gm); esl_stopwatch_Stop(w); info->elapsed = w->elapsed; esl_stopwatch_Destroy(w); esl_threads_Finished(obj, workeridx); pthread_exit(NULL); return; }
static void mpi_worker(const ESL_GETOPTS *go, struct cfg_s *cfg) { int xstatus = eslOK; int status; int type; P7_BUILDER *bld = NULL; ESL_MSA *msa = NULL; ESL_MSA *postmsa = NULL; ESL_MSA **postmsa_ptr = (cfg->postmsafile != NULL) ? &postmsa : NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; char *wbuf = NULL; /* packed send/recv buffer */ void *tmp; /* for reallocation of wbuf */ int wn = 0; /* allocation size for wbuf */ int sz, n; /* size of a packed message */ int pos; char errmsg[eslERRBUFSIZE]; /* After master initialization: master broadcasts its status. */ MPI_Bcast(&xstatus, 1, MPI_INT, 0, MPI_COMM_WORLD); if (xstatus != eslOK) return; /* master saw an error code; workers do an immediate normal shutdown. */ ESL_DPRINTF2(("worker %d: sees that master has initialized\n", cfg->my_rank)); /* Master now broadcasts worker initialization information (alphabet type) * Workers returns their status post-initialization. * Initial allocation of wbuf must be large enough to guarantee that * we can pack an error result into it, because after initialization, * errors will be returned as packed (code, errmsg) messages. */ MPI_Bcast(&type, 1, MPI_INT, 0, MPI_COMM_WORLD); if (xstatus == eslOK) { if ((cfg->abc = esl_alphabet_Create(type)) == NULL) xstatus = eslEMEM; } if (xstatus == eslOK) { wn = 4096; if ((wbuf = malloc(wn * sizeof(char))) == NULL) xstatus = eslEMEM; } if (xstatus == eslOK) { if ((bld = p7_builder_Create(go, cfg->abc)) == NULL) xstatus = eslEMEM; } MPI_Reduce(&xstatus, &status, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); /* everyone sends xstatus back to master */ if (xstatus != eslOK) { if (wbuf != NULL) free(wbuf); if (bld != NULL) p7_builder_Destroy(bld); return; /* shutdown; we passed the error back for the master to deal with. */ } bg = p7_bg_Create(cfg->abc); ESL_DPRINTF2(("worker %d: initialized\n", cfg->my_rank)); /* source = 0 (master); tag = 0 */ while (esl_msa_MPIRecv(0, 0, MPI_COMM_WORLD, cfg->abc, &wbuf, &wn, &msa) == eslOK) { /* Build the HMM */ ESL_DPRINTF2(("worker %d: has received MSA %s (%d columns, %d seqs)\n", cfg->my_rank, msa->name, msa->alen, msa->nseq)); if ((status = p7_Builder(bld, msa, bg, &hmm, NULL, NULL, NULL, postmsa_ptr)) != eslOK) { strcpy(errmsg, bld->errbuf); goto ERROR; } ESL_DPRINTF2(("worker %d: has produced an HMM %s\n", cfg->my_rank, hmm->name)); /* Calculate upper bound on size of sending status, HMM, and optional postmsa; make sure wbuf can hold it. */ n = 0; if (MPI_Pack_size(1, MPI_INT, MPI_COMM_WORLD, &sz) != 0) goto ERROR; n += sz; if (p7_hmm_MPIPackSize( hmm, MPI_COMM_WORLD, &sz) != eslOK) goto ERROR; n += sz; if (esl_msa_MPIPackSize(postmsa, MPI_COMM_WORLD, &sz) != eslOK) goto ERROR; n += sz; if (n > wn) { ESL_RALLOC(wbuf, tmp, sizeof(char) * n); wn = n; } ESL_DPRINTF2(("worker %d: has calculated that HMM will pack into %d bytes\n", cfg->my_rank, n)); /* Send status, HMM, and optional postmsa back to the master */ pos = 0; if (MPI_Pack (&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD) != 0) goto ERROR; if (p7_hmm_MPIPack (hmm, wbuf, wn, &pos, MPI_COMM_WORLD) != eslOK) goto ERROR; if (esl_msa_MPIPack(postmsa, wbuf, wn, &pos, MPI_COMM_WORLD) != eslOK) goto ERROR; MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); ESL_DPRINTF2(("worker %d: has sent HMM to master in message of %d bytes\n", cfg->my_rank, pos)); esl_msa_Destroy(msa); msa = NULL; esl_msa_Destroy(postmsa); postmsa = NULL; p7_hmm_Destroy(hmm); hmm = NULL; } if (wbuf != NULL) free(wbuf); p7_builder_Destroy(bld); return; ERROR: ESL_DPRINTF2(("worker %d: fails, is sending an error message, as follows:\n%s\n", cfg->my_rank, errmsg)); pos = 0; MPI_Pack(&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Pack(errmsg, eslERRBUFSIZE, MPI_CHAR, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); if (wbuf != NULL) free(wbuf); if (msa != NULL) esl_msa_Destroy(msa); if (hmm != NULL) p7_hmm_Destroy(hmm); if (bld != NULL) p7_builder_Destroy(bld); return; }
/* serial_master() * The serial version of hmmbuild. * For each MSA, build an HMM and save it. * * A master can only return if it's successful. All errors are handled immediately and fatally with p7_Fail(). */ static int serial_master(const ESL_GETOPTS *go, struct cfg_s *cfg) { int status; int i; int ncpus = 0; int infocnt = 0; WORKER_INFO *info = NULL; #ifdef HMMER_THREADS WORK_ITEM *item = NULL; ESL_THREADS *threadObj= NULL; ESL_WORK_QUEUE *queue = NULL; #endif char errmsg[eslERRBUFSIZE]; if ((status = init_master_cfg(go, cfg, errmsg)) != eslOK) p7_Fail(errmsg); #ifdef HMMER_THREADS /* initialize thread data */ if (esl_opt_IsOn(go, "--cpu")) ncpus = esl_opt_GetInteger(go, "--cpu"); else esl_threads_CPUCount(&ncpus); if (ncpus > 0) { threadObj = esl_threads_Create(&pipeline_thread); queue = esl_workqueue_Create(ncpus * 2); } #endif infocnt = (ncpus == 0) ? 1 : ncpus; ESL_ALLOC(info, sizeof(*info) * infocnt); for (i = 0; i < infocnt; ++i) { info[i].bg = p7_bg_Create(cfg->abc); info[i].bld = p7_builder_Create(go, cfg->abc); if (info[i].bld == NULL) p7_Fail("p7_builder_Create failed"); #ifdef HMMER_THREADS info[i].queue = queue; if (ncpus > 0) esl_threads_AddThread(threadObj, &info[i]); #endif } #ifdef HMMER_THREADS for (i = 0; i < ncpus * 2; ++i) { ESL_ALLOC(item, sizeof(*item)); item->nali = 0; item->processed = FALSE; item->postmsa = NULL; item->msa = NULL; item->hmm = NULL; item->entropy = 0.0; status = esl_workqueue_Init(queue, item); if (status != eslOK) esl_fatal("Failed to add block to work queue"); } #endif #ifdef HMMER_THREADS if (ncpus > 0) status = thread_loop(threadObj, queue, cfg); else status = serial_loop(info, cfg); #else status = serial_loop(info, cfg); #endif if (status == eslEFORMAT) esl_fatal("Alignment file parse error:\n%s\n", cfg->afp->errbuf); else if (status == eslEINVAL) esl_fatal("Alignment file parse error:\n%s\n", cfg->afp->errbuf); else if (status != eslEOF) esl_fatal("Alignment file read failed with error code %d\n", status); for (i = 0; i < infocnt; ++i) { p7_bg_Destroy(info[i].bg); p7_builder_Destroy(info[i].bld); } #ifdef HMMER_THREADS if (ncpus > 0) { esl_workqueue_Reset(queue); while (esl_workqueue_Remove(queue, (void **) &item) == eslOK) { free(item); } esl_workqueue_Destroy(queue); esl_threads_Destroy(threadObj); } #endif free(info); return eslOK; ERROR: return eslFAIL; }
P7_HMM * constructHMM(ESL_MSA *msa, ESL_ALPHABET *abc, int ali_hmm, int frag, P7_HMM **ret_hmm, char *errbuf){ int status; ESL_GETOPTS *go = esl_getopts_Create(options); P7_BUILDER *bld = NULL; P7_BG *bg = NULL; char *args = NULL; esl_strcat(&args, -1, "X ", -1); status = esl_msa_SetName(msa, "Query", -1); /* Now take this alignment and make an HMM from it */ if(status != eslOK){ ESL_XFAIL(status, errbuf, "Easel MSA SetNAME returned an error %d\n", status); } bg = p7_bg_Create(abc); if(bg == NULL){ ESL_XFAIL(status, errbuf, "Error generating bg\n"); } if (frag == 1) { // add --fragthresh 0 esl_strcat(&args, -1, "--fragthresh 0 ", -1); } // if these flags are set, then we want a non standard hmm out. Used by the logo server. if (ali_hmm == 1) { // observed counts // arguments "X --pnone --wnone --enone" esl_strcat(&args, -1, "--pnone --wnone --enone --symfrac 0 ", -1); } else if (ali_hmm == 2) { // weighted counts // arguments "X --pnone"); esl_strcat(&args, -1, "--pnone --symfrac 0 ", -1); } else if (ali_hmm == 3) { // Create HMM - keep all columns esl_strcat(&args, -1, "--symfrac 0 ", -1); } else { // no arguments ? } // pass in arguments to hmm builder esl_opt_ProcessSpoof(go, args); if (args != NULL) free(args); bld = p7_builder_Create(go, abc); if(bld == NULL){ ESL_XFAIL(eslEMEM, errbuf, "Error creating builder\n"); } status = p7_Builder(bld, msa, bg, ret_hmm, NULL, NULL, NULL, NULL); if (status != eslOK) { strcpy( errbuf, bld->errbuf ); goto ERROR; } p7_bg_Destroy(bg); p7_builder_Destroy(bld); esl_getopts_Destroy(go); return status; ERROR: if (bg != NULL) p7_bg_Destroy(bg); if (bld != NULL) p7_builder_Destroy(bld); if (go != NULL) esl_getopts_Destroy(go); return status; }
/* Function: p7_builder_Create() * Synopsis: Create a default HMM construction configuration. * * Purpose: Create a construction configuration for building * HMMs in alphabet <abc>, and return a pointer to it. * * An application configuration <go> may optionally be * provided. If <go> is <NULL>, default parameters are * used. If <go> is non-<NULL>, it must include appropriate * settings for all of the following ``standard build options'': * * Model construction: --fast --hand --symfrac --fragthresh * Relative weighting: --wgsc --wblosum --wpb --wgiven --wid * Effective seq #: --eent --eclust --enone --eset --ere --esigma --eid * Prior scheme: --pnone --plaplace * E-val calibration: --EmL --EmN --EvL --EvN --EfL --EfN --Eft * run-to-run variation: --seed * * See <hmmbuild.c> or other big users of the build * pipeline for an example of appropriate <ESL_GETOPTS> * initializations of these 24 options. */ P7_BUILDER * p7_builder_Create(const ESL_GETOPTS *go, const ESL_ALPHABET *abc) { P7_BUILDER *bld = NULL; int seed; int status; ESL_ALLOC(bld, sizeof(P7_BUILDER)); bld->prior = NULL; bld->r = NULL; bld->S = NULL; bld->Q = NULL; bld->eset = -1.0; /* -1.0 = unset; must be set if effn_strategy is p7_EFFN_SET */ bld->re_target = -1.0; if (go == NULL) { bld->arch_strategy = p7_ARCH_FAST; bld->wgt_strategy = p7_WGT_PB; bld->effn_strategy = p7_EFFN_ENTROPY; seed = 42; } else { if (esl_opt_GetBoolean(go, "--fast")) bld->arch_strategy = p7_ARCH_FAST; else if (esl_opt_GetBoolean(go, "--hand")) bld->arch_strategy = p7_ARCH_HAND; if (esl_opt_GetBoolean(go, "--wpb")) bld->wgt_strategy = p7_WGT_PB; else if (esl_opt_GetBoolean(go, "--wgsc")) bld->wgt_strategy = p7_WGT_GSC; else if (esl_opt_GetBoolean(go, "--wblosum")) bld->wgt_strategy = p7_WGT_BLOSUM; else if (esl_opt_GetBoolean(go, "--wnone")) bld->wgt_strategy = p7_WGT_NONE; else if (esl_opt_GetBoolean(go, "--wgiven")) bld->wgt_strategy = p7_WGT_GIVEN; if (esl_opt_GetBoolean(go, "--eent")) bld->effn_strategy = p7_EFFN_ENTROPY; else if (esl_opt_GetBoolean(go, "--eclust")) bld->effn_strategy = p7_EFFN_CLUST; else if (esl_opt_GetBoolean(go, "--enone")) bld->effn_strategy = p7_EFFN_NONE; else if (esl_opt_IsOn (go, "--eset")) { bld->effn_strategy = p7_EFFN_SET; bld->eset = esl_opt_GetReal(go, "--eset"); } seed = esl_opt_GetInteger(go, "--seed"); } bld->max_insert_len = 0; /* The default RE target is alphabet dependent. */ if (go != NULL && esl_opt_IsOn (go, "--ere")) bld->re_target = esl_opt_GetReal(go, "--ere"); else { switch (abc->type) { case eslAMINO: bld->re_target = p7_ETARGET_AMINO; break; case eslDNA: bld->re_target = p7_ETARGET_DNA; break; case eslRNA: bld->re_target = p7_ETARGET_DNA; break; default: bld->re_target = p7_ETARGET_OTHER; break; } } bld->symfrac = (go != NULL) ? esl_opt_GetReal (go, "--symfrac") : 0.5; bld->fragthresh = (go != NULL) ? esl_opt_GetReal (go, "--fragthresh") : 0.5; bld->wid = (go != NULL) ? esl_opt_GetReal (go, "--wid") : 0.62; bld->esigma = (go != NULL) ? esl_opt_GetReal (go, "--esigma") : 45.0; bld->eid = (go != NULL) ? esl_opt_GetReal (go, "--eid") : 0.62; bld->EmL = (go != NULL) ? esl_opt_GetInteger(go, "--EmL") : 200; bld->EmN = (go != NULL) ? esl_opt_GetInteger(go, "--EmN") : 200; bld->EvL = (go != NULL) ? esl_opt_GetInteger(go, "--EvL") : 200; bld->EvN = (go != NULL) ? esl_opt_GetInteger(go, "--EvN") : 200; bld->EfL = (go != NULL) ? esl_opt_GetInteger(go, "--EfL") : 100; bld->EfN = (go != NULL) ? esl_opt_GetInteger(go, "--EfN") : 200; bld->Eft = (go != NULL) ? esl_opt_GetReal (go, "--Eft") : 0.04; /* Normally we reinitialize the RNG to original seed before calibrating each model. * This eliminates run-to-run variation. * As a special case, seed==0 means choose an arbitrary seed and shut off the * reinitialization; this allows run-to-run variation. */ bld->r = esl_randomness_CreateFast(seed); bld->do_reseeding = (seed == 0) ? FALSE : TRUE; if (go && esl_opt_GetBoolean(go, "--pnone") ) bld->prior = NULL; else if (go && esl_opt_GetBoolean(go, "--plaplace") ) bld->prior = p7_prior_CreateLaplace(abc); else { switch (abc->type) { case eslAMINO: bld->prior = p7_prior_CreateAmino(); break; case eslDNA: bld->prior = p7_prior_CreateNucleic(); break; case eslRNA: bld->prior = p7_prior_CreateNucleic(); break; default: bld->prior = p7_prior_CreateLaplace(abc); break; } if (bld->prior == NULL) goto ERROR; } bld->abc = abc; bld->errbuf[0] = '\0'; bld->popen = -1; bld->pextend = -1; return bld; ERROR: p7_builder_Destroy(bld); return NULL; }