/* Function: main_loop_threaded() * Date: SRE, Wed Dec 1 12:43:09 1999 [St. Louis] * * Purpose: Given an HMM and parameters for synthesizing random * sequences; return a histogram of scores. * (Threaded version.) * * Args: hmm - an HMM to calibrate. * seed - random number seed * nsample - number of seqs to synthesize * lenmean - mean length of random sequence * lensd - std dev of random seq length * fixedlen - if nonzero, override lenmean, always this len * nthreads - number of threads to start * ret_hist - RETURN: the score histogram * ret_max - RETURN: highest score seen in simulation * twatch - RETURN: accumulation of thread times * * Returns: (void) * hist is alloc'ed here, and must be free'd by caller. */ static void main_loop_threaded(struct plan7_s *hmm, int seed, int nsample, float lenmean, float lensd, int fixedlen, int nthreads, struct histogram_s **ret_hist, float *ret_max, Stopwatch_t *twatch) { struct histogram_s *hist; float randomseq[MAXABET]; float p1; struct workpool_s *wpool; /* pool of worker threads */ /* Initialize. * We assume we've already set the alphabet (safe, because * HMM input sets the alphabet). */ sre_srandom(seed); P7Logoddsify(hmm, TRUE); P7DefaultNullModel(randomseq, &p1); hist = AllocHistogram(-200, 200, 100); wpool = workpool_start(hmm, lenmean, lensd, fixedlen, randomseq, nsample, hist, nthreads); workpool_stop(wpool); *ret_hist = hist; *ret_max = wpool->max_score; StopwatchInclude(twatch, &(wpool->watch)); workpool_free(wpool); return; }
/* Function: Plan9toPlan7() * * Purpose: Convert an old HMM into Plan7. Configures it in * ls mode. * * Args: hmm - old ugly plan9 style HMM * ret_plan7 - new wonderful Plan7 HMM * * Return: (void) * Plan7 HMM is allocated here. Free w/ FreePlan7(). */ void Plan9toPlan7(struct plan9_s *hmm, struct plan7_s **ret_plan7) { struct plan7_s *plan7; int k, x; plan7 = AllocPlan7(hmm->M); for (k = 1; k < hmm->M; k++) { plan7->t[k][TMM] = hmm->mat[k].t[MATCH]; plan7->t[k][TMD] = hmm->mat[k].t[DELETE]; plan7->t[k][TMI] = hmm->mat[k].t[INSERT]; plan7->t[k][TDM] = hmm->del[k].t[MATCH]; plan7->t[k][TDD] = hmm->del[k].t[DELETE]; plan7->t[k][TIM] = hmm->ins[k].t[MATCH]; plan7->t[k][TII] = hmm->ins[k].t[INSERT]; } for (k = 1; k <= hmm->M; k++) for (x = 0; x < Alphabet_size; x++) plan7->mat[k][x] = hmm->mat[k].p[x]; for (k = 1; k < hmm->M; k++) for (x = 0; x < Alphabet_size; x++) plan7->ins[k][x] = hmm->ins[k].p[x]; plan7->tbd1 = hmm->mat[0].t[DELETE] / (hmm->mat[0].t[DELETE] + hmm->mat[0].t[MATCH]); /* We have to make up the null transition p1; use default */ P7DefaultNullModel(plan7->null, &(plan7->p1)); for (x = 0; x < Alphabet_size; x++) plan7->null[x] = hmm->null[x]; if (hmm->name != NULL) Plan7SetName(plan7, hmm->name); if (hmm->flags & HMM_REF) { strcpy(plan7->rf, hmm->ref); plan7->flags |= PLAN7_RF; } if (hmm->flags & HMM_CS) { strcpy(plan7->cs, hmm->cs); plan7->flags |= PLAN7_CS; } Plan7LSConfig(plan7); /* configure specials for ls-style alignment */ Plan7Renormalize(plan7); /* mainly to correct for missing ID and DI */ plan7->flags |= PLAN7_HASPROB; /* probabilities are valid */ plan7->flags &= ~PLAN7_HASBITS; /* scores are not valid */ *ret_plan7 = plan7; }
/* Function: main_loop_serial() * Date: SRE, Tue Aug 18 16:18:28 1998 [St. Louis] * * Purpose: Given an HMM and parameters for synthesizing random * sequences; return a histogram of scores. * (Serial version) * * Args: hmm - an HMM to calibrate. * seed - random number seed * nsample - number of seqs to synthesize * lenmean - mean length of random sequence * lensd - std dev of random seq length * fixedlen - if nonzero, override lenmean, always this len * ret_hist - RETURN: the score histogram * ret_max - RETURN: highest score seen in simulation * * Returns: (void) * hist is alloc'ed here, and must be free'd by caller. */ static void main_loop_serial(struct plan7_s *hmm, int seed, int nsample, float lenmean, float lensd, int fixedlen, struct histogram_s **ret_hist, float *ret_max) { struct histogram_s *hist; float randomseq[MAXABET]; float p1; float max; char *seq; char *dsq; float score; int sqlen; int idx; /* Initialize. * We assume we've already set the alphabet (safe, because * HMM input sets the alphabet). */ sre_srandom(seed); P7Logoddsify(hmm, TRUE); P7DefaultNullModel(randomseq, &p1); hist = AllocHistogram(-200, 200, 100); max = -FLT_MAX; for (idx = 0; idx < nsample; idx++) { /* choose length of random sequence */ if (fixedlen) sqlen = fixedlen; else do sqlen = (int) Gaussrandom(lenmean, lensd); while (sqlen < 1); /* generate it */ seq = RandomSequence(Alphabet, randomseq, Alphabet_size, sqlen); dsq = DigitizeSequence(seq, sqlen); if (P7ViterbiSize(sqlen, hmm->M) <= RAMLIMIT) score = P7Viterbi(dsq, sqlen, hmm, NULL); else score = P7SmallViterbi(dsq, sqlen, hmm, NULL); AddToHistogram(hist, score); if (score > max) max = score; free(dsq); free(seq); } *ret_hist = hist; *ret_max = max; return; }
void HMMCreateWPoolTask::runUnsafe() { const UHMMCalibrateSettings& settings = pt->getSettings(); WorkPool_s* wpool = pt->getWorkPool(); SetAlphabet(wpool->hmm->atype); sre_srandom(settings.seed); wpool->fixedlen = settings.fixedlen; wpool->hist = AllocHistogram(-200, 200, 100); wpool->lenmean = settings.lenmean; wpool->lensd = settings.lensd; wpool->nsample = settings.nsample; wpool->nseq = 0; wpool->randomseq.resize(MAXABET); wpool->max_score = -FLT_MAX; float p1; P7Logoddsify(wpool->hmm, TRUE); P7DefaultNullModel(wpool->randomseq.data(), &p1); }
static void main_loop_serial(struct plan7_s *hmm, int seed, int nsample, float lenmean, float lensd, int fixedlen, struct histogram_s **ret_hist, float *ret_max, int& cancelFlag, int& progress) { struct histogram_s *hist; struct dpmatrix_s *mx; float randomseq[MAXABET]; float p1; float max; char *seq; unsigned char *dsq; float score; int sqlen; int idx; // Initialize. // We assume we've already set the alphabet (safe, because // HMM input sets the alphabet). sre_srandom(seed); //get HMMERTaskLocalData HMMERTaskLocalData *tls = getHMMERTaskLocalData(); alphabet_s &al = tls->al; SetAlphabet(hmm->atype); P7Logoddsify(hmm, TRUE); P7DefaultNullModel(randomseq, &p1); hist = AllocHistogram(-200, 200, 100); mx = CreatePlan7Matrix(1, hmm->M, 25, 0); max = -FLT_MAX; progress = 0; int pStub; for (idx = 0; idx < nsample && !cancelFlag; idx++) { // choose length of random sequence if (fixedlen) { sqlen = fixedlen; } else { do sqlen = (int) Gaussrandom(lenmean, lensd); while (sqlen < 1); } // generate it seq = RandomSequence(al.Alphabet, randomseq, al.Alphabet_size, sqlen); dsq = DigitizeSequence(seq, sqlen); if (P7ViterbiSpaceOK(sqlen, hmm->M, mx)) { score = P7Viterbi(dsq, sqlen, hmm, mx, NULL); } else { score = P7SmallViterbi(dsq, sqlen, hmm, mx, NULL, pStub); } AddToHistogram(hist, score); max = qMax(score, max); progress = int(100*idx/float(nsample)); free(dsq); free(seq); } FreePlan7Matrix(mx); *ret_hist = hist; *ret_max = max; }
int main(void) { int master_tid; /* PVM TID of our master */ int slaveidx; /* my slave index (0..nslaves-1) */ struct plan7_s *hmm; /* HMM to calibrate, sent from master */ struct histogram_s *hist; /* score histogram */ int hmmidx; /* index of this HMM */ char *seq; /* synthetic random sequence */ char *dsq; /* digitized seq */ int len; /* length of seq */ float sc; /* score of seq aligned to HMM */ float max; /* maximum score seen in sample */ int seed; /* random number seed */ int nsample; /* number of seqs to sample */ int fixedlen; /* if nonzero, fixed length of seq */ float lenmean; /* Gaussian mean length of seq */ float lensd; /* Gaussian length std. dev. for seq */ int fitok; /* TRUE if EVD fit was OK */ float randomseq[MAXABET]; /* iid frequencies of residues */ float p1; int alphatype; /* alphabet type, hmmAMINO or hmmNUCLEIC */ int idx; int code; /* Register leave_pvm() cleanup function so any exit() call * first calls pvm_exit(). */ if (atexit(leave_pvm) != 0) { pvm_exit(); Die("slave couldn't register leave_pvm()"); } /***************************************************************** * initialization. * Master broadcasts the problem to us: parameters of the * HMM calibration. ******************************************************************/ master_tid = pvm_parent(); /* who's our master? */ pvm_recv(master_tid, HMMPVM_INIT); pvm_upkint(&nsample, 1, 1); pvm_upkint(&fixedlen, 1, 1); pvm_upkfloat(&lenmean, 1, 1); pvm_upkfloat(&lensd, 1, 1); /* tell the master we're OK and ready to go (or not) */ code = HMMPVM_OK; pvm_initsend(PvmDataDefault); pvm_pkint(&code, 1, 1); pvm_send(master_tid, HMMPVM_RESULTS); /***************************************************************** * Main loop. * Receive a random number seed, then an HMM to search against. * If we receive a -1 seed, we shut down. *****************************************************************/ slaveidx = -1; for (;;) { pvm_recv(master_tid, HMMPVM_WORK); pvm_upkint(&seed, 1, 1); if (seed == -1) break; /* shutdown signal */ pvm_upkint(&hmmidx, 1, 1); pvm_upkint(&alphatype,1, 1); SetAlphabet(alphatype); hmm = PVMUnpackHMM(); if (hmm == NULL) Die("oh no, the HMM never arrived"); if (slaveidx == -1) slaveidx = hmmidx; P7DefaultNullModel(randomseq, &p1); sre_srandom(seed); P7Logoddsify(hmm, TRUE); hist = AllocHistogram(-200, 200, 100); max = -FLT_MAX; for (idx = 0; idx < nsample; idx++) { /* choose length of random sequence */ if (fixedlen) len = fixedlen; else do len = (int) Gaussrandom(lenmean, lensd); while (len < 1); /* generate it */ seq = RandomSequence(Alphabet, randomseq, Alphabet_size, len); dsq = DigitizeSequence(seq, len); if (P7ViterbiSize(len, hmm->M) <= RAMLIMIT) sc = P7Viterbi(dsq, len, hmm, NULL); else sc = P7SmallViterbi(dsq, len, hmm, NULL); AddToHistogram(hist, sc); if (sc > max) max = sc; free(seq); free(dsq); } /* Fit an EVD to the observed histogram. * The TRUE left-censors and fits only the right slope of the histogram. * The 9999. is an arbitrary high number that means we won't trim outliers * on the right. */ fitok = ExtremeValueFitHistogram(hist, TRUE, 9999.); /* Return output to master. * Currently we don't send the histogram back, but we could. */ pvm_initsend(PvmDataDefault); pvm_pkint(&slaveidx, 1, 1); pvm_pkint(&hmmidx, 1, 1); PVMPackString(hmm->name); pvm_pkint(&fitok, 1, 1); pvm_pkfloat(&(hist->param[EVD_MU]), 1, 1); pvm_pkfloat(&(hist->param[EVD_LAMBDA]), 1, 1); pvm_pkfloat(&max, 1, 1); pvm_send(master_tid, HMMPVM_RESULTS); /* cleanup */ FreeHistogram(hist); FreePlan7(hmm); } /*********************************************** * Cleanup, return. ***********************************************/ return 0; /* pvm_exit() is called by atexit() registration. */ }