/* Function: main_loop_threaded() * Date: SRE, Wed Dec 1 12:43:09 1999 [St. Louis] * * Purpose: Given an HMM and parameters for synthesizing random * sequences; return a histogram of scores. * (Threaded version.) * * Args: hmm - an HMM to calibrate. * seed - random number seed * nsample - number of seqs to synthesize * lenmean - mean length of random sequence * lensd - std dev of random seq length * fixedlen - if nonzero, override lenmean, always this len * nthreads - number of threads to start * ret_hist - RETURN: the score histogram * ret_max - RETURN: highest score seen in simulation * twatch - RETURN: accumulation of thread times * * Returns: (void) * hist is alloc'ed here, and must be free'd by caller. */ static void main_loop_threaded(struct plan7_s *hmm, int seed, int nsample, float lenmean, float lensd, int fixedlen, int nthreads, struct histogram_s **ret_hist, float *ret_max, Stopwatch_t *twatch) { struct histogram_s *hist; float randomseq[MAXABET]; float p1; struct workpool_s *wpool; /* pool of worker threads */ /* Initialize. * We assume we've already set the alphabet (safe, because * HMM input sets the alphabet). */ sre_srandom(seed); P7Logoddsify(hmm, TRUE); P7DefaultNullModel(randomseq, &p1); hist = AllocHistogram(-200, 200, 100); wpool = workpool_start(hmm, lenmean, lensd, fixedlen, randomseq, nsample, hist, nthreads); workpool_stop(wpool); *ret_hist = hist; *ret_max = wpool->max_score; StopwatchInclude(twatch, &(wpool->watch)); workpool_free(wpool); return; }
/* Function: main_loop_serial() * Date: SRE, Tue Aug 18 16:18:28 1998 [St. Louis] * * Purpose: Given an HMM and parameters for synthesizing random * sequences; return a histogram of scores. * (Serial version) * * Args: hmm - an HMM to calibrate. * seed - random number seed * nsample - number of seqs to synthesize * lenmean - mean length of random sequence * lensd - std dev of random seq length * fixedlen - if nonzero, override lenmean, always this len * ret_hist - RETURN: the score histogram * ret_max - RETURN: highest score seen in simulation * * Returns: (void) * hist is alloc'ed here, and must be free'd by caller. */ static void main_loop_serial(struct plan7_s *hmm, int seed, int nsample, float lenmean, float lensd, int fixedlen, struct histogram_s **ret_hist, float *ret_max) { struct histogram_s *hist; float randomseq[MAXABET]; float p1; float max; char *seq; char *dsq; float score; int sqlen; int idx; /* Initialize. * We assume we've already set the alphabet (safe, because * HMM input sets the alphabet). */ sre_srandom(seed); P7Logoddsify(hmm, TRUE); P7DefaultNullModel(randomseq, &p1); hist = AllocHistogram(-200, 200, 100); max = -FLT_MAX; for (idx = 0; idx < nsample; idx++) { /* choose length of random sequence */ if (fixedlen) sqlen = fixedlen; else do sqlen = (int) Gaussrandom(lenmean, lensd); while (sqlen < 1); /* generate it */ seq = RandomSequence(Alphabet, randomseq, Alphabet_size, sqlen); dsq = DigitizeSequence(seq, sqlen); if (P7ViterbiSize(sqlen, hmm->M) <= RAMLIMIT) score = P7Viterbi(dsq, sqlen, hmm, NULL); else score = P7SmallViterbi(dsq, sqlen, hmm, NULL); AddToHistogram(hist, score); if (score > max) max = score; free(dsq); free(seq); } *ret_hist = hist; *ret_max = max; return; }
/* * cc -g -o testdriver -DTESTDRIVER -L. shuffle.c -lsquid -lm */ int main(int argc, char **argv) { char s1[100]; char s2[100]; sre_srandom(42); strcpy(s2, "GGGGGGGGGGCCCCCCCCCC"); /* strcpy(s2, "AGACATAAAGTTCCGTACTGCCGGGAT"); */ StrDPShuffle(s1, s2); printf("DPshuffle: %s\n", s1); StrMarkov0(s1,s2); printf("Markov 0 : %s\n", s1); StrMarkov1(s1,s2); printf("Markov 1 : %s\n", s1); return 0; }
void HMMCreateWPoolTask::runUnsafe() { const UHMMCalibrateSettings& settings = pt->getSettings(); WorkPool_s* wpool = pt->getWorkPool(); SetAlphabet(wpool->hmm->atype); sre_srandom(settings.seed); wpool->fixedlen = settings.fixedlen; wpool->hist = AllocHistogram(-200, 200, 100); wpool->lenmean = settings.lenmean; wpool->lensd = settings.lensd; wpool->nsample = settings.nsample; wpool->nseq = 0; wpool->randomseq.resize(MAXABET); wpool->max_score = -FLT_MAX; float p1; P7Logoddsify(wpool->hmm, TRUE); P7DefaultNullModel(wpool->randomseq.data(), &p1); }
int main(int argc, char **argv) { const char *hmmfile; /* file to read HMMs from */ FILE *fp; /* output file handle */ HMMFILE *hmmfp; /* opened hmmfile for reading */ struct plan7_s *hmm; /* HMM to generate from */ int L; /* length of a sequence */ int i; /* counter over sequences */ char *ofile; /* output sequence file */ int nseq; /* number of seqs to sample */ int seed; /* random number generator seed */ int be_quiet; /* TRUE to silence header/footer */ int do_alignment; /* TRUE to output in aligned format */ int do_consensus; /* TRUE to do a single consensus seq */ AjBool ajselex; AjBool ajcons; AjPFile inf=NULL; AjPFile outf=NULL; AjPStr instr=NULL; AjPStr outstr=NULL; #ifdef MEMDEBUG unsigned long histid1, histid2, orig_size, current_size; orig_size = malloc_inuse(&histid1); fprintf(stderr, "[... memory debugging is ON ...]\n"); #endif /*********************************************** * Parse command line ***********************************************/ nseq = 10; be_quiet = FALSE; do_alignment = FALSE; do_consensus = FALSE; ofile = NULL; embInitPV("ohmmemit",argc,argv,"HMMER",VERSION); ajselex = ajAcdGetBoolean("selex"); ajcons = ajAcdGetBoolean("consensus"); nseq = ajAcdGetInt("number"); seed = ajAcdGetInt("seed"); inf = ajAcdGetInfile("infile"); outf = ajAcdGetOutfile("outfile"); if(!seed) seed = time ((time_t *) NULL); if(ajselex) do_alignment=TRUE; else do_alignment=FALSE; if(ajcons) do_consensus=TRUE; else do_consensus=FALSE; instr = ajStrNewC((char *)ajFileGetNameC(inf)); outstr = ajStrNewC((char *)ajFileGetNameC(outf)); hmmfile = ajStrGetPtr(instr); sre_srandom(seed); if (do_alignment && do_consensus) ajFatal("Sorry, -selex and -consensus are incompatible.\n"); if (nseq != 10 && do_consensus) ajWarn("-consensus overrides -number (# of sampled seqs)"); /*********************************************** * Open HMM file (might be in HMMERDB or current directory). * Read a single HMM from it. ***********************************************/ if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) ajFatal("Failed to open HMM file %s\n", hmmfile); if (!HMMFileRead(hmmfp, &hmm)) ajFatal("Failed to read any HMMs from %s\n", hmmfile); HMMFileClose(hmmfp); if (hmm == NULL) ajFatal("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); /* Configure the HMM to shut off N,J,C emission: so we * do a simple single pass through the model. */ Plan7NakedConfig(hmm); Plan7Renormalize(hmm); /*********************************************** * Open the output file, or stdout ***********************************************/ fp = ajFileGetFileptr(outf); /*********************************************** * Show the options banner ***********************************************/ be_quiet=TRUE; if (! be_quiet) { printf("HMM file: %s\n", hmmfile); if (! do_consensus) { printf("Number of seqs: %d\n", nseq); printf("Random seed: %d\n", seed); } printf("- - - - - - - - - - - - - - - - - - - - - - - - - " "- - - - - - -\n\n"); } /*********************************************** * Do the work. * If we're generating an alignment, we have to collect * all our traces, then output. If we're generating unaligned * sequences, we can emit one at a time. ***********************************************/ if (do_consensus) { char *seq; SQINFO sqinfo; /* info about sequence (name/desc) */ EmitConsensusSequence(hmm, &seq, NULL, &L, NULL); strcpy(sqinfo.name, "consensus"); sqinfo.len = L; sqinfo.flags = SQINFO_NAME | SQINFO_LEN; WriteSeq(fp, kPearson, seq, &sqinfo); free(seq); } else if (do_alignment) { struct p7trace_s **tr; char **dsq; SQINFO *sqinfo; char **aseq; AINFO ainfo; float *wgt; dsq = MallocOrDie(sizeof(char *) * nseq); tr = MallocOrDie(sizeof(struct p7trace_s *) * nseq); sqinfo = MallocOrDie(sizeof(SQINFO) * nseq); wgt = MallocOrDie(sizeof(float) * nseq); FSet(wgt, nseq, 1.0); for (i = 0; i < nseq; i++) { EmitSequence(hmm, &(dsq[i]), &L, &(tr[i])); sprintf(sqinfo[i].name, "seq%d", i+1); sqinfo[i].len = L; sqinfo[i].flags = SQINFO_NAME | SQINFO_LEN; } P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, FALSE, &aseq, &ainfo); /* Output the alignment */ WriteSELEX(fp, aseq, &ainfo, 50); if (ofile != NULL && !be_quiet) printf("Alignment saved in file %s\n", ofile); /* Free memory */ for (i = 0; i < nseq; i++) { P7FreeTrace(tr[i]); free(dsq[i]); } FreeAlignment(aseq, &ainfo); free(sqinfo); free(dsq); free(wgt); free(tr); } else /* unaligned sequence output */ { struct p7trace_s *tr; char *dsq; char *seq; SQINFO sqinfo; for (i = 0; i < nseq; i++) { EmitSequence(hmm, &dsq, &L, &tr); sprintf(sqinfo.name, "seq%d", i+1); sqinfo.len = L; sqinfo.flags = SQINFO_NAME | SQINFO_LEN; seq = DedigitizeSequence(dsq, L); WriteSeq(fp, kPearson, seq, &sqinfo); P7FreeTrace(tr); free(dsq); free(seq); } } ajFileClose(&outf); FreePlan7(hmm); SqdClean(); #ifdef MEMDEBUG current_size = malloc_inuse(&histid2); if (current_size != orig_size) malloc_list(2, histid1, histid2); else fprintf(stderr, "[No memory leaks.]\n"); #endif ajStrDel(&instr); ajStrDel(&outstr); ajFileClose(&inf); ajFileClose(&outf); embExit(); return 0; }
static void main_loop_serial(struct plan7_s *hmm, int seed, int nsample, float lenmean, float lensd, int fixedlen, struct histogram_s **ret_hist, float *ret_max, int& cancelFlag, int& progress) { struct histogram_s *hist; struct dpmatrix_s *mx; float randomseq[MAXABET]; float p1; float max; char *seq; unsigned char *dsq; float score; int sqlen; int idx; // Initialize. // We assume we've already set the alphabet (safe, because // HMM input sets the alphabet). sre_srandom(seed); //get HMMERTaskLocalData HMMERTaskLocalData *tls = getHMMERTaskLocalData(); alphabet_s &al = tls->al; SetAlphabet(hmm->atype); P7Logoddsify(hmm, TRUE); P7DefaultNullModel(randomseq, &p1); hist = AllocHistogram(-200, 200, 100); mx = CreatePlan7Matrix(1, hmm->M, 25, 0); max = -FLT_MAX; progress = 0; int pStub; for (idx = 0; idx < nsample && !cancelFlag; idx++) { // choose length of random sequence if (fixedlen) { sqlen = fixedlen; } else { do sqlen = (int) Gaussrandom(lenmean, lensd); while (sqlen < 1); } // generate it seq = RandomSequence(al.Alphabet, randomseq, al.Alphabet_size, sqlen); dsq = DigitizeSequence(seq, sqlen); if (P7ViterbiSpaceOK(sqlen, hmm->M, mx)) { score = P7Viterbi(dsq, sqlen, hmm, mx, NULL); } else { score = P7SmallViterbi(dsq, sqlen, hmm, mx, NULL, pStub); } AddToHistogram(hist, score); max = qMax(score, max); progress = int(100*idx/float(nsample)); free(dsq); free(seq); } FreePlan7Matrix(mx); *ret_hist = hist; *ret_max = max; }
int main(void) { int master_tid; /* PVM TID of our master */ int slaveidx; /* my slave index (0..nslaves-1) */ struct plan7_s *hmm; /* HMM to calibrate, sent from master */ struct histogram_s *hist; /* score histogram */ int hmmidx; /* index of this HMM */ char *seq; /* synthetic random sequence */ char *dsq; /* digitized seq */ int len; /* length of seq */ float sc; /* score of seq aligned to HMM */ float max; /* maximum score seen in sample */ int seed; /* random number seed */ int nsample; /* number of seqs to sample */ int fixedlen; /* if nonzero, fixed length of seq */ float lenmean; /* Gaussian mean length of seq */ float lensd; /* Gaussian length std. dev. for seq */ int fitok; /* TRUE if EVD fit was OK */ float randomseq[MAXABET]; /* iid frequencies of residues */ float p1; int alphatype; /* alphabet type, hmmAMINO or hmmNUCLEIC */ int idx; int code; /* Register leave_pvm() cleanup function so any exit() call * first calls pvm_exit(). */ if (atexit(leave_pvm) != 0) { pvm_exit(); Die("slave couldn't register leave_pvm()"); } /***************************************************************** * initialization. * Master broadcasts the problem to us: parameters of the * HMM calibration. ******************************************************************/ master_tid = pvm_parent(); /* who's our master? */ pvm_recv(master_tid, HMMPVM_INIT); pvm_upkint(&nsample, 1, 1); pvm_upkint(&fixedlen, 1, 1); pvm_upkfloat(&lenmean, 1, 1); pvm_upkfloat(&lensd, 1, 1); /* tell the master we're OK and ready to go (or not) */ code = HMMPVM_OK; pvm_initsend(PvmDataDefault); pvm_pkint(&code, 1, 1); pvm_send(master_tid, HMMPVM_RESULTS); /***************************************************************** * Main loop. * Receive a random number seed, then an HMM to search against. * If we receive a -1 seed, we shut down. *****************************************************************/ slaveidx = -1; for (;;) { pvm_recv(master_tid, HMMPVM_WORK); pvm_upkint(&seed, 1, 1); if (seed == -1) break; /* shutdown signal */ pvm_upkint(&hmmidx, 1, 1); pvm_upkint(&alphatype,1, 1); SetAlphabet(alphatype); hmm = PVMUnpackHMM(); if (hmm == NULL) Die("oh no, the HMM never arrived"); if (slaveidx == -1) slaveidx = hmmidx; P7DefaultNullModel(randomseq, &p1); sre_srandom(seed); P7Logoddsify(hmm, TRUE); hist = AllocHistogram(-200, 200, 100); max = -FLT_MAX; for (idx = 0; idx < nsample; idx++) { /* choose length of random sequence */ if (fixedlen) len = fixedlen; else do len = (int) Gaussrandom(lenmean, lensd); while (len < 1); /* generate it */ seq = RandomSequence(Alphabet, randomseq, Alphabet_size, len); dsq = DigitizeSequence(seq, len); if (P7ViterbiSize(len, hmm->M) <= RAMLIMIT) sc = P7Viterbi(dsq, len, hmm, NULL); else sc = P7SmallViterbi(dsq, len, hmm, NULL); AddToHistogram(hist, sc); if (sc > max) max = sc; free(seq); free(dsq); } /* Fit an EVD to the observed histogram. * The TRUE left-censors and fits only the right slope of the histogram. * The 9999. is an arbitrary high number that means we won't trim outliers * on the right. */ fitok = ExtremeValueFitHistogram(hist, TRUE, 9999.); /* Return output to master. * Currently we don't send the histogram back, but we could. */ pvm_initsend(PvmDataDefault); pvm_pkint(&slaveidx, 1, 1); pvm_pkint(&hmmidx, 1, 1); PVMPackString(hmm->name); pvm_pkint(&fitok, 1, 1); pvm_pkfloat(&(hist->param[EVD_MU]), 1, 1); pvm_pkfloat(&(hist->param[EVD_LAMBDA]), 1, 1); pvm_pkfloat(&max, 1, 1); pvm_send(master_tid, HMMPVM_RESULTS); /* cleanup */ FreeHistogram(hist); FreePlan7(hmm); } /*********************************************** * Cleanup, return. ***********************************************/ return 0; /* pvm_exit() is called by atexit() registration. */ }