/* Function: main_loop_serial() * Date: SRE, Tue Aug 18 16:18:28 1998 [St. Louis] * * Purpose: Given an HMM and parameters for synthesizing random * sequences; return a histogram of scores. * (Serial version) * * Args: hmm - an HMM to calibrate. * seed - random number seed * nsample - number of seqs to synthesize * lenmean - mean length of random sequence * lensd - std dev of random seq length * fixedlen - if nonzero, override lenmean, always this len * ret_hist - RETURN: the score histogram * ret_max - RETURN: highest score seen in simulation * * Returns: (void) * hist is alloc'ed here, and must be free'd by caller. */ static void main_loop_serial(struct plan7_s *hmm, int seed, int nsample, float lenmean, float lensd, int fixedlen, struct histogram_s **ret_hist, float *ret_max) { struct histogram_s *hist; float randomseq[MAXABET]; float p1; float max; char *seq; char *dsq; float score; int sqlen; int idx; /* Initialize. * We assume we've already set the alphabet (safe, because * HMM input sets the alphabet). */ sre_srandom(seed); P7Logoddsify(hmm, TRUE); P7DefaultNullModel(randomseq, &p1); hist = AllocHistogram(-200, 200, 100); max = -FLT_MAX; for (idx = 0; idx < nsample; idx++) { /* choose length of random sequence */ if (fixedlen) sqlen = fixedlen; else do sqlen = (int) Gaussrandom(lenmean, lensd); while (sqlen < 1); /* generate it */ seq = RandomSequence(Alphabet, randomseq, Alphabet_size, sqlen); dsq = DigitizeSequence(seq, sqlen); if (P7ViterbiSize(sqlen, hmm->M) <= RAMLIMIT) score = P7Viterbi(dsq, sqlen, hmm, NULL); else score = P7SmallViterbi(dsq, sqlen, hmm, NULL); AddToHistogram(hist, score); if (score > max) max = score; free(dsq); free(seq); } *ret_hist = hist; *ret_max = max; return; }
int main(void) { struct p7trace_s *tr; /* traceback of an alignment */ int master_tid; /* PVM TID of our master */ char *hmmfile; /* file to read HMM(s) from */ HMMFILE *hmmfp; /* opened hmmfile for reading */ struct plan7_s *hmm; char *seq; char *dsq; int len; int nhmm; /* number of HMM to work on */ float sc; int my_idx = -1; /* my index, 0..nslaves-1 */ float globT; /* T parameter: keep only hits > globT bits */ double globE; /* E parameter: keep hits < globE E-value */ double pvalue; /* Z*pvalue = Evalue */ int Z; /* nseq to base E value calculation on */ int send_trace; /* TRUE if score is significant */ int do_xnu; /* TRUE to do XNU filter on seq */ int do_forward; /* TRUE to use Forward() scores not Viterbi */ int do_null2; /* TRUE to correct scores w/ ad hoc null2 */ int alphatype; /* alphabet type, hmmAMINO or hmmNUCLEIC */ int code; /* return code after initialization */ /* Register leave_pvm() cleanup function so any exit() call * first calls pvm_exit(). */ if (atexit(leave_pvm) != 0) { pvm_exit(); Die("slave couldn't register leave_pvm()"); } /***************************************************************** * initialization. * Master broadcasts to us: * 1) len of HMM file name (int) * 2) name of HMM file (string) * 3) length of sequence string (int) * 4) sequence (string) * 5) globT threshold * 6) globE threshold * 7) Z * 8) do_xnu flag * 9) do_forward flag * 10) do_null2 flag * 11) alphabet type * We receive the broadcast and open the files. ******************************************************************/ master_tid = pvm_parent(); /* who's our master? */ pvm_recv(master_tid, HMMPVM_INIT); pvm_upkint(&len, 1, 1); hmmfile = MallocOrDie(sizeof(char *) * (len+1)); pvm_upkstr(hmmfile); pvm_upkint(&len, 1, 1); seq = MallocOrDie(sizeof(char *) * (len+1)); pvm_upkstr(seq); pvm_upkfloat(&globT, 1, 1); pvm_upkdouble(&globE, 1, 1); pvm_upkint(&Z, 1, 1); pvm_upkint(&do_xnu, 1, 1); pvm_upkint(&do_forward, 1, 1); pvm_upkint(&do_null2, 1, 1); pvm_upkint(&alphatype, 1, 1); SetAlphabet(alphatype); /* Open HMM file (maybe in HMMERDB) */ code = HMMPVM_OK; if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) code = HMMPVM_NO_HMMFILE; else if (hmmfp->gsi == NULL) code = HMMPVM_NO_INDEX; /* report our status. */ pvm_initsend(PvmDataDefault); pvm_pkint(&code, 1, 1); pvm_send(master_tid, HMMPVM_RESULTS); dsq = DigitizeSequence(seq, len); if (do_xnu) XNU(dsq, len); /***************************************************************** * Main loop. * Receive an integer 0..nhmm-1 for which HMM to search against. * If we receive a -1, we shut down. *****************************************************************/ for (;;) { pvm_recv(master_tid, HMMPVM_WORK); pvm_upkint(&nhmm, 1, 1); if (my_idx < 0) my_idx = nhmm; /* first time thru, remember what index we are. */ if (nhmm == -1) break; /* shutdown signal */ /* move to our assigned HMM in the HMM file, and read it */ HMMFilePositionByIndex(hmmfp, nhmm); if (! HMMFileRead(hmmfp, &hmm)) Die("unexpected end of HMM file"); if (hmm == NULL) Die("unexpected failure to parse HMM file"); P7Logoddsify(hmm, TRUE); /* Score sequence, do alignment (Viterbi), recover trace */ if (P7ViterbiSize(len, hmm->M) <= RAMLIMIT) { SQD_DPRINTF1(("P7Viterbi(): Estimated size %d Mb\n", P7ViterbiSize(len, hmm->M))); sc = P7Viterbi(dsq, len, hmm, &tr); } else { SQD_DPRINTF1(("P7SmallViterbi() called; %d Mb > %d\n", P7ViterbiSize(len, hmm->M), RAMLIMIT)); sc = P7SmallViterbi(dsq, len, hmm, &tr); } if (do_forward) sc = P7Forward(dsq, len, hmm, NULL); if (do_null2) sc -= TraceScoreCorrection(hmm, tr, dsq); pvalue = PValue(hmm, sc); send_trace = (sc > globT && pvalue * (float) Z < globE) ? 1 : 0; /* return output */ pvm_initsend(PvmDataDefault); pvm_pkint(&my_idx, 1, 1); /* tell master who we are */ pvm_pkstr(hmm->name); /* double check that we did the right thing */ pvm_pkfloat(&sc, 1, 1); pvm_pkdouble(&pvalue, 1, 1); pvm_pkint(&send_trace, 1, 1); /* flag for whether a trace structure is coming */ if (send_trace) PVMPackTrace(tr); pvm_send(master_tid, HMMPVM_RESULTS); /* cleanup */ FreePlan7(hmm); P7FreeTrace(tr); } /*********************************************** * Cleanup, return. ***********************************************/ HMMFileClose(hmmfp); free(seq); free(dsq); free(hmmfile); return 0; }
int main(void) { int master_tid; /* PVM TID of our master */ int slaveidx; /* my slave index (0..nslaves-1) */ struct plan7_s *hmm; /* HMM to calibrate, sent from master */ struct histogram_s *hist; /* score histogram */ int hmmidx; /* index of this HMM */ char *seq; /* synthetic random sequence */ char *dsq; /* digitized seq */ int len; /* length of seq */ float sc; /* score of seq aligned to HMM */ float max; /* maximum score seen in sample */ int seed; /* random number seed */ int nsample; /* number of seqs to sample */ int fixedlen; /* if nonzero, fixed length of seq */ float lenmean; /* Gaussian mean length of seq */ float lensd; /* Gaussian length std. dev. for seq */ int fitok; /* TRUE if EVD fit was OK */ float randomseq[MAXABET]; /* iid frequencies of residues */ float p1; int alphatype; /* alphabet type, hmmAMINO or hmmNUCLEIC */ int idx; int code; /* Register leave_pvm() cleanup function so any exit() call * first calls pvm_exit(). */ if (atexit(leave_pvm) != 0) { pvm_exit(); Die("slave couldn't register leave_pvm()"); } /***************************************************************** * initialization. * Master broadcasts the problem to us: parameters of the * HMM calibration. ******************************************************************/ master_tid = pvm_parent(); /* who's our master? */ pvm_recv(master_tid, HMMPVM_INIT); pvm_upkint(&nsample, 1, 1); pvm_upkint(&fixedlen, 1, 1); pvm_upkfloat(&lenmean, 1, 1); pvm_upkfloat(&lensd, 1, 1); /* tell the master we're OK and ready to go (or not) */ code = HMMPVM_OK; pvm_initsend(PvmDataDefault); pvm_pkint(&code, 1, 1); pvm_send(master_tid, HMMPVM_RESULTS); /***************************************************************** * Main loop. * Receive a random number seed, then an HMM to search against. * If we receive a -1 seed, we shut down. *****************************************************************/ slaveidx = -1; for (;;) { pvm_recv(master_tid, HMMPVM_WORK); pvm_upkint(&seed, 1, 1); if (seed == -1) break; /* shutdown signal */ pvm_upkint(&hmmidx, 1, 1); pvm_upkint(&alphatype,1, 1); SetAlphabet(alphatype); hmm = PVMUnpackHMM(); if (hmm == NULL) Die("oh no, the HMM never arrived"); if (slaveidx == -1) slaveidx = hmmidx; P7DefaultNullModel(randomseq, &p1); sre_srandom(seed); P7Logoddsify(hmm, TRUE); hist = AllocHistogram(-200, 200, 100); max = -FLT_MAX; for (idx = 0; idx < nsample; idx++) { /* choose length of random sequence */ if (fixedlen) len = fixedlen; else do len = (int) Gaussrandom(lenmean, lensd); while (len < 1); /* generate it */ seq = RandomSequence(Alphabet, randomseq, Alphabet_size, len); dsq = DigitizeSequence(seq, len); if (P7ViterbiSize(len, hmm->M) <= RAMLIMIT) sc = P7Viterbi(dsq, len, hmm, NULL); else sc = P7SmallViterbi(dsq, len, hmm, NULL); AddToHistogram(hist, sc); if (sc > max) max = sc; free(seq); free(dsq); } /* Fit an EVD to the observed histogram. * The TRUE left-censors and fits only the right slope of the histogram. * The 9999. is an arbitrary high number that means we won't trim outliers * on the right. */ fitok = ExtremeValueFitHistogram(hist, TRUE, 9999.); /* Return output to master. * Currently we don't send the histogram back, but we could. */ pvm_initsend(PvmDataDefault); pvm_pkint(&slaveidx, 1, 1); pvm_pkint(&hmmidx, 1, 1); PVMPackString(hmm->name); pvm_pkint(&fitok, 1, 1); pvm_pkfloat(&(hist->param[EVD_MU]), 1, 1); pvm_pkfloat(&(hist->param[EVD_LAMBDA]), 1, 1); pvm_pkfloat(&max, 1, 1); pvm_send(master_tid, HMMPVM_RESULTS); /* cleanup */ FreeHistogram(hist); FreePlan7(hmm); } /*********************************************** * Cleanup, return. ***********************************************/ return 0; /* pvm_exit() is called by atexit() registration. */ }
/* Function: worker_thread() * Date: SRE, Thu Jul 16 10:41:02 1998 [St. Louis] * * Purpose: The procedure executed by the worker threads. * * Args: ptr - (void *) that is recast to a pointer to * the workpool. * * Returns: (void *) */ void * worker_thread(void *ptr) { struct plan7_s *hmm; struct workpool_s *wpool; char *seq; char *dsq; int len; float sc; int rtn; Stopwatch_t thread_watch; StopwatchStart(&thread_watch); wpool = (struct workpool_s *) ptr; hmm = wpool->hmm; for (;;) { /* 1. Synthesize a random sequence. * The input sequence number is a shared resource, * and sre_random() isn't thread-safe, so protect * the whole section with mutex. */ /* acquire a lock */ if ((rtn = pthread_mutex_lock(&(wpool->input_lock))) != 0) Die("pthread_mutex_lock failure: %s\n", strerror(rtn)); /* generate a sequence */ wpool->nseq++; if (wpool->nseq > wpool->nsample) { /* we're done; release input lock, break loop */ if ((rtn = pthread_mutex_unlock(&(wpool->input_lock))) != 0) Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); break; } if (wpool->fixedlen) len = wpool->fixedlen; else do len = (int) Gaussrandom(wpool->lenmean, wpool->lensd); while (len < 1); seq = RandomSequence(Alphabet, wpool->randomseq, Alphabet_size, len); /* release the lock */ if ((rtn = pthread_mutex_unlock(&(wpool->input_lock))) != 0) Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); /* 2. Score the sequence against the model. */ dsq = DigitizeSequence(seq, len); if (P7ViterbiSize(len, hmm->M) <= RAMLIMIT) sc = P7Viterbi(dsq, len, hmm, NULL); else sc = P7SmallViterbi(dsq, len, hmm, NULL); free(dsq); free(seq); /* 3. Save the output; hist and max_score are shared, * so protect this section with the output mutex. */ /* acquire lock on the output queue */ if ((rtn = pthread_mutex_lock(&(wpool->output_lock))) != 0) Die("pthread_mutex_lock failure: %s\n", strerror(rtn)); /* save output */ AddToHistogram(wpool->hist, sc); if (sc > wpool->max_score) wpool->max_score = sc; /* release our lock */ if ((rtn = pthread_mutex_unlock(&(wpool->output_lock))) != 0) Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); } StopwatchStop(&thread_watch); /* acquire lock on the output queue */ if ((rtn = pthread_mutex_lock(&(wpool->output_lock))) != 0) Die("pthread_mutex_lock failure: %s\n", strerror(rtn)); /* accumulate cpu time into main stopwatch */ StopwatchInclude(&(wpool->watch), &thread_watch); /* release our lock */ if ((rtn = pthread_mutex_unlock(&(wpool->output_lock))) != 0) Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); pthread_exit(NULL); return NULL; /* solely to silence compiler warnings */ }
int main(int argc, char **argv) { const char *hmmfile; /* file to read HMMs from */ HMMFILE *hmmfp; /* opened hmmfile for reading */ const char *seqfile; /* file to read target sequence from */ char **rseq; /* raw, unaligned sequences */ SQINFO *sqinfo; /* info associated with sequences */ char **dsq; /* digitized raw sequences */ int nseq; /* number of sequences */ char **aseq; /* aligned sequences */ AINFO ainfo; /* alignment information */ float *wgt; /* per-sequence weights */ int i; struct plan7_s *hmm; /* HMM to align to */ struct p7trace_s **tr; /* traces for aligned sequences */ int be_quiet; /* TRUE to suppress verbose banner */ int matchonly; /* TRUE to show only match state syms */ const char *outfile; /* optional alignment output file */ FILE *ofp; /* handle on alignment output file */ AjPFile ajwithali; /* name of additional alignment file to align */ AjPFile ajmapali; /* name of additional alignment file to map */ AjBool ajmatch=ajFalse; AjPFile outf=NULL; AjPStr outfname=NULL; AjPFile inf=NULL; AjPStr infname=NULL; AjPSeqset seqset=NULL; AjPStr ajseqfile=NULL; char* mapali=NULL; char* withali=NULL; #ifdef MEMDEBUG unsigned long histid1, histid2, orig_size, current_size; orig_size = malloc_inuse(&histid1); fprintf(stderr, "[... memory debugging is ON ...]\n"); #endif /*********************************************** * Parse command line ***********************************************/ matchonly = FALSE; outfile = NULL; be_quiet = FALSE; withali = NULL; mapali = NULL; embInitPV("ohmmalign",argc,argv,"HMMER",VERSION); ajmatch = ajAcdGetBoolean("matchonly"); if(ajmatch) matchonly=TRUE; else matchonly=FALSE; ajmapali = ajAcdGetInfile("mapalifile"); if (ajmapali) mapali = ajCharNewS(ajFileGetNameS(ajmapali)); ajFileClose(&ajmapali); ajwithali = ajAcdGetInfile("withalifile"); if (ajwithali) withali = ajCharNewS(ajFileGetNameS(ajwithali)); ajFileClose(&ajwithali); be_quiet=TRUE; outf = ajAcdGetOutfile("outfile"); outfname = ajStrNewC((char *)ajFileGetNameC(outf)); if(*ajStrGetPtr(outfname)>31) ajFileClose(&outf); outfile = ajStrGetPtr(outfname); inf = ajAcdGetInfile("hmmfile"); infname = ajStrNewC((char *)ajFileGetNameC(inf)); ajFileClose(&inf); hmmfile = ajStrGetPtr(infname); seqset = ajAcdGetSeqset("sequences"); ajseqfile = ajStrNewC(ajStrGetPtr(seqset->Filename)); seqfile = ajStrGetPtr(ajseqfile); /*********************************************** * Open HMM file (might be in HMMERDB or current directory). * Read a single HMM from it. * * Currently hmmalign disallows the J state and * only allows one domain per sequence. To preserve * the S/W entry information, the J state is explicitly * disallowed, rather than calling a Plan7*Config() function. * this is a workaround in 2.1 for the 2.0.x "yo!" bug. ***********************************************/ if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) ajFatal("Failed to open HMM file %s\n", hmmfile); if (!HMMFileRead(hmmfp, &hmm)) ajFatal("Failed to read any HMMs from %s\n", hmmfile); HMMFileClose(hmmfp); if (hmm == NULL) ajFatal("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); hmm->xt[XTE][MOVE] = 1.; /* only 1 domain/sequence ("global" alignment) */ hmm->xt[XTE][LOOP] = 0.; P7Logoddsify(hmm, TRUE); /* do we have the map we might need? */ if (mapali != NULL && ! (hmm->flags & PLAN7_MAP)) ajFatal("HMMER: HMM file %s has no map; you can't use --mapali.", hmmfile); /*********************************************** * Open sequence file in current directory. * Read all seqs from it. ***********************************************/ /* if (! SeqfileFormat(seqfile, &format, NULL)) switch (squid_errno) { case SQERR_NOFILE: ajFatal("Sequence file %s could not be opened for reading", seqfile); case SQERR_FORMAT: default: ajFatal("Failed to determine format of sequence file %s", seqfile); } if (! ReadMultipleRseqs(seqfile, format, &rseq, &sqinfo, &nseq)) ajFatal("Failed to read any sequences from file %s", seqfile); */ emboss_rseqs(seqset,&rseq,&sqinfo,&nseq); /*********************************************** * Show the banner ***********************************************/ be_quiet=TRUE; if (! be_quiet) { /* Banner(stdout, banner); */ printf( "HMM file: %s\n", hmmfile); printf( "Sequence file: %s\n", seqfile); printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n"); } /*********************************************** * Do the work ***********************************************/ /* Allocations and initializations. */ dsq = MallocOrDie(sizeof(char *) * nseq); tr = MallocOrDie(sizeof(struct p7trace_s *) * nseq); /* Align each sequence to the model, collect traces */ for (i = 0; i < nseq; i++) { dsq[i] = DigitizeSequence(rseq[i], sqinfo[i].len); if (P7ViterbiSize(sqinfo[i].len, hmm->M) <= RAMLIMIT) (void) P7Viterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i])); else (void) P7SmallViterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i])); } /* Include an aligned alignment, if desired. */ if (mapali != NULL) include_alignment(mapali, hmm, TRUE, &rseq, &dsq, &sqinfo, &tr, &nseq); if (withali != NULL) include_alignment(withali, hmm, FALSE, &rseq, &dsq, &sqinfo, &tr, &nseq); /* Turn traces into a multiple alignment */ wgt = MallocOrDie(sizeof(float) * nseq); FSet(wgt, nseq, 1.0); P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, matchonly, &aseq, &ainfo); /*********************************************** * Output the alignment ***********************************************/ if (outfile != NULL && (ofp = fopen(outfile, "w")) != NULL) { WriteSELEX(ofp, aseq, &ainfo, 50); printf("Alignment saved in file %s\n", outfile); fclose(ofp); } else WriteSELEX(stdout, aseq, &ainfo, 50); /*********************************************** * Cleanup and exit ***********************************************/ for (i = 0; i < nseq; i++) { P7FreeTrace(tr[i]); FreeSequence(rseq[i], &(sqinfo[i])); free(dsq[i]); } FreeAlignment(aseq, &ainfo); FreePlan7(hmm); free(sqinfo); free(rseq); free(dsq); free(wgt); free(tr); SqdClean(); ajStrDel(&outfname); ajStrDel(&infname); ajStrDel(&ajseqfile); #ifdef MEMDEBUG current_size = malloc_inuse(&histid2); if (current_size != orig_size) malloc_list(2, histid1, histid2); else fprintf(stderr, "[No memory leaks.]\n"); #endif ajSeqsetDel(&seqset); ajFileClose(&ajwithali); ajFileClose(&ajmapali); embExit(); return 0; }