Example #1
0
/* Function: main_loop_threaded()
 * Date:     SRE, Wed Dec  1 12:43:09 1999 [St. Louis]
 *
 * Purpose:  Given an HMM and parameters for synthesizing random
 *           sequences; return a histogram of scores.
 *           (Threaded version.)  
 *
 * Args:     hmm      - an HMM to calibrate.
 *           seed     - random number seed
 *           nsample  - number of seqs to synthesize
 *           lenmean  - mean length of random sequence
 *           lensd    - std dev of random seq length
 *           fixedlen - if nonzero, override lenmean, always this len
 *           nthreads - number of threads to start
 *           ret_hist - RETURN: the score histogram 
 *           ret_max  - RETURN: highest score seen in simulation
 *           twatch   - RETURN: accumulation of thread times
 *
 * Returns:  (void)
 *           hist is alloc'ed here, and must be free'd by caller.
 */
static void
main_loop_threaded(struct plan7_s *hmm, int seed, int nsample, 
		   float lenmean, float lensd, int fixedlen,
		   int nthreads,
		   struct histogram_s **ret_hist, float *ret_max,
		   Stopwatch_t *twatch)
{
  struct histogram_s *hist;
  float  randomseq[MAXABET];
  float  p1;
  struct workpool_s *wpool;     /* pool of worker threads  */
  
  /* Initialize.
   * We assume we've already set the alphabet (safe, because
   * HMM input sets the alphabet).
   */
  sre_srandom(seed);
  P7Logoddsify(hmm, TRUE);
  P7DefaultNullModel(randomseq, &p1);
  hist = AllocHistogram(-200, 200, 100);

  wpool = workpool_start(hmm, lenmean, lensd, fixedlen, randomseq, nsample,
			 hist, nthreads);
  workpool_stop(wpool);

  *ret_hist = hist;
  *ret_max  = wpool->max_score;
  StopwatchInclude(twatch, &(wpool->watch));

  workpool_free(wpool);
  return;
}
Example #2
0
/* Function: main_loop_serial()
 * Date:     SRE, Tue Aug 18 16:18:28 1998 [St. Louis]
 *
 * Purpose:  Given an HMM and parameters for synthesizing random
 *           sequences; return a histogram of scores.
 *           (Serial version)  
 *
 * Args:     hmm      - an HMM to calibrate.
 *           seed     - random number seed
 *           nsample  - number of seqs to synthesize
 *           lenmean  - mean length of random sequence
 *           lensd    - std dev of random seq length
 *           fixedlen - if nonzero, override lenmean, always this len
 *           ret_hist - RETURN: the score histogram 
 *           ret_max  - RETURN: highest score seen in simulation
 *
 * Returns:  (void)
 *           hist is alloc'ed here, and must be free'd by caller.
 */
static void
main_loop_serial(struct plan7_s *hmm, int seed, int nsample, 
		 float lenmean, float lensd, int fixedlen,
		 struct histogram_s **ret_hist, float *ret_max)
{
  struct histogram_s *hist;
  float  randomseq[MAXABET];
  float  p1;
  float  max;
  char  *seq;
  char  *dsq;
  float  score;
  int    sqlen;
  int    idx;
  
  /* Initialize.
   * We assume we've already set the alphabet (safe, because
   * HMM input sets the alphabet).
   */
  sre_srandom(seed);
  P7Logoddsify(hmm, TRUE);
  P7DefaultNullModel(randomseq, &p1);
  hist = AllocHistogram(-200, 200, 100);
  max = -FLT_MAX;

  for (idx = 0; idx < nsample; idx++)
    {
				/* choose length of random sequence */
      if (fixedlen) sqlen = fixedlen;
      else do sqlen = (int) Gaussrandom(lenmean, lensd); while (sqlen < 1);
				/* generate it */
      seq = RandomSequence(Alphabet, randomseq, Alphabet_size, sqlen);
      dsq = DigitizeSequence(seq, sqlen);

      if (P7ViterbiSize(sqlen, hmm->M) <= RAMLIMIT)
	score = P7Viterbi(dsq, sqlen, hmm, NULL);
      else
	score = P7SmallViterbi(dsq, sqlen, hmm, NULL);

      AddToHistogram(hist, score);
      if (score > max) max = score;

      free(dsq); 
      free(seq);
    }

  *ret_hist   = hist;
  *ret_max    = max;
  return;
}
Example #3
0
/*
 * cc -g -o testdriver -DTESTDRIVER -L. shuffle.c -lsquid -lm
 */
int 
main(int argc, char **argv)
{
  char s1[100];
  char s2[100];

  sre_srandom(42);
  strcpy(s2, "GGGGGGGGGGCCCCCCCCCC");
  /*  strcpy(s2, "AGACATAAAGTTCCGTACTGCCGGGAT");
   */
  StrDPShuffle(s1, s2);
  printf("DPshuffle: %s\n", s1);
  StrMarkov0(s1,s2);
  printf("Markov 0 : %s\n", s1);
  StrMarkov1(s1,s2);
  printf("Markov 1 : %s\n", s1);
  return 0;
}
void HMMCreateWPoolTask::runUnsafe() {
    const UHMMCalibrateSettings& settings = pt->getSettings();
    WorkPool_s* wpool = pt->getWorkPool();

    SetAlphabet(wpool->hmm->atype);
    sre_srandom(settings.seed);

    wpool->fixedlen = settings.fixedlen;
    wpool->hist = AllocHistogram(-200, 200, 100);
    wpool->lenmean = settings.lenmean;
    wpool->lensd = settings.lensd;
    wpool->nsample = settings.nsample;
    wpool->nseq = 0;
    wpool->randomseq.resize(MAXABET);
    wpool->max_score = -FLT_MAX;

        
    float  p1;
    P7Logoddsify(wpool->hmm, TRUE);
    P7DefaultNullModel(wpool->randomseq.data(), &p1);
}
Example #5
0
int main(int argc, char **argv) 
{
    const char      *hmmfile;	/* file to read HMMs from                  */
    FILE            *fp;	/* output file handle                      */
    HMMFILE         *hmmfp;	/* opened hmmfile for reading              */
    struct plan7_s  *hmm;	/* HMM to generate from                    */
    int              L;		/* length of a sequence                    */
    int              i;		/* counter over sequences                  */

    char            *ofile;	/* output sequence file                    */
    int              nseq;	/* number of seqs to sample                */
    int              seed;	/* random number generator seed            */
    int              be_quiet;	/* TRUE to silence header/footer           */
    int              do_alignment; /* TRUE to output in aligned format     */ 
    int              do_consensus; /* TRUE to do a single consensus seq    */

    AjBool ajselex;
    AjBool ajcons;
    AjPFile inf=NULL;
    AjPFile outf=NULL;
    AjPStr  instr=NULL;
    AjPStr  outstr=NULL;
  

#ifdef MEMDEBUG
    unsigned long histid1, histid2, orig_size, current_size;
    orig_size = malloc_inuse(&histid1);
    fprintf(stderr, "[... memory debugging is ON ...]\n");
#endif

    /*********************************************** 
     * Parse command line
     ***********************************************/

    nseq         = 10;

    be_quiet     = FALSE;
    do_alignment = FALSE;  
    do_consensus = FALSE;
    ofile        = NULL;

    embInitPV("ohmmemit",argc,argv,"HMMER",VERSION);

    ajselex = ajAcdGetBoolean("selex");
    ajcons  = ajAcdGetBoolean("consensus");
    nseq    = ajAcdGetInt("number");
    seed    = ajAcdGetInt("seed");
    inf     = ajAcdGetInfile("infile");
    outf    = ajAcdGetOutfile("outfile");
  
    if(!seed)
	seed = time ((time_t *) NULL);

    if(ajselex)
	do_alignment=TRUE;
    else
	do_alignment=FALSE;
  
    if(ajcons)
	do_consensus=TRUE;
    else
	do_consensus=FALSE;

    instr  = ajStrNewC((char *)ajFileGetNameC(inf));
    outstr = ajStrNewC((char *)ajFileGetNameC(outf));

    hmmfile = ajStrGetPtr(instr);

    sre_srandom(seed);

    if (do_alignment && do_consensus)
	ajFatal("Sorry, -selex and -consensus are incompatible.\n"); 
    if (nseq != 10 && do_consensus)
	ajWarn("-consensus overrides -number (# of sampled seqs)");

    /*********************************************** 
     * Open HMM file (might be in HMMERDB or current directory).
     * Read a single HMM from it.
     ***********************************************/

    if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL)
	ajFatal("Failed to open HMM file %s\n", hmmfile);
    if (!HMMFileRead(hmmfp, &hmm)) 
	ajFatal("Failed to read any HMMs from %s\n", hmmfile);
    HMMFileClose(hmmfp);
    if (hmm == NULL) 
	ajFatal("HMM file %s corrupt or in incorrect format? Parse failed",
		hmmfile);

    /* Configure the HMM to shut off N,J,C emission: so we
     * do a simple single pass through the model.
     */
    Plan7NakedConfig(hmm);
    Plan7Renormalize(hmm);

    /*********************************************** 
     * Open the output file, or stdout
     ***********************************************/ 

    fp = ajFileGetFileptr(outf);
  
 
    /*********************************************** 
     * Show the options banner
     ***********************************************/
    be_quiet=TRUE;
    if (! be_quiet) 
    {
	printf("HMM file:             %s\n", hmmfile);
	if (! do_consensus)
	{
	    printf("Number of seqs:       %d\n", nseq);
	    printf("Random seed:          %d\n", seed);
	}
	printf("- - - - - - - - - - - - - - - - - - - - - - - - - "
	       "- - - - - - -\n\n");
    }

    /*********************************************** 
     * Do the work.
     * If we're generating an alignment, we have to collect
     * all our traces, then output. If we're generating unaligned
     * sequences, we can emit one at a time.
     ***********************************************/

    if (do_consensus) 
    {
	char    *seq;
	SQINFO   sqinfo;	/* info about sequence (name/desc)        */

	EmitConsensusSequence(hmm, &seq, NULL, &L, NULL);
	strcpy(sqinfo.name, "consensus");
	sqinfo.len = L;
	sqinfo.flags = SQINFO_NAME | SQINFO_LEN;

	WriteSeq(fp, kPearson, seq, &sqinfo);
	free(seq);
    }
    else if (do_alignment)
    {
	struct p7trace_s **tr;
	char           **dsq;
	SQINFO          *sqinfo;
	char           **aseq;
	AINFO            ainfo;
	float           *wgt;

	dsq    = MallocOrDie(sizeof(char *)             * nseq);
	tr     = MallocOrDie(sizeof(struct p7trace_s *) * nseq);
	sqinfo = MallocOrDie(sizeof(SQINFO)             * nseq);
	wgt    = MallocOrDie(sizeof(float)              * nseq);
	FSet(wgt, nseq, 1.0);

	for (i = 0; i < nseq; i++)
	{
	    EmitSequence(hmm, &(dsq[i]), &L, &(tr[i]));
	    sprintf(sqinfo[i].name, "seq%d", i+1);
	    sqinfo[i].len   = L;
	    sqinfo[i].flags = SQINFO_NAME | SQINFO_LEN;
	}

	P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, FALSE, 
			   &aseq, &ainfo);

	/* Output the alignment */
	WriteSELEX(fp, aseq, &ainfo, 50);
	if (ofile != NULL && !be_quiet)
	    printf("Alignment saved in file %s\n", ofile);

	/* Free memory
	 */
	for (i = 0; i < nseq; i++) 
	{
	    P7FreeTrace(tr[i]);
	    free(dsq[i]);
	}
	FreeAlignment(aseq, &ainfo);
	free(sqinfo);
	free(dsq);
	free(wgt);
	free(tr);
    }
    else				/* unaligned sequence output */
    {
	struct p7trace_s *tr;
	char             *dsq;
	char             *seq;
	SQINFO            sqinfo;

	for (i = 0; i < nseq; i++)
	{
	    EmitSequence(hmm, &dsq, &L, &tr);
	    sprintf(sqinfo.name, "seq%d", i+1);
	    sqinfo.len   = L;
	    sqinfo.flags = SQINFO_NAME | SQINFO_LEN;

	    seq = DedigitizeSequence(dsq, L);

	    WriteSeq(fp, kPearson, seq, &sqinfo);
	  
	    P7FreeTrace(tr);
	    free(dsq);
	    free(seq);
	}
    }

    ajFileClose(&outf);
  
    FreePlan7(hmm);
    SqdClean();

#ifdef MEMDEBUG
    current_size = malloc_inuse(&histid2);
    if (current_size != orig_size)
	malloc_list(2, histid1, histid2);
    else
	fprintf(stderr, "[No memory leaks.]\n");
#endif


    ajStrDel(&instr);
    ajStrDel(&outstr);
    ajFileClose(&inf);
    ajFileClose(&outf);

    embExit();
    return 0;
}
Example #6
0
static void main_loop_serial(struct plan7_s *hmm, int seed, int nsample, 
                            float lenmean, float lensd, int fixedlen,
                            struct histogram_s **ret_hist, float *ret_max, int& cancelFlag, int& progress)
{
    struct histogram_s *hist;
    struct dpmatrix_s  *mx;
    float  randomseq[MAXABET];
    float  p1;
    float  max;
    char  *seq;
    unsigned char  *dsq;
    float  score;
    int    sqlen;
    int    idx;

    // Initialize.
    // We assume we've already set the alphabet (safe, because
    // HMM input sets the alphabet).
    
    sre_srandom(seed);

	//get HMMERTaskLocalData
	HMMERTaskLocalData *tls = getHMMERTaskLocalData();
    alphabet_s &al = tls->al;
	
    SetAlphabet(hmm->atype);

    P7Logoddsify(hmm, TRUE);
    P7DefaultNullModel(randomseq, &p1);
    hist = AllocHistogram(-200, 200, 100);
    mx = CreatePlan7Matrix(1, hmm->M, 25, 0);
    max = -FLT_MAX;

    progress = 0;
    int pStub;
    
    for (idx = 0; idx < nsample && !cancelFlag; idx++) {
        // choose length of random sequence
        if (fixedlen) {
            sqlen = fixedlen;
        } else {
            do sqlen = (int) Gaussrandom(lenmean, lensd); while (sqlen < 1);
        }
        // generate it
        seq = RandomSequence(al.Alphabet, randomseq, al.Alphabet_size, sqlen);
        dsq = DigitizeSequence(seq, sqlen);

        if (P7ViterbiSpaceOK(sqlen, hmm->M, mx)) {
            score = P7Viterbi(dsq, sqlen, hmm, mx, NULL);
        } else {
            score = P7SmallViterbi(dsq, sqlen, hmm, mx, NULL, pStub);
        }
    
        AddToHistogram(hist, score);
        max = qMax(score, max);

        progress = int(100*idx/float(nsample));

        free(dsq); 
        free(seq);
    }

    FreePlan7Matrix(mx);
    *ret_hist   = hist;
    *ret_max    = max;
}
Example #7
0
int 
main(void)
{
  int      master_tid;		/* PVM TID of our master */
  int      slaveidx;		/* my slave index (0..nslaves-1) */
  struct plan7_s *hmm;		/* HMM to calibrate, sent from master */
  struct histogram_s *hist;     /* score histogram */
  int      hmmidx;		/* index of this HMM */
  char    *seq;			/* synthetic random sequence */
  char    *dsq;			/* digitized seq */
  int      len;			/* length of seq */
  float    sc;			/* score of seq aligned to HMM */
  float    max;			/* maximum score seen in sample */
  int      seed;		/* random number seed */
  int      nsample;		/* number of seqs to sample */
  int      fixedlen;		/* if nonzero, fixed length of seq */
  float    lenmean;		/* Gaussian mean length of seq */
  float    lensd;		/* Gaussian length std. dev. for seq */
  int      fitok;		/* TRUE if EVD fit was OK */
  float    randomseq[MAXABET];	/* iid frequencies of residues */
  float    p1;
  int      alphatype;		/* alphabet type, hmmAMINO or hmmNUCLEIC    */
  int      idx;
  int      code;

  /* Register leave_pvm() cleanup function so any exit() call
   * first calls pvm_exit().
   */
  if (atexit(leave_pvm) != 0) { pvm_exit(); Die("slave couldn't register leave_pvm()"); }

  /*****************************************************************
   * initialization.
   * Master broadcasts the problem to us: parameters of the
   * HMM calibration.  
   ******************************************************************/

  master_tid = pvm_parent();	/* who's our master? */

  pvm_recv(master_tid, HMMPVM_INIT);
  pvm_upkint(&nsample,  1, 1);
  pvm_upkint(&fixedlen, 1, 1);
  pvm_upkfloat(&lenmean,  1, 1);
  pvm_upkfloat(&lensd,    1, 1);

  /* tell the master we're OK and ready to go (or not)
   */
  code = HMMPVM_OK;
  pvm_initsend(PvmDataDefault);
  pvm_pkint(&code, 1, 1);	
  pvm_send(master_tid, HMMPVM_RESULTS);

  /*****************************************************************
   * Main loop.
   * Receive a random number seed, then an HMM to search against.
   * If we receive a -1 seed, we shut down. 
   *****************************************************************/ 
  
  slaveidx = -1;
  for (;;) 
    {
      pvm_recv(master_tid, HMMPVM_WORK);
      pvm_upkint(&seed, 1, 1);
      if (seed == -1) break;	/* shutdown signal */
      pvm_upkint(&hmmidx, 1, 1);
      pvm_upkint(&alphatype,1, 1);
      SetAlphabet(alphatype);
      hmm = PVMUnpackHMM();
      if (hmm == NULL) Die("oh no, the HMM never arrived");

      if (slaveidx == -1) slaveidx = hmmidx; 
      P7DefaultNullModel(randomseq, &p1);

      sre_srandom(seed);
      P7Logoddsify(hmm, TRUE);
      hist = AllocHistogram(-200, 200, 100);
      max  = -FLT_MAX;

      for (idx = 0; idx < nsample; idx++)
	{
  				/* choose length of random sequence */
	  if (fixedlen) len = fixedlen;
	  else do len = (int) Gaussrandom(lenmean, lensd); while (len < 1);
				/* generate it */
	  seq = RandomSequence(Alphabet, randomseq, Alphabet_size, len);
	  dsq = DigitizeSequence(seq, len);

	  if (P7ViterbiSize(len, hmm->M) <= RAMLIMIT)
	    sc = P7Viterbi(dsq, len, hmm, NULL);
	  else
	    sc = P7SmallViterbi(dsq, len, hmm, NULL);

	  AddToHistogram(hist, sc);
	  if (sc > max) max = sc;
	  
	  free(seq);
	  free(dsq);
	}

      /* Fit an EVD to the observed histogram.
       * The TRUE left-censors and fits only the right slope of the histogram.
       * The 9999. is an arbitrary high number that means we won't trim outliers
       * on the right.
       */
      fitok = ExtremeValueFitHistogram(hist, TRUE, 9999.);

      /* Return output to master.
       * Currently we don't send the histogram back, but we could.
       */
      pvm_initsend(PvmDataDefault);
      pvm_pkint(&slaveidx, 1, 1);
      pvm_pkint(&hmmidx, 1, 1);	
      PVMPackString(hmm->name);
      pvm_pkint(&fitok,  1, 1);
      pvm_pkfloat(&(hist->param[EVD_MU]), 1, 1);
      pvm_pkfloat(&(hist->param[EVD_LAMBDA]), 1, 1);
      pvm_pkfloat(&max, 1, 1);
      pvm_send(master_tid, HMMPVM_RESULTS);

      /* cleanup
       */
      FreeHistogram(hist);
      FreePlan7(hmm);
    }

  /*********************************************** 
   * Cleanup, return.
   ***********************************************/

  return 0;			/* pvm_exit() is called by atexit() registration. */
}