Exemple #1
0
static void
serial_master(ESL_GETOPTS *go, struct cfg_s *cfg)
{
  P7_HMM     *hmm = NULL;     
  double     *xv  = NULL;	/* results: array of N scores */
  int        *av  = NULL;	/* optional results: array of N alignment lengths */
  char        errbuf[eslERRBUFSIZE];
  int         status;


  if ((status = init_master_cfg(go, cfg, errbuf)) != eslOK) p7_Fail(errbuf);
  if ((xv = malloc(sizeof(double) * cfg->N)) == NULL)       p7_Fail("allocation failed");
  if (esl_opt_GetBoolean(go, "-a") && 
      (av = malloc(sizeof(int)    * cfg->N)) == NULL)       p7_Fail("allocation failed");

  while ((status = p7_hmmfile_Read(cfg->hfp, &(cfg->abc), &hmm)) != eslEOF) 
    {
      if      (status == eslEOD)       p7_Fail("read failed, HMM file %s may be truncated?", cfg->hmmfile);
      else if (status == eslEFORMAT)   p7_Fail("bad file format in HMM file %s",             cfg->hmmfile);
      else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets",   cfg->hmmfile);
      else if (status != eslOK)        p7_Fail("Unexpected error in reading HMMs from %s",   cfg->hmmfile);

      if (cfg->bg == NULL) {
        if (esl_opt_GetBoolean(go, "--bgflat")) cfg->bg = p7_bg_CreateUniform(cfg->abc);
        else                                    cfg->bg = p7_bg_Create(cfg->abc);
        p7_bg_SetLength(cfg->bg, esl_opt_GetInteger(go, "-L"));  /* set the null model background length in both master and workers. */
      }


      if (esl_opt_GetBoolean(go, "--recal")) {
        if (recalibrate_model(go, cfg, errbuf, hmm)      != eslOK) p7_Fail(errbuf);
      }
      if (process_workunit(go, cfg, errbuf, hmm, xv, av) != eslOK) p7_Fail(errbuf);
      if (output_result   (go, cfg, errbuf, hmm, xv, av) != eslOK) p7_Fail(errbuf);

      p7_hmm_Destroy(hmm);      
    }
  free(xv);
  if (av != NULL) free(av);
}
Exemple #2
0
/* mpi_master()
 * The MPI version of hmmbuild.
 * Follows standard pattern for a master/worker load-balanced MPI program (J1/78-79).
 * 
 * A master can only return if it's successful. 
 * Errors in an MPI master come in two classes: recoverable and nonrecoverable.
 * 
 * Recoverable errors include all worker-side errors, and any
 * master-side error that do not affect MPI communication. Error
 * messages from recoverable messages are delayed until we've cleanly
 * shut down the workers.
 * 
 * Unrecoverable errors are master-side errors that may affect MPI
 * communication, meaning we cannot count on being able to reach the
 * workers and shut them down. Unrecoverable errors result in immediate
 * p7_Fail()'s, which will cause MPI to shut down the worker processes
 * uncleanly.
 */
static void
mpi_master(const ESL_GETOPTS *go, struct cfg_s *cfg)
{
  int         xstatus       = eslOK;	/* changes from OK on recoverable error */
  int         status;
  int         have_work     = TRUE;	/* TRUE while alignments remain  */
  int         nproc_working = 0;	        /* number of worker processes working, up to nproc-1 */
  int         wi;          	        /* rank of next worker to get an alignment to work on */
  char       *buf           = NULL;	/* input/output buffer, for packed MPI messages */
  int         bn            = 0;
  ESL_MSA    *msa           = NULL;
  P7_HMM     *hmm           = NULL;
  P7_BG      *bg            = NULL;
  ESL_MSA   **msalist       = NULL;
  ESL_MSA    *postmsa       = NULL;
  int        *msaidx        = NULL;
  char        errmsg[eslERRBUFSIZE];
  MPI_Status  mpistatus; 
  int         n;
  int         pos;

  double      entropy;
  
  /* Master initialization: including, figure out the alphabet type.
   * If any failure occurs, delay printing error message until we've shut down workers.
   */
  if (xstatus == eslOK) { if ((status = init_master_cfg(go, cfg, errmsg)) != eslOK) xstatus = status; }
  if (xstatus == eslOK) { bn = 4096; if ((buf = malloc(sizeof(char) * bn)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } }
  if (xstatus == eslOK) { if ((msalist = malloc(sizeof(ESL_MSA *) * cfg->nproc)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } }
  if (xstatus == eslOK) { if ((msaidx  = malloc(sizeof(int)       * cfg->nproc)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } }
  MPI_Bcast(&xstatus, 1, MPI_INT, 0, MPI_COMM_WORLD);
  if (xstatus != eslOK) {  MPI_Finalize(); p7_Fail(errmsg); }
  ESL_DPRINTF1(("MPI master is initialized\n"));

  bg = p7_bg_Create(cfg->abc);

  for (wi = 0; wi < cfg->nproc; wi++) { msalist[wi] = NULL; msaidx[wi] = 0; } 

  /* Worker initialization:
   * Because we've already successfully initialized the master before we start
   * initializing the workers, we don't expect worker initialization to fail;
   * so we just receive a quick OK/error code reply from each worker to be sure,
   * and don't worry about an informative message. 
   */
  MPI_Bcast(&(cfg->abc->type), 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Reduce(&xstatus, &status, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
  if (status != eslOK) { MPI_Finalize(); p7_Fail("One or more MPI worker processes failed to initialize."); }
  ESL_DPRINTF1(("%d workers are initialized\n", cfg->nproc-1));


  /* Main loop: combining load workers, send/receive, clear workers loops;
   * also, catch error states and die later, after clean shutdown of workers.
   * 
   * When a recoverable error occurs, have_work = FALSE, xstatus !=
   * eslOK, and errmsg is set to an informative message. No more
   * errmsg's can be received after the first one. We wait for all the
   * workers to clear their work units, then send them shutdown signals,
   * then finally print our errmsg and exit.
   * 
   * Unrecoverable errors just crash us out with p7_Fail().
   */
  wi = 1;
  while (have_work || nproc_working)
    {
      if (have_work) 
	{
	  if ((status = esl_msa_Read(cfg->afp, &msa)) == eslOK) 
	    {
	      cfg->nali++;  
	      ESL_DPRINTF1(("MPI master read MSA %s\n", msa->name == NULL? "" : msa->name));
	    }
	  else 
	    {
	      have_work = FALSE;
	      if      (status == eslEFORMAT)  { xstatus = eslEFORMAT; snprintf(errmsg, eslERRBUFSIZE, "Alignment file parse error:\n%s\n", cfg->afp->errbuf); }
	      else if (status == eslEINVAL)   { xstatus = eslEFORMAT; snprintf(errmsg, eslERRBUFSIZE, "Alignment file parse error:\n%s\n", cfg->afp->errbuf); }
	      else if (status != eslEOF)      { xstatus = status;     snprintf(errmsg, eslERRBUFSIZE, "Alignment file read unexpectedly failed with code %d\n", status); }
	      ESL_DPRINTF1(("MPI master has run out of MSAs (having read %d)\n", cfg->nali));
	    } 
	}

      if ((have_work && nproc_working == cfg->nproc-1) || (!have_work && nproc_working > 0))
	{
	  if (MPI_Probe(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &mpistatus) != 0) { MPI_Finalize(); p7_Fail("mpi probe failed"); }
	  if (MPI_Get_count(&mpistatus, MPI_PACKED, &n)                != 0) { MPI_Finalize(); p7_Fail("mpi get count failed"); }
	  wi = mpistatus.MPI_SOURCE;
	  ESL_DPRINTF1(("MPI master sees a result of %d bytes from worker %d\n", n, wi));

	  if (n > bn) {
	    if ((buf = realloc(buf, sizeof(char) * n)) == NULL) p7_Fail("reallocation failed");
	    bn = n; 
	  }
	  if (MPI_Recv(buf, bn, MPI_PACKED, wi, 0, MPI_COMM_WORLD, &mpistatus) != 0) { MPI_Finalize(); p7_Fail("mpi recv failed"); }
	  ESL_DPRINTF1(("MPI master has received the buffer\n"));

	  /* If we're in a recoverable error state, we're only clearing worker results;
           * just receive them, don't unpack them or print them.
           * But if our xstatus is OK, go ahead and process the result buffer.
	   */
	  if (xstatus == eslOK)	
	    {
	      pos = 0;
	      if (MPI_Unpack(buf, bn, &pos, &xstatus, 1, MPI_INT, MPI_COMM_WORLD)     != 0) { MPI_Finalize();  p7_Fail("mpi unpack failed");}
	      if (xstatus == eslOK) /* worker reported success. Get the HMM. */
		{
		  ESL_DPRINTF1(("MPI master sees that the result buffer contains an HMM\n"));
		  if (p7_hmm_MPIUnpack(buf, bn, &pos, MPI_COMM_WORLD, &(cfg->abc), &hmm) != eslOK) {  MPI_Finalize(); p7_Fail("HMM unpack failed"); }
		  ESL_DPRINTF1(("MPI master has unpacked the HMM\n"));

		  if (cfg->postmsafile != NULL) {
		    if (esl_msa_MPIUnpack(cfg->abc, buf, bn, &pos, MPI_COMM_WORLD, &postmsa) != eslOK) { MPI_Finalize(); p7_Fail("postmsa unpack failed");}
		  } 

		  entropy = p7_MeanMatchRelativeEntropy(hmm, bg);
		  if ((status = output_result(cfg, errmsg, msaidx[wi], msalist[wi], hmm, postmsa, entropy)) != eslOK) xstatus = status;

		  esl_msa_Destroy(postmsa); postmsa = NULL;
		  p7_hmm_Destroy(hmm);      hmm     = NULL;
		}
	      else	/* worker reported an error. Get the errmsg. */
		{
		  if (MPI_Unpack(buf, bn, &pos, errmsg, eslERRBUFSIZE, MPI_CHAR, MPI_COMM_WORLD) != 0) { MPI_Finalize(); p7_Fail("mpi unpack of errmsg failed"); }
		  ESL_DPRINTF1(("MPI master sees that the result buffer contains an error message\n"));
		}
	    }
	  esl_msa_Destroy(msalist[wi]);
	  msalist[wi] = NULL;
	  msaidx[wi]  = 0;
	  nproc_working--;
	}

      if (have_work)
	{   
	  ESL_DPRINTF1(("MPI master is sending MSA %s to worker %d\n", msa->name == NULL ? "":msa->name, wi));
	  if (esl_msa_MPISend(msa, wi, 0, MPI_COMM_WORLD, &buf, &bn) != eslOK) p7_Fail("MPI msa send failed");
	  msalist[wi] = msa;
	  msaidx[wi]  = cfg->nali; /* 1..N for N alignments in the MSA database */
	  msa = NULL;
	  wi++;
	  nproc_working++;
	}
    }
  
  /* On success or recoverable errors:
   * Shut down workers cleanly. 
   */
  ESL_DPRINTF1(("MPI master is done. Shutting down all the workers cleanly\n"));
  for (wi = 1; wi < cfg->nproc; wi++) 
    if (esl_msa_MPISend(NULL, wi, 0, MPI_COMM_WORLD, &buf, &bn) != eslOK) p7_Fail("MPI msa send failed");

  free(buf);
  free(msaidx);
  free(msalist);
  p7_bg_Destroy(bg);

  if (xstatus != eslOK) { MPI_Finalize(); p7_Fail(errmsg); }
  else                  return;
}
Exemple #3
0
/* serial_master()
 * The serial version of hmmbuild.
 * For each MSA, build an HMM and save it.
 * 
 * A master can only return if it's successful. All errors are handled immediately and fatally with p7_Fail().
 */
static int
serial_master(const ESL_GETOPTS *go, struct cfg_s *cfg)
{
  int              status;

  int              i;
  int              ncpus    = 0;

  int              infocnt  = 0;
  WORKER_INFO     *info     = NULL;
#ifdef HMMER_THREADS
  WORK_ITEM       *item     = NULL;
  ESL_THREADS     *threadObj= NULL;
  ESL_WORK_QUEUE  *queue    = NULL;
#endif

  char             errmsg[eslERRBUFSIZE];

  if ((status = init_master_cfg(go, cfg, errmsg)) != eslOK) p7_Fail(errmsg);
  
#ifdef HMMER_THREADS
  /* initialize thread data */
  if (esl_opt_IsOn(go, "--cpu")) ncpus = esl_opt_GetInteger(go, "--cpu");
  else                                   esl_threads_CPUCount(&ncpus);

  if (ncpus > 0)
    {
      threadObj = esl_threads_Create(&pipeline_thread);
      queue = esl_workqueue_Create(ncpus * 2);
    }
#endif

  infocnt = (ncpus == 0) ? 1 : ncpus;
  ESL_ALLOC(info, sizeof(*info) * infocnt);

  for (i = 0; i < infocnt; ++i)
    {
      info[i].bg = p7_bg_Create(cfg->abc);
      info[i].bld = p7_builder_Create(go, cfg->abc);
      if (info[i].bld == NULL)  p7_Fail("p7_builder_Create failed");
#ifdef HMMER_THREADS
      info[i].queue = queue;
      if (ncpus > 0) esl_threads_AddThread(threadObj, &info[i]);
#endif
    }

#ifdef HMMER_THREADS
  for (i = 0; i < ncpus * 2; ++i)
    {
      ESL_ALLOC(item, sizeof(*item));

      item->nali      = 0;
      item->processed = FALSE;
      item->postmsa   = NULL;
      item->msa       = NULL;
      item->hmm       = NULL;
      item->entropy   = 0.0;

      status = esl_workqueue_Init(queue, item);
      if (status != eslOK) esl_fatal("Failed to add block to work queue");
    }
#endif

#ifdef HMMER_THREADS
  if (ncpus > 0)  status = thread_loop(threadObj, queue, cfg);
  else            status = serial_loop(info, cfg);
#else
  status = serial_loop(info, cfg);
#endif

  if      (status == eslEFORMAT) esl_fatal("Alignment file parse error:\n%s\n", cfg->afp->errbuf);
  else if (status == eslEINVAL)  esl_fatal("Alignment file parse error:\n%s\n", cfg->afp->errbuf);
  else if (status != eslEOF)     esl_fatal("Alignment file read failed with error code %d\n", status);

  for (i = 0; i < infocnt; ++i)
    {
      p7_bg_Destroy(info[i].bg);
      p7_builder_Destroy(info[i].bld);
    }

#ifdef HMMER_THREADS
  if (ncpus > 0)
    {
      esl_workqueue_Reset(queue);
      while (esl_workqueue_Remove(queue, (void **) &item) == eslOK)
	{
	  free(item);
	}
      esl_workqueue_Destroy(queue);
      esl_threads_Destroy(threadObj);
    }
#endif

  free(info);
  return eslOK;

 ERROR:
  return eslFAIL;
}
Exemple #4
0
/* mpi_master()
 * The MPI version of hmmsim.
 * Follows standard pattern for a master/worker load-balanced MPI program (J1/78-79).
 * 
 * A master can only return if it's successful. 
 * Errors in an MPI master come in two classes: recoverable and nonrecoverable.
 * 
 * Recoverable errors include all worker-side errors, and any
 * master-side error that do not affect MPI communication. Error
 * messages from recoverable messages are delayed until we've cleanly
 * shut down the workers.
 * 
 * Unrecoverable errors are master-side errors that may affect MPI
 * communication, meaning we cannot count on being able to reach the
 * workers and shut them down. Unrecoverable errors result in immediate
 * p7_Fail()'s, which will cause MPI to shut down the worker processes
 * uncleanly.
 */
static void
mpi_master(ESL_GETOPTS *go, struct cfg_s *cfg)
{
  int              xstatus       = eslOK; /* changes in the event of a recoverable error */
  P7_HMM          *hmm           = NULL;  /* query HMM                                 */
  P7_HMM         **hmmlist       = NULL;  /* queue of HMMs being worked on, 1..nproc-1 */
  char            *wbuf          = NULL;  /* working buffer for sending packed profiles and receiving packed results. */
  int              wn            = 0;
  double          *xv            = NULL;  /* results: array of N scores */
  int             *av            = NULL;  /* optional results: array of N alignment lengths */
  int              have_work     = TRUE;
  int              nproc_working = 0;
  int              wi;
  int              pos;
  char             errbuf[eslERRBUFSIZE];
  int              status;
  MPI_Status       mpistatus;
  

  /* Master initialization. */
  if (init_master_cfg(go, cfg, errbuf)            != eslOK) p7_Fail(errbuf);
  if (minimum_mpi_working_buffer(go, cfg->N, &wn) != eslOK) p7_Fail("mpi pack sizes must have failed");
  ESL_ALLOC(wbuf,    sizeof(char)     * wn);
  ESL_ALLOC(xv,      sizeof(double)   * cfg->N);
  if (esl_opt_GetBoolean(go, "-a"))
    ESL_ALLOC(av,    sizeof(int)      * cfg->N);
  ESL_ALLOC(hmmlist, sizeof(P7_HMM *) * cfg->nproc);
  for (wi = 0; wi < cfg->nproc; wi++) hmmlist[wi] = NULL;

  /* Standard design pattern for data parallelization in a master/worker model. (J1/78-79).  */
  wi = 1;
  while (have_work || nproc_working)
    {
      /* Get next work unit: one HMM, <hmm> */
      if (have_work) 
	{
	  if ((status = p7_hmmfile_Read(cfg->hfp, &(cfg->abc), &hmm)) != eslOK) 
	    {
	      have_work = FALSE;
	      if      (status == eslEOD)       { xstatus = status; sprintf(errbuf, "read failed, HMM file %s may be truncated?", cfg->hmmfile); }
	      else if (status == eslEFORMAT)   { xstatus = status; sprintf(errbuf, "bad file format in HMM file %s",             cfg->hmmfile); }
	      else if (status == eslEINCOMPAT) { xstatus = status; sprintf(errbuf, "HMM file %s contains different alphabets",   cfg->hmmfile); }
	      else if (status != eslEOF)       { xstatus = status; sprintf(errbuf, "Unexpected error in reading HMMs from %s",   cfg->hmmfile); }


        if (cfg->bg == NULL) { // first time only
          if (esl_opt_GetBoolean(go, "--bgflat")) cfg->bg = p7_bg_CreateUniform(cfg->abc);
          else                                    cfg->bg = p7_bg_Create(cfg->abc);
        }
        //this next step is redundant, but it avoids a race condition above.
        p7_bg_SetLength(cfg->bg, esl_opt_GetInteger(go, "-L"));  /* set the null model background length in both master and workers. */


	    }
	}

      /* If we have work but no free workers, or we have no work but workers
       * are still working, then wait for a result to return from any worker.
       */
      if ( (have_work && nproc_working == cfg->nproc-1) || (! have_work && nproc_working > 0))
	{
	  if (MPI_Recv(wbuf, wn, MPI_PACKED, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &mpistatus) != 0) p7_Fail("mpi recv failed");
	  wi = mpistatus.MPI_SOURCE;
	  
	  /* Check the xstatus before printing results.
           * If we're in a recoverable error state, we're only clearing worker results, prior to a clean failure
	   */
	  if (xstatus == eslOK)	
	    {
	      pos = 0;
	      if (MPI_Unpack(wbuf, wn, &pos, &xstatus, 1, MPI_INT, MPI_COMM_WORLD)     != 0)     p7_Fail("mpi unpack failed");
	      if (xstatus == eslOK) /* worker reported success. Get the results. */
		{
		  if (MPI_Unpack(wbuf, wn, &pos, xv,     cfg->N, MPI_DOUBLE, MPI_COMM_WORLD) != 0)   p7_Fail("score vector unpack failed");
		  if (esl_opt_GetBoolean(go, "-a") &&
		      MPI_Unpack(wbuf, wn, &pos, av,     cfg->N, MPI_INT,    MPI_COMM_WORLD) != 0)   p7_Fail("alilen vector unpack failed");
		  if ((status = output_result(go, cfg, errbuf, hmmlist[wi], xv, av))  != eslOK) xstatus = status;
		}
	      else	/* worker reported a user error. Get the errbuf. */
		{
		  if (MPI_Unpack(wbuf, wn, &pos, errbuf, eslERRBUFSIZE, MPI_CHAR, MPI_COMM_WORLD) != 0) p7_Fail("mpi unpack of errbuf failed");
		  have_work = FALSE;
		  p7_hmm_Destroy(hmm);
		}
	    }
	  p7_hmm_Destroy(hmmlist[wi]);
	  hmmlist[wi] = NULL;
	  nproc_working--;
	}
	
      /* If we have work, assign it to a free worker; else, terminate the free worker. */
      if (have_work) 
	{
	  p7_hmm_mpi_Send(hmm, wi, 0, MPI_COMM_WORLD, &wbuf, &wn);
	  hmmlist[wi] = hmm;
	  wi++;
	  nproc_working++;
	}
    }

  /* Tell all the workers (1..nproc-1) to shut down by sending them a NULL workunit. */
  for (wi = 1; wi < cfg->nproc; wi++)
    if (p7_hmm_mpi_Send(NULL, wi, 0, MPI_COMM_WORLD, &wbuf, &wn) != eslOK) p7_Fail("MPI HMM send failed");	


  free(hmmlist);
  free(wbuf);
  free(xv);
  if (av != NULL) free(av);
  if (xstatus != eslOK) p7_Fail(errbuf);
  else                  return;

 ERROR:
  if (hmmlist != NULL) free(hmmlist);
  if (wbuf    != NULL) free(wbuf);
  if (xv      != NULL) free(xv);
  if (av      != NULL) free(av);
  p7_Fail("Fatal error in mpi_master");
}