コード例 #1
0
ファイル: uhmmsearch.cpp プロジェクト: ggrekhov/ugene
static void
main_loop_serial(struct plan7_s *hmm, const char* seq, int seqLen, struct threshold_s *thresh, int do_forward,
                 int do_null2, int do_xnu, struct histogram_s *histogram, struct tophit_s *ghit, struct tophit_s *dhit, 
                 int *ret_nseq, TaskStateInfo& ti)
{
	//get HMMERTaskLocalData
	HMMERTaskLocalData *tld = getHMMERTaskLocalData();
	alphabet_s *al = &tld->al;

    struct dpmatrix_s *mx;      // DP matrix, growable
    struct p7trace_s *tr;       // traceback
    unsigned char   *dsq;       // digitized target sequence
    float  sc;                  // score of an HMM search
    double pvalue;              // pvalue of an HMM score
    double evalue;              // evalue of an HMM score

    // Create a DP matrix; initially only two rows big, but growable;
    // we overalloc by 25 rows (L dimension) when we grow; not growable
    // in model dimension, since we know the hmm size
    mx = CreatePlan7Matrix(1, hmm->M, 25, 0); 

    assert(seqLen > 0);

    dsq = DigitizeSequence(seq, seqLen);

    if (do_xnu && al->Alphabet_type == hmmAMINO) {
        XNU(dsq, seqLen);
    }

    // 1. Recover a trace by Viterbi.
    //    In extreme cases, the alignment may be literally impossible;
    //    in which case, the score comes out ridiculously small (but not
    //    necessarily <= -INFTY, because we're not terribly careful
    //    about underflow issues), and tr will be returned as NULL.
    if (P7ViterbiSpaceOK(seqLen, hmm->M, mx)) {
        sc = P7Viterbi(dsq, seqLen, hmm, mx, &tr);
    } else {
        sc = P7SmallViterbi(dsq, seqLen, hmm, mx, &tr, ti.progress);
    }

    // 2. If we're using Forward scores, calculate the
    //    whole sequence score; this overrides anything
    //    PostprocessSignificantHit() is going to do to the per-seq score.
    if (do_forward) {
        sc  = P7Forward(dsq, seqLen, hmm, NULL);
        if (do_null2)   sc -= TraceScoreCorrection(hmm, tr, dsq); 
    }

    // 2. Store score/pvalue for global alignment; will sort on score,
    //    which in hmmsearch is monotonic with E-value. 
    //    Keep all domains in a significant sequence hit.
    //    We can only make a lower bound estimate of E-value since
    //    we don't know the final value of nseq yet, so the list
    //    of hits we keep in memory is >= the list we actually
    //    output. 
    //
    pvalue = PValue(hmm, sc);
    evalue = thresh->Z ? (double) thresh->Z * pvalue : (double) pvalue;
    if (sc >= thresh->globT && evalue <= thresh->globE)  {
        sc = PostprocessSignificantHit(ghit, dhit, 
            tr, hmm, dsq, seqLen,
			(char *)"sequence", //todo: sqinfo.name, 
            NULL, 
            NULL, 
            do_forward, sc,
            do_null2,
            thresh,
            FALSE); // FALSE-> not hmmpfam mode, hmmsearch mode
    }
    AddToHistogram(histogram, sc);
    P7FreeTrace(tr);
    free(dsq);

    FreePlan7Matrix(mx);
    return;
}
コード例 #2
0
int 
main(void)
{
  struct p7trace_s *tr;         /* traceback of an alignment               */
  int      master_tid;		/* PVM TID of our master */
  char    *hmmfile;	        /* file to read HMM(s) from                */
  HMMFILE *hmmfp;               /* opened hmmfile for reading              */
  struct plan7_s *hmm;
  char    *seq;
  char    *dsq;
  int      len;
  int      nhmm;		/* number of HMM to work on                */
  float    sc;
  int      my_idx = -1;		/* my index, 0..nslaves-1 */
  float    globT;		/* T parameter: keep only hits > globT bits */
  double   globE;		/* E parameter: keep hits < globE E-value   */
  double   pvalue;		/* Z*pvalue = Evalue                        */
  int      Z;			/* nseq to base E value calculation on      */
  int      send_trace;		/* TRUE if score is significant             */
  int      do_xnu;		/* TRUE to do XNU filter on seq             */
  int      do_forward;		/* TRUE to use Forward() scores not Viterbi */
  int      do_null2;		/* TRUE to correct scores w/ ad hoc null2   */
  int      alphatype;		/* alphabet type, hmmAMINO or hmmNUCLEIC    */
  int      code;		/* return code after initialization         */

  
  /* Register leave_pvm() cleanup function so any exit() call
   * first calls pvm_exit().
   */
  if (atexit(leave_pvm) != 0) { pvm_exit(); Die("slave couldn't register leave_pvm()"); }

  /*****************************************************************
   * initialization.
   * Master broadcasts to us: 
   *     1) len of HMM file name        (int)
   *     2) name of HMM file            (string)
   *     3) length of sequence string   (int) 
   *     4) sequence                    (string)
   *     5) globT threshold
   *     6) globE threshold
   *     7) Z 
   *     8) do_xnu flag
   *     9) do_forward flag
   *    10) do_null2 flag
   *    11) alphabet type
   * We receive the broadcast and open the files.    
   ******************************************************************/

  master_tid = pvm_parent();	/* who's our master? */

  pvm_recv(master_tid, HMMPVM_INIT);
  pvm_upkint(&len, 1, 1);
  hmmfile = MallocOrDie(sizeof(char *) * (len+1));
  pvm_upkstr(hmmfile);
  pvm_upkint(&len, 1, 1);
  seq = MallocOrDie(sizeof(char *) * (len+1));
  pvm_upkstr(seq);
  pvm_upkfloat(&globT, 1, 1);
  pvm_upkdouble(&globE, 1, 1);
  pvm_upkint(&Z, 1, 1);
  pvm_upkint(&do_xnu, 1, 1);
  pvm_upkint(&do_forward, 1, 1);
  pvm_upkint(&do_null2, 1, 1);
  pvm_upkint(&alphatype, 1, 1);

  SetAlphabet(alphatype);
				/* Open HMM file (maybe in HMMERDB) */
  code = HMMPVM_OK;
  if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL)
    code = HMMPVM_NO_HMMFILE;
  else if (hmmfp->gsi == NULL)
    code = HMMPVM_NO_INDEX;
  
  /* report our status.
   */
  pvm_initsend(PvmDataDefault);
  pvm_pkint(&code, 1, 1);	
  pvm_send(master_tid, HMMPVM_RESULTS);

  dsq = DigitizeSequence(seq, len);
  if (do_xnu) XNU(dsq, len);

  /*****************************************************************
   * Main loop.
   * Receive an integer 0..nhmm-1 for which HMM to search against.
   * If we receive a -1, we shut down. 
   *****************************************************************/ 
  
  for (;;) 
    {
      pvm_recv(master_tid, HMMPVM_WORK);
      pvm_upkint(&nhmm, 1, 1);
      if (my_idx < 0) my_idx = nhmm; /* first time thru, remember what index we are. */

      if (nhmm == -1) break;	/* shutdown signal */

      /* move to our assigned HMM in the HMM file, and read it
       */
      HMMFilePositionByIndex(hmmfp, nhmm);
      if (! HMMFileRead(hmmfp, &hmm)) Die("unexpected end of HMM file"); 
      if (hmm == NULL)                Die("unexpected failure to parse HMM file"); 
      P7Logoddsify(hmm, TRUE);
      
      /* Score sequence, do alignment (Viterbi), recover trace
       */
      if (P7ViterbiSize(len, hmm->M) <= RAMLIMIT)
	{
	  SQD_DPRINTF1(("P7Viterbi(): Estimated size %d Mb\n", P7ViterbiSize(len, hmm->M)));
	  sc = P7Viterbi(dsq, len, hmm, &tr);
	}
      else
	{
	  SQD_DPRINTF1(("P7SmallViterbi() called; %d Mb > %d\n", P7ViterbiSize(len, hmm->M), RAMLIMIT));
	  sc = P7SmallViterbi(dsq, len, hmm, &tr);
	}

      if (do_forward) sc  = P7Forward(dsq, len, hmm, NULL);
      if (do_null2)   sc -= TraceScoreCorrection(hmm, tr, dsq);
	
      pvalue = PValue(hmm, sc);
      send_trace = (sc > globT && pvalue * (float) Z < globE) ? 1 : 0;

      /* return output
       */
      pvm_initsend(PvmDataDefault);
      pvm_pkint(&my_idx, 1, 1);	/* tell master who we are */
      pvm_pkstr(hmm->name);	/* double check that we did the right thing */
      pvm_pkfloat(&sc, 1, 1);
      pvm_pkdouble(&pvalue, 1, 1);
      pvm_pkint(&send_trace, 1, 1); /* flag for whether a trace structure is coming */
      if (send_trace) PVMPackTrace(tr);
      pvm_send(master_tid, HMMPVM_RESULTS);

      /* cleanup
       */
      FreePlan7(hmm);
      P7FreeTrace(tr);
    }

  /*********************************************** 
   * Cleanup, return.
   ***********************************************/

  HMMFileClose(hmmfp);
  free(seq);
  free(dsq);
  free(hmmfile);
  return 0;
}