コード例 #1
0
ファイル: prior.c プロジェクト: PlantandFoodResearch/wise2
/* Function: P7PriorifyHMM()
 * 
 * Purpose:  Add pseudocounts to an HMM using Dirichlet priors,
 *           and renormalize the HMM.
 * 
 * Args:     hmm -- the HMM to add counts to (counts form)
 *           pri -- the Dirichlet prior to use
 *           
 * Return:   (void)
 *           HMM returns in probability form.
 */          
void
P7PriorifyHMM(struct plan7_s *hmm, struct p7prior_s *pri)
{
  int k;			/* counter for model position   */
  float d;			/* a denominator */

  /* Model-dependent transitions are handled simply; Laplace.
   */
  FSet(hmm->begin+2, hmm->M-1, 0.);     /* wipe internal BM entries */
  FSet(hmm->end+1, hmm->M-1, 0.);	/* wipe internal ME exits   */
  d = hmm->tbd1 + hmm->begin[1] + 2.;
  hmm->tbd1        = (hmm->tbd1 + 1.)/ d;
  hmm->begin[1]    = (hmm->begin[1] + 1.)/ d;
  hmm->end[hmm->M] = 1.0;

  /* Main model transitions and emissions
   */
  for (k = 1; k < hmm->M; k++)
    {
      P7PriorifyTransitionVector(hmm->t[k], pri);
      P7PriorifyEmissionVector(hmm->mat[k], pri, pri->mnum, pri->mq, pri->m, NULL);
      P7PriorifyEmissionVector(hmm->ins[k], pri, pri->inum, pri->iq, pri->i, NULL);
    }
  P7PriorifyEmissionVector(hmm->mat[hmm->M], pri, pri->mnum, pri->mq, pri->m, NULL);

  Plan7Renormalize(hmm);
}
コード例 #2
0
ファイル: plan7.c プロジェクト: Denis84/EPA-WorkBench
/* Function: Plan9toPlan7()
 * 
 * Purpose:  Convert an old HMM into Plan7. Configures it in
 *           ls mode.
 *           
 * Args:     hmm       - old ugly plan9 style HMM
 *           ret_plan7 - new wonderful Plan7 HMM
 *           
 * Return:   (void)    
 *           Plan7 HMM is allocated here. Free w/ FreePlan7().
 */               
void
Plan9toPlan7(struct plan9_s *hmm, struct plan7_s **ret_plan7)
{
  struct plan7_s *plan7;
  int k, x;

  plan7 = AllocPlan7(hmm->M);
  
  for (k = 1; k < hmm->M; k++)
    {
      plan7->t[k][TMM] = hmm->mat[k].t[MATCH];
      plan7->t[k][TMD] = hmm->mat[k].t[DELETE];
      plan7->t[k][TMI] = hmm->mat[k].t[INSERT];
      plan7->t[k][TDM] = hmm->del[k].t[MATCH];
      plan7->t[k][TDD] = hmm->del[k].t[DELETE];
      plan7->t[k][TIM] = hmm->ins[k].t[MATCH];
      plan7->t[k][TII] = hmm->ins[k].t[INSERT];
    }

  for (k = 1; k <= hmm->M; k++)
    for (x = 0; x < Alphabet_size; x++)
      plan7->mat[k][x] = hmm->mat[k].p[x];

  for (k = 1; k < hmm->M; k++)
    for (x = 0; x < Alphabet_size; x++)
      plan7->ins[k][x] = hmm->ins[k].p[x];

  plan7->tbd1 = hmm->mat[0].t[DELETE] / (hmm->mat[0].t[DELETE] + hmm->mat[0].t[MATCH]);
  
		/* We have to make up the null transition p1; use default */
  P7DefaultNullModel(plan7->null, &(plan7->p1));
  for (x = 0; x < Alphabet_size; x++)
    plan7->null[x] = hmm->null[x];
      
  if (hmm->name != NULL) 
    Plan7SetName(plan7, hmm->name);
  if (hmm->flags & HMM_REF) {
    strcpy(plan7->rf, hmm->ref);
    plan7->flags |= PLAN7_RF;
  }
  if (hmm->flags & HMM_CS) {
    strcpy(plan7->cs, hmm->cs);
    plan7->flags |= PLAN7_CS;
  }

  Plan7LSConfig(plan7);		/* configure specials for ls-style alignment */
  Plan7Renormalize(plan7);	/* mainly to correct for missing ID and DI */
  plan7->flags |= PLAN7_HASPROB;	/* probabilities are valid */
  plan7->flags &= ~PLAN7_HASBITS;	/* scores are not valid    */
  *ret_plan7 = plan7;
}
コード例 #3
0
/* Function: P7PriorifyHMM()
 * 
 * Purpose:  Add pseudocounts to an HMM using Dirichlet priors,
 *           and renormalize the HMM.
 * 
 * Args:     hmm -- the HMM to add counts to (counts form)
 *           pri -- the Dirichlet prior to use
 *           
 * Return:   (void)
 *           HMM returns in probability form.
 */          
void
P7PriorifyHMM(struct plan7_s *hmm, struct p7prior_s *pri)
{
  int k;			/* counter for model position   */
  float d;			/* a denominator */
  float tq[MAXDCHLET];		/* prior distribution over mixtures */
  float mq[MAXDCHLET];		/* prior distribution over mixtures */
  float iq[MAXDCHLET];		/* prior distribution over mixtures */

  /* Model-dependent transitions are handled simply; Laplace.
   */
  FSet(hmm->begin+2, hmm->M-1, 0.);     /* wipe internal BM entries */
  FSet(hmm->end+1, hmm->M-1, 0.);	/* wipe internal ME exits   */
  d = hmm->tbd1 + hmm->begin[1] + 2.;
  hmm->tbd1        = (hmm->tbd1 + 1.)/ d;
  hmm->begin[1]    = (hmm->begin[1] + 1.)/ d;
  hmm->end[hmm->M] = 1.0;

  /* Main model transitions and emissions
   */
  for (k = 1; k < hmm->M; k++)
    {
      /* The following code chunk is experimental. 
       * Collaboration with Michael Asman, Erik Sonnhammer, CGR Stockholm.
       * Only activated if X-PR* annotation has been used, in which
       * priors are overridden and a single Dirichlet component is
       * specified for each column (using structural annotation).
       * If X-PR* annotation is not used, which is usually the case, 
       * the following code has no effect (observe how the real prior 
       * distributions are copied into tq, mq, iq).
       */
      if (hmm->tpri != NULL && hmm->tpri[k] >= 0)
	{
	  if (hmm->tpri[k] >= pri->tnum) Die("X-PRT annotation out of range");
	  FSet(tq, pri->tnum, 0.0);
	  tq[hmm->tpri[k]] = 1.0;
	}
      else 
	FCopy(tq, pri->tq, pri->tnum);
      if (hmm->mpri != NULL && hmm->mpri[k] >= 0)
	{
	  if (hmm->mpri[k] >= pri->mnum) Die("X-PRM annotation out of range");
	  FSet(mq, pri->mnum, 0.0);
	  mq[hmm->mpri[k]] = 1.0;
	}
      else 
	FCopy(mq, pri->mq, pri->mnum);
      if (hmm->ipri != NULL && hmm->ipri[k] >= 0)
	{
	  if (hmm->ipri[k] >= pri->inum) Die("X-PRI annotation out of range");
	  FSet(iq, pri->inum, 0.0);
	  iq[hmm->ipri[k]] = 1.0;
	}
      else 
	FCopy(iq, pri->iq, pri->inum);

      /* This is the main line of the code:
       */
      P7PriorifyTransitionVector(hmm->t[k], pri, tq);
      P7PriorifyEmissionVector(hmm->mat[k], pri, pri->mnum, mq, pri->m, NULL);
      P7PriorifyEmissionVector(hmm->ins[k], pri, pri->inum, iq, pri->i, NULL);
    }

  /* We repeat the above steps just for the final match state, M.
   */
  if (hmm->mpri != NULL && hmm->mpri[hmm->M] >= 0)
    {
      if (hmm->mpri[hmm->M] >= pri->mnum) Die("X-PRM annotation out of range");
      FSet(mq, pri->mnum, 0.0);
      mq[hmm->mpri[hmm->M]] = 1.0;
    }
  else 
    FCopy(mq, pri->mq, pri->mnum);

  P7PriorifyEmissionVector(hmm->mat[hmm->M], pri, pri->mnum, mq, pri->m, NULL);

  /* Now we're done. Convert the counts-based HMM to probabilities.
   */
  Plan7Renormalize(hmm);
}
コード例 #4
0
int main(int argc, char **argv) 
{
    const char      *hmmfile;	/* file to read HMMs from                  */
    FILE            *fp;	/* output file handle                      */
    HMMFILE         *hmmfp;	/* opened hmmfile for reading              */
    struct plan7_s  *hmm;	/* HMM to generate from                    */
    int              L;		/* length of a sequence                    */
    int              i;		/* counter over sequences                  */

    char            *ofile;	/* output sequence file                    */
    int              nseq;	/* number of seqs to sample                */
    int              seed;	/* random number generator seed            */
    int              be_quiet;	/* TRUE to silence header/footer           */
    int              do_alignment; /* TRUE to output in aligned format     */ 
    int              do_consensus; /* TRUE to do a single consensus seq    */

    AjBool ajselex;
    AjBool ajcons;
    AjPFile inf=NULL;
    AjPFile outf=NULL;
    AjPStr  instr=NULL;
    AjPStr  outstr=NULL;
  

#ifdef MEMDEBUG
    unsigned long histid1, histid2, orig_size, current_size;
    orig_size = malloc_inuse(&histid1);
    fprintf(stderr, "[... memory debugging is ON ...]\n");
#endif

    /*********************************************** 
     * Parse command line
     ***********************************************/

    nseq         = 10;

    be_quiet     = FALSE;
    do_alignment = FALSE;  
    do_consensus = FALSE;
    ofile        = NULL;

    embInitPV("ohmmemit",argc,argv,"HMMER",VERSION);

    ajselex = ajAcdGetBoolean("selex");
    ajcons  = ajAcdGetBoolean("consensus");
    nseq    = ajAcdGetInt("number");
    seed    = ajAcdGetInt("seed");
    inf     = ajAcdGetInfile("infile");
    outf    = ajAcdGetOutfile("outfile");
  
    if(!seed)
	seed = time ((time_t *) NULL);

    if(ajselex)
	do_alignment=TRUE;
    else
	do_alignment=FALSE;
  
    if(ajcons)
	do_consensus=TRUE;
    else
	do_consensus=FALSE;

    instr  = ajStrNewC((char *)ajFileGetNameC(inf));
    outstr = ajStrNewC((char *)ajFileGetNameC(outf));

    hmmfile = ajStrGetPtr(instr);

    sre_srandom(seed);

    if (do_alignment && do_consensus)
	ajFatal("Sorry, -selex and -consensus are incompatible.\n"); 
    if (nseq != 10 && do_consensus)
	ajWarn("-consensus overrides -number (# of sampled seqs)");

    /*********************************************** 
     * Open HMM file (might be in HMMERDB or current directory).
     * Read a single HMM from it.
     ***********************************************/

    if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL)
	ajFatal("Failed to open HMM file %s\n", hmmfile);
    if (!HMMFileRead(hmmfp, &hmm)) 
	ajFatal("Failed to read any HMMs from %s\n", hmmfile);
    HMMFileClose(hmmfp);
    if (hmm == NULL) 
	ajFatal("HMM file %s corrupt or in incorrect format? Parse failed",
		hmmfile);

    /* Configure the HMM to shut off N,J,C emission: so we
     * do a simple single pass through the model.
     */
    Plan7NakedConfig(hmm);
    Plan7Renormalize(hmm);

    /*********************************************** 
     * Open the output file, or stdout
     ***********************************************/ 

    fp = ajFileGetFileptr(outf);
  
 
    /*********************************************** 
     * Show the options banner
     ***********************************************/
    be_quiet=TRUE;
    if (! be_quiet) 
    {
	printf("HMM file:             %s\n", hmmfile);
	if (! do_consensus)
	{
	    printf("Number of seqs:       %d\n", nseq);
	    printf("Random seed:          %d\n", seed);
	}
	printf("- - - - - - - - - - - - - - - - - - - - - - - - - "
	       "- - - - - - -\n\n");
    }

    /*********************************************** 
     * Do the work.
     * If we're generating an alignment, we have to collect
     * all our traces, then output. If we're generating unaligned
     * sequences, we can emit one at a time.
     ***********************************************/

    if (do_consensus) 
    {
	char    *seq;
	SQINFO   sqinfo;	/* info about sequence (name/desc)        */

	EmitConsensusSequence(hmm, &seq, NULL, &L, NULL);
	strcpy(sqinfo.name, "consensus");
	sqinfo.len = L;
	sqinfo.flags = SQINFO_NAME | SQINFO_LEN;

	WriteSeq(fp, kPearson, seq, &sqinfo);
	free(seq);
    }
    else if (do_alignment)
    {
	struct p7trace_s **tr;
	char           **dsq;
	SQINFO          *sqinfo;
	char           **aseq;
	AINFO            ainfo;
	float           *wgt;

	dsq    = MallocOrDie(sizeof(char *)             * nseq);
	tr     = MallocOrDie(sizeof(struct p7trace_s *) * nseq);
	sqinfo = MallocOrDie(sizeof(SQINFO)             * nseq);
	wgt    = MallocOrDie(sizeof(float)              * nseq);
	FSet(wgt, nseq, 1.0);

	for (i = 0; i < nseq; i++)
	{
	    EmitSequence(hmm, &(dsq[i]), &L, &(tr[i]));
	    sprintf(sqinfo[i].name, "seq%d", i+1);
	    sqinfo[i].len   = L;
	    sqinfo[i].flags = SQINFO_NAME | SQINFO_LEN;
	}

	P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, FALSE, 
			   &aseq, &ainfo);

	/* Output the alignment */
	WriteSELEX(fp, aseq, &ainfo, 50);
	if (ofile != NULL && !be_quiet)
	    printf("Alignment saved in file %s\n", ofile);

	/* Free memory
	 */
	for (i = 0; i < nseq; i++) 
	{
	    P7FreeTrace(tr[i]);
	    free(dsq[i]);
	}
	FreeAlignment(aseq, &ainfo);
	free(sqinfo);
	free(dsq);
	free(wgt);
	free(tr);
    }
    else				/* unaligned sequence output */
    {
	struct p7trace_s *tr;
	char             *dsq;
	char             *seq;
	SQINFO            sqinfo;

	for (i = 0; i < nseq; i++)
	{
	    EmitSequence(hmm, &dsq, &L, &tr);
	    sprintf(sqinfo.name, "seq%d", i+1);
	    sqinfo.len   = L;
	    sqinfo.flags = SQINFO_NAME | SQINFO_LEN;

	    seq = DedigitizeSequence(dsq, L);

	    WriteSeq(fp, kPearson, seq, &sqinfo);
	  
	    P7FreeTrace(tr);
	    free(dsq);
	    free(seq);
	}
    }

    ajFileClose(&outf);
  
    FreePlan7(hmm);
    SqdClean();

#ifdef MEMDEBUG
    current_size = malloc_inuse(&histid2);
    if (current_size != orig_size)
	malloc_list(2, histid1, histid2);
    else
	fprintf(stderr, "[No memory leaks.]\n");
#endif


    ajStrDel(&instr);
    ajStrDel(&outstr);
    ajFileClose(&inf);
    ajFileClose(&outf);

    embExit();
    return 0;
}