/* Function: PAMPrior()
 * 
 * Purpose:  Produces an ad hoc "Dirichlet mixture" prior for
 *           match emissions, using a PAM matrix. 
 *           
 *           Side effect notice: PAMPrior() replaces the match
 *           emission section of an existing Dirichlet prior,
 *           which is /expected/ to be a simple one-component 
 *           kind of prior. The insert emissions /must/ be a
 *           one-component prior (because of details in how 
 *           PriorifyEmissionVector() is done). However, 
 *           the transitions /could/ be a mixture Dirichlet prior 
 *           without causing problems. In other words, the
 *           -p and -P options of hmmb can coexist, but there
 *           may be conflicts. PAMPrior() checks for these,
 *           so there's no serious problem, except that the
 *           error message from PAMPrior() might be confusing to
 *           a user. 
 */
void
PAMPrior(char *pamfile, struct p7prior_s *pri, float wt)
{
  FILE  *fp;
  char  *blastpamfile;            /* BLAST looks in aa/ subdirectory of BLASTMAT */
  int  **pam;
  float  scale;
  int    xi, xj;
  int    idx1, idx2;

  if (Alphabet_type != hmmAMINO)
    Die("PAM prior is only valid for protein sequences");
  if (pri->strategy != PRI_DCHLET)
    Die("PAM prior may only be applied over an existing Dirichlet prior");
  if (pri->inum != 1)
    Die("PAM prior requires that the insert emissions be a single Dirichlet");
  if (MAXDCHLET < 20)
    Die("Whoa, code is misconfigured; MAXDCHLET must be >= 20 for PAM prior");

  blastpamfile = FileConcat("aa", pamfile);

  if ((fp = fopen(pamfile, "r")) == NULL &&
      (fp = EnvFileOpen(pamfile, "BLASTMAT", NULL)) == NULL &&
      (fp = EnvFileOpen(blastpamfile, "BLASTMAT", NULL)) == NULL)
    Die("Failed to open PAM scoring matrix file %s", pamfile);
  if (! ParsePAMFile(fp, &pam, &scale))
    Die("Failed to parse PAM scoring matrix file %s", pamfile);
  fclose(fp);
  free(blastpamfile);

  pri->strategy = PRI_PAM;
  pri->mnum     = 20;
  
  /* Convert PAM entries back to conditional prob's P(xj | xi),
   * which we'll use as "pseudocounts" weighted by wt.
   */
  for (xi = 0; xi < Alphabet_size; xi++)
    for (xj = 0; xj < Alphabet_size; xj++)
      {
        idx1 = Alphabet[xi] - 'A';
        idx2 = Alphabet[xj] - 'A';
        pri->m[xi][xj] = aafq[xj] * exp((float) pam[idx1][idx2] * scale);
      }
  
  /* Normalize so that rows add up to wt.
   * i.e. Sum(xj) mat[xi][xj] = wt for every row xi
   */
  for (xi = 0; xi < Alphabet_size; xi++)
    {
      pri->mq[xi] = 1. / Alphabet_size;
      FNorm(pri->m[xi], Alphabet_size);
      FScale(pri->m[xi], Alphabet_size, wt);
    }

  Free2DArray((void **)pam,27);
}
Ejemplo n.º 2
0
/* Function: Plan7RenormalizeExits()
 * Date:     SRE, Fri Aug 14 11:22:19 1998 [St. Louis]
 *
 * Purpose:  Renormalize just the match state transitions;
 *           for instance, after a Config() function has
 *           modified the exit distribution.
 *
 * Args:     hmm - hmm to renormalize
 *
 * Returns:  void
 */
void
Plan7RenormalizeExits(struct plan7_s *hmm)
{
  int   k;
  float d;

  for (k = 1; k < hmm->M; k++)
    {
      d = FSum(hmm->t[k], 3);
      FScale(hmm->t[k], 3, 1./(d + d*hmm->end[k]));
    }
}
Ejemplo n.º 3
0
/* Function: Plan7Renormalize()
 * 
 * Purpose:  Take an HMM in counts form, and renormalize
 *           all of its probability vectors. Also enforces
 *           Plan7 restrictions on nonexistent transitions.
 *           
 * Args:     hmm - the model to renormalize.
 *                 
 * Return:   (void)
 *           hmm is changed.
 */                          
void
Plan7Renormalize(struct plan7_s *hmm)
{
  int   k;			/* counter for model position */
  int   st;			/* counter for special states */
  float d;			/* denominator */

				/* match emissions */
  for (k = 1; k <= hmm->M; k++) 
    FNorm(hmm->mat[k], Alphabet_size);
				/* insert emissions */
  for (k = 1; k < hmm->M; k++)
    FNorm(hmm->ins[k], Alphabet_size);
				/* begin transitions */
  d = FSum(hmm->begin+1, hmm->M) + hmm->tbd1;
  FScale(hmm->begin+1, hmm->M, 1./d);
  hmm->tbd1 /= d;
				/* main model transitions */
  for (k = 1; k < hmm->M; k++)
    {
      d = FSum(hmm->t[k], 3) + hmm->end[k]; 
      FScale(hmm->t[k], 3, 1./d);
      hmm->end[k] /= d;

      FNorm(hmm->t[k]+3, 2);	/* insert */
      FNorm(hmm->t[k]+5, 2);	/* delete */
    }
				/* null model emissions */
  FNorm(hmm->null, Alphabet_size);
				/* special transitions  */
  for (st = 0; st < 4; st++)
    FNorm(hmm->xt[st], 2);
				/* enforce nonexistent transitions */
				/* (is this necessary?) */
  hmm->t[0][TDM] = hmm->t[0][TDD] = 0.0;

  hmm->flags &= ~PLAN7_HASBITS;	/* clear the log-odds ready flag */
  hmm->flags |= PLAN7_HASPROB;	/* set the probabilities OK flag */
}