/* Function: PAMPrior() * * Purpose: Produces an ad hoc "Dirichlet mixture" prior for * match emissions, using a PAM matrix. * * Side effect notice: PAMPrior() replaces the match * emission section of an existing Dirichlet prior, * which is /expected/ to be a simple one-component * kind of prior. The insert emissions /must/ be a * one-component prior (because of details in how * PriorifyEmissionVector() is done). However, * the transitions /could/ be a mixture Dirichlet prior * without causing problems. In other words, the * -p and -P options of hmmb can coexist, but there * may be conflicts. PAMPrior() checks for these, * so there's no serious problem, except that the * error message from PAMPrior() might be confusing to * a user. */ void PAMPrior(char *pamfile, struct p7prior_s *pri, float wt) { FILE *fp; char *blastpamfile; /* BLAST looks in aa/ subdirectory of BLASTMAT */ int **pam; float scale; int xi, xj; int idx1, idx2; if (Alphabet_type != hmmAMINO) Die("PAM prior is only valid for protein sequences"); if (pri->strategy != PRI_DCHLET) Die("PAM prior may only be applied over an existing Dirichlet prior"); if (pri->inum != 1) Die("PAM prior requires that the insert emissions be a single Dirichlet"); if (MAXDCHLET < 20) Die("Whoa, code is misconfigured; MAXDCHLET must be >= 20 for PAM prior"); blastpamfile = FileConcat("aa", pamfile); if ((fp = fopen(pamfile, "r")) == NULL && (fp = EnvFileOpen(pamfile, "BLASTMAT", NULL)) == NULL && (fp = EnvFileOpen(blastpamfile, "BLASTMAT", NULL)) == NULL) Die("Failed to open PAM scoring matrix file %s", pamfile); if (! ParsePAMFile(fp, &pam, &scale)) Die("Failed to parse PAM scoring matrix file %s", pamfile); fclose(fp); free(blastpamfile); pri->strategy = PRI_PAM; pri->mnum = 20; /* Convert PAM entries back to conditional prob's P(xj | xi), * which we'll use as "pseudocounts" weighted by wt. */ for (xi = 0; xi < Alphabet_size; xi++) for (xj = 0; xj < Alphabet_size; xj++) { idx1 = Alphabet[xi] - 'A'; idx2 = Alphabet[xj] - 'A'; pri->m[xi][xj] = aafq[xj] * exp((float) pam[idx1][idx2] * scale); } /* Normalize so that rows add up to wt. * i.e. Sum(xj) mat[xi][xj] = wt for every row xi */ for (xi = 0; xi < Alphabet_size; xi++) { pri->mq[xi] = 1. / Alphabet_size; FNorm(pri->m[xi], Alphabet_size); FScale(pri->m[xi], Alphabet_size, wt); } Free2DArray((void **)pam,27); }
/* Function: Plan7RenormalizeExits() * Date: SRE, Fri Aug 14 11:22:19 1998 [St. Louis] * * Purpose: Renormalize just the match state transitions; * for instance, after a Config() function has * modified the exit distribution. * * Args: hmm - hmm to renormalize * * Returns: void */ void Plan7RenormalizeExits(struct plan7_s *hmm) { int k; float d; for (k = 1; k < hmm->M; k++) { d = FSum(hmm->t[k], 3); FScale(hmm->t[k], 3, 1./(d + d*hmm->end[k])); } }
/* Function: Plan7Renormalize() * * Purpose: Take an HMM in counts form, and renormalize * all of its probability vectors. Also enforces * Plan7 restrictions on nonexistent transitions. * * Args: hmm - the model to renormalize. * * Return: (void) * hmm is changed. */ void Plan7Renormalize(struct plan7_s *hmm) { int k; /* counter for model position */ int st; /* counter for special states */ float d; /* denominator */ /* match emissions */ for (k = 1; k <= hmm->M; k++) FNorm(hmm->mat[k], Alphabet_size); /* insert emissions */ for (k = 1; k < hmm->M; k++) FNorm(hmm->ins[k], Alphabet_size); /* begin transitions */ d = FSum(hmm->begin+1, hmm->M) + hmm->tbd1; FScale(hmm->begin+1, hmm->M, 1./d); hmm->tbd1 /= d; /* main model transitions */ for (k = 1; k < hmm->M; k++) { d = FSum(hmm->t[k], 3) + hmm->end[k]; FScale(hmm->t[k], 3, 1./d); hmm->end[k] /= d; FNorm(hmm->t[k]+3, 2); /* insert */ FNorm(hmm->t[k]+5, 2); /* delete */ } /* null model emissions */ FNorm(hmm->null, Alphabet_size); /* special transitions */ for (st = 0; st < 4; st++) FNorm(hmm->xt[st], 2); /* enforce nonexistent transitions */ /* (is this necessary?) */ hmm->t[0][TDM] = hmm->t[0][TDD] = 0.0; hmm->flags &= ~PLAN7_HASBITS; /* clear the log-odds ready flag */ hmm->flags |= PLAN7_HASPROB; /* set the probabilities OK flag */ }