/* Function: Plan7ESTConfig() * * Purpose: Configure a Plan7 model for EST Smith/Waterman * analysis. * * OUTDATED; DO NOT USE WITHOUT RECHECKING * * Args: hmm - hmm to configure. * aacode - 0..63 vector mapping genetic code to amino acids * estmodel - 20x64 translation matrix, w/ codon bias and substitution error * dna2 - probability of a -1 frameshift in a triplet * dna4 - probability of a +1 frameshift in a triplet */ void Plan7ESTConfig(struct plan7_s *hmm, int *aacode, float **estmodel, float dna2, float dna4) { int k; int x; float p; float *tripnull; /* UNFINISHED!!! */ /* configure specials */ hmm->xt[XTN][MOVE] = 1./351.; hmm->xt[XTN][LOOP] = 350./351.; hmm->xt[XTE][MOVE] = 1.; hmm->xt[XTE][LOOP] = 0.; hmm->xt[XTC][MOVE] = 1./351.; hmm->xt[XTC][LOOP] = 350./351.; hmm->xt[XTJ][MOVE] = 1.; hmm->xt[XTJ][LOOP] = 0.; /* configure entry/exit */ hmm->begin[1] = 0.5; FSet(hmm->begin+2, hmm->M-1, 0.5 / ((float)hmm->M - 1.)); hmm->end[hmm->M] = 1.; FSet(hmm->end, hmm->M-1, 0.5 / ((float)hmm->M - 1.)); /* configure dna triplet/frameshift emissions */ for (k = 1; k <= hmm->M; k++) { /* translate aa to triplet probabilities */ for (x = 0; x < 64; x++) { p = hmm->mat[k][aacode[x]] * estmodel[aacode[x]][x] * (1.-dna2-dna4); hmm->dnam[x][k] = Prob2Score(p, tripnull[x]); p = hmm->ins[k][aacode[x]] * estmodel[aacode[x]][x] * (1.-dna2-dna4); hmm->dnai[x][k] = Prob2Score(p, tripnull[x]); } hmm->dnam[64][k] = 0; /* ambiguous codons score 0 (danger?) */ hmm->dna2 = Prob2Score(dna2, 1.); hmm->dna4 = Prob2Score(dna4, 1.); } }
/* Function: CP9Logoddsify() * * Purpose: Take an HMM with valid probabilities, and * fill in the integer log-odds score section of the model. * * Notes on log-odds scores (simplified from plan7.c): * type of parameter probability score * ----------------- ----------- ------ * any emission p_x log_2 p_x/null_x * any transition t_x log_2 t_x * * Args: hmm - the hmm to calculate scores in. * * Return: (void) * hmm scores are filled in. */ void CP9Logoddsify(CP9_t *hmm) { /*printf("in CP9Logoddsify()\n");*/ int k; /* counter for model position */ int x; /* counter for symbols */ int *sc; int status; if (hmm->flags & CPLAN9_HASBITS) return; ESL_ALLOC(sc, hmm->abc->Kp * sizeof(int)); /* Symbol emission scores */ sc[hmm->abc->K] = -INFTY; /* gap character */ sc[hmm->abc->Kp-1] = -INFTY; /* missing data character */ sc[hmm->abc->Kp-2] = -INFTY; /* non-residue data character */ /* Insert emission scores, relies on sc[K, Kp-1] initialization to -inf above */ for (k = 0; k <= hmm->M; k++) { for (x = 0; x < hmm->abc->K; x++) sc[x] = Prob2Score(hmm->ins[k][x], hmm->null[x]); esl_abc_IExpectScVec(hmm->abc, sc, hmm->null); for (x = 0; x < hmm->abc->Kp; x++) { hmm->isc[x][k] = sc[x]; } } /* Match emission scores, relies on sc[K, Kp-1] initialization to -inf above */ for (k = 1; k <= hmm->M; k++) { for (x = 0; x < hmm->abc->K; x++) sc[x] = Prob2Score(hmm->mat[k][x], hmm->null[x]); esl_abc_IExpectScVec(hmm->abc, sc, hmm->null); for (x = 0; x < hmm->abc->Kp; x++) { hmm->msc[x][k] = sc[x]; } } for (k = 0; k <= hmm->M; k++) { hmm->tsc[CTMM][k] = Prob2Score(hmm->t[k][CTMM], 1.0); hmm->tsc[CTMI][k] = Prob2Score(hmm->t[k][CTMI], 1.0); hmm->tsc[CTMD][k] = Prob2Score(hmm->t[k][CTMD], 1.0); hmm->tsc[CTMEL][k] = Prob2Score(hmm->t[k][CTMEL], 1.0); hmm->tsc[CTIM][k] = Prob2Score(hmm->t[k][CTIM], 1.0); hmm->tsc[CTII][k] = Prob2Score(hmm->t[k][CTII], 1.0); hmm->tsc[CTID][k] = Prob2Score(hmm->t[k][CTID], 1.0); if(k != 0) { hmm->tsc[CTDM][k] = Prob2Score(hmm->t[k][CTDM], 1.0); hmm->tsc[CTDI][k] = Prob2Score(hmm->t[k][CTDI], 1.0); hmm->tsc[CTDD][k] = Prob2Score(hmm->t[k][CTDD], 1.0); } else { hmm->tsc[CTDM][k] = -INFTY; hmm->tsc[CTDD][k] = -INFTY; /*D_0 doesn't exist*/ hmm->tsc[CTDI][k] = -INFTY; } if(k != 0) { hmm->bsc[k] = Prob2Score(hmm->begin[k], 1.0); hmm->esc[k] = Prob2Score(hmm->end[k], 1.0); } } hmm->el_selfsc = Prob2Score(hmm->el_self, 1.0); /* Finally, fill the efficiently reordered transition scores for this HMM. */ for (k = 0 ; k <= hmm->M; k++) { int *otsc_k = hmm->otsc + k*cp9O_NTRANS; otsc_k[cp9O_MM] = hmm->tsc[CTMM][k]; otsc_k[cp9O_MI] = hmm->tsc[CTMI][k]; otsc_k[cp9O_MD] = hmm->tsc[CTMD][k]; otsc_k[cp9O_IM] = hmm->tsc[CTIM][k]; otsc_k[cp9O_II] = hmm->tsc[CTII][k]; otsc_k[cp9O_DM] = hmm->tsc[CTDM][k]; otsc_k[cp9O_DD] = hmm->tsc[CTDD][k]; otsc_k[cp9O_ID] = hmm->tsc[CTID][k]; otsc_k[cp9O_DI] = hmm->tsc[CTDI][k]; otsc_k[cp9O_BM] = hmm->bsc[k]; otsc_k[cp9O_MEL]= hmm->tsc[CTMEL][k]; otsc_k[cp9O_ME] = hmm->esc[k]; } hmm->flags |= CPLAN9_HASBITS; /* raise the log-odds ready flag */ free(sc); return; ERROR: cm_Fail("Memory allocation error.\n"); return; /* never reached */ }
/* Function: cm_tr_penalties_Create() * Date: EPN, Sat Jan 21 12:03:52 2012 * * Purpose: Allocate and initialize a CM_TR_PENALTIES object. * A CM and its emit map are required to determine * truncation penalty scores. This is annoyingly * complex, see verbose notes within code below. * * Some of the code in this function, specifically * that which calculates the probability of a fragment * aligning at a given node, is checkable, but only * if we disallow truncated begins into insert states. * However, we want to allow truncated begins in reality. * I've left in a flag for ignoring inserts (<ignore_inserts>) * I used in testing this function. Set it to TRUE to * perform the test. * * Returns: Newly allocated CM_TR_PENALTIES object. NULL if out * of memory. */ CM_TR_PENALTIES * cm_tr_penalties_Create(CM_t *cm, int ignore_inserts, char *errbuf) { int status; int v, nd, m, i1, i2; int lpos, rpos; int i; /* variables used for determining ratio of inserts to match at each consensus position */ float *mexpocc = NULL; /* [0..c..clen] probability match state is used to emit at cons posn c */ float *iexpocc = NULL; /* [0..c..clen] probability insert state is used to emit after cons posn c */ double *psi = NULL; /* [0..v..M-1] expected occupancy of state v */ float m_psi, i1_psi, i2_psi; /* temp psi values */ float summed_psi; CM_TR_PENALTIES *trp = NULL; /* variables used for calculating global truncation penalties */ float g_5and3; /* fragment probability if 5' and 3' truncation are allowed */ float g_5or3; /* fragment probability if 5' or 3' truncation are allowed */ /* variables used for calculating local truncation penalties */ float *begin = NULL; /* local begin probabilities 0..v..M-1 */ int subtree_clen; /* consensus length of subtree under this node */ float prv53, prv5, prv3; /* previous node's fragment probability, 5'&3', 5' only, 3'only */ float cur53, cur5, cur3; /* current node's fragment probability, 5'&3', 5' only, 3'only */ int nfrag53, nfrag5, nfrag3; /* number of fragments, 5'&3', 5' only, 3'only */ if(cm == NULL || cm->emap == NULL) goto ERROR; ESL_ALLOC(trp, sizeof(CM_TR_PENALTIES)); trp->M = cm->M; trp->ignored_inserts = ignore_inserts; /* Define truncation penalties for each state v. This will be * the score for doing a truncated begin into state v. * * Important note: For this discussion we assume that sequences can * only be truncated at consensus positions, which means we don't * have to worry about truncated begins into inserts. This is an * approximation (also made by Diana and Sean in the 2009 trCYK * paper) that greatly simplifies the explanation of the calculation * of the truncation penalties. The examples in my ELN3 notebook * also use this simplification. However, I need to be able to do * truncated begins into insert states in some cases (some pass/mode * combinations see ELN bottom of p.47). I explain first the * rationale for calculating truncation penalties ignoring inserts * and then I describe how I adapt those penalties to allow * for inserts. * * This is a lengthy comment. I've divided it into 3 sections: * Section 1. Global mode truncation penalties, ignoring inserts. * Section 2. Local mode truncation penalties, ignoring inserts. * Section 3. Adapting truncation penalties to allow for inserts. * ************************************************************** * Section 1. Global mode truncation penalties, ignoring inserts. * * We want the truncation penalty to be the log of the probability * that the particular fragment we're aligning was generated from * the following generative process. The generative process differs * between global and local mode. * * In global mode: * o Sample global parsetree which spans consensus positions 1..clen. * o Randomly choose g and h in range 1..clen, where h >= g and * truncate sequence from g..h. The first residue will either be * an insert before position g, or a match at position g of the * model. The final residue will either be an insert after position * h or a match at position h of the model. * * All g,h fragments are equiprobable, so the probability of any * particular fragment is 2 / (clen * (clen+1)). So log_2 of this * value is the truncation penalty for all truncated alignments in * global mode where both 5' and 3' truncation are allowed. * * We store this penalty, per-state in the * g_ptyAA[TRPENALTY_5P_AND_3P][0..v..M-1]. The penalty is * identical for all emitting states. The penalty value for * non-emitters is IMPOSSIBLE because truncated begins are * not allowed into non-emitters. * * If only 5' OR 3' truncation is allowed, we only truncate at g or * h, which menas there's 1/clen possible fragments and log_2 * (1/clen) is our global truncation penalty. * * However, if 5' truncation is allowed we can only do a truncated * begin into states that with a consensus subtree that spans * position clen (since we don't allow a truncation at the 3' end). * Thus any state whose subtree that doesn't span clen gets * an IMPOSSIBLE value for its truncation score in: * g_ptyAA[TRPENALTY_5P_ONLY][0..v..M-1]. * * Likewise, if 3' truncation is allowed we can only do a truncated * begin into states that with a consensus subtree that spans * position 1 (since we don't allow a truncation at the 5' end). * * There's an example of computing all three types of penalties for * a simple CM in ELN 3 p43. * ************************************************************ * Section 2. Local mode truncation penalties, ignoring inserts. * * Generative process that generates fragments in local mode: * o Sample local begin state b with consensus subtree from i..j from * local begin state distribution. * o Randomly choose g and h in range i..j, where h >= g and * truncate sequence from g..h. The first residue will either be * an insert before position g, or a match at position g of the * model. The final residue will either be an insert after position * h or a match at position h of the model. * * Unlike in global mode, in local mode all fragments are not * equiprobable since the local begin state distribution can be * anything, and each b allows different sets of fragments to be * generated (because they can only span from i to j). * * The truncation penalty should be the log of the probability of * aligning the current fragment to the model. So we need to know * the probability of generating each possible fragment. * We could calculate probability of any fragment g,h with the * following inefficient algorithm: * * For each start fragment point g, * For each start fragment point h, * For each state v, * If lpos[v] <= g && rpos[v] >= h, then * prob[g][h] += begin[v] * 2. / (st_clen[v] * (st_clen[v]+1)); * * Where lpos[v]/rpos[v] are the left/right consensus positions in * consensus subtree rooted at state v. And st_clen[v] is rpos[v] - * lpos[v] + 1, the consensus length of that subtree. * * This gives us prob[g][h], the probability of generating fragment * g,h. But we want to apply the penalty to a state, not to a * fragment, to avoid needing to know the fragment boundaries g,h * during the DP recursion when applying the penalty. * * To facilitate this, we need to find state t, the state with * smallest subtree that contains g,h. State t is relevant because * it is the state which will root the alignment of the fragment g,h * by using a truncated begin transition into t. This gives a new * algorithm: * * For each start fragment point g, * For each start fragment point h, * Identify state t, the max valued state for which * lpos[v] <= g && rpos[v] >= h, then { * prob[t] += prob[g][h] * fcount[t]++; * } * * prob[t] will be the probability of observing an alignment that * uses a truncated begin into t to align any fragment. Then we take * average over all fragments: prob[t] / fcount[t] (since we'll only * be aligning one of those fragments) and use the log of that * probability as the penalty for observing a truncated alignment * rooted at state t. Conveniently, it turns out that all fragments * that share t are equiprobable (have equal prob[g][h] values), so * the average probability is the actual probability for each * fragment, and thus the correct penalty to apply. * * Fortunately, we can compute the correct penalty much more * efficiently than the two algorithms shown above. The * efficient way is implemented below. A test that the penalties * are correctly computed is in cm_tr_penalties_Validate(). * * This discussion assumes we're truncating 5' and 3', but if we're * only truncating 5' or 3' The situation is a little different. * * There's an example of computing all three types of penalties for * a simple CM in ELN3 p44-45. * ************************************************************ * Section 3. Adapting truncation penalties to allow for inserts. * * We need to be able to do truncated begins into insert states * because we enforce that the first/final residue of a sequence be * included in 5'/3' truncated alignments and we want to be able * to properly align those residues if they're probably emitted * by insert states. * * The methods/logic explained in sections 1 and 2 above I believe * is correct IF we ignore inserts (assume truncated begins into * them are impossible). But we need to allow inserts, so I modify * the truncation penalties as described above to allow for inserts * as follows. We can calculate the appropriate truncated begin * penalty for all MATP_MP, MATL_ML, MATR_MR, BIF_B states as with * the methods described above by ignoring inserts. This gives us a * probability p of using that state as the root of the truncated * alignment, i.e. the truncated begin state. (The log_2 of this * probability is the penalty.) We then partition p amongst the * MATP_MP, MATL_ML, MATR_MR, BIF_B states and any parent insert * states, i.e. any insert state that can transition into the * match/bif state. For each match/bif state there's 0, 1 or 2 * parent inserts. We then partition p based on the relative * expected occupancy of these inserts versus the match/bif state. * * This is certainly 'incorrect' in that it doesn't reflect the * true probability of a fragment being aligned to each of the * states, but it should be a close approximation. I think doing * it correctly is basically impossible in the context of a single * state-specific penalty (i.e. the penalty would have to be per-fragment * which would be hard to deal with in the DP functions). */ /* allocate and initialize the penalty arrays */ ESL_ALLOC(trp->g_ptyAA, sizeof(float *) * NTRPENALTY); ESL_ALLOC(trp->l_ptyAA, sizeof(float *) * NTRPENALTY); ESL_ALLOC(trp->ig_ptyAA, sizeof(int *) * NTRPENALTY); ESL_ALLOC(trp->il_ptyAA, sizeof(int *) * NTRPENALTY); for(i = 0; i < NTRPENALTY; i++) { trp->g_ptyAA[i] = NULL; trp->l_ptyAA[i] = NULL; trp->il_ptyAA[i] = NULL; trp->ig_ptyAA[i] = NULL; ESL_ALLOC(trp->g_ptyAA[i], sizeof(float) * cm->M); ESL_ALLOC(trp->l_ptyAA[i], sizeof(float) * cm->M); ESL_ALLOC(trp->ig_ptyAA[i], sizeof(int) * cm->M); ESL_ALLOC(trp->il_ptyAA[i], sizeof(int) * cm->M); esl_vec_FSet(trp->g_ptyAA[i], cm->M, IMPOSSIBLE); esl_vec_FSet(trp->l_ptyAA[i], cm->M, IMPOSSIBLE); esl_vec_ISet(trp->ig_ptyAA[i], cm->M, -INFTY); esl_vec_ISet(trp->il_ptyAA[i], cm->M, -INFTY); } /* DumpEmitMap(stdout, cm->emap, cm); */ /* Calculate local begin probabilities and expected occupancy */ ESL_ALLOC(begin, sizeof(float) * cm->M); cm_CalculateLocalBeginProbs(cm, cm->pbegin, cm->t, begin); if((status = cm_ExpectedPositionOccupancy(cm, &mexpocc, &iexpocc, &psi, NULL, NULL, NULL)) != eslOK) goto ERROR; /* Fill global and local truncation penalties in a single loop. We * step through all nodes and set the truncation penalties for the * MATP_MP, MATL_ML, MATR_MR, and BIF_B states and any parent * inserts (i1, i2) of those states. */ g_5and3 = 2. / (cm->clen * (cm->clen+1)); /* for global mode: probability of all fragments if we're truncating 5' and 3' */ g_5or3 = 1. / cm->clen; /* for global mode: probability of all fragments if we're only truncating 5' or 3' */ prv5 = prv3 = prv53 = 0.; /* initialize 'previous' probability values used for calc'ing local truncation penalties */ for(nd = 0; nd < cm->nodes; nd++) { lpos = (cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd) ? cm->emap->lpos[nd] : cm->emap->lpos[nd] + 1; rpos = (cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATR_nd) ? cm->emap->rpos[nd] : cm->emap->rpos[nd] - 1; /* now set penalties for match and insert states m, i1 and maybe i2 (if we're a MATP_MP or BIF_B) */ if(cm->ndtype[nd] == END_nd) { prv5 = prv3 = prv53 = 0.; } else if(cm->ndtype[nd] == BEGL_nd || cm->ndtype[nd] == BEGR_nd) { prv5 = (cm->ndtype[nd] == BEGL_nd) ? 0. : trp->l_ptyAA[TRPENALTY_5P_ONLY][cm->plast[cm->nodemap[nd]]]; /* parent BIF_B's probability */; prv3 = (cm->ndtype[nd] == BEGR_nd) ? 0. : trp->l_ptyAA[TRPENALTY_3P_ONLY][cm->plast[cm->nodemap[nd]]]; /* parent BIF_B's probability */; prv53 = trp->l_ptyAA[TRPENALTY_5P_AND_3P][cm->plast[cm->nodemap[nd]]]; /* parent BIF_B's probability */ } else if(cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd || cm->ndtype[nd] == BIF_nd) { /* determine match states and insert states that pertain to this node */ m = cm->nodemap[nd]; /* MATP_MP, MATL_ML, MATR_MR, or BIF_B */ InsertsGivenNodeIndex(cm, nd-1, &i1, &i2); m_psi = psi[m]; if(cm->ndtype[nd] == MATP_MP) { m_psi += (psi[m+1] + psi[m+2]); } /* include MATP_ML and MATP_MR psi */ i1_psi = (i1 == -1) ? 0. : psi[i1]; i2_psi = (i2 == -1) ? 0. : psi[i2]; summed_psi = m_psi + i1_psi + i2_psi; if(ignore_inserts) { i1_psi = i2_psi = 0.; summed_psi = m_psi; } /* Global penalties */ /* sanity check, we should only set truncation penalty once per state */ if(NOT_IMPOSSIBLE(trp->g_ptyAA[TRPENALTY_5P_AND_3P][m])) goto ERROR; if((i1 != -1) && NOT_IMPOSSIBLE(trp->g_ptyAA[TRPENALTY_5P_AND_3P][i1])) goto ERROR; if((i2 != -1) && NOT_IMPOSSIBLE(trp->g_ptyAA[TRPENALTY_5P_AND_3P][i2])) goto ERROR; /* divide up the probability g_5and3 amongst relevant states m, i1, i2, weighted by psi */ trp->g_ptyAA[TRPENALTY_5P_AND_3P][m] = (m_psi / summed_psi) * g_5and3; if(i1 != -1) trp->g_ptyAA[TRPENALTY_5P_AND_3P][i1] = (i1_psi / summed_psi) * g_5and3; if(i2 != -1) trp->g_ptyAA[TRPENALTY_5P_AND_3P][i2] = (i2_psi / summed_psi) * g_5and3; /* same thing, for 5P only and 3P only */ if(rpos == cm->clen) { /* else it will remain IMPOSSIBLE */ trp->g_ptyAA[TRPENALTY_5P_ONLY][m] = (m_psi / summed_psi) * g_5or3; if(i1 != -1) trp->g_ptyAA[TRPENALTY_5P_ONLY][i1] = (i1_psi / summed_psi) * g_5or3; if(i2 != -1) trp->g_ptyAA[TRPENALTY_5P_ONLY][i2] = (i2_psi / summed_psi) * g_5or3; } if(lpos == 1) { /* else it will remain IMPOSSIBLE */ trp->g_ptyAA[TRPENALTY_3P_ONLY][m] = (m_psi / summed_psi) * g_5or3; if(i1 != -1) trp->g_ptyAA[TRPENALTY_3P_ONLY][i1] = (i1_psi / summed_psi) * g_5or3; if(i2 != -1) trp->g_ptyAA[TRPENALTY_3P_ONLY][i2] = (i2_psi / summed_psi) * g_5or3; } /* Local penalties */ subtree_clen = rpos - lpos + 1; nfrag5 = subtree_clen; nfrag3 = subtree_clen; nfrag53 = (subtree_clen * (subtree_clen+1)) / 2; /* determine probability of observing a fragment aligned at * state m (here, m is what I call t above and in notes) and * partition that probability between m and i1 and/or i2 by * relative occupancy of match versus inserts */ cur5 = begin[m] / (float) nfrag5 + prv5; cur3 = begin[m] / (float) nfrag3 + prv3; cur53 = begin[m] / (float) nfrag53 + prv53; /* sanity check, we should only set truncation penalty once per state */ if(NOT_IMPOSSIBLE(trp->l_ptyAA[TRPENALTY_5P_AND_3P][m])) goto ERROR; if((i1 != -1) && NOT_IMPOSSIBLE(trp->l_ptyAA[TRPENALTY_5P_AND_3P][i1])) goto ERROR; if((i2 != -1) && NOT_IMPOSSIBLE(trp->l_ptyAA[TRPENALTY_5P_AND_3P][i2])) goto ERROR; trp->l_ptyAA[TRPENALTY_5P_AND_3P][m] = (m_psi / summed_psi) * cur53; if(i1 != -1) trp->l_ptyAA[TRPENALTY_5P_AND_3P][i1] = (i1_psi / summed_psi) * cur53; if(i2 != -1) trp->l_ptyAA[TRPENALTY_5P_AND_3P][i2] = (i2_psi / summed_psi) * cur53; trp->l_ptyAA[TRPENALTY_5P_ONLY][m] = (m_psi / summed_psi) * cur5; if(i1 != -1) trp->l_ptyAA[TRPENALTY_5P_ONLY][i1] = (i1_psi / summed_psi) * cur5; if(i2 != -1) trp->l_ptyAA[TRPENALTY_5P_ONLY][i2] = (i2_psi / summed_psi) * cur5; trp->l_ptyAA[TRPENALTY_3P_ONLY][m] = (m_psi / summed_psi) * cur3; if(i1 != -1) trp->l_ptyAA[TRPENALTY_3P_ONLY][i1] = (i1_psi / summed_psi) * cur3; if(i2 != -1) trp->l_ptyAA[TRPENALTY_3P_ONLY][i2] = (i2_psi / summed_psi) * cur3; prv5 = (cm->ndtype[nd] == MATL_nd) ? cur5 : 0.; prv3 = (cm->ndtype[nd] == MATR_nd) ? cur3 : 0.; prv53 = cur53; } } /* all penalties are currently probabilities, convert them to log * probs and set integer penalties (careful, we have to check if * IMPOSSIBLE first) */ for(v = 0; v < cm->M; v++) { if((cm->stid[v] == MATP_MP || cm->stid[v] == MATL_ML || cm->stid[v] == MATR_MR || cm->stid[v] == BIF_B) || ((cm->sttype[v] == IL_st || cm->sttype[v] == IR_st) && (! StateIsDetached(cm, v)))) { /* Check for rare special case: if we're a MATP_IL and next * two states are MATP_IR and END_E, then we won't have set * a trunction penalty. This state will keep an impossible * truncated begin score, if we did a truncated begin into * it we'd just emit from the MATP_IL and then go to the * END_E anyway (the MATP_IR will be detached. */ if(cm->stid[v] == MATP_IL && cm->ndtype[cm->ndidx[v]+1] == END_nd) continue; /* glocal 5P AND 3P: all of these should have been set to a non-IMPOSSIBLE value */ if(! NOT_IMPOSSIBLE(trp->g_ptyAA[TRPENALTY_5P_AND_3P][v])) goto ERROR; trp->ig_ptyAA[TRPENALTY_5P_AND_3P][v] = Prob2Score(trp->g_ptyAA[TRPENALTY_5P_AND_3P][v], 1.0); trp->g_ptyAA[TRPENALTY_5P_AND_3P][v] = sreLOG2(trp->g_ptyAA[TRPENALTY_5P_AND_3P][v]); /* glocal 5P only: some may be IMPOSSIBLE */ if(NOT_IMPOSSIBLE(trp->g_ptyAA[TRPENALTY_5P_ONLY][v])) { trp->ig_ptyAA[TRPENALTY_5P_ONLY][v] = Prob2Score(trp->g_ptyAA[TRPENALTY_5P_ONLY][v], 1.0); trp->g_ptyAA[TRPENALTY_5P_ONLY][v] = sreLOG2(trp->g_ptyAA[TRPENALTY_5P_ONLY][v]); } /* glocal 5P only: some may be IMPOSSIBLE */ if(NOT_IMPOSSIBLE(trp->g_ptyAA[TRPENALTY_3P_ONLY][v])) { trp->ig_ptyAA[TRPENALTY_3P_ONLY][v] = Prob2Score(trp->g_ptyAA[TRPENALTY_3P_ONLY][v], 1.0); trp->g_ptyAA[TRPENALTY_3P_ONLY][v] = sreLOG2(trp->g_ptyAA[TRPENALTY_3P_ONLY][v]); } /* local penalties all of these should have been set to a non-IMPOSSIBLE value */ if(! NOT_IMPOSSIBLE(trp->il_ptyAA[TRPENALTY_5P_AND_3P][v])) goto ERROR; if(! NOT_IMPOSSIBLE(trp->il_ptyAA[TRPENALTY_5P_ONLY][v])) goto ERROR; if(! NOT_IMPOSSIBLE(trp->il_ptyAA[TRPENALTY_3P_ONLY][v])) goto ERROR; trp->il_ptyAA[TRPENALTY_5P_AND_3P][v] = Prob2Score(trp->l_ptyAA[TRPENALTY_5P_AND_3P][v], 1.0); trp->il_ptyAA[TRPENALTY_5P_ONLY][v] = Prob2Score(trp->l_ptyAA[TRPENALTY_5P_ONLY][v], 1.0); trp->il_ptyAA[TRPENALTY_3P_ONLY][v] = Prob2Score(trp->l_ptyAA[TRPENALTY_3P_ONLY][v], 1.0); trp->l_ptyAA[TRPENALTY_5P_AND_3P][v] = sreLOG2(trp->l_ptyAA[TRPENALTY_5P_AND_3P][v]); trp->l_ptyAA[TRPENALTY_5P_ONLY][v] = sreLOG2(trp->l_ptyAA[TRPENALTY_5P_ONLY][v]); trp->l_ptyAA[TRPENALTY_3P_ONLY][v] = sreLOG2(trp->l_ptyAA[TRPENALTY_3P_ONLY][v]); } } if(ignore_inserts) { if((status = cm_tr_penalties_Validate(trp, cm, 0.0001, errbuf)) != eslOK) { printf("%s", errbuf); goto ERROR; } } /* cm_tr_penalties_Dump(stdout, cm, trp); */ if(mexpocc != NULL) free(mexpocc); if(iexpocc != NULL) free(iexpocc); if(psi != NULL) free(psi); if(begin != NULL) free(begin); return trp; ERROR: if(mexpocc != NULL) free(mexpocc); if(iexpocc != NULL) free(iexpocc); if(psi != NULL) free(psi); if(begin != NULL) free(begin); if(trp != NULL) cm_tr_penalties_Destroy(trp); return NULL; }
/* Function: P7Logoddsify() * * Purpose: Take an HMM with valid probabilities, and * fill in the integer log-odds score section of the model. * * Notes on log-odds scores: * type of parameter probability score * ----------------- ----------- ------ * any emission p_x log_2 p_x/null_x * N,J,C /assume/ p_x = null_x so /always/ score zero. * transition to emitters t_x log_2 t_x/p1 * (M,I; N,C; J) * NN and CC loops are often equal to p1, so usu. score zero. * C->T transition t_x log_2 t_x/p2 * often zero, usu. C->T = p2. * all other transitions t_x log_2 t_x * (no null model counterpart, so null prob is 1) * * Notes on entry/exit scores, B->M and M->E: * The probability form model includes delete states 1 and M. * these states are removed from a search form model to * prevent B->D...D->E->J->B mute cycles, which would complicate * dynamic programming algorithms. The data-independent * S/W B->M and M->E transitions are folded together with * data-dependent B->D...D->M and M->D...D->E paths. * * This process is referred to in the code as "wing folding" * or "wing retraction"... the analogy is to a swept-wing * fighter in landing vs. high speed flight configuration. * * Note on Viterbi vs. forward flag: * Wing retraction must take forward vs. Viterbi * into account. If forward, sum two paths; if Viterbi, take * max. I tried to slide this by as a sum, without * the flag, but Alex detected it as a bug, because you can * then find cases where the Viterbi score doesn't match * the P7TraceScore(). * * Args: hmm - the hmm to calculate scores in. * viterbi_mode - TRUE to fold wings in Viterbi configuration. * * Return: (void) * hmm scores are filled in. */ void P7Logoddsify(struct plan7_s *hmm, int viterbi_mode) { int k; /* counter for model position */ int x; /* counter for symbols */ float accum; float tbm, tme; if (hmm->flags & PLAN7_HASBITS) return; /* Symbol emission scores */ for (k = 1; k <= hmm->M; k++) { /* match/insert emissions in main model */ for (x = 0; x < Alphabet_size; x++) { hmm->msc[x][k] = Prob2Score(hmm->mat[k][x], hmm->null[x]); if (k < hmm->M) hmm->isc[x][k] = Prob2Score(hmm->ins[k][x], hmm->null[x]); } /* degenerate match/insert emissions */ for (x = Alphabet_size; x < Alphabet_iupac; x++) { hmm->msc[x][k] = DegenerateSymbolScore(hmm->mat[k], hmm->null, x); if (k < hmm->M) hmm->isc[x][k] = DegenerateSymbolScore(hmm->ins[k], hmm->null, x); } } /* State transitions. * * A note on "folding" of D_1 and D_M. * These two delete states are folded out of search form models * in order to prevent null cycles in the dynamic programming * algorithms (see code below). However, we use their log transitions * when we save the model! So the following log transition probs * are used *only* in save files, *never* in search algorithms: * log (tbd1), D1 -> M2, D1 -> D2 * Mm-1 -> Dm, Dm-1 -> Dm * * In a search algorithm, these have to be interpreted as -INFTY * because their contributions are folded into bsc[] and esc[] * entry/exit scores. They can't be set to -INFTY here because * we need them in save files. */ for (k = 1; k < hmm->M; k++) { hmm->tsc[k][TMM] = Prob2Score(hmm->t[k][TMM], hmm->p1); hmm->tsc[k][TMI] = Prob2Score(hmm->t[k][TMI], hmm->p1); hmm->tsc[k][TMD] = Prob2Score(hmm->t[k][TMD], 1.0); hmm->tsc[k][TIM] = Prob2Score(hmm->t[k][TIM], hmm->p1); hmm->tsc[k][TII] = Prob2Score(hmm->t[k][TII], hmm->p1); hmm->tsc[k][TDM] = Prob2Score(hmm->t[k][TDM], hmm->p1); hmm->tsc[k][TDD] = Prob2Score(hmm->t[k][TDD], 1.0); } /* B->M entry transitions. Note how D_1 is folded out. * M1 is just B->M1 * M2 is sum (or max) of B->M2 and B->D1->M2 * M_k is sum (or max) of B->M_k and B->D1...D_k-1->M_k * These have to be done in log space, else you'll get * underflow errors; and we also have to watch for log(0). * A little sloppier than it probably has to be; historically, * doing in this in log space was in response to a bug report. */ accum = hmm->tbd1 > 0.0 ? log(hmm->tbd1) : -9999.; for (k = 1; k <= hmm->M; k++) { tbm = hmm->begin[k] > 0. ? log(hmm->begin[k]) : -9999.; /* B->M_k part */ /* B->D1...D_k-1->M_k part we get from accum*/ if (k > 1 && accum > -9999.) { if (hmm->t[k-1][TDM] > 0.0) { if (viterbi_mode) tbm = MAX(tbm, accum + log(hmm->t[k-1][TDM])); else tbm = LogSum(tbm, accum + log(hmm->t[k-1][TDM])); } accum = (hmm->t[k-1][TDD] > 0.0) ? accum + log(hmm->t[k-1][TDD]) : -9999.; } /* Convert from log_e to scaled integer log_2 odds. */ if (tbm > -9999.) hmm->bsc[k] = (int) floor(0.5 + INTSCALE * 1.44269504 * (tbm - log(hmm->p1))); else hmm->bsc[k] = -INFTY; } /* M->E exit transitions. Note how D_M is folded out. * M_M is 1 by definition * M_M-1 is sum of M_M-1->E and M_M-1->D_M->E, where D_M->E is 1 by definition * M_k is sum of M_k->E and M_k->D_k+1...D_M->E * Must be done in log space to avoid underflow errors. * A little sloppier than it probably has to be; historically, * doing in this in log space was in response to a bug report. */ hmm->esc[hmm->M] = 0; accum = 0.; for (k = hmm->M-1; k >= 1; k--) { tme = hmm->end[k] > 0. ? log(hmm->end[k]) : -9999.; if (accum > -9999.) { if (hmm->t[k][TMD] > 0.0) { if (viterbi_mode) tme = MAX(tme, accum + log(hmm->t[k][TMD])); else tme = LogSum(tme, accum + log(hmm->t[k][TMD])); } accum = (hmm->t[k][TDD] > 0.0) ? accum + log(hmm->t[k][TDD]) : -9999.; } /* convert from log_e to scaled integer log odds. */ hmm->esc[k] = (tme > -9999.) ? (int) floor(0.5 + INTSCALE * 1.44269504 * tme) : -INFTY; } /* special transitions */ hmm->xsc[XTN][LOOP] = Prob2Score(hmm->xt[XTN][LOOP], hmm->p1); hmm->xsc[XTN][MOVE] = Prob2Score(hmm->xt[XTN][MOVE], 1.0); hmm->xsc[XTE][LOOP] = Prob2Score(hmm->xt[XTE][LOOP], 1.0); hmm->xsc[XTE][MOVE] = Prob2Score(hmm->xt[XTE][MOVE], 1.0); hmm->xsc[XTC][LOOP] = Prob2Score(hmm->xt[XTC][LOOP], hmm->p1); hmm->xsc[XTC][MOVE] = Prob2Score(hmm->xt[XTC][MOVE], 1.-hmm->p1); hmm->xsc[XTJ][LOOP] = Prob2Score(hmm->xt[XTJ][LOOP], hmm->p1); hmm->xsc[XTJ][MOVE] = Prob2Score(hmm->xt[XTJ][MOVE], 1.0); hmm->flags |= PLAN7_HASBITS; /* raise the log-odds ready flag */ }
/* Function: CP9_reconfig2sub() * EPN 10.16.06 * * Purpose: Given a CM Plan 9 HMM and a start position * (spos) and end position (epos) that a sub CM models, * reconfigure the HMM so that it can only start in the * node that models spos (spos_nd) end in the node that * models epos (epos_nd). * * If we're reconfiguring a CP9 HMM that ONLY models the * consensus columns spos to epos, then spos_nd == 1 * and epos_nd == hmm->M, but this is not necessarily true. * We may be reconfiguring a CP9 HMM that models the * full alignment including positions before and/or after * spos and epos. In this case spos_nd == spos and * epos_nd == epos; * * Args: hmm - the CP9 model w/ data-dep prob's valid * spos - first consensus column modelled by some original * full length, template CP9 HMM that 'hmm' models. * epos - final consensus column modelled by some original * CP9 HMM that 'hmm' models. * spos_nd - the node of 'hmm' that models spos. * (1 if 'hmm' only has (epos-spos+1) nodes * (spos if 'hmm' has a node for each column of original aln) * epos_nd - the node of the 'hmm' in that models epos. * (hmm->M if 'hmm' only has (epos-spos+1) nodes * (epos if 'hmm' has a node for each column of original aln) * orig_phi - the 2D phi array for the original CP9 HMM. * Return: (void) * HMM probabilities are modified. */ void CP9_reconfig2sub(CP9_t *hmm, int spos, int epos, int spos_nd, int epos_nd, double **orig_phi) { /* Make the necessary modifications. Since in cmalign --sub mode this * function will be called potentially once for each sequence, we * don't want to call CP9Logoddsify(), but rather only logoddsify * the parameters that are different. */ /* Configure entry. * Exactly 3 ways to start, B->M_1 (hmm->begin[1]), B->I_0 (hmm->t[0][CTMI]), * and B->D_1 (hmm->t[0][CTMD]) */ /* prob of starting in M_spos is (1. - prob of starting in I_spos-1) as there is no D_spos-1 -> M_spos trans */ if(spos > 1) { hmm->begin[spos_nd] = 1.-((orig_phi[spos-1][HMMINSERT] * (1. - hmm->t[spos-1][CTII])) + (orig_phi[spos ][HMMDELETE] - (orig_phi[spos-1][HMMINSERT] * hmm->t[spos-1][CTID]))); hmm->t[spos_nd-1][CTMI] = (orig_phi[spos-1][HMMINSERT] * (1. - hmm->t[spos-1][CTII])); hmm->t[spos_nd-1][CTMD] = orig_phi[spos ][HMMDELETE] - (orig_phi[spos-1][HMMINSERT] * hmm->t[spos-1][CTID]); hmm->t[spos_nd-1][CTMM] = 0.; /* probability of going from B(M_0) to M_1 is begin[1] */ hmm->t[spos_nd-1][CTMEL] = 0.; /* can't go to EL from B(M_0) */ hmm->t[spos_nd-1][CTDM] = 0.; /* D_0 doesn't exist */ hmm->t[spos_nd-1][CTDI] = 0.; /* D_0 doesn't exist */ hmm->t[spos_nd-1][CTDD] = 0.; /* D_0 doesn't exist */ hmm->bsc[spos_nd] = Prob2Score(hmm->begin[1], 1.0); hmm->tsc[CTMM][spos_nd-1] = -INFTY; /* probability of going from B(M_0) to M_1 is begin[1] */ hmm->tsc[CTMEL][spos_nd-1] = -INFTY; hmm->tsc[CTDM][spos_nd-1] = -INFTY; /* D_0 doesn't exist */ hmm->tsc[CTDI][spos_nd-1] = -INFTY; /* D_0 doesn't exist */ hmm->tsc[CTDD][spos_nd-1] = -INFTY; /* D_0 doesn't exist */ hmm->tsc[CTMI][spos_nd-1] = Prob2Score(hmm->t[spos_nd-1][CTMI], 1.0); hmm->tsc[CTMD][spos_nd-1] = Prob2Score(hmm->t[spos_nd-1][CTMD], 1.0); } if(epos < hmm->M) { hmm->end[epos_nd] = hmm->t[epos][CTMM] + hmm->t[epos][CTMD]; hmm->t[epos_nd][CTDM] += hmm->t[epos][CTDD]; hmm->t[epos_nd][CTIM] += hmm->t[epos][CTID]; hmm->t[epos_nd][CTMM] = 0.; /* M->E is actually end[M] */ hmm->t[epos_nd][CTMEL] = 0.; hmm->t[epos_nd][CTMD] = 0.; /* D_M+1 doesn't exist */ hmm->t[epos_nd][CTDD] = 0.; /* D_M+1 doesn't exist */ hmm->t[epos_nd][CTID] = 0.; /* D_M+1 doesn't exist */ hmm->esc[epos_nd] = Prob2Score(hmm->end[epos_nd], 1.0); hmm->tsc[CTDM][epos_nd] = Prob2Score(hmm->t[epos_nd][CTDM], 1.0); hmm->tsc[CTIM][epos_nd] = Prob2Score(hmm->t[epos_nd][CTIM], 1.0); hmm->tsc[CTMM][epos_nd] = -INFTY; /* M->E is actually end[M] */ hmm->tsc[CTMEL][epos_nd] = -INFTY; hmm->tsc[CTMD][epos_nd] = -INFTY; /* D_M+1 doesn't exist */ hmm->tsc[CTDD][epos_nd] = -INFTY; /* D_M+1 doesn't exist */ hmm->tsc[CTID][epos_nd] = -INFTY; /* D_M+1 doesn't exist */ } hmm->flags |= CPLAN9_HASBITS; /* raise the log-odds ready flag */ return; }
/* Function: CPlan9InitEL() * Incept: EPN, Tue Jun 19 13:10:56 2007 * * Purpose: Initialize a CP9 HMM for possible EL local ends * by determining how the EL states should be connected * based on the CM node topology. * * Args: cm - the CM * cp9 - the CP9 HMM, built from cm * * Return: (void) */ void CPlan9InitEL(CM_t *cm, CP9_t *cp9) { int status; CMEmitMap_t *emap; /* consensus emit map for the CM */ int k; /* counter over HMM nodes */ int nd; int *tmp_el_from_ct; /* First copy the CM el self transition score/probability: */ cp9->el_self = sreEXP2(cm->el_selfsc); cp9->el_selfsc = Prob2Score(cp9->el_self, 1.0); /* For each HMM node k, we can transit FROM >= 0 EL states from * HMM nodes kp. Determine how many such valid transitions exist * from each node, then allocate and fill cp9->el_from_idx[k] and * cp9->el_from_cmnd arrays based on that. * This two-pass method saves memory b/c we only allocate for * what we'll need. */ emap = CreateEmitMap(cm); /* Initialize to 0 */ for(k = 0; k <= cp9->M; k++) { cp9->el_from_ct[k] = 0; cp9->has_el[k] = FALSE; } cp9->el_from_ct[(cp9->M+1)] = 0; /* special case, we can get to E state from EL states */ /* first pass to get number of valid transitions */ for(nd = 0; nd < cm->nodes; nd++) { if ((cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd || cm->ndtype[nd] == BEGL_nd || cm->ndtype[nd] == BEGR_nd) && cm->ndtype[nd+1] != END_nd) { /*printf("HMM node %d can be reached from HMM node %d's EL state\n", emap->rpos[nd], emap->lpos[nd]);*/ cp9->el_from_ct[emap->rpos[nd]]++; cp9->has_el[emap->lpos[nd]] = TRUE; } } /* allocate cp9->el_from_idx[k], cp9->el_from_cmnd for all k */ for(k = 0; k <= (cp9->M+1); k++) { if(cp9->el_from_idx[k] != NULL) /* if !NULL we already filled it, shouldn't happen */ cm_Fail("ERROR in CPlan9InitEL() el_from_idx has already been initialized\n"); if(cp9->el_from_ct[k] > 0) { ESL_ALLOC(cp9->el_from_idx[k], sizeof(int) * cp9->el_from_ct[k]); ESL_ALLOC(cp9->el_from_cmnd[k],sizeof(int) * cp9->el_from_ct[k]); } /* else it remains NULL */ } /* now fill in cp9->el_from_idx, we need a new counter array */ ESL_ALLOC(tmp_el_from_ct, sizeof(int) * (cp9->M+2)); for(k = 0; k <= (cp9->M+1); k++) tmp_el_from_ct[k] = 0; for(nd = 0; nd < cm->nodes; nd++) { if ((cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd || cm->ndtype[nd] == BEGL_nd || cm->ndtype[nd] == BEGR_nd) && cm->ndtype[nd+1] != END_nd) { k = emap->rpos[nd]; cp9->el_from_idx[k][tmp_el_from_ct[k]] = emap->lpos[nd]; cp9->el_from_cmnd[k][tmp_el_from_ct[k]] = nd; tmp_el_from_ct[k]++; } } /* Debugging printfs */ /* for(k = 0; k <= (cp9->M+1); k++) { for(c = 0; c < cp9->el_from_ct[k]; c++) printf("cp9->el_from_idx[%3d][%2d]: %4d\n", k, c, cp9->el_from_idx[k][c]); if(cp9->has_el[k]) printf("node k:%3d HAS an EL!\n", k); }*/ /* Free memory and exit */ free(tmp_el_from_ct); FreeEmitMap(emap); return; ERROR: cm_Fail("Memory allocation error."); }
/* Function: CP9Logoddsify() * * Purpose: Take an HMM with valid probabilities, and * fill in the integer log-odds score section of the model. * * Notes on log-odds scores (simplified from plan7.c): * type of parameter probability score * ----------------- ----------- ------ * any emission p_x log_2 p_x/null_x * any transition t_x log_2 t_x * * Args: hmm - the hmm to calculate scores in. * * Return: (void) * hmm scores are filled in. */ void CP9Logoddsify(CP9_t *hmm) { int k; /* counter for model position */ int x; /* counter for symbols */ int sc[MAXDEGEN]; /* 17, NEED TO INCREASE FOR BIGGER ALPHABETS! */ if (hmm->flags & CPLAN9_HASBITS) return; /* Symbol emission scores */ sc[hmm->abc->K] = -INFTY; /* gap character */ sc[hmm->abc->Kp-1] = -INFTY; /* missing data character */ /* Insert emission scores, relies on sc[K, Kp-1] initialization to -inf above */ for (k = 0; k <= hmm->M; k++) { for (x = 0; x < hmm->abc->K; x++) sc[x] = Prob2Score(hmm->ins[k][x], hmm->null[x]); esl_abc_IExpectScVec(hmm->abc, sc, hmm->null); for (x = 0; x < hmm->abc->Kp; x++) { hmm->isc[x][k] = sc[x]; } } /* Match emission scores, relies on sc[K, Kp-1] initialization to -inf above */ for (k = 1; k <= hmm->M; k++) { for (x = 0; x < hmm->abc->K; x++) sc[x] = Prob2Score(hmm->mat[k][x], hmm->null[x]); esl_abc_IExpectScVec(hmm->abc, sc, hmm->null); for (x = 0; x < hmm->abc->Kp; x++) { hmm->msc[x][k] = sc[x]; } } for (k = 0; k <= hmm->M; k++) { hmm->tsc[CTMM][k] = Prob2Score(hmm->t[k][CTMM], 1.0); hmm->tsc[CTMI][k] = Prob2Score(hmm->t[k][CTMI], 1.0); hmm->tsc[CTMD][k] = Prob2Score(hmm->t[k][CTMD], 1.0); hmm->tsc[CTMEL][k] = Prob2Score(hmm->t[k][CTMEL], 1.0); hmm->tsc[CTIM][k] = Prob2Score(hmm->t[k][CTIM], 1.0); hmm->tsc[CTII][k] = Prob2Score(hmm->t[k][CTII], 1.0); hmm->tsc[CTID][k] = Prob2Score(hmm->t[k][CTID], 1.0); if(k != 0) { hmm->tsc[CTDM][k] = Prob2Score(hmm->t[k][CTDM], 1.0); hmm->tsc[CTDI][k] = Prob2Score(hmm->t[k][CTDI], 1.0); hmm->tsc[CTDD][k] = Prob2Score(hmm->t[k][CTDD], 1.0); } else { hmm->tsc[CTDM][k] = -INFTY; hmm->tsc[CTDD][k] = -INFTY; /*D_0 doesn't exist*/ hmm->tsc[CTDI][k] = -INFTY; } if(k != 0) { hmm->bsc[k] = Prob2Score(hmm->begin[k], 1.0); //if(hmm->flags & CPLAN9_LOCAL_END) hmm->esc[k] = 0; //else hmm->esc[k] = -INFTY; hmm->esc[k] = Prob2Score(hmm->end[k], 1.0); } } hmm->el_selfsc = Prob2Score(hmm->el_self, 1.0); /* Finally, fill the efficiently reordered transition scores for this HMM. */ for (k = 0 ; k <= hmm->M; k++) { int *otsc_k = hmm->otsc + k*cp9O_NTRANS; otsc_k[cp9O_MM] = hmm->tsc[CTMM][k]; otsc_k[cp9O_MI] = hmm->tsc[CTMI][k]; otsc_k[cp9O_MD] = hmm->tsc[CTMD][k]; otsc_k[cp9O_IM] = hmm->tsc[CTIM][k]; otsc_k[cp9O_II] = hmm->tsc[CTII][k]; otsc_k[cp9O_DM] = hmm->tsc[CTDM][k]; otsc_k[cp9O_DD] = hmm->tsc[CTDD][k]; otsc_k[cp9O_ID] = hmm->tsc[CTID][k]; otsc_k[cp9O_DI] = hmm->tsc[CTDI][k]; otsc_k[cp9O_BM] = hmm->bsc[k]; otsc_k[cp9O_MEL]= hmm->tsc[CTMEL][k]; otsc_k[cp9O_ME] = hmm->esc[k]; } hmm->flags |= CPLAN9_HASBITS; /* raise the log-odds ready flag */ }