/* yes LogSum2 and FLogsum are identical, this is for backwards compatibility */ float FLogsum(float s1, float s2) { const float max = ESL_MAX(s1, s2); const float min = ESL_MIN(s1, s2); #if 0 return (min == -eslINFINITY || (max-min) >= 23.f) ? max : max + sreLOG2(1.0 + sreEXP2(min-max)); /* EPN: While debugging. Replaces logsum table with analytical calculation. Remember to remove! */ #endif return (min == -eslINFINITY || (max-min) >= 23.f) ? max : max + flogsum_lookup[(int)((max-min)*INTSCALE)]; }
void init_ilogsum(void) { static int firsttime = TRUE; if (!firsttime) return; firsttime = FALSE; int i; for (i = 0; i < LOGSUM_TBL; i++) ilogsum_lookup[i] = rint(INTSCALE * (sreLOG2(1.+sreEXP2((double) -i/INTSCALE)))); }
void FLogsumInit(void) { static int firsttime = TRUE; if (!firsttime) return; firsttime = FALSE; int i; for (i = 0; i < LOGSUM_TBL; i++) flogsum_lookup[i] = sreLOG2(1. + sreEXP2((double) -i / INTSCALE)); return; }
/* Function: cm_tr_penalties_Validate() * Date: EPN, Fri Jan 27 14:57:04 2012 * * Purpose: Validate a CM_TR_PENALTIES object by checking that * all possible fragments in local mode sum to 1.0 * for the three scenarios: 5' and 3' truncation, * 5' truncation only and 3' truncation only. * * This is an expensive test and was written only to test * the code that determines fragment probability (really * only for local mode) in cm_tr_penalties_Create(). It can * only be run if the <ignore_inserts> flag was set to TRUE * when cm_tr_penalties_Create() was called. However, in * real life that inserts should not be ignored, so this * test should never actually be run except during testing * (it also is helpful for understanding the logic behind * the derivation of the truncated begin * penalties/probabilities). * * Returns: eslOK if all checks pass within tolerance level. * eslFAIL if any check fails, errbuf is filled. */ int cm_tr_penalties_Validate(CM_TR_PENALTIES *trp, CM_t *cm, double tol, char *errbuf) { if(! trp->ignored_inserts) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), trp->ignored_inserts flag is not TRUE"); /* This is an expensive test of the trp->l_ptyAA values, the truncation * penalties for local mode alignment. We test each of the three arrays * in trp->ptyAA, one each for the following three scenarios: * * 1. trp->l_ptyAA[TRPENALTY_5P_AND_3P][0..v..M-1]: penalty for state v * when 5' and 3' truncation are allowed. * 2. trp->l_ptyAA[TRPENALTY_5P_ONLY][0..v..M-1]: penalty for state v when * only 5' truncation is allowed. * 3. trp->l_ptyAA[TRPENALTY_3P_ONLY][0..v..M-1]: penalty for state v when * only 3' truncation is allowed. * * The test is to enumerate all possible g,h fragments in the * consensus yield 1..clen, for those that can possibly be generated * in the scenario (^), determine the state t with the smallest * subtree yield that contains g..h. This is the state at which an * alignment of a g..h fragment would be rooted. We then add the * probability of a truncated parsetree rooted at v (that is, * exp_2(trp->l_ptyAA[][t])) to a growing sum. After all fragments * are considered the sum should be 1.0. If it is then our * penalties are valid, if not they're invalid and we computed them * incorrectly. * * (^): When 5' and 3' truncation are both allowed, all fragments can be * generated, but not all fragments (for most models) can be generated if * only 5' or 3' truncation is allowed. * */ double sump = 0.; /* the sum, should be 1.0 after all fragments are considered */ int lpos, rpos; /* left and right consensus positions of a parsetree */ int g, h; /* fragment start/stop */ int keep_going; /* break the loop when this is set to FALSE */ int nd, v; /* test 1: trp->l_ptyAA[TRPENALTY_5P_AND_3P]: */ for(g = 1; g <= cm->clen; g++) { for(h = g; h <= cm->clen; h++) { /* determine which node a truncated parsetree from [a..b] would align to, * this will be lowest node in the model whose subtree spans a..b */ nd = cm->nodes-1; keep_going = TRUE; while(keep_going) { if(nd == 0) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate: 5' and 3' test, unable to find node that spans %d..%d\n", g, h); lpos = cm->emap->lpos[nd]; rpos = cm->emap->rpos[nd]; if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATL_nd) lpos++; /* lpos was one less than what we want */ if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATR_nd) rpos--; /* rpos was one more than what we want */ if((cm->ndtype[nd] == BIF_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd) && (lpos <= g && rpos >= h)) { keep_going = FALSE; } else { nd--; } } v = cm->nodemap[nd]; sump += sreEXP2(trp->l_ptyAA[TRPENALTY_5P_AND_3P][v]); /* printf("LRBOTH g: %3d h: %3d nd: %3d adding %10.5f (%10.5f)\n", g, h, nd, trp->l_ptyAA[TRPENALTY_5P_AND_3P][v], sump); */ } } printf("L and R sump: %.5f\n", sump); if(esl_DCompare(1.0, sump, tol) != eslOK) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), 5' and 3' truncation test failed (%g != 1.0)", sump); /* test 2: trp->l_ptyAA[TRPENALTY_5P_ONLY]: */ sump = 0.; for(g = 1; g <= cm->clen; g++) { for(h = g; h <= cm->clen; h++) { /* determine which node a truncated parsetree from [g..h] would align to, * this will be lowest node in the model whose subtree spans g..h. * Since we're only truncating on the left, an alignment from * g..h may be impossible, only those fragments for which a node exists with * lpos <= g and rpos==h will be possible. */ nd = cm->nodes-1; keep_going = TRUE; while(keep_going && nd > 0) { lpos = cm->emap->lpos[nd]; rpos = cm->emap->rpos[nd]; if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATL_nd) lpos++; /* lpos was one less than what we want */ if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATR_nd) rpos--; /* rpos was one more than what we want */ if((cm->ndtype[nd] == BIF_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd) && (lpos <= g && rpos == h)) { keep_going = FALSE; } else { nd--; } } if(keep_going == FALSE) { v = cm->nodemap[nd]; sump += sreEXP2(trp->l_ptyAA[TRPENALTY_5P_ONLY][v]); } } } printf("L only sump: %.5f\n", sump); if(esl_DCompare(1.0, sump, tol) != eslOK) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), 5' only truncation test failed (%g != 1.0)", sump); /* test 3: trp->l_ptyAA[TRPENALTY_3P_ONLY]: */ sump = 0.; for(g = 1; g <= cm->clen; g++) { for(h = g; h <= cm->clen; h++) { /* determine which node a truncated parsetree from [g..h] would align to, * this will be lowest node in the model whose subtree spans g..h * since we're only truncating on the right, an alignment from * g..h may be impossible, only those for which a node exists with * lpos==g and rpos >= h will be possible. */ nd = cm->nodes-1; keep_going = TRUE; while(keep_going && nd > 0) { lpos = cm->emap->lpos[nd]; rpos = cm->emap->rpos[nd]; if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATL_nd) lpos++; /* lpos was one less than what we want */ if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATR_nd) rpos--; /* rpos was one more than what we want */ if((cm->ndtype[nd] == BIF_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd) && (lpos == g && rpos >= h)) { keep_going = FALSE; } else { nd--; } } if(keep_going == FALSE) { v = cm->nodemap[nd]; sump += sreEXP2(trp->l_ptyAA[TRPENALTY_3P_ONLY][v]); } } } printf("R only sump: %.5f\n", sump); if(esl_DCompare(1.0, sump, tol) != eslOK) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), 3' only truncation test failed (%g != 1.0)", sump); return eslOK; }
/* Function: Score2Prob() * * Purpose: Convert an integer log_2 odds score back to a probability; * needs the null model probability, if any, to do the conversion. */ float Score2Prob(int sc, float null) { if (sc == -INFTY) return 0.; else return (null * sreEXP2((float) sc / INTSCALE)); }
/* Function: CPlan9InitEL() * Incept: EPN, Tue Jun 19 13:10:56 2007 * * Purpose: Initialize a CP9 HMM for possible EL local ends * by determining how the EL states should be connected * based on the CM node topology. * * Args: cm - the CM * cp9 - the CP9 HMM, built from cm * * Return: (void) */ void CPlan9InitEL(CM_t *cm, CP9_t *cp9) { int status; CMEmitMap_t *emap; /* consensus emit map for the CM */ int k; /* counter over HMM nodes */ int nd; int *tmp_el_from_ct; /* First copy the CM el self transition score/probability: */ cp9->el_self = sreEXP2(cm->el_selfsc); cp9->el_selfsc = Prob2Score(cp9->el_self, 1.0); /* For each HMM node k, we can transit FROM >= 0 EL states from * HMM nodes kp. Determine how many such valid transitions exist * from each node, then allocate and fill cp9->el_from_idx[k] and * cp9->el_from_cmnd arrays based on that. * This two-pass method saves memory b/c we only allocate for * what we'll need. */ emap = CreateEmitMap(cm); /* Initialize to 0 */ for(k = 0; k <= cp9->M; k++) { cp9->el_from_ct[k] = 0; cp9->has_el[k] = FALSE; } cp9->el_from_ct[(cp9->M+1)] = 0; /* special case, we can get to E state from EL states */ /* first pass to get number of valid transitions */ for(nd = 0; nd < cm->nodes; nd++) { if ((cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd || cm->ndtype[nd] == BEGL_nd || cm->ndtype[nd] == BEGR_nd) && cm->ndtype[nd+1] != END_nd) { /*printf("HMM node %d can be reached from HMM node %d's EL state\n", emap->rpos[nd], emap->lpos[nd]);*/ cp9->el_from_ct[emap->rpos[nd]]++; cp9->has_el[emap->lpos[nd]] = TRUE; } } /* allocate cp9->el_from_idx[k], cp9->el_from_cmnd for all k */ for(k = 0; k <= (cp9->M+1); k++) { if(cp9->el_from_idx[k] != NULL) /* if !NULL we already filled it, shouldn't happen */ cm_Fail("ERROR in CPlan9InitEL() el_from_idx has already been initialized\n"); if(cp9->el_from_ct[k] > 0) { ESL_ALLOC(cp9->el_from_idx[k], sizeof(int) * cp9->el_from_ct[k]); ESL_ALLOC(cp9->el_from_cmnd[k],sizeof(int) * cp9->el_from_ct[k]); } /* else it remains NULL */ } /* now fill in cp9->el_from_idx, we need a new counter array */ ESL_ALLOC(tmp_el_from_ct, sizeof(int) * (cp9->M+2)); for(k = 0; k <= (cp9->M+1); k++) tmp_el_from_ct[k] = 0; for(nd = 0; nd < cm->nodes; nd++) { if ((cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd || cm->ndtype[nd] == BEGL_nd || cm->ndtype[nd] == BEGR_nd) && cm->ndtype[nd+1] != END_nd) { k = emap->rpos[nd]; cp9->el_from_idx[k][tmp_el_from_ct[k]] = emap->lpos[nd]; cp9->el_from_cmnd[k][tmp_el_from_ct[k]] = nd; tmp_el_from_ct[k]++; } } /* Debugging printfs */ /* for(k = 0; k <= (cp9->M+1); k++) { for(c = 0; c < cp9->el_from_ct[k]; c++) printf("cp9->el_from_idx[%3d][%2d]: %4d\n", k, c, cp9->el_from_idx[k][c]); if(cp9->has_el[k]) printf("node k:%3d HAS an EL!\n", k); }*/ /* Free memory and exit */ free(tmp_el_from_ct); FreeEmitMap(emap); return; ERROR: cm_Fail("Memory allocation error."); }