/* Function: CPlan9NoEL() * Incept: EPN, Tue Jun 19 09:50:52 2007 * * Purpose: Turn EL local ends off in a CM Plan 9 HMM * * Args: cm - the CM, must have valid CP9 HMM * * Return: (void) * HMM probabilities are modified. */ void CPlan9NoEL(CM_t *cm) { /* Contract checks */ if(cm->cp9 == NULL) cm_Fail("ERROR in CPlan9ELConfig, cm->cp9 is NULL.\n"); if(cm->cp9map == NULL) cm_Fail("ERROR in CPlan9ELConfig, cm->cp9map is NULL.\n"); if(!(cm->flags & CMH_CP9)) cm_Fail("ERROR in CPlan9ELConfig, CMH_CP9 flag is down."); if(!(cm->cp9->flags & CPLAN9_EL)) cm_Fail("ERROR in CPlan9ELConfig, CP9_EL flag is already down."); int k; /* counter over HMM nodes */ for(k = 0; k <= cm->cp9->M; k++) cm->cp9->t[k][CTMEL] = 0.; CPlan9RenormalizeExits(cm->cp9, 1); cm->cp9->flags &= ~CPLAN9_HASBITS; /* clear the log-odds ready flag */ CP9Logoddsify(cm->cp9); cm->cp9->flags &= ~CPLAN9_EL; /* EL end locals now off */ return; }
/* Function: CreateCP9Matrix() * based on CreatePlan7Matrix() <-- this function's comments below * Purpose: Create a dynamic programming matrix for standard Forward, * Backward, or Viterbi, with scores kept as scaled log-odds * integers. Keeps 2D arrays compact in RAM in an attempt * to maximize cache hits. * * The mx structure can be dynamically grown, if a new * HMM or seq exceeds the currently allocated size. Dynamic * growing is more efficient than an alloc/free of a whole * matrix for every new target. The ResizePlan7Matrix() * call does this reallocation, if needed. Here, in the * creation step, we set up some pads - to inform the resizing * call how much to overallocate when it realloc's. * * Args: N - N+1 rows are allocated, usually N == 1 for * scanning in memory efficient mode, or N == L, length of sequence. * M - size of model in nodes * * Return: mx * mx is allocated here. Caller frees with FreeCP9Matrix(mx). */ CP9_MX * CreateCP9Matrix(int N, int M) { int status; CP9_MX *mx; int i; ESL_ALLOC(mx, sizeof(CP9_MX)); ESL_ALLOC(mx->mmx, sizeof(int *) * (N+1)); ESL_ALLOC(mx->imx, sizeof(int *) * (N+1)); ESL_ALLOC(mx->dmx, sizeof(int *) * (N+1)); ESL_ALLOC(mx->elmx,sizeof(int *) * (N+1)); /* slightly wasteful, some nodes can't go to EL (for ex: right half of MATPs) */ ESL_ALLOC(mx->erow, sizeof(int) * (N+1)); ESL_ALLOC(mx->mmx_mem, sizeof(int) * ((N+1)*(M+1))); ESL_ALLOC(mx->imx_mem, sizeof(int) * ((N+1)*(M+1))); ESL_ALLOC(mx->dmx_mem, sizeof(int) * ((N+1)*(M+1))); ESL_ALLOC(mx->elmx_mem,sizeof(int) * ((N+1)*(M+1))); /* The indirect assignment below looks wasteful; it's actually * used for aligning data on 16-byte boundaries as a cache * optimization in the fast altivec implementation */ mx->mmx[0] = (int *) mx->mmx_mem; mx->imx[0] = (int *) mx->imx_mem; mx->dmx[0] = (int *) mx->dmx_mem; mx->elmx[0]= (int *) mx->elmx_mem; for (i = 1; i <= N; i++) { mx->mmx[i] = mx->mmx[0] + (i*(M+1)); mx->imx[i] = mx->imx[0] + (i*(M+1)); mx->dmx[i] = mx->dmx[0] + (i*(M+1)); mx->elmx[i]= mx->elmx[0]+ (i*(M+1)); } mx->M = M; mx->rows = N; mx->kmin = NULL; mx->kmax = NULL; mx->ncells_allocated = (M+1) * (N+1); mx->ncells_valid = (M+1) * (N+1); mx->size_Mb = (float) sizeof(CP9_MX); mx->size_Mb += (float) (sizeof(int *) * (mx->rows+1) * 4); /* mx->*mx ptrs */ mx->size_Mb += (float) (sizeof(int) * (mx->rows+1) * (M+1) * 4); /* mx->*mx_mem */ mx->size_Mb += (float) (sizeof(int) * (mx->rows+1)); /* mx->erow */ mx->size_Mb /= 1000000.; return mx; ERROR: cm_Fail("Memory allocation error."); return NULL; /* never reached */ }
CP9_t * AllocCPlan9Shell(void) { int status; CP9_t *hmm; ESL_ALLOC(hmm, sizeof(CP9_t)); hmm->abc = NULL; hmm->M = 0; hmm->t = NULL; hmm->mat = NULL; hmm->ins = NULL; hmm->tsc = hmm->msc = hmm->isc = NULL; hmm->tsc_mem = hmm->msc_mem = hmm->isc_mem = NULL; hmm->begin = NULL; hmm->end = NULL; hmm->bsc = hmm->bsc_mem = NULL; hmm->esc = hmm->esc_mem = NULL; hmm->otsc = NULL; hmm->has_el = NULL; hmm->el_from_ct = NULL; hmm->el_from_idx = NULL; hmm->el_from_cmnd= NULL; hmm->flags = 0; return hmm; ERROR: cm_Fail("Memory allocation error.\n"); return NULL; /* never reached */ }
/* Function: CPlan9CMLocalBeginConfig() * Incept: EPN, Thu Jun 21 15:43:29 2007 * based on SRE's Plan7SWConfig() from HMMER's plan7.c * * Purpose: Set up a CM Plan 9 HMM to mimic CM local begins as closely * as it can. We can't enforce that a begin/end point are chosen * the same way a CM's are, as the choice of a CM local begin * (in non-truncated CYK mode) defines both a start and end point, * and some start/end combinations are impossible. For the CP9 * we allow all possible start/end combos. * * Args: cm - the CM, must have valid cm->cp9, we'll use * the CM local begin probs to set the cm->cp9s * begin/end probs. * * Return: (void) * HMM probabilities are modified. */ void CPlan9CMLocalBeginConfig(CM_t *cm) { CMEmitMap_t *emap; /* consensus emit map for the CM */ int nd; /* Contract checks */ if(cm->cp9 == NULL) cm_Fail("ERROR in CPlan9CMLocalBeginConfig, cm->cp9 is NULL.\n"); if(cm->cp9map == NULL) cm_Fail("ERROR in CPlan9CMLocalBeginConfig, cm->cp9map is NULL.\n"); if(!(cm->flags & CMH_CP9)) cm_Fail("ERROR in CPlan9CMLocalBeginConfig, CMH_CP9 flag is down."); if(!(cm->flags & CMH_LOCAL_BEGIN)) cm_Fail("ERROR in CPlan9CMLocalBeginConfig, CMH_LOCAL_BEGIN flag is down."); if(!(cm->flags & CMH_LOCAL_END)) cm_Fail("ERROR in CPlan9CMLocalBeginConfig, CP9_LOCAL_BEGIN flag is already up."); if(cm->cp9->flags & CPLAN9_LOCAL_END) cm_Fail("ERROR in CPlan9CMLocalBeginConfig, CP9_LOCAL_END flag is already up."); /* Configure entry. * To match CM, we enforce the only way out of the B state (M_0) * is through a local begin into a match state */ esl_vec_FSet(cm->cp9->begin, cm->cp9->M, 0.); emap = CreateEmitMap(cm); for (nd = 1; nd < cm->nodes; nd++) { if(NOT_IMPOSSIBLE(cm->begin[cm->nodemap[nd]])) { cm->cp9->begin[emap->lpos[nd]] += cm->begin[cm->nodemap[nd]]; /* we do += b/c for lpos of BIFs, there's > 1 way to enter there, the BIF and the first MATP or MATL of the left child of the BIF */ } } cm->cp9->flags &= ~CPLAN9_HASBITS; /* reconfig invalidates log-odds scores */ cm->cp9->flags |= CPLAN9_LOCAL_BEGIN; /* local begins now on */ cm->cp9->flags |= CPLAN9_LOCAL_END; /* local ends now on */ CP9Logoddsify(cm->cp9); }
void AllocCPlan9Body(CP9_t *hmm, int M, const ESL_ALPHABET *abc) { int status; int k, x; hmm->abc = abc; hmm->M = M; ESL_ALLOC(hmm->t, (M+1) * sizeof(float *)); ESL_ALLOC(hmm->mat, (M+1) * sizeof(float *)); ESL_ALLOC(hmm->ins, (M+1) * sizeof(float *)); ESL_ALLOC(hmm->t[0],(cp9_NTRANS*(M+1)) * sizeof(float)); ESL_ALLOC(hmm->mat[0],(abc->K*(M+1)) * sizeof(float)); ESL_ALLOC(hmm->ins[0],(abc->K*(M+1)) * sizeof(float)); ESL_ALLOC(hmm->tsc, cp9_NTRANS * sizeof(int *)); ESL_ALLOC(hmm->msc, hmm->abc->Kp * sizeof(int *)); ESL_ALLOC(hmm->isc, hmm->abc->Kp * sizeof(int *)); ESL_ALLOC(hmm->tsc_mem,(cp9_NTRANS*(M+1)) * sizeof(int)); ESL_ALLOC(hmm->msc_mem,(hmm->abc->Kp*(M+1)) * sizeof(int)); ESL_ALLOC(hmm->isc_mem,(hmm->abc->Kp*(M+1)) * sizeof(int)); hmm->tsc[0] = hmm->tsc_mem; hmm->msc[0] = hmm->msc_mem; hmm->isc[0] = hmm->isc_mem; /* transition scores reordered */ ESL_ALLOC(hmm->otsc, sizeof(int) * (M+1) * cp9O_NTRANS); /* note allocation strategy for important 2D arrays -- trying * to keep locality as much as possible, cache efficiency etc. */ for (k = 1; k <= M; k++) { hmm->mat[k] = hmm->mat[0] + k * abc->K; hmm->ins[k] = hmm->ins[0] + k * abc->K; hmm->t[k] = hmm->t[0] + k * cp9_NTRANS; } for (x = 1; x < hmm->abc->Kp; x++) { hmm->msc[x] = hmm->msc[0] + x * (M+1); hmm->isc[x] = hmm->isc[0] + x * (M+1); } for (x = 0; x < cp9_NTRANS; x++) hmm->tsc[x] = hmm->tsc[0] + x * (M+1); /* tsc[x][0] is used as a boundary condition sometimes [Viterbi()], * so set to -inf always. */ for (x = 0; x < cp9_NTRANS; x++) hmm->tsc[x][0] = -INFTY; ESL_ALLOC(hmm->begin, (M+1) * sizeof(float)); ESL_ALLOC(hmm->end, (M+1) * sizeof(float)); ESL_ALLOC(hmm->bsc_mem, (M+1) * sizeof(int)); ESL_ALLOC(hmm->esc_mem, (M+1) * sizeof(int)); ESL_ALLOC(hmm->null, (abc->K) * sizeof(float)); hmm->bsc = hmm->bsc_mem; hmm->esc = hmm->esc_mem; /* end[0], begin[0], esc[0] and bsc[0] are never * used, set them to 0. and -INFTY */ hmm->end[0] = hmm->begin[0] = -INFTY; hmm->esc[0] = hmm->bsc[0] = -INFTY; ESL_ALLOC(hmm->has_el, (M+1) * sizeof(int)); ESL_ALLOC(hmm->el_from_ct, (M+2) * sizeof(int)); ESL_ALLOC(hmm->el_from_idx,(M+2) * sizeof(int *)); ESL_ALLOC(hmm->el_from_cmnd,(M+2) * sizeof(int *)); esl_vec_ISet(hmm->has_el, M+1, FALSE); esl_vec_ISet(hmm->el_from_ct, M+1, 0); for(k = 0; k <= M+1; k++) { hmm->el_from_idx[k] = NULL; hmm->el_from_cmnd[k] = NULL; } return; ERROR: cm_Fail("Memory allocation error."); }
/* Function: CP9Logoddsify() * * Purpose: Take an HMM with valid probabilities, and * fill in the integer log-odds score section of the model. * * Notes on log-odds scores (simplified from plan7.c): * type of parameter probability score * ----------------- ----------- ------ * any emission p_x log_2 p_x/null_x * any transition t_x log_2 t_x * * Args: hmm - the hmm to calculate scores in. * * Return: (void) * hmm scores are filled in. */ void CP9Logoddsify(CP9_t *hmm) { /*printf("in CP9Logoddsify()\n");*/ int k; /* counter for model position */ int x; /* counter for symbols */ int *sc; int status; if (hmm->flags & CPLAN9_HASBITS) return; ESL_ALLOC(sc, hmm->abc->Kp * sizeof(int)); /* Symbol emission scores */ sc[hmm->abc->K] = -INFTY; /* gap character */ sc[hmm->abc->Kp-1] = -INFTY; /* missing data character */ sc[hmm->abc->Kp-2] = -INFTY; /* non-residue data character */ /* Insert emission scores, relies on sc[K, Kp-1] initialization to -inf above */ for (k = 0; k <= hmm->M; k++) { for (x = 0; x < hmm->abc->K; x++) sc[x] = Prob2Score(hmm->ins[k][x], hmm->null[x]); esl_abc_IExpectScVec(hmm->abc, sc, hmm->null); for (x = 0; x < hmm->abc->Kp; x++) { hmm->isc[x][k] = sc[x]; } } /* Match emission scores, relies on sc[K, Kp-1] initialization to -inf above */ for (k = 1; k <= hmm->M; k++) { for (x = 0; x < hmm->abc->K; x++) sc[x] = Prob2Score(hmm->mat[k][x], hmm->null[x]); esl_abc_IExpectScVec(hmm->abc, sc, hmm->null); for (x = 0; x < hmm->abc->Kp; x++) { hmm->msc[x][k] = sc[x]; } } for (k = 0; k <= hmm->M; k++) { hmm->tsc[CTMM][k] = Prob2Score(hmm->t[k][CTMM], 1.0); hmm->tsc[CTMI][k] = Prob2Score(hmm->t[k][CTMI], 1.0); hmm->tsc[CTMD][k] = Prob2Score(hmm->t[k][CTMD], 1.0); hmm->tsc[CTMEL][k] = Prob2Score(hmm->t[k][CTMEL], 1.0); hmm->tsc[CTIM][k] = Prob2Score(hmm->t[k][CTIM], 1.0); hmm->tsc[CTII][k] = Prob2Score(hmm->t[k][CTII], 1.0); hmm->tsc[CTID][k] = Prob2Score(hmm->t[k][CTID], 1.0); if(k != 0) { hmm->tsc[CTDM][k] = Prob2Score(hmm->t[k][CTDM], 1.0); hmm->tsc[CTDI][k] = Prob2Score(hmm->t[k][CTDI], 1.0); hmm->tsc[CTDD][k] = Prob2Score(hmm->t[k][CTDD], 1.0); } else { hmm->tsc[CTDM][k] = -INFTY; hmm->tsc[CTDD][k] = -INFTY; /*D_0 doesn't exist*/ hmm->tsc[CTDI][k] = -INFTY; } if(k != 0) { hmm->bsc[k] = Prob2Score(hmm->begin[k], 1.0); hmm->esc[k] = Prob2Score(hmm->end[k], 1.0); } } hmm->el_selfsc = Prob2Score(hmm->el_self, 1.0); /* Finally, fill the efficiently reordered transition scores for this HMM. */ for (k = 0 ; k <= hmm->M; k++) { int *otsc_k = hmm->otsc + k*cp9O_NTRANS; otsc_k[cp9O_MM] = hmm->tsc[CTMM][k]; otsc_k[cp9O_MI] = hmm->tsc[CTMI][k]; otsc_k[cp9O_MD] = hmm->tsc[CTMD][k]; otsc_k[cp9O_IM] = hmm->tsc[CTIM][k]; otsc_k[cp9O_II] = hmm->tsc[CTII][k]; otsc_k[cp9O_DM] = hmm->tsc[CTDM][k]; otsc_k[cp9O_DD] = hmm->tsc[CTDD][k]; otsc_k[cp9O_ID] = hmm->tsc[CTID][k]; otsc_k[cp9O_DI] = hmm->tsc[CTDI][k]; otsc_k[cp9O_BM] = hmm->bsc[k]; otsc_k[cp9O_MEL]= hmm->tsc[CTMEL][k]; otsc_k[cp9O_ME] = hmm->esc[k]; } hmm->flags |= CPLAN9_HASBITS; /* raise the log-odds ready flag */ free(sc); return; ERROR: cm_Fail("Memory allocation error.\n"); return; /* never reached */ }
/* Function: CP9_2sub_cp9() * EPN 09.24.06 * * Purpose: Given a template CM Plan 9 HMM, build a sub-model that * models only a subset of the consensus columns of the * original alignment. This requires a bit of care for * the initial and final node of the sub CP9, and * straightforward copying of parameters for the rest. * * The new CP9 is constructed in Global Needleman/Wunsch * mode. The orig_hmm MUST be in global mode. THIS IS * CHECKED FOR IN A VERY FRAGILE MANNER! * * The approach here is to allocate and fill the new * sub CP9. There might be a better way - transforming * the original CP9 into the new sub CP9 using a method * involving pointer rearrangement, but I'm not sure * how to do this. * * Args: orig_hmm - the CP9 model w/ data-dep prob's valid * ret_sub_hmm - the new sub CP9 hmm, allocated here, must * be freed by caller. * spos - first consensus column modelled by original * CP9 HMM the sub CP9 HMM models. * epos - final consensus column modelled by original * CP9 HMM the sub CP9 HMM models. * orig_phi - the 2D phi array for the original CP9 HMM. * Return: (void) * HMM probabilities are modified. */ void CP9_2sub_cp9(CP9_t *orig_hmm, CP9_t **ret_sub_hmm, int spos, int epos, double **orig_phi) { CP9_t *sub_hmm; int i, x; int orig_pos; sub_hmm = AllocCPlan9((epos-spos+1), orig_hmm->abc); for(x = 0; x < MAXABET; x++) { sub_hmm->null[x] = orig_hmm->null[x]; } /* No special (*x* states in Plan 7) states in CM Plan 9 */ /* First we just copy the parameters for spos..epos from the template HMM. * This is *slightly* wasteful, as we'll overwrite a few of these later. */ for(i = 0; i <= (epos-spos+1); i++) { orig_pos = i + spos - 1; if(i > 0) { for(x = 0; x < MAXABET; x++) { sub_hmm->mat[i][x] = orig_hmm->mat[orig_pos][x]; sub_hmm->msc[x][i] = orig_hmm->msc[x][orig_pos]; } sub_hmm->begin[i] = orig_hmm->begin[orig_pos]; sub_hmm->end[i] = orig_hmm->end[orig_pos]; sub_hmm->bsc[i] = orig_hmm->bsc[orig_pos]; sub_hmm->esc[i] = orig_hmm->esc[orig_pos]; if((i > 1) && ((0. - sub_hmm->begin[i] > 0.00000001) || (sub_hmm->begin[i] - 0. > 0.00000001))) { cm_Fail("ERROR in cp9_2sub_cp9() is original CP9 HMM not in global (NW) mode? i: %d\n", i); } } for(x = 0; x < MAXABET; x++) { sub_hmm->ins[i][x] = orig_hmm->ins[orig_pos][x]; sub_hmm->isc[x][i] = orig_hmm->isc[x][orig_pos]; } for(x = 0; x < cp9_NTRANS; x++) { sub_hmm->t[i][x] = orig_hmm->t[orig_pos][x]; sub_hmm->tsc[x][i] = orig_hmm->tsc[x][orig_pos]; } } /* Make the necessary modifications. */ CP9_reconfig2sub(sub_hmm, spos, epos, 1, sub_hmm->M, orig_phi); sub_hmm->el_self = orig_hmm->el_self; sub_hmm->el_selfsc = orig_hmm->el_selfsc; sub_hmm->flags |= CPLAN9_HASBITS; /* raise the log-odds ready flag */ *ret_sub_hmm = sub_hmm; return; }
/* Function: CPlan9InitEL() * Incept: EPN, Tue Jun 19 13:10:56 2007 * * Purpose: Initialize a CP9 HMM for possible EL local ends * by determining how the EL states should be connected * based on the CM node topology. * * Args: cm - the CM * cp9 - the CP9 HMM, built from cm * * Return: (void) */ void CPlan9InitEL(CM_t *cm, CP9_t *cp9) { int status; CMEmitMap_t *emap; /* consensus emit map for the CM */ int k; /* counter over HMM nodes */ int nd; int *tmp_el_from_ct; /* First copy the CM el self transition score/probability: */ cp9->el_self = sreEXP2(cm->el_selfsc); cp9->el_selfsc = Prob2Score(cp9->el_self, 1.0); /* For each HMM node k, we can transit FROM >= 0 EL states from * HMM nodes kp. Determine how many such valid transitions exist * from each node, then allocate and fill cp9->el_from_idx[k] and * cp9->el_from_cmnd arrays based on that. * This two-pass method saves memory b/c we only allocate for * what we'll need. */ emap = CreateEmitMap(cm); /* Initialize to 0 */ for(k = 0; k <= cp9->M; k++) { cp9->el_from_ct[k] = 0; cp9->has_el[k] = FALSE; } cp9->el_from_ct[(cp9->M+1)] = 0; /* special case, we can get to E state from EL states */ /* first pass to get number of valid transitions */ for(nd = 0; nd < cm->nodes; nd++) { if ((cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd || cm->ndtype[nd] == BEGL_nd || cm->ndtype[nd] == BEGR_nd) && cm->ndtype[nd+1] != END_nd) { /*printf("HMM node %d can be reached from HMM node %d's EL state\n", emap->rpos[nd], emap->lpos[nd]);*/ cp9->el_from_ct[emap->rpos[nd]]++; cp9->has_el[emap->lpos[nd]] = TRUE; } } /* allocate cp9->el_from_idx[k], cp9->el_from_cmnd for all k */ for(k = 0; k <= (cp9->M+1); k++) { if(cp9->el_from_idx[k] != NULL) /* if !NULL we already filled it, shouldn't happen */ cm_Fail("ERROR in CPlan9InitEL() el_from_idx has already been initialized\n"); if(cp9->el_from_ct[k] > 0) { ESL_ALLOC(cp9->el_from_idx[k], sizeof(int) * cp9->el_from_ct[k]); ESL_ALLOC(cp9->el_from_cmnd[k],sizeof(int) * cp9->el_from_ct[k]); } /* else it remains NULL */ } /* now fill in cp9->el_from_idx, we need a new counter array */ ESL_ALLOC(tmp_el_from_ct, sizeof(int) * (cp9->M+2)); for(k = 0; k <= (cp9->M+1); k++) tmp_el_from_ct[k] = 0; for(nd = 0; nd < cm->nodes; nd++) { if ((cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd || cm->ndtype[nd] == BEGL_nd || cm->ndtype[nd] == BEGR_nd) && cm->ndtype[nd+1] != END_nd) { k = emap->rpos[nd]; cp9->el_from_idx[k][tmp_el_from_ct[k]] = emap->lpos[nd]; cp9->el_from_cmnd[k][tmp_el_from_ct[k]] = nd; tmp_el_from_ct[k]++; } } /* Debugging printfs */ /* for(k = 0; k <= (cp9->M+1); k++) { for(c = 0; c < cp9->el_from_ct[k]; c++) printf("cp9->el_from_idx[%3d][%2d]: %4d\n", k, c, cp9->el_from_idx[k][c]); if(cp9->has_el[k]) printf("node k:%3d HAS an EL!\n", k); }*/ /* Free memory and exit */ free(tmp_el_from_ct); FreeEmitMap(emap); return; ERROR: cm_Fail("Memory allocation error."); }
/* Function: CPlan9ELConfig() * Incept: EPN, Tue Jun 19 09:50:52 2007 * * Purpose: Turn EL local ends in a CM Plan 9 HMM on based on * the local end probs in the CM. * * Args: cm - the CM, must have valid CP9 HMM * * Return: (void) * HMM probabilities are modified. */ void CPlan9ELConfig(CM_t *cm) { /*printf("IN CPlan9ELConfig\n");*/ /* Contract checks */ if(cm->cp9 == NULL) cm_Fail("ERROR in CPlan9ELConfig, cm->cp9 is NULL.\n"); if(cm->cp9map == NULL) cm_Fail("ERROR in CPlan9ELConfig, cm->cp9map is NULL.\n"); if(!(cm->flags & CMH_CP9)) cm_Fail("ERROR in CPlan9ELConfig, CMH_CP9 flag is down."); if(cm->cp9->flags & CPLAN9_EL) cm_Fail("ERROR in CPlan9ELConfig, CP9_EL flag is already up."); int v; int k; /* counter over HMM nodes */ int nd; int seen_exit; float to_el_prob; float norm_factor; int nexits; /* If the CM has local ends on, check to make sure all non-zero * local end probabilities in the CM are identical (within reasonable * precision), use that probability to set all HMM transitions to * EL states. */ if(cm->flags & CMH_LOCAL_END) { seen_exit = FALSE; to_el_prob = 0.; for(v = 0; v < cm->M; v++) { nd = cm->ndidx[v]; if (((cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd || cm->ndtype[nd] == BEGL_nd || cm->ndtype[nd] == BEGR_nd) && cm->ndtype[nd+1] != END_nd) && cm->nodemap[nd] == v) { /* this should have a non-zero local end probability */ if(fabs(cm->end[v] - 0.) < 0.00001) /* non-zero */ cm_Fail("In CPlan9ELConfig(), CM state %d should have non-zero local end prob, but it doesn't.\n", v); if(!seen_exit) { to_el_prob = cm->end[v]; seen_exit = TRUE; } else if(fabs(to_el_prob - cm->end[v]) > 0.00001) cm_Fail("In CPlan9ELConfig(), not all CM states EL probs are identical.\n"); } } if(! seen_exit && cm->nodes != 3) cm_Fail("In CPlan9ELConfig(), CM_LOCAL_END flag up, cm->nodes != 3, but all CM local end probs are zero."); } else { /* CM_LOCAL_END flag is down, local ends are off in the CM * We figure out what the local end prob would be given cm->pend * and set the HMM local end probs based on that. * First, count internal nodes MATP, MATL, MATR, BEGL, BEGR that aren't * adjacent to END nodes. */ nexits = 0; for (nd = 1; nd < cm->nodes; nd++) { if ((cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd || cm->ndtype[nd] == BEGL_nd || cm->ndtype[nd] == BEGR_nd) && cm->ndtype[nd+1] != END_nd) nexits++; } to_el_prob = cm->pend / (float) nexits; } /* transitions from HMM node 0 to EL is impossible */ cm->cp9->t[0][CTMEL] = 0.; for(k = 1; k <= cm->cp9->M; k++) { if(cm->cp9->has_el[k]) { cm->cp9->t[k][CTMEL] = to_el_prob; norm_factor = 1. - (cm->cp9->t[k][CTMEL] / (1. - cm->cp9->end[k])); cm->cp9->t[k][CTMM] *= norm_factor; cm->cp9->t[k][CTMI] *= norm_factor; cm->cp9->t[k][CTMD] *= norm_factor; /* cm->cp9->end[k] untouched */ } } cm->cp9->flags &= ~CPLAN9_HASBITS; /* clear the log-odds ready flag */ CP9Logoddsify(cm->cp9); cm->cp9->flags |= CPLAN9_EL; /* EL end locals now on */ /*debug_print_cp9_params(cm->cp9);*/ return; }