/* Note that calculate_occupancy has moved to p7_hmm.c, but * unit tests over there aren't hooked up yet; so leave a copy of the unit test * here for now. */ static void utest_occupancy(P7_HMM *hmm) { char *msg = "modelconfig.c::calculate_occupancy() unit test failed"; float *occ; float x; occ = malloc(sizeof(float) * (hmm->M+1)); p7_hmm_CalculateOccupancy(hmm, occ, NULL); x = esl_vec_FSum(occ+1, hmm->M) / (float) hmm->M; if (esl_FCompare(x, 0.6, 0.1) != eslOK) esl_fatal(msg); free(occ); return; }
/* assumes heights is allocated with hmm->M floats*/ int hmmlogo_IndelValues (P7_HMM *hmm, float *insert_P, float *insert_expL, float *occupancy ) { int i; if (insert_P != NULL) insert_P[1] = hmm->t[1][p7H_MI]; //probability of inserting after this match if (insert_expL != NULL) insert_expL[1] = 1 / (1 - hmm->t[1][p7H_II]) ; //expected length of the insert, if it happens // if (delete_P != NULL) delete_P[1] = 0.0; //1st match state never deleted for (i = 2; i < hmm->M; i++) { if (insert_P != NULL) insert_P[i] = hmm->t[i][p7H_MI]; //probability of inserting after this match if (insert_expL != NULL) insert_expL[i] = 1 / (1 - hmm->t[i][p7H_II]) ; //expected length of the insert, if it happens // if (delete_P != NULL) delete_P[i] = ( (1.0-delete_P[i-1]) * hmm->t[i-1][p7H_MD] ) + ( delete_P[i-1] * hmm->t[i-1][p7H_DD]) ; //probability of missing this state, either due to DD or MD from previous position } if (insert_P != NULL) insert_P[hmm->M] = 0.0; //no inserts after final position if (insert_expL != NULL) insert_expL[hmm->M] = 0.0; //no inserts after final position // if (delete_P != NULL) delete_P[hmm->M] = ( (1.0-delete_P[hmm->M-1]) * hmm->t[hmm->M-1][p7H_MD] ) + ( delete_P[hmm->M-1] * hmm->t[hmm->M-1][p7H_DD]) ; //probability of missing this state, either due to DD or MD from previous position if (occupancy != NULL) p7_hmm_CalculateOccupancy(hmm, occupancy, NULL); return eslOK; }
/* Function: p7_ProfileConfig() * Synopsis: Configure a search profile. * * Purpose: Given a model <hmm> with core probabilities, the null1 * model <bg>, a desired search <mode> (one of <p7_LOCAL>, * <p7_GLOCAL>, <p7_UNILOCAL>, or <p7_UNIGLOCAL>), and an * expected target sequence length <L>; configure the * search model in <gm> with lod scores relative to the * background frequencies in <bg>. * * Returns: <eslOK> on success; the profile <gm> now contains * scores and is ready for searching target sequences. * * Throws: <eslEMEM> on allocation error. */ int p7_ProfileConfig(const P7_HMM *hmm, const P7_BG *bg, P7_PROFILE *gm, int L, int mode) { int k, x, z; /* counters over states, residues, annotation */ int status; float *occ = NULL; float *tp, *rp; float sc[p7_MAXCODE]; float Z; /* Contract checks */ if (gm->abc->type != hmm->abc->type) ESL_XEXCEPTION(eslEINVAL, "HMM and profile alphabet don't match"); if (hmm->M > gm->allocM) ESL_XEXCEPTION(eslEINVAL, "profile too small to hold HMM"); if (! (hmm->flags & p7H_CONS)) ESL_XEXCEPTION(eslEINVAL, "HMM must have a consensus to transfer to the profile"); /* Copy some pointer references and other info across from HMM */ gm->M = hmm->M; gm->max_length = hmm->max_length; gm->mode = mode; gm->roff = -1; gm->eoff = -1; gm->offs[p7_MOFFSET] = -1; gm->offs[p7_FOFFSET] = -1; gm->offs[p7_POFFSET] = -1; if (gm->name != NULL) free(gm->name); if (gm->acc != NULL) free(gm->acc); if (gm->desc != NULL) free(gm->desc); if ((status = esl_strdup(hmm->name, -1, &(gm->name))) != eslOK) goto ERROR; if ((status = esl_strdup(hmm->acc, -1, &(gm->acc))) != eslOK) goto ERROR; if ((status = esl_strdup(hmm->desc, -1, &(gm->desc))) != eslOK) goto ERROR; if (hmm->flags & p7H_RF) strcpy(gm->rf, hmm->rf); if (hmm->flags & p7H_MMASK) strcpy(gm->mm, hmm->mm); if (hmm->flags & p7H_CONS) strcpy(gm->consensus, hmm->consensus); /* must be present, actually, so the flag test is just for symmetry w/ other optional HMM fields */ if (hmm->flags & p7H_CS) strcpy(gm->cs, hmm->cs); for (z = 0; z < p7_NEVPARAM; z++) gm->evparam[z] = hmm->evparam[z]; for (z = 0; z < p7_NCUTOFFS; z++) gm->cutoff[z] = hmm->cutoff[z]; for (z = 0; z < p7_MAXABET; z++) gm->compo[z] = hmm->compo[z]; /* Entry scores. */ if (p7_profile_IsLocal(gm)) { /* Local mode entry: occ[k] /( \sum_i occ[i] * (M-i+1)) * (Reduces to uniform 2/(M(M+1)) for occupancies of 1.0) */ Z = 0.; ESL_ALLOC(occ, sizeof(float) * (hmm->M+1)); if ((status = p7_hmm_CalculateOccupancy(hmm, occ, NULL)) != eslOK) goto ERROR; for (k = 1; k <= hmm->M; k++) Z += occ[k] * (float) (hmm->M-k+1); for (k = 1; k <= hmm->M; k++) p7P_TSC(gm, k-1, p7P_BM) = log(occ[k] / Z); /* note off-by-one: entry at Mk stored as [k-1][BM] */ free(occ); } else /* glocal modes: left wing retraction; must be in log space for precision */ { Z = log(hmm->t[0][p7H_MD]); p7P_TSC(gm, 0, p7P_BM) = log(1.0 - hmm->t[0][p7H_MD]); for (k = 1; k < hmm->M; k++) { p7P_TSC(gm, k, p7P_BM) = Z + log(hmm->t[k][p7H_DM]); Z += log(hmm->t[k][p7H_DD]); } } /* E state loop/move probabilities: nonzero for MOVE allows loops/multihits * N,C,J transitions are set later by length config */ if (p7_profile_IsMultihit(gm)) { gm->xsc[p7P_E][p7P_MOVE] = -eslCONST_LOG2; gm->xsc[p7P_E][p7P_LOOP] = -eslCONST_LOG2; gm->nj = 1.0f; } else { gm->xsc[p7P_E][p7P_MOVE] = 0.0f; gm->xsc[p7P_E][p7P_LOOP] = -eslINFINITY; gm->nj = 0.0f; } /* Transition scores. */ for (k = 1; k < gm->M; k++) { tp = gm->tsc + k * p7P_NTRANS; tp[p7P_MM] = log(hmm->t[k][p7H_MM]); tp[p7P_MI] = log(hmm->t[k][p7H_MI]); tp[p7P_MD] = log(hmm->t[k][p7H_MD]); tp[p7P_IM] = log(hmm->t[k][p7H_IM]); tp[p7P_II] = log(hmm->t[k][p7H_II]); tp[p7P_DM] = log(hmm->t[k][p7H_DM]); tp[p7P_DD] = log(hmm->t[k][p7H_DD]); } /* Match emission scores. */ sc[hmm->abc->K] = -eslINFINITY; /* gap character */ sc[hmm->abc->Kp-2] = -eslINFINITY; /* nonresidue character */ sc[hmm->abc->Kp-1] = -eslINFINITY; /* missing data character */ for (k = 1; k <= hmm->M; k++) { for (x = 0; x < hmm->abc->K; x++) sc[x] = log((double)hmm->mat[k][x] / bg->f[x]); esl_abc_FExpectScVec(hmm->abc, sc, bg->f); for (x = 0; x < hmm->abc->Kp; x++) { rp = gm->rsc[x] + k * p7P_NR; rp[p7P_MSC] = sc[x]; } } /* Insert emission scores */ /* SRE, Fri Dec 5 08:41:08 2008: We currently hardwire insert scores * to 0, i.e. corresponding to the insertion emission probabilities * being equal to the background probabilities. Benchmarking shows * that setting inserts to informative emission distributions causes * more problems than it's worth: polar biased composition hits * driven by stretches of "insertion" occur, and are difficult to * correct for. */ for (x = 0; x < gm->abc->Kp; x++) { for (k = 1; k < hmm->M; k++) p7P_ISC(gm, k, x) = 0.0f; p7P_ISC(gm, hmm->M, x) = -eslINFINITY; /* init I_M to impossible. */ } for (k = 1; k <= hmm->M; k++) p7P_ISC(gm, k, gm->abc->K) = -eslINFINITY; /* gap symbol */ for (k = 1; k <= hmm->M; k++) p7P_ISC(gm, k, gm->abc->Kp-2) = -eslINFINITY; /* nonresidue symbol */ for (k = 1; k <= hmm->M; k++) p7P_ISC(gm, k, gm->abc->Kp-1) = -eslINFINITY; /* missing data symbol */ #if 0 /* original (informative) insert setting: relies on sc[K, Kp-1] initialization to -inf above */ for (k = 1; k < hmm->M; k++) { for (x = 0; x < hmm->abc->K; x++) sc[x] = log(hmm->ins[k][x] / bg->f[x]); esl_abc_FExpectScVec(hmm->abc, sc, bg->f); for (x = 0; x < hmm->abc->Kp; x++) { rp = gm->rsc[x] + k*p7P_NR; rp[p7P_ISC] = sc[x]; } } for (x = 0; x < hmm->abc->Kp; x++) p7P_ISC(gm, hmm->M, x) = -eslINFINITY; /* init I_M to impossible. */ #endif /* Remaining specials, [NCJ][MOVE | LOOP] are set by ReconfigLength() */ gm->L = 0; /* force ReconfigLength to reconfig */ if ((status = p7_ReconfigLength(gm, L)) != eslOK) goto ERROR; return eslOK; ERROR: if (occ != NULL) free(occ); return status; }
/* Function: p7_ProfileConfig() * Synopsis: Configure a search profile. * Incept: SRE, Sun Sep 25 12:21:25 2005 [St. Louis] * * Purpose: Given a model <hmm> with core probabilities, the null1 * model <bg>, a desired search <mode> (one of <p7_LOCAL>, * <p7_GLOCAL>, <p7_UNILOCAL>, or <p7_UNIGLOCAL>), and an * expected target sequence length <L>; configure the * search model in <gm> with lod scores relative to the * background frequencies in <bg>. * * Returns: <eslOK> on success; the profile <gm> now contains * scores and is ready for searching target sequences. * * Throws: <eslEMEM> on allocation error. */ int p7_ProfileConfig(const P7_HMM *hmm, const P7_BG *bg, P7_PROFILE *gm, int L, int mode) { int k, x, z; /* counters over states, residues, annotation */ int status; float *occ = NULL; float *tp, *rp; float sc[p7_MAXCODE]; float mthresh; float Z; /* Contract checks */ if (gm->abc->type != hmm->abc->type) ESL_XEXCEPTION(eslEINVAL, "HMM and profile alphabet don't match"); if (hmm->M > gm->allocM) ESL_XEXCEPTION(eslEINVAL, "profile too small to hold HMM"); /* Copy some pointer references and other info across from HMM */ gm->M = hmm->M; gm->mode = mode; gm->roff = -1; gm->eoff = -1; gm->offs[p7_MOFFSET] = -1; gm->offs[p7_FOFFSET] = -1; gm->offs[p7_POFFSET] = -1; if (gm->name != NULL) free(gm->name); if (gm->acc != NULL) free(gm->acc); if (gm->desc != NULL) free(gm->desc); if ((status = esl_strdup(hmm->name, -1, &(gm->name))) != eslOK) goto ERROR; if ((status = esl_strdup(hmm->acc, -1, &(gm->acc))) != eslOK) goto ERROR; if ((status = esl_strdup(hmm->desc, -1, &(gm->desc))) != eslOK) goto ERROR; if (hmm->flags & p7H_RF) strcpy(gm->rf, hmm->rf); if (hmm->flags & p7H_CS) strcpy(gm->cs, hmm->cs); for (z = 0; z < p7_NEVPARAM; z++) gm->evparam[z] = hmm->evparam[z]; for (z = 0; z < p7_NCUTOFFS; z++) gm->cutoff[z] = hmm->cutoff[z]; for (z = 0; z < p7_MAXABET; z++) gm->compo[z] = hmm->compo[z]; /* Determine the "consensus" residue for each match position. * This is only used for alignment displays, not in any calculations. */ if (hmm->abc->type == eslAMINO) mthresh = 0.5; else if (hmm->abc->type == eslDNA) mthresh = 0.9; else if (hmm->abc->type == eslRNA) mthresh = 0.9; else mthresh = 0.5; gm->consensus[0] = ' '; for (k = 1; k <= hmm->M; k++) { x = esl_vec_FArgMax(hmm->mat[k], hmm->abc->K); gm->consensus[k] = ((hmm->mat[k][x] > mthresh) ? toupper(hmm->abc->sym[x]) : tolower(hmm->abc->sym[x])); } gm->consensus[hmm->M+1] = '\0'; /* Entry scores. */ if (p7_profile_IsLocal(gm)) { /* Local mode entry: occ[k] /( \sum_i occ[i] * (M-i+1)) * (Reduces to uniform 2/(M(M+1)) for occupancies of 1.0) */ Z = 0.; ESL_ALLOC_WITH_TYPE(occ, float*, sizeof(float) * (hmm->M+1)); if ((status = p7_hmm_CalculateOccupancy(hmm, occ, NULL)) != eslOK) goto ERROR; for (k = 1; k <= hmm->M; k++) Z += occ[k] * (float) (hmm->M-k+1); for (k = 1; k <= hmm->M; k++) p7P_TSC(gm, k-1, p7P_BM) = log((double)(occ[k] / Z)); /* note off-by-one: entry at Mk stored as [k-1][BM] */ free(occ); } else /* glocal modes: left wing retraction; must be in log space for precision */ {