/* M(i,k) is reached from B(i-1), M(i-1,k-1), D(i-1,k-1), or I(i-1,k-1). */ static inline int select_m(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell M(i,k) */ int r = (k-1) / Q; __m128 *tp = om->tfv + 7*q; /* *tp now at start of transitions to cur cell M(i,k) */ __m128 xBv = _mm_set1_ps(ox->xmx[(i-1)*p7X_NXCELLS+p7X_B]); __m128 zerov = _mm_setzero_ps(); __m128 mpv, dpv, ipv; union { __m128 v; float p[4]; } u; float path[4]; int state[4] = { p7T_B, p7T_M, p7T_I, p7T_D }; if (q > 0) { mpv = ox->dpf[i-1][(q-1)*3 + p7X_M]; dpv = ox->dpf[i-1][(q-1)*3 + p7X_D]; ipv = ox->dpf[i-1][(q-1)*3 + p7X_I]; } else { mpv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_M], zerov); dpv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_D], zerov); ipv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_I], zerov); } u.v = _mm_mul_ps(xBv, *tp); tp++; path[0] = u.p[r]; u.v = _mm_mul_ps(mpv, *tp); tp++; path[1] = u.p[r]; u.v = _mm_mul_ps(ipv, *tp); tp++; path[2] = u.p[r]; u.v = _mm_mul_ps(dpv, *tp); path[3] = u.p[r]; esl_vec_FNorm(path, 4); return state[esl_rnd_FChoose(rng, path, 4)]; }
/* D(i,k) is reached from M(i, k-1) or D(i,k-1). */ static inline int select_d(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */ int r = (k-1) / Q; vector float zerov; vector float mpv, dpv; vector float tmdv, tddv; union { vector float v; float p[4]; } u; float path[2]; int state[2] = { p7T_M, p7T_D }; zerov = (vector float) vec_splat_u32(0); if (q > 0) { mpv = ox->dpf[i][(q-1)*3 + p7X_M]; dpv = ox->dpf[i][(q-1)*3 + p7X_D]; tmdv = om->tfv[7*(q-1) + p7O_MD]; tddv = om->tfv[7*Q + (q-1)]; } else { mpv = vec_sld(zerov, ox->dpf[i][(Q-1)*3 + p7X_M], 12); dpv = vec_sld(zerov, ox->dpf[i][(Q-1)*3 + p7X_D], 12); tmdv = vec_sld(zerov, om->tfv[7*(Q-1) + p7O_MD], 12); tddv = vec_sld(zerov, om->tfv[8*Q-1], 12); } u.v = vec_madd(mpv, tmdv, zerov); path[0] = u.p[r]; u.v = vec_madd(dpv, tddv, zerov); path[1] = u.p[r]; esl_vec_FNorm(path, 2); return state[esl_rnd_FChoose(rng, path, 2)]; }
/* Using FChoose() here would mean allocating tmp space for 2M-1 paths; * instead we use the fact that E(i) is itself the necessary normalization * factor, and implement FChoose's algorithm here for an on-the-fly * calculation. * Note that that means double-precision calculation, to be sure 0.0 <= roll < 1.0 */ static inline int select_e(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k) { int Q = p7O_NQF(ox->M); double sum = 0.0; double roll = esl_random(rng); double norm = 1.0 / ox->xmx[i*p7X_NXCELLS+p7X_E]; __m128 xEv = _mm_set1_ps(norm); /* all M, D already scaled exactly the same */ union { __m128 v; float p[4]; } u; int q,r; while (1) { for (q = 0; q < Q; q++) { u.v = _mm_mul_ps(ox->dpf[i][q*3 + p7X_M], xEv); for (r = 0; r < 4; r++) { sum += u.p[r]; if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_M;} } u.v = _mm_mul_ps(ox->dpf[i][q*3 + p7X_D], xEv); for (r = 0; r < 4; r++) { sum += u.p[r]; if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_D;} } } ESL_DASSERT1((sum > 0.99)); } /*UNREACHED*/ ESL_EXCEPTION(-1, "unreached code was reached. universe collapses."); }
/* M(i,k) is reached from B(i-1), M(i-1,k-1), D(i-1,k-1), or I(i-1,k-1). */ static inline int select_m(const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell M(i,k) */ int r = (k-1) / Q; vector float *tp = om->tfv + 7*q; /* *tp now at start of transitions to cur cell M(i,k) */ vector float xBv; vector float zerov; vector float mpv, dpv, ipv; union { vector float v; float p[4]; } u, tv; float path[4]; int state[4] = { p7T_M, p7T_I, p7T_D, p7T_B }; xBv = esl_vmx_set_float(ox->xmx[(i-1)*p7X_NXCELLS+p7X_B]); zerov = (vector float) vec_splat_u32(0); if (q > 0) { mpv = ox->dpf[i-1][(q-1)*3 + p7X_M]; dpv = ox->dpf[i-1][(q-1)*3 + p7X_D]; ipv = ox->dpf[i-1][(q-1)*3 + p7X_I]; } else { mpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_M], 12); dpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_D], 12); ipv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_I], 12); } /* paths are numbered so that most desirable choice in case of tie is first. */ u.v = xBv; tv.v = *tp; path[3] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]); tp++; u.v = mpv; tv.v = *tp; path[0] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]); tp++; u.v = ipv; tv.v = *tp; path[1] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]); tp++; u.v = dpv; tv.v = *tp; path[2] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]); return state[esl_vec_FArgMax(path, 4)]; }
/* D(i,k) is reached from M(i, k-1) or D(i,k-1). */ static inline int select_d(const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */ int r = (k-1) / Q; vector float zerov; union { vector float v; float p[4]; } mpv, dpv, tmdv, tddv; float path[2]; zerov = (vector float) vec_splat_u32(0); if (q > 0) { mpv.v = ox->dpf[i][(q-1)*3 + p7X_M]; dpv.v = ox->dpf[i][(q-1)*3 + p7X_D]; tmdv.v = om->tfv[7*(q-1) + p7O_MD]; tddv.v = om->tfv[7*Q + (q-1)]; } else { mpv.v = vec_sld(zerov, ox->dpf[i][(Q-1)*3 + p7X_M], 12); dpv.v = vec_sld(zerov, ox->dpf[i][(Q-1)*3 + p7X_D], 12); tmdv.v = vec_sld(zerov, om->tfv[7*(Q-1) + p7O_MD], 12); tddv.v = vec_sld(zerov, om->tfv[8*Q-1], 12); } path[0] = ((tmdv.p[r] == 0.0) ? -eslINFINITY : mpv.p[r]); path[1] = ((tddv.p[r] == 0.0) ? -eslINFINITY : dpv.p[r]); return ((path[0] >= path[1]) ? p7T_M : p7T_D); }
/* D(i,k) is reached from M(i, k-1) or D(i,k-1). */ static inline int select_d(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */ int r = (k-1) / Q; __m128 zerov = _mm_setzero_ps(); __m128 mpv, dpv; __m128 tmdv, tddv; union { __m128 v; float p[4]; } u; float path[2]; int state[2] = { p7T_M, p7T_D }; if (q > 0) { mpv = ox->dpf[i][(q-1)*3 + p7X_M]; dpv = ox->dpf[i][(q-1)*3 + p7X_D]; tmdv = om->tfv[7*(q-1) + p7O_MD]; tddv = om->tfv[7*Q + (q-1)]; } else { mpv = esl_sse_rightshift_ps(ox->dpf[i][(Q-1)*3 + p7X_M], zerov); dpv = esl_sse_rightshift_ps(ox->dpf[i][(Q-1)*3 + p7X_D], zerov); tmdv = esl_sse_rightshift_ps(om->tfv[7*(Q-1) + p7O_MD], zerov); tddv = esl_sse_rightshift_ps(om->tfv[8*Q-1], zerov); } u.v = _mm_mul_ps(mpv, tmdv); path[0] = u.p[r]; u.v = _mm_mul_ps(dpv, tddv); path[1] = u.p[r]; esl_vec_FNorm(path, 2); return state[esl_rnd_FChoose(rng, path, 2)]; }
/* M(i,k) is reached from B(i-1), M(i-1,k-1), D(i-1,k-1), or I(i-1,k-1). */ static inline int select_m(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell M(i,k) */ int r = (k-1) / Q; vector float *tp = om->tfv + 7*q; /* *tp now at start of transitions to cur cell M(i,k) */ vector float xBv; vector float zerov; vector float mpv, dpv, ipv; union { vector float v; float p[4]; } u; float path[4]; int state[4] = { p7T_B, p7T_M, p7T_I, p7T_D }; xBv = esl_vmx_set_float(ox->xmx[(i-1)*p7X_NXCELLS+p7X_B]); zerov = (vector float) vec_splat_u32(0); if (q > 0) { mpv = ox->dpf[i-1][(q-1)*3 + p7X_M]; dpv = ox->dpf[i-1][(q-1)*3 + p7X_D]; ipv = ox->dpf[i-1][(q-1)*3 + p7X_I]; } else { mpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_M], 12); dpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_D], 12); ipv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_I], 12); } u.v = vec_madd(xBv, *tp, zerov); tp++; path[0] = u.p[r]; u.v = vec_madd(mpv, *tp, zerov); tp++; path[1] = u.p[r]; u.v = vec_madd(ipv, *tp, zerov); tp++; path[2] = u.p[r]; u.v = vec_madd(dpv, *tp, zerov); path[3] = u.p[r]; esl_vec_FNorm(path, 4); return state[esl_rnd_FChoose(rng, path, 4)]; }
/* Function: p7_omx_FDeconvert() * Synopsis: Convert an optimized DP matrix to generic one. * Incept: SRE, Tue Aug 19 17:58:13 2008 [Janelia] * * Purpose: Convert the 32-bit float values in optimized DP matrix * <ox> to a generic one <gx>. Caller provides <gx> with sufficient * space to hold the <ox->M> by <ox->L> matrix. * * This function is used to gain access to the * somewhat more powerful debugging and display * tools available for generic DP matrices. */ int p7_omx_FDeconvert(P7_OMX *ox, P7_GMX *gx) { int Q = p7O_NQF(ox->M); int i, q, r, k; union { __m128 v; float p[4]; } u; float **dp = gx->dp; float *xmx = gx->xmx; for (i = 0; i <= ox->L; i++) { MMX(i,0) = DMX(i,0) = IMX(i,0) = -eslINFINITY; for (q = 0; q < Q; q++) { u.v = MMO(ox->dpf[i],q); for (r = 0; r < 4; r++) { k = (Q*r)+q+1; if (k <= ox->M) MMX(i, (Q*r)+q+1) = u.p[r]; } u.v = DMO(ox->dpf[i],q); for (r = 0; r < 4; r++) { k = (Q*r)+q+1; if (k <= ox->M) DMX(i, (Q*r)+q+1) = u.p[r]; } u.v = IMO(ox->dpf[i],q); for (r = 0; r < 4; r++) { k = (Q*r)+q+1; if (k <= ox->M) IMX(i, (Q*r)+q+1) = u.p[r]; } } XMX(i,p7G_E) = ox->xmx[i*p7X_NXCELLS+p7X_E]; XMX(i,p7G_N) = ox->xmx[i*p7X_NXCELLS+p7X_N]; XMX(i,p7G_J) = ox->xmx[i*p7X_NXCELLS+p7X_J]; XMX(i,p7G_B) = ox->xmx[i*p7X_NXCELLS+p7X_B]; XMX(i,p7G_C) = ox->xmx[i*p7X_NXCELLS+p7X_C]; } gx->L = ox->L; gx->M = ox->M; return eslOK; }
/* Function: p7_oprofile_MPIPackSize() * Synopsis: Calculates size needed to pack an OPROFILE. * Incept: MSF, Wed Oct 21, 2009 [Janelia] * * Purpose: Calculate an upper bound on the number of bytes * that <p7_oprofile_MPIPack()> will need to pack an * OPROFILE <om> in a packed MPI message for MPI * communicator <comm>; return that number of bytes * in <*ret_n>. * * Returns: <eslOK> on success, and <*ret_n> contains the answer. * * Throws: <eslESYS> if an MPI call fails, and <*ret_n> is 0. */ int p7_oprofile_MPIPackSize(P7_OPROFILE *om, MPI_Comm comm, int *ret_n) { int status; int n = 0; int K = om->abc->Kp; int len = 0; int cnt; int sz; int Q4 = p7O_NQF(om->M); int Q8 = p7O_NQW(om->M); int Q16 = p7O_NQB(om->M); int vsz = sizeof(__m128i); /* MSV Filter information */ if (MPI_Pack_size(5, MPI_CHAR, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; if (MPI_Pack_size(1, MPI_FLOAT, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; if (MPI_Pack_size(vsz*Q16, MPI_CHAR, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += (K*sz); /* Viterbi Filter information */ if (MPI_Pack_size(1, MPI_SHORT, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += ((p7O_NXSTATES*p7O_NXTRANS+2)*sz); if (MPI_Pack_size(2, MPI_FLOAT, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; if (MPI_Pack_size(K*vsz*Q8, MPI_CHAR, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; if (MPI_Pack_size(8*vsz*Q8, MPI_CHAR, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; /* Forward/Backward information */ if (MPI_Pack_size(1, MPI_FLOAT, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += (p7O_NXSTATES*p7O_NXTRANS*sz); if (MPI_Pack_size(K*vsz*Q4, MPI_CHAR, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; if (MPI_Pack_size(8*vsz*Q4, MPI_CHAR, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; /* disk offsets */ if (MPI_Pack_size(1, MPI_LONG_LONG_INT, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += ((p7_NOFFSETS+2)*sz); /* annotation info */ if (om->name != NULL) len += strlen(om->name) + 1; if (om->acc != NULL) len += strlen(om->acc) + 1; if (om->desc != NULL) len += strlen(om->desc) + 1; if (om->rf != NULL) len += strlen(om->rf) + 1; if (om->mm != NULL) len += strlen(om->mm) + 1; if (om->cs != NULL) len += strlen(om->cs) + 1; if (om->consensus != NULL) len += strlen(om->consensus) + 1; if (MPI_Pack_size(7, MPI_INT, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; if (MPI_Pack_size(len, MPI_CHAR, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; cnt = p7_NEVPARAM + p7_NCUTOFFS + p7_MAXABET; if (MPI_Pack_size(cnt, MPI_FLOAT, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; /* current model size */ if (MPI_Pack_size(4, MPI_INT, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; if (MPI_Pack_size(1, MPI_FLOAT, comm, &sz) != 0) ESL_XEXCEPTION(eslESYS, "pack size failed"); n += sz; *ret_n = n; return eslOK; ERROR: *ret_n = 0; return status; }
/* Function: p7_omx_Create() * Synopsis: Create an optimized dynamic programming matrix. * Incept: SRE, Tue Nov 27 08:48:20 2007 [Janelia] * * Purpose: Allocates a reusable, resizeable <P7_OMX> for models up to * size <allocM> and target sequences up to length * <allocL/allocXL>, for use by any of the various optimized * DP routines. * * To allocate the very memory-efficient one-row matrix * used by *Filter() and *Score() functions that only * calculate scores, <allocM=M>, <allocL=0>, and * <allocXL=0>. * * To allocate the reasonably memory-efficient linear * arrays used by *Parser() functions that only keep * special (X) state scores, <allocM=M>, <allocL=0>, * and <allocXL=L>. * * To allocate a complete matrix suitable for functions * that need the whole DP matrix for traceback, sampling, * posterior decoding, or reestimation, <allocM=M> and * <allocL=allocXL=L>. * * Returns: a pointer to the new <P7_OMX>. * * Throws: <NULL> on allocation failure. */ P7_OMX * p7_omx_Create(int allocM, int allocL, int allocXL) { P7_OMX *ox = NULL; int i; int status; ESL_ALLOC(ox, sizeof(P7_OMX)); ox->dp_mem = NULL; ox->dpb = NULL; ox->dpw = NULL; ox->dpf = NULL; ox->xmx = NULL; ox->x_mem = NULL; /* DP matrix will be allocated for allocL+1 rows 0,1..L; allocQ4*p7X_NSCELLS columns */ ox->allocR = allocL+1; ox->validR = ox->allocR; ox->allocQ4 = p7O_NQF(allocM); ox->allocQ8 = p7O_NQW(allocM); ox->allocQ16 = p7O_NQB(allocM); ox->ncells = ox->allocR * ox->allocQ4 * 4; /* # of DP cells allocated, where 1 cell contains MDI */ /* floats always dominate; +15 for alignment */ ESL_ALLOC(ox->dp_mem, sizeof(vector float) * ox->allocR * ox->allocQ4 * p7X_NSCELLS + 15); ESL_ALLOC(ox->dpb, sizeof(vector unsigned char *) * ox->allocR); ESL_ALLOC(ox->dpw, sizeof(vector signed short *) * ox->allocR); ESL_ALLOC(ox->dpf, sizeof(vector float *) * ox->allocR); /* DP memory shared by <dpb>, <dpw>, <dpf> */ ox->dpb[0] = (vector unsigned char *) ((unsigned long int) ((char *) ox->dp_mem + 15) & (~0xf)); ox->dpw[0] = (vector signed short *) ox->dpb[0]; ox->dpf[0] = (vector float *) ox->dpb[0]; for (i = 1; i <= allocL; i++) { ox->dpf[i] = ox->dpf[0] + i * ox->allocQ4 * p7X_NSCELLS; ox->dpw[i] = ox->dpw[0] + i * ox->allocQ8 * p7X_NSCELLS; ox->dpb[i] = ox->dpb[0] + i * ox->allocQ16; } ox->allocXR = allocXL+1; ESL_ALLOC(ox->x_mem, sizeof(float) * ox->allocXR * p7X_NXCELLS + 15); ox->xmx = (float *) ((unsigned long int) ((char *) ox->x_mem + 15) & (~0xf)); ox->M = 0; ox->L = 0; ox->totscale = 0.0; ox->has_own_scales = TRUE; /* most matrices are Forward, control their own scale factors */ #ifdef p7_DEBUGGING ox->debugging = FALSE; ox->dfp = NULL; #endif return ox; ERROR: p7_omx_Destroy(ox); return NULL; }
/* I(i,k) is reached from M(i-1, k) or I(i-1,k). */ static inline int select_i(const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */ int r = (k-1) / Q; vector float *tp = om->tfv + 7*q + p7O_MI; union { vector float v; float p[4]; } tv, mpv, ipv; float path[2]; mpv.v = ox->dpf[i-1][q*3 + p7X_M]; tv.v = *tp; path[0] = ((tv.p[r] == 0.0) ? -eslINFINITY : mpv.p[r]); tp++; ipv.v = ox->dpf[i-1][q*3 + p7X_I]; tv.v = *tp; path[1] = ((tv.p[r] == 0.0) ? -eslINFINITY : ipv.p[r]); return ((path[0] >= path[1]) ? p7T_M : p7T_I); }
static inline float get_postprob(const P7_OMX *pp, int scur, int sprv, int k, int i) { int Q = p7O_NQF(pp->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell M(i,k) */ int r = (k-1) / Q; union { vector float v; float p[4]; } u; switch (scur) { case p7T_M: u.v = MMO(pp->dpf[i], q); return u.p[r]; case p7T_I: u.v = IMO(pp->dpf[i], q); return u.p[r]; case p7T_N: if (sprv == scur) return pp->xmx[i*p7X_NXCELLS+p7X_N]; case p7T_C: if (sprv == scur) return pp->xmx[i*p7X_NXCELLS+p7X_C]; case p7T_J: if (sprv == scur) return pp->xmx[i*p7X_NXCELLS+p7X_J]; default: return 0.0; } }
/* I(i,k) is reached from M(i-1, k) or I(i-1,k). */ static inline int select_i(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */ int r = (k-1) / Q; __m128 mpv = ox->dpf[i-1][q*3 + p7X_M]; __m128 ipv = ox->dpf[i-1][q*3 + p7X_I]; __m128 *tp = om->tfv + 7*q + p7O_MI; union { __m128 v; float p[4]; } u; float path[2]; int state[2] = { p7T_M, p7T_I }; u.v = _mm_mul_ps(mpv, *tp); tp++; path[0] = u.p[r]; u.v = _mm_mul_ps(ipv, *tp); path[1] = u.p[r]; esl_vec_FNorm(path, 2); return state[esl_rnd_FChoose(rng, path, 2)]; }
/* This assumes all M_k->E, D_k->E are 1.0 */ static inline int select_e(const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k) { int Q = p7O_NQF(ox->M); vector float *dp = ox->dpf[i]; union { vector float v; float p[4]; } u; float max = -eslINFINITY; int smax, kmax; int q,r; /* precedence rules in case of ties here are a little tricky: M beats D: note the >= max! */ for (q = 0; q < Q; q++) { u.v = *dp; dp++; for (r = 0; r < 4; r++) if (u.p[r] >= max) { max = u.p[r]; smax = p7T_M; kmax = r*Q + q + 1; } u.v = *dp; dp+=2; for (r = 0; r < 4; r++) if (u.p[r] > max) { max = u.p[r]; smax = p7T_D; kmax = r*Q + q + 1; } } *ret_k = kmax; return smax; }
/* I(i,k) is reached from M(i-1, k) or I(i-1,k). */ static inline int select_i(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */ int r = (k-1) / Q; vector float zerov; vector float mpv = ox->dpf[i-1][q*3 + p7X_M]; vector float ipv = ox->dpf[i-1][q*3 + p7X_I]; vector float *tp = om->tfv + 7*q + p7O_MI; union { vector float v; float p[4]; } u; float path[2]; int state[2] = { p7T_M, p7T_I }; zerov = (vector float) vec_splat_u32(0); u.v = vec_madd(mpv, *tp, zerov); tp++; path[0] = u.p[r]; u.v = vec_madd(ipv, *tp, zerov); path[1] = u.p[r]; esl_vec_FNorm(path, 2); return state[esl_rnd_FChoose(rng, path, 2)]; }
/* Using FChoose() here would mean allocating tmp space for 2M-1 paths; * instead we use the fact that E(i) is itself the necessary normalization * factor, and implement FChoose's algorithm here for an on-the-fly * calculation. */ static inline int select_e(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k) { int Q = p7O_NQF(ox->M); float sum = 0.0; float roll = esl_random(rng); float norm = 1.0 / ox->xmx[i*p7X_NXCELLS+p7X_E]; /* all M, D already scaled exactly the same */ vector float xEv; vector float zerov; union { vector float v; float p[4]; } u; int q,r; xEv = esl_vmx_set_float(norm); zerov = (vector float) vec_splat_u32(0); while (1) { for (q = 0; q < Q; q++) { u.v = vec_madd(ox->dpf[i][q*3 + p7X_M], xEv, zerov); for (r = 0; r < 4; r++) { sum += u.p[r]; if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_M;} } u.v = vec_madd(ox->dpf[i][q*3 + p7X_D], xEv, zerov); for (r = 0; r < 4; r++) { sum += u.p[r]; if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_D;} } } if (sum < 0.99) ESL_EXCEPTION(-1, "Probabilities weren't normalized - failed to trace back from an E"); } /*UNREACHED*/ ESL_EXCEPTION(-1, "unreached code was reached. universe collapses."); }
/* Function: p7_omx_GrowTo() * Synopsis: Assure that a DP matrix is big enough. * Incept: SRE, Thu Dec 20 09:27:07 2007 [Janelia] * * Purpose: Assures that an optimized DP matrix <ox> is allocated for * a model up to <allocM> in length; if not, reallocate to * make it so. * * Because the optimized matrix is one-row, only the model * length matters; the target sequence length isn't * relevant. * * Returns: <eslOK> on success, and <gx> may be reallocated upon * return; any data that may have been in <gx> must be * assumed to be invalidated. * * Throws: <eslEMEM> on allocation failure, and any data that may * have been in <gx> must be assumed to be invalidated. */ int p7_omx_GrowTo(P7_OMX *ox, int allocM, int allocL, int allocXL) { void *p; int nqf = p7O_NQF(allocM); /* segment length; total # of striped vectors for uchar */ int nqw = p7O_NQW(allocM); /* segment length; total # of striped vectors for float */ int nqb = p7O_NQB(allocM); /* segment length; total # of striped vectors for float */ size_t ncells = (allocL+1) * nqf * 4; int reset_row_pointers = FALSE; int i; int status; /* If all possible dimensions are already satisfied, the matrix is fine */ if (ox->allocQ4*4 >= allocM && ox->validR > allocL && ox->allocXR >= allocXL+1) return eslOK; /* If the main matrix is too small in cells, reallocate it; * and we'll need to realign/reset the row pointers later. */ if (ncells > ox->ncells) { ESL_RALLOC(ox->dp_mem, p, sizeof(vector float) * (allocL+1) * nqf * p7X_NSCELLS + 15); ox->ncells = ncells; reset_row_pointers = TRUE; } /* If the X beams are too small, reallocate them. */ if (allocXL+1 >= ox->allocXR) { ESL_RALLOC(ox->x_mem, p, sizeof(float) * (allocXL+1) * p7X_NXCELLS + 15); ox->allocXR = allocXL+1; ox->xmx = (float *) ((unsigned long int) ((char *) ox->x_mem + 15) & (~0xf)); } /* If there aren't enough rows, reallocate the row pointers; we'll * realign and reset them later. */ if (allocL >= ox->allocR) { ESL_RALLOC(ox->dpb, p, sizeof(vector unsigned char *) * (allocL+1)); ESL_RALLOC(ox->dpw, p, sizeof(vector signed short * ) * (allocL+1)); ESL_RALLOC(ox->dpf, p, sizeof(vector float *) * (allocL+1)); ox->allocR = allocL+1; reset_row_pointers = TRUE; } /* must we widen the rows? */ if (allocM > ox->allocQ4*4) reset_row_pointers = TRUE; /* must we set some more valid row pointers? */ if (allocL >= ox->validR) reset_row_pointers = TRUE; /* now reset the row pointers, if needed */ if (reset_row_pointers) { ox->dpb[0] = (vector unsigned char *) ((unsigned long int) ((char *) ox->dp_mem + 15) & (~0xf)); ox->dpw[0] = (vector signed short *) ox->dpb[0]; ox->dpf[0] = (vector float *) ox->dpb[0]; ox->validR = ESL_MIN( ox->ncells / (nqf * 4), ox->allocR); for (i = 1; i < ox->validR; i++) { ox->dpb[i] = ox->dpb[0] + i * nqb; ox->dpw[i] = ox->dpw[0] + i * nqw * p7X_NSCELLS; ox->dpf[i] = ox->dpf[0] + i * nqf * p7X_NSCELLS; } ox->allocQ4 = nqf; ox->allocQ8 = nqw; ox->allocQ16 = nqb; } ox->M = 0; ox->L = 0; return eslOK; ERROR: return status; }
/* Function: p7_Null2_ByExpectation() * Synopsis: Calculate null2 model from posterior probabilities. * Incept: SRE, Mon Aug 18 08:32:55 2008 [Janelia] * * Purpose: Identical to <p7_GNull2_ByExpectation()> except that * <om>, <pp> are SSE optimized versions of the profile * and the residue posterior probability matrix. See * <p7_GNull2_ByExpectation()> documentation. * * Args: om - profile, in any mode, target length model set to <L> * pp - posterior prob matrix, for <om> against domain envelope <dsq+i-1> (offset) * null2 - RETURN: null2 log odds scores per residue; <0..Kp-1>; caller allocated space */ int p7_Null2_ByExpectation(const P7_OPROFILE *om, const P7_OMX *pp, float *null2) { int M = om->M; int Ld = pp->L; int Q = p7O_NQF(M); float *xmx = pp->xmx; /* enables use of XMXo(i,s) macro */ float norm; __m128 *rp; __m128 sv; float xfactor; int i,q,x; /* Calculate expected # of times that each emitting state was used * in generating the Ld residues in this domain. * The 0 row in <wrk> is used to hold these numbers. */ memcpy(pp->dpf[0], pp->dpf[1], sizeof(__m128) * 3 * Q); XMXo(0,p7X_N) = XMXo(1,p7X_N); XMXo(0,p7X_C) = XMXo(1,p7X_C); /* 0.0 */ XMXo(0,p7X_J) = XMXo(1,p7X_J); /* 0.0 */ for (i = 2; i <= Ld; i++) { for (q = 0; q < Q; q++) { pp->dpf[0][q*3 + p7X_M] = _mm_add_ps(pp->dpf[i][q*3 + p7X_M], pp->dpf[0][q*3 + p7X_M]); pp->dpf[0][q*3 + p7X_I] = _mm_add_ps(pp->dpf[i][q*3 + p7X_I], pp->dpf[0][q*3 + p7X_I]); } XMXo(0,p7X_N) += XMXo(i,p7X_N); XMXo(0,p7X_C) += XMXo(i,p7X_C); XMXo(0,p7X_J) += XMXo(i,p7X_J); } /* Convert those expected #'s to frequencies, to use as posterior weights. */ norm = 1.0 / (float) Ld; sv = _mm_set1_ps(norm); for (q = 0; q < Q; q++) { pp->dpf[0][q*3 + p7X_M] = _mm_mul_ps(pp->dpf[0][q*3 + p7X_M], sv); pp->dpf[0][q*3 + p7X_I] = _mm_mul_ps(pp->dpf[0][q*3 + p7X_I], sv); } XMXo(0,p7X_N) *= norm; XMXo(0,p7X_C) *= norm; XMXo(0,p7X_J) *= norm; /* Calculate null2's emission odds, by taking posterior weighted sum * over all emission vectors used in paths explaining the domain. */ xfactor = XMXo(0, p7X_N) + XMXo(0, p7X_C) + XMXo(0, p7X_J); for (x = 0; x < om->abc->K; x++) { sv = _mm_setzero_ps(); rp = om->rfv[x]; for (q = 0; q < Q; q++) { sv = _mm_add_ps(sv, _mm_mul_ps(pp->dpf[0][q*3 + p7X_M], *rp)); rp++; sv = _mm_add_ps(sv, pp->dpf[0][q*3 + p7X_I]); /* insert odds implicitly 1.0 */ // sv = _mm_add_ps(sv, _mm_mul_ps(pp->dpf[0][q*3 + p7X_I], *rp)); rp++; } esl_sse_hsum_ps(sv, &(null2[x])); null2[x] += xfactor; } /* now null2[x] = \frac{f_d(x)}{f_0(x)} for all x in alphabet, * 0..K-1, where f_d(x) are the ad hoc "null2" residue frequencies * for this envelope. */ /* make valid scores for all degeneracies, by averaging the odds ratios. */ esl_abc_FAvgScVec(om->abc, null2); null2[om->abc->K] = 1.0; /* gap character */ null2[om->abc->Kp-2] = 1.0; /* nonresidue "*" */ null2[om->abc->Kp-1] = 1.0; /* missing data "~" */ return eslOK; }
/* Function: p7_Null2_ByTrace() * Synopsis: Assign null2 scores to an envelope by the sampling method. * Incept: SRE, Mon Aug 18 10:22:49 2008 [Janelia] * * Purpose: Identical to <p7_GNull2_ByTrace()> except that * <om>, <wrk> are SSE optimized versions of the profile * and the residue posterior probability matrix. See * <p7_GNull2_ByTrace()> documentation. */ int p7_Null2_ByTrace(const P7_OPROFILE *om, const P7_TRACE *tr, int zstart, int zend, P7_OMX *wrk, float *null2) { union { __m128 v; float p[4]; } u; int Q = p7O_NQF(om->M); int Ld = 0; float *xmx = wrk->xmx; /* enables use of XMXo macro */ float norm; float xfactor; __m128 sv; __m128 *rp; int q, r, s; int x; int z; /* We'll use the i=0 row in wrk for working space: dp[0][] and xmx[][0]. */ for (q = 0; q < Q; q++) { wrk->dpf[0][q*3 + p7X_M] = _mm_setzero_ps(); wrk->dpf[0][q*3 + p7X_I] = _mm_setzero_ps(); } XMXo(0,p7X_N) = 0.0; XMXo(0,p7X_C) = 0.0; XMXo(0,p7X_J) = 0.0; /* Calculate emitting state usage in this particular trace segment */ for (z = zstart; z <= zend; z++) { if (tr->i[z] == 0) continue; /* quick test for whether this trace elem emitted or not */ Ld++; if (tr->k[z] > 0) /* must be an M or I */ { /* surely there's an easier way? but our workspace is striped, interleaved quads... */ s = ( (tr->st[z] == p7T_M) ? p7X_M : p7X_I); q = p7X_NSCELLS * ( (tr->k[z] - 1) % Q) + p7X_M; r = (tr->k[z] - 1) / Q; u.v = wrk->dpf[0][q]; u.p[r] += 1.0; /* all this to increment a count by one! */ wrk->dpf[0][q] = u.v; } else /* emitted an x_i with no k; must be an N,C,J */ { switch (tr->st[z]) { case p7T_N: XMXo(0,p7X_N) += 1.0; break; case p7T_C: XMXo(0,p7X_C) += 1.0; break; case p7T_J: XMXo(0,p7X_J) += 1.0; break; } } } norm = 1.0 / (float) Ld; sv = _mm_set1_ps(norm); for (q = 0; q < Q; q++) { wrk->dpf[0][q*3 + p7X_M] = _mm_mul_ps(wrk->dpf[0][q*3 + p7X_M], sv); wrk->dpf[0][q*3 + p7X_I] = _mm_mul_ps(wrk->dpf[0][q*3 + p7X_I], sv); } XMXo(0,p7X_N) *= norm; XMXo(0,p7X_C) *= norm; XMXo(0,p7X_J) *= norm; /* Calculate null2's emission odds, by taking posterior weighted sum * over all emission vectors used in paths explaining the domain. */ xfactor = XMXo(0,p7X_N) + XMXo(0,p7X_C) + XMXo(0,p7X_J); for (x = 0; x < om->abc->K; x++) { sv = _mm_setzero_ps(); rp = om->rfv[x]; for (q = 0; q < Q; q++) { sv = _mm_add_ps(sv, _mm_mul_ps(wrk->dpf[0][q*3 + p7X_M], *rp)); rp++; sv = _mm_add_ps(sv, wrk->dpf[0][q*3 + p7X_I]); /* insert emission odds implicitly 1.0 */ // sv = _mm_add_ps(sv, _mm_mul_ps(wrk->dpf[0][q*3 + p7X_I], *rp)); rp++; } esl_sse_hsum_ps(sv, &(null2[x])); null2[x] += xfactor; } /* now null2[x] = \frac{f_d(x)}{f_0(x)} for all x in alphabet, * 0..K-1, where f_d(x) are the ad hoc "null2" residue frequencies * for this envelope. */ /* make valid scores for all degeneracies, by averaging the odds ratios. */ esl_abc_FAvgScVec(om->abc, null2); null2[om->abc->K] = 1.0; /* gap character */ null2[om->abc->Kp-2] = 1.0; /* nonresidue "*" */ null2[om->abc->Kp-1] = 1.0; /* missing data "~" */ return eslOK; }
/* Function: p7_OptimalAccuracy() * Synopsis: DP fill of an optimal accuracy alignment calculation. * Incept: SRE, Mon Aug 18 11:04:48 2008 [Janelia] * * Purpose: Calculates the fill step of the optimal accuracy decoding * algorithm \citep{Kall05}. * * Caller provides the posterior decoding matrix <pp>, * which was calculated by Forward/Backward on a target sequence * of length <pp->L> using the query model <om>. * * Caller also provides a DP matrix <ox>, allocated for a full * <om->M> by <L> comparison. The routine fills this in * with OA scores. * * Args: gm - query profile * pp - posterior decoding matrix created by <p7_GPosteriorDecoding()> * gx - RESULT: caller provided DP matrix for <gm->M> by <L> * ret_e - RETURN: expected number of correctly decoded positions * * Returns: <eslOK> on success, and <*ret_e> contains the final OA * score, which is the expected number of correctly decoded * positions in the target sequence (up to <L>). * * Throws: (no abnormal error conditions) */ int p7_OptimalAccuracy(const P7_OPROFILE *om, const P7_OMX *pp, P7_OMX *ox, float *ret_e) { vector float mpv, dpv, ipv; /* previous row values */ vector float sv; /* temp storage of 1 curr row value in progress */ vector float xEv; /* E state: keeps max for Mk->E as we go */ vector float xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector float dcv; float *xmx = ox->xmx; vector float *dpc = ox->dpf[0]; /* current row, for use in {MDI}MO(dpp,q) access macro */ vector float *dpp; /* previous row, for use in {MDI}MO(dpp,q) access macro */ vector float *ppp; /* quads in the <pp> posterior probability matrix */ vector float *tp; /* quads in the <om->tfv> transition scores */ vector float zerov; vector float infv; int M = om->M; int Q = p7O_NQF(M); int q; int j; int i; float t1, t2; zerov = (vector float) vec_splat_u32(0); infv = esl_vmx_set_float(-eslINFINITY); ox->M = om->M; ox->L = pp->L; for (q = 0; q < Q; q++) MMO(dpc, q) = IMO(dpc,q) = DMO(dpc,q) = infv; XMXo(0, p7X_E) = -eslINFINITY; XMXo(0, p7X_N) = 0.; XMXo(0, p7X_J) = -eslINFINITY; XMXo(0, p7X_B) = 0.; XMXo(0, p7X_C) = -eslINFINITY; for (i = 1; i <= pp->L; i++) { dpp = dpc; /* previous DP row in OA matrix */ dpc = ox->dpf[i]; /* current DP row in OA matrix */ ppp = pp->dpf[i]; /* current row in the posterior probabilities per position */ tp = om->tfv; /* transition probabilities */ dcv = infv; xEv = infv; xBv = esl_vmx_set_float(XMXo(i-1, p7X_B)); mpv = vec_sld(infv, MMO(dpp,Q-1), 12); /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. */ dpv = vec_sld(infv, DMO(dpp,Q-1), 12); ipv = vec_sld(infv, IMO(dpp,Q-1), 12); for (q = 0; q < Q; q++) { sv = vec_and(vec_cmpgt(*tp, zerov), xBv); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), mpv)); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), ipv)); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), dpv)); tp++; sv = vec_add(sv, *ppp); ppp += 2; xEv = vec_max(xEv, sv); mpv = MMO(dpp,q); dpv = DMO(dpp,q); ipv = IMO(dpp,q); MMO(dpc,q) = sv; DMO(dpc,q) = dcv; dcv = vec_and(vec_cmpgt(*tp, zerov), sv); tp++; sv = vec_and(vec_cmpgt(*tp, zerov), mpv); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), ipv)); tp++; IMO(dpc,q) = vec_add(sv, *ppp); ppp++; } /* dcv has carried through from end of q loop above; store it * in first pass, we add M->D and D->D path into DMX */ dcv = vec_sld(infv, dcv, 12); tp = om->tfv + 7*Q; /* set tp to start of the DD's */ for (q = 0; q < Q; q++) { DMO(dpc, q) = vec_max(dcv, DMO(dpc, q)); dcv = vec_and(vec_cmpgt(*tp, zerov), DMO(dpc,q)); tp++; } /* fully serialized D->D; can optimize later */ for (j = 1; j < 4; j++) { dcv = vec_sld(infv, dcv, 12); tp = om->tfv + 7*Q; for (q = 0; q < Q; q++) { DMO(dpc, q) = vec_max(dcv, DMO(dpc, q)); dcv = vec_and(vec_cmpgt(*tp, zerov), dcv); tp++; } } /* D->E paths */ for (q = 0; q < Q; q++) xEv = vec_max(xEv, DMO(dpc,q)); /* Specials */ XMXo(i,p7X_E) = esl_vmx_hmax_float(xEv); t1 = ( (om->xf[p7O_J][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_J] + pp->xmx[i*p7X_NXCELLS+p7X_J]); t2 = ( (om->xf[p7O_E][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[ i *p7X_NXCELLS+p7X_E]); ox->xmx[i*p7X_NXCELLS+p7X_J] = ESL_MAX(t1, t2); t1 = ( (om->xf[p7O_C][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_C] + pp->xmx[i*p7X_NXCELLS+p7X_C]); t2 = ( (om->xf[p7O_E][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[ i *p7X_NXCELLS+p7X_E]); ox->xmx[i*p7X_NXCELLS+p7X_C] = ESL_MAX(t1, t2); ox->xmx[i*p7X_NXCELLS+p7X_N] = ((om->xf[p7O_N][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_N] + pp->xmx[i*p7X_NXCELLS+p7X_N]); t1 = ( (om->xf[p7O_N][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[i*p7X_NXCELLS+p7X_N]); t2 = ( (om->xf[p7O_J][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[i*p7X_NXCELLS+p7X_J]); ox->xmx[i*p7X_NXCELLS+p7X_B] = ESL_MAX(t1, t2); } *ret_e = ox->xmx[pp->L*p7X_NXCELLS+p7X_C]; return eslOK; }
/* Function: p7_Decoding() * Synopsis: Posterior decoding of residue assignment. * Incept: SRE, Fri Aug 8 14:29:42 2008 [UA217 to SFO] * * Purpose: Identical to <p7_GDecoding()> except that <om>, <oxf>, * <oxb> are SSE optimized versions. See <p7_GDecoding()> * documentation for more info. * * Args: om - profile (must be the same that was used to fill <oxf>, <oxb>). * oxf - filled Forward matrix * oxb - filled Backward matrix * pp - RESULT: posterior decoding matrix. * * Returns: <eslOK> on success. * * Returns <eslERANGE> if numeric range of floating-point * values is exceeded during posterior probability * calculations. In this case, the <pp> matrix must not be * used by the caller; it will contain <NaN> values. To be * safe, the caller should recalculate a generic posterior * decoding matrix instead -- generic calculations are done * in log probability space and are robust. * * However, I currently believe that this overflow only * occurs on an unusual and ignorable situation: when a * <p7_UNILOCAL> model is used on a region that contains * two or more high scoring distinct alignments to the * model. And that only happens if domain definition fails, * after stochastic clustering, and an envelope that we * pass to p7_domaindef.c::rescore_isolated_domain() * erroneously contains 2+ distinct domains. (Note that * this is different from having 2+ expected B states: that * can happen normally, if a single consistent domain is * better described by 2+ passes through the model). And I * strongly believe all this only can happen on repetitive * or biased-composition junk that we want to ignore anyway. * Therefore the caller should be safe in ignoring any domain * for which <p7_Decoding()> returns <eslERANGE>. * * Exception (bug #h68): see hmmalign.c, where the model is * in unilocal mode, and it is entirely possible for the * user to give us a multidomain protein. * * Throws: (no abnormal error conditions) * * Xref: [J3/119-121]: for analysis of numeric range issues when * <scaleproduct> overflows. */ int p7_Decoding(const P7_OPROFILE *om, const P7_OMX *oxf, P7_OMX *oxb, P7_OMX *pp) { __m128 *ppv; __m128 *fv; __m128 *bv; __m128 totrv; int L = oxf->L; int M = om->M; int Q = p7O_NQF(M); int i,q; float scaleproduct = 1.0 / oxb->xmx[p7X_N]; pp->M = M; pp->L = L; ppv = pp->dpf[0]; for (q = 0; q < Q; q++) { *ppv = _mm_setzero_ps(); ppv++; *ppv = _mm_setzero_ps(); ppv++; *ppv = _mm_setzero_ps(); ppv++; } pp->xmx[p7X_E] = 0.0; pp->xmx[p7X_N] = 0.0; pp->xmx[p7X_J] = 0.0; pp->xmx[p7X_C] = 0.0; pp->xmx[p7X_B] = 0.0; for (i = 1; i <= L; i++) { ppv = pp->dpf[i]; fv = oxf->dpf[i]; bv = oxb->dpf[i]; totrv = _mm_set1_ps(scaleproduct * oxf->xmx[i*p7X_NXCELLS+p7X_SCALE]); for (q = 0; q < Q; q++) { /* M */ *ppv = _mm_mul_ps(*fv, *bv); *ppv = _mm_mul_ps(*ppv, totrv); ppv++; fv++; bv++; /* D */ *ppv = _mm_setzero_ps(); ppv++; fv++; bv++; /* I */ *ppv = _mm_mul_ps(*fv, *bv); *ppv = _mm_mul_ps(*ppv, totrv); ppv++; fv++; bv++; } pp->xmx[i*p7X_NXCELLS+p7X_E] = 0.0; pp->xmx[i*p7X_NXCELLS+p7X_N] = oxf->xmx[(i-1)*p7X_NXCELLS+p7X_N] * oxb->xmx[i*p7X_NXCELLS+p7X_N] * om->xf[p7O_N][p7O_LOOP] * scaleproduct; pp->xmx[i*p7X_NXCELLS+p7X_J] = oxf->xmx[(i-1)*p7X_NXCELLS+p7X_J] * oxb->xmx[i*p7X_NXCELLS+p7X_J] * om->xf[p7O_J][p7O_LOOP] * scaleproduct; pp->xmx[i*p7X_NXCELLS+p7X_C] = oxf->xmx[(i-1)*p7X_NXCELLS+p7X_C] * oxb->xmx[i*p7X_NXCELLS+p7X_C] * om->xf[p7O_C][p7O_LOOP] * scaleproduct; pp->xmx[i*p7X_NXCELLS+p7X_B] = 0.0; if (oxb->has_own_scales) scaleproduct *= oxf->xmx[i*p7X_NXCELLS+p7X_SCALE] / oxb->xmx[i*p7X_NXCELLS+p7X_SCALE]; } if (isinf(scaleproduct)) return eslERANGE; else return eslOK; }
/* Function: p7_ViterbiScore() * Synopsis: Calculates Viterbi score, correctly, and vewy vewy fast. * Incept: SRE, Tue Nov 27 09:15:24 2007 [Janelia] * * Purpose: Calculates the Viterbi score for sequence <dsq> of length <L> * residues, using optimized profile <om>, and a preallocated * one-row DP matrix <ox>. Return the Viterbi score (in nats) * in <ret_sc>. * * The model <om> must be configured specially to have * lspace float scores, not its usual pspace float scores for * <p7_ForwardFilter()>. * * As with all <*Score()> implementations, the score is * accurate (full range and precision) and can be * calculated on models in any mode, not only local modes. * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - DP matrix * ret_sc - RETURN: Viterbi score (in nats) * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if <ox> allocation is too small. */ int p7_ViterbiScore(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc) { vector float mpv, dpv, ipv; /* previous row values */ vector float sv; /* temp storage of 1 curr row value in progress */ vector float dcv; /* delayed storage of D(i,q+1) */ vector float xEv; /* E state: keeps max for Mk->E as we go */ vector float xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector float Dmaxv; /* keeps track of maximum D cell on row */ vector float infv; /* -eslINFINITY in a vector */ float xN, xE, xB, xC, xJ; /* special states' scores */ float Dmax; /* maximum D cell on row */ int i; /* counter over sequence positions 1..L */ int q; /* counter over vectors 0..nq-1 */ int Q = p7O_NQF(om->M); /* segment length: # of vectors */ vector float *dp = ox->dpf[0]; /* using {MDI}MX(q) macro requires initialization of <dp> */ vector float *rsc; /* will point at om->rf[x] for residue x[i] */ vector float *tsc; /* will point into (and step thru) om->tf */ /* Check that the DP matrix is ok for us. */ if (Q > ox->allocQ4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small"); ox->M = om->M; /* Initialization. */ infv = esl_vmx_set_float(-eslINFINITY); for (q = 0; q < Q; q++) MMXo(q) = IMXo(q) = DMXo(q) = infv; xN = 0.; xB = om->xf[p7O_N][p7O_MOVE]; xE = -eslINFINITY; xJ = -eslINFINITY; xC = -eslINFINITY; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFloatRow(ox, FALSE, 0, 5, 2, xE, xN, xJ, xB, xC); /* logify=FALSE, <rowi>=0, width=5, precision=2*/ #endif for (i = 1; i <= L; i++) { rsc = om->rf[dsq[i]]; tsc = om->tf; dcv = infv; xEv = infv; Dmaxv = infv; xBv = esl_vmx_set_float(xB); mpv = vec_sld(infv, MMXo(Q-1), 12); /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. */ dpv = vec_sld(infv, DMXo(Q-1), 12); ipv = vec_sld(infv, IMXo(Q-1), 12); for (q = 0; q < Q; q++) { /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */ sv = vec_add(xBv, *tsc); tsc++; sv = vec_max(sv, vec_add(mpv, *tsc)); tsc++; sv = vec_max(sv, vec_add(ipv, *tsc)); tsc++; sv = vec_max(sv, vec_add(dpv, *tsc)); tsc++; sv = vec_add(sv, *rsc); rsc++; xEv = vec_max(xEv, sv); /* Load {MDI}(i-1,q) into mpv, dpv, ipv; * {MDI}MX(q) is then the current, not the prev row */ mpv = MMXo(q); dpv = DMXo(q); ipv = IMXo(q); /* Do the delayed stores of {MD}(i,q) now that memory is usable */ MMXo(q) = sv; DMXo(q) = dcv; /* Calculate the next D(i,q+1) partially: M->D only; * delay storage, holding it in dcv */ dcv = vec_add(sv, *tsc); tsc++; Dmaxv = vec_max(dcv, Dmaxv); /* Calculate and store I(i,q) */ sv = vec_add(mpv, *tsc); tsc++; sv = vec_max(sv, vec_add(ipv, *tsc)); tsc++; IMXo(q) = vec_add(sv, *rsc); rsc++; } /* Now the "special" states, which start from Mk->E (->C, ->J->B) */ xE = esl_vmx_hmax_float(xEv); xN = xN + om->xf[p7O_N][p7O_LOOP]; xC = ESL_MAX(xC + om->xf[p7O_C][p7O_LOOP], xE + om->xf[p7O_E][p7O_MOVE]); xJ = ESL_MAX(xJ + om->xf[p7O_J][p7O_LOOP], xE + om->xf[p7O_E][p7O_LOOP]); xB = ESL_MAX(xJ + om->xf[p7O_J][p7O_MOVE], xN + om->xf[p7O_N][p7O_MOVE]); /* and now xB will carry over into next i, and xC carries over after i=L */ /* Finally the "lazy F" loop (sensu [Farrar07]). We can often * prove that we don't need to evaluate any D->D paths at all. * * The observation is that if we can show that on the next row, * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths * for all k, then we don't need any D->D calculations. * * The test condition is: * max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i) * So: * max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound; * max_k D(i,k) is why we tracked Dmaxv; * xB(i) was just calculated above. */ Dmax = esl_vmx_hmax_float(Dmaxv); if (Dmax + om->ddbound_f > xB) { /* Now we're obligated to do at least one complete DD path to be sure. */ /* dcv has carried through from end of q loop above */ dcv = vec_sld(infv, dcv, 12); tsc = om->tf + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_add(DMXo(q), *tsc); tsc++; } /* We may have to do up to three more passes; the check * is for whether crossing a segment boundary can improve * our score. */ do { dcv = vec_sld(infv, dcv, 12); tsc = om->tf + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { if (! vec_any_gt(dcv, DMXo(q))) break; DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_add(DMXo(q), *tsc); tsc++; } } while (q == Q); } else { /* not calculating DD? then just store that last MD vector we calc'ed. */ dcv = vec_sld(infv, dcv, 12); DMXo(0) = dcv; } #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFloatRow(ox, FALSE, i, 5, 2, xE, xN, xJ, xB, xC); /* logify=FALSE, <rowi>=i, width=5, precision=2*/ #endif } /* end loop over sequence residues 1..L */ /* finally C->T */ *ret_sc = xC + om->xf[p7O_C][p7O_MOVE]; return eslOK; }
/* Function: p7_oprofile_MPIUnpack() * Synopsis: Unpacks an OPROFILE from an MPI buffer. * Incept: MSF, Wed Oct 21, 2009 [Janelia] * * Purpose: Unpack a newly allocated OPROFILE from MPI packed buffer * <buf>, starting from position <*pos>, where the total length * of the buffer in bytes is <n>. * * Caller may or may not already know what alphabet the OPROFILE * is expected to be in. A reference to the current * alphabet is passed in <abc>. If the alphabet is unknown, * pass <*abc = NULL>, and when the OPROFILE is received, an * appropriate new alphabet object is allocated and passed * back to the caller via <*abc>. If the alphabet is * already known, <*abc> is that alphabet, and the new * OPROFILE's alphabet type is verified to agree with it. This * mechanism allows an application to let the first OPROFILE * determine the alphabet type for the application, while * still keeping the alphabet under the application's scope * of control. * * Returns: <eslOK> on success. <*pos> is updated to the position of * the next element in <buf> to unpack (if any). <*ret_om> * contains a newly allocated OPROFILE, which the caller is * responsible for free'ing. If <*abc> was passed as * <NULL>, it now points to an <ESL_ALPHABET> object that * was allocated here; caller is responsible for free'ing * this. * * Returns <eslEINCOMPAT> if the OPROFILE is in a different * alphabet than <*abc> said to expect. In this case, * <*abc> is unchanged, <*buf> and <*nalloc> may have been * changed, and <*ret_om> is <NULL>. * * Throws: <eslESYS> on an MPI call failure. <eslEMEM> on allocation failure. * In either case, <*ret_om> is <NULL>, and the state of <buf> * and <*pos> is undefined and should be considered to be corrupted. */ int p7_oprofile_MPIUnpack(char *buf, int n, int *pos, MPI_Comm comm, ESL_ALPHABET **abc, P7_OPROFILE **ret_om) { int status; int M, K, atype; int len; int x; int Q4, Q8, Q16; int vsz = sizeof(vector float); P7_OPROFILE *om = NULL; if (MPI_Unpack(buf, n, pos, &M, 1, MPI_INT, comm) != 0) ESL_XEXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &atype, 1, MPI_INT, comm) != 0) ESL_XEXCEPTION(eslESYS, "mpi unpack failed"); /* Set or verify the alphabet */ if (*abc == NULL) { /* still unknown: set it, pass control of it back to caller */ if ((*abc = esl_alphabet_Create(atype)) == NULL) { status = eslEMEM; goto ERROR; } } else { /* already known: check it */ if ((*abc)->type != atype) { status = eslEINCOMPAT; goto ERROR; } } Q4 = p7O_NQF(M); Q8 = p7O_NQW(M); Q16 = p7O_NQB(M); if ((om = p7_oprofile_Create(M, *abc)) == NULL) { status = eslEMEM; goto ERROR; } om->M = M; K = (*abc)->Kp; /* model configuration */ if (MPI_Unpack(buf, n, pos, &om->L, 1, MPI_INT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->mode, 1, MPI_INT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->nj, 1, MPI_FLOAT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); /* MSV Filter information */ if (MPI_Unpack(buf, n, pos, &om->tbm_b, 1, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->tec_b, 1, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->tjb_b, 1, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->scale_b, 1, MPI_FLOAT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->base_b, 1, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->bias_b, 1, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); for (x = 0; x < K; x++) if (MPI_Unpack(buf, n, pos, om->rbv[x], vsz*Q16, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); /* Viterbi Filter information */ if (MPI_Unpack(buf, n, pos, &om->scale_w, 1, MPI_FLOAT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->base_w, 1, MPI_SHORT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->ddbound_w, 1, MPI_SHORT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->ncj_roundoff, 1, MPI_FLOAT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, om->twv, 8*vsz*Q8, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); for (x = 0; x < p7O_NXSTATES; x++) if (MPI_Unpack(buf, n, pos, om->xw[x], p7O_NXTRANS, MPI_SHORT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); for (x = 0; x < K; x++) if (MPI_Unpack(buf, n, pos, om->rwv[x], vsz*Q8, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); /* Forward/Backward information */ if (MPI_Unpack(buf, n, pos, om->tfv, 8*vsz*Q4, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); for (x = 0; x < p7O_NXSTATES; x++) if (MPI_Unpack(buf, n, pos, om->xf[x], p7O_NXTRANS, MPI_FLOAT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); for (x = 0; x < K; x++) if (MPI_Unpack(buf, n, pos, om->rfv[x], vsz*Q4, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); /* Forward/Backward information */ if (MPI_Unpack(buf, n, pos, om->offs, p7_NOFFSETS, MPI_LONG_LONG_INT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->roff, 1, MPI_LONG_LONG_INT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, &om->eoff, 1, MPI_LONG_LONG_INT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); /* Annotation information */ if (MPI_Unpack(buf, n, pos, &len, 1, MPI_INT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (len > 0) { ESL_ALLOC(om->name, len); if (MPI_Unpack(buf, n, pos, om->name, len, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); om->name[len-1] = '\0'; } if (MPI_Unpack(buf, n, pos, &len, 1, MPI_INT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (len > 0) { ESL_ALLOC(om->acc, len); if (MPI_Unpack(buf, n, pos, om->acc, len, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); om->acc[len-1] = '\0'; } if (MPI_Unpack(buf, n, pos, &len, 1, MPI_INT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (len > 0) { ESL_ALLOC(om->desc, len); if (MPI_Unpack(buf, n, pos, om->desc, len, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); om->desc[len-1] = '\0'; } if (MPI_Unpack(buf, n, pos, &len, 1, MPI_INT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (len > 0) { ESL_ALLOC(om->rf, len); if (MPI_Unpack(buf, n, pos, om->rf, len, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); om->rf[len-1] = '\0'; } if (MPI_Unpack(buf, n, pos, &len, 1, MPI_INT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (len > 0) { ESL_ALLOC(om->cs, len); if (MPI_Unpack(buf, n, pos, om->cs, len, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); om->cs[len-1] = '\0'; } if (MPI_Unpack(buf, n, pos, &len, 1, MPI_INT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (len > 0) { ESL_ALLOC(om->consensus, len); if (MPI_Unpack(buf, n, pos, om->consensus, len, MPI_CHAR, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); om->consensus[len-1] = '\0'; } if (MPI_Unpack(buf, n, pos, om->evparam, p7_NEVPARAM, MPI_FLOAT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, om->cutoff, p7_NCUTOFFS, MPI_FLOAT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (MPI_Unpack(buf, n, pos, om->compo, p7_MAXABET, MPI_FLOAT, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); *ret_om = om; return eslOK; ERROR: if (om != NULL) p7_oprofile_Destroy(om); return status; }
/* Function: p7_oprofile_Write() * Synopsis: Write an optimized profile in two files. * Incept: SRE, Wed Jan 21 10:35:28 2009 [Janelia] * * Purpose: Write the MSV filter part of <om> to open binary stream * <ffp>, and the rest of the model to <pfp>. These two * streams will typically be <.h3f> and <.h3p> files * being created by hmmpress. * * Args: ffp - open binary stream for saving MSV filter part * pfp - open binary stream for saving rest of profile * om - optimized profile to save * * Returns: <eslOK> on success. * * Returns <eslFAIL> on any write failure; for example, * if disk is full. * * Throws: (no abnormal error conditions) */ int p7_oprofile_Write(FILE *ffp, FILE *pfp, P7_OPROFILE *om) { int Q4 = p7O_NQF(om->M); int Q8 = p7O_NQW(om->M); int Q16 = p7O_NQB(om->M); int n = strlen(om->name); int x; /* <ffp> is the part of the oprofile that MSVFilter() needs */ if (fwrite((char *) &(v3b_fmagic), sizeof(uint32_t), 1, ffp) != 1) return eslFAIL; if (fwrite((char *) &(om->M), sizeof(int), 1, ffp) != 1) return eslFAIL; if (fwrite((char *) &(om->abc->type), sizeof(int), 1, ffp) != 1) return eslFAIL; if (fwrite((char *) &n, sizeof(int), 1, ffp) != 1) return eslFAIL; if (fwrite((char *) om->name, sizeof(char), n+1, ffp) != n+1) return eslFAIL; if (fwrite((char *) &(om->tbm_b), sizeof(uint8_t), 1, ffp) != 1) return eslFAIL; if (fwrite((char *) &(om->tec_b), sizeof(uint8_t), 1, ffp) != 1) return eslFAIL; if (fwrite((char *) &(om->tjb_b), sizeof(uint8_t), 1, ffp) != 1) return eslFAIL; if (fwrite((char *) &(om->scale_b), sizeof(float), 1, ffp) != 1) return eslFAIL; if (fwrite((char *) &(om->base_b), sizeof(uint8_t), 1, ffp) != 1) return eslFAIL; if (fwrite((char *) &(om->bias_b), sizeof(uint8_t), 1, ffp) != 1) return eslFAIL; for (x = 0; x < om->abc->Kp; x++) if (fwrite( (char *) om->rbv[x], sizeof(__m128i), Q16, ffp) != Q16) return eslFAIL; if (fwrite((char *) om->evparam, sizeof(float), p7_NEVPARAM, ffp) != p7_NEVPARAM) return eslFAIL; if (fwrite((char *) om->offs, sizeof(off_t), p7_NOFFSETS, ffp) != p7_NOFFSETS) return eslFAIL; if (fwrite((char *) om->compo, sizeof(float), p7_MAXABET, ffp) != p7_MAXABET) return eslFAIL; /* <pfp> gets the rest of the oprofile */ if (fwrite((char *) &(v3b_pmagic), sizeof(uint32_t), 1, pfp) != 1) return eslFAIL; if (fwrite((char *) &(om->M), sizeof(int), 1, pfp) != 1) return eslFAIL; if (fwrite((char *) &(om->abc->type), sizeof(int), 1, pfp) != 1) return eslFAIL; if (fwrite((char *) &n, sizeof(int), 1, pfp) != 1) return eslFAIL; if (fwrite((char *) om->name, sizeof(char), n+1, pfp) != n+1) return eslFAIL; if (om->acc == NULL) { n = 0; if (fwrite((char *) &n, sizeof(int), 1, pfp) != 1) return eslFAIL; } else { n = strlen(om->acc); if (fwrite((char *) &n, sizeof(int), 1, pfp) != 1) return eslFAIL; if (fwrite((char *) om->acc, sizeof(char), n+1, pfp) != n+1) return eslFAIL; } if (om->desc == NULL) { n = 0; if (fwrite((char *) &n, sizeof(int), 1, pfp) != 1) return eslFAIL; } else { n = strlen(om->desc); if (fwrite((char *) &n, sizeof(int), 1, pfp) != 1) return eslFAIL; if (fwrite((char *) om->desc, sizeof(char), n+1, pfp) != n+1) return eslFAIL; } if (fwrite((char *) om->rf, sizeof(char), om->M+2, pfp) != om->M+2) return eslFAIL; if (fwrite((char *) om->cs, sizeof(char), om->M+2, pfp) != om->M+2) return eslFAIL; if (fwrite((char *) om->consensus, sizeof(char), om->M+2, pfp) != om->M+2) return eslFAIL; /* ViterbiFilter part */ if (fwrite((char *) om->twv, sizeof(__m128i), 8*Q8, pfp) != 8*Q8) return eslFAIL; for (x = 0; x < om->abc->Kp; x++) if (fwrite( (char *) om->rwv[x], sizeof(__m128i), Q8, pfp) != Q8) return eslFAIL; for (x = 0; x < p7O_NXSTATES; x++) if (fwrite( (char *) om->xw[x], sizeof(int16_t), p7O_NXTRANS, pfp) != p7O_NXTRANS) return eslFAIL; if (fwrite((char *) &(om->scale_w), sizeof(float), 1, pfp) != 1) return eslFAIL; if (fwrite((char *) &(om->base_w), sizeof(int16_t), 1, pfp) != 1) return eslFAIL; if (fwrite((char *) &(om->ddbound_w), sizeof(int16_t), 1, pfp) != 1) return eslFAIL; if (fwrite((char *) &(om->ncj_roundoff), sizeof(float), 1, pfp) != 1) return eslFAIL; /* Forward/Backward part */ if (fwrite((char *) om->tfv, sizeof(__m128), 8*Q4, pfp) != 8*Q4) return eslFAIL; for (x = 0; x < om->abc->Kp; x++) if (fwrite( (char *) om->rfv[x], sizeof(__m128), Q4, pfp) != Q4) return eslFAIL; for (x = 0; x < p7O_NXSTATES; x++) if (fwrite( (char *) om->xf[x], sizeof(float), p7O_NXTRANS, pfp) != p7O_NXTRANS) return eslFAIL; if (fwrite((char *) om->cutoff, sizeof(float), p7_NCUTOFFS, pfp) != p7_NCUTOFFS) return eslFAIL; if (fwrite((char *) &(om->nj), sizeof(float), 1, pfp) != 1) return eslFAIL; if (fwrite((char *) &(om->mode), sizeof(int), 1, pfp) != 1) return eslFAIL; if (fwrite((char *) &(om->L) , sizeof(int), 1, pfp) != 1) return eslFAIL; return eslOK; }
/* Function: p7_omx_DumpFBRow() * Synopsis: Dump one row from float part of a DP matrix. * Incept: SRE, Wed Jul 30 16:45:16 2008 [Janelia] * * Purpose: Dump current row of Forward/Backward (float) part of DP * matrix <ox> for diagnostics, and include the values of * specials <xE>, etc. The index <rowi> for the current row * is used as a row label. * * The output format of the floats is controlled by * <width>, <precision>; 8,5 is good for pspace, 5,2 is * fine for lspace. * * If <rowi> is 0, print a header first too. * * If <logify> is TRUE, then scores are printed as log(score); this is * useful for comparing DP with pspace scores with other DP matrices * (like generic P7_GMX ones) that use log-odds scores. * * The output format is coordinated with <p7_gmx_Dump()> to * facilitate comparison to a known answer. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. */ int p7_omx_DumpFBRow(P7_OMX *ox, int logify, int rowi, int width, int precision, float xE, float xN, float xJ, float xB, float xC) { vector float *dp; int M = ox->M; int Q = p7O_NQF(M); float *v = NULL; /* array of uninterleaved, unstriped scores */ int q,z,k; union { vector float v; float x[4]; } tmp; int status; dp = (ox->allocR == 1) ? ox->dpf[0] : ox->dpf[rowi]; /* must set <dp> before using {MDI}MX macros */ ESL_ALLOC(v, sizeof(float) * ((Q*4)+1)); v[0] = 0.; if (rowi == 0) { fprintf(ox->dfp, " "); for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*d ", width, k); fprintf(ox->dfp, "%*s %*s %*s %*s %*s\n", width, "E", width, "N", width, "J", width, "B", width, "C"); fprintf(ox->dfp, " "); for (k = 0; k <= M+5; k++) fprintf(ox->dfp, "%*s ", width, "--------"); fprintf(ox->dfp, "\n"); } /* Unpack, unstripe, then print M's. */ for (q = 0; q < Q; q++) { tmp.v = MMXo(q); for (z = 0; z < 4; z++) v[q+Q*z+1] = tmp.x[z]; } fprintf(ox->dfp, "%3d M ", rowi); if (logify) for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k] == 0. ? -eslINFINITY : log(v[k])); else for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k]); /* The specials */ if (logify) fprintf(ox->dfp, "%*.*f %*.*f %*.*f %*.*f %*.*f\n", width, precision, xE == 0. ? -eslINFINITY : log(xE), width, precision, xN == 0. ? -eslINFINITY : log(xN), width, precision, xJ == 0. ? -eslINFINITY : log(xJ), width, precision, xB == 0. ? -eslINFINITY : log(xB), width, precision, xC == 0. ? -eslINFINITY : log(xC)); else fprintf(ox->dfp, "%*.*f %*.*f %*.*f %*.*f %*.*f\n", width, precision, xE, width, precision, xN, width, precision, xJ, width, precision, xB, width, precision, xC); /* Unpack, unstripe, then print I's. */ for (q = 0; q < Q; q++) { tmp.v = IMXo(q); for (z = 0; z < 4; z++) v[q+Q*z+1] = tmp.x[z]; } fprintf(ox->dfp, "%3d I ", rowi); if (logify) for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k] == 0. ? -eslINFINITY : log(v[k])); else for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k]); fprintf(ox->dfp, "\n"); /* Unpack, unstripe, then print D's. */ for (q = 0; q < Q; q++) { tmp.v = DMXo(q); for (z = 0; z < 4; z++) v[q+Q*z+1] = tmp.x[z]; } fprintf(ox->dfp, "%3d D ", rowi); if (logify) for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k] == 0. ? -eslINFINITY : log(v[k])); else for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k]); fprintf(ox->dfp, "\n\n"); free(v); return eslOK; ERROR: free(v); return status; }
static int backward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc) { register __m128 mpv, ipv, dpv; /* previous row values */ register __m128 mcv, dcv; /* current row values */ register __m128 tmmv, timv, tdmv; /* tmp vars for accessing rotated transition scores */ register __m128 xBv; /* collects B->Mk components of B(i) */ register __m128 xEv; /* splatted E(i) */ __m128 zerov; /* splatted 0.0's in a vector */ float xN, xE, xB, xC, xJ; /* special states' scores */ int i; /* counter over sequence positions 0,1..L */ int q; /* counter over quads 0..Q-1 */ int Q = p7O_NQF(om->M); /* segment length: # of vectors */ int j; /* DD segment iteration counter (4 = full serialization) */ __m128 *dpc; /* current DP row */ __m128 *dpp; /* next ("previous") DP row */ __m128 *rp; /* will point into om->rfv[x] for residue x[i+1] */ __m128 *tp; /* will point into (and step thru) om->tfv transition scores */ /* initialize the L row. */ bck->M = om->M; bck->L = L; bck->has_own_scales = FALSE; /* backwards scale factors are *usually* given by <fwd> */ dpc = bck->dpf[L * do_full]; xJ = 0.0; xB = 0.0; xN = 0.0; xC = om->xf[p7O_C][p7O_MOVE]; /* C<-T */ xE = xC * om->xf[p7O_E][p7O_MOVE]; /* E<-C, no tail */ xEv = _mm_set1_ps(xE); zerov = _mm_setzero_ps(); dcv = zerov; /* solely to silence a compiler warning */ for (q = 0; q < Q; q++) MMO(dpc,q) = DMO(dpc,q) = xEv; for (q = 0; q < Q; q++) IMO(dpc,q) = zerov; /* init row L's DD paths, 1) first segment includes xE, from DMO(q) */ tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */ dpv = _mm_move_ss(DMO(dpc,Q-1), zerov); /* start leftshift: [1 5 9 13] -> [x 5 9 13] */ dpv = _mm_shuffle_ps(dpv, dpv, _MM_SHUFFLE(0,3,2,1)); /* finish leftshift:[x 5 9 13] -> [5 9 13 x] */ for (q = Q-1; q >= 0; q--) { dcv = _mm_mul_ps(dpv, *tp); tp--; DMO(dpc,q) = _mm_add_ps(DMO(dpc,q), dcv); dpv = DMO(dpc,q); } /* 2) three more passes, only extending DD component (dcv only; no xE contrib from DMO(q)) */ for (j = 1; j < 4; j++) { tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */ dcv = _mm_move_ss(dcv, zerov); /* start leftshift: [1 5 9 13] -> [x 5 9 13] */ dcv = _mm_shuffle_ps(dcv, dcv, _MM_SHUFFLE(0,3,2,1)); /* finish leftshift:[x 5 9 13] -> [5 9 13 x] */ for (q = Q-1; q >= 0; q--) { dcv = _mm_mul_ps(dcv, *tp); tp--; DMO(dpc,q) = _mm_add_ps(DMO(dpc,q), dcv); } } /* now MD init */ tp = om->tfv + 7*Q - 3; /* <*tp> now the [4 8 12 x] Mk->Dk+1 quad */ dcv = _mm_move_ss(DMO(dpc,0), zerov); /* start leftshift: [1 5 9 13] -> [x 5 9 13] */ dcv = _mm_shuffle_ps(dcv, dcv, _MM_SHUFFLE(0,3,2,1)); /* finish leftshift:[x 5 9 13] -> [5 9 13 x] */ for (q = Q-1; q >= 0; q--) { MMO(dpc,q) = _mm_add_ps(MMO(dpc,q), _mm_mul_ps(dcv, *tp)); tp -= 7; dcv = DMO(dpc,q); } /* Sparse rescaling: same scale factors as fwd matrix */ if (fwd->xmx[L*p7X_NXCELLS+p7X_SCALE] > 1.0) { xE = xE / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; xN = xN / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; xC = xC / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; xJ = xJ / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; xB = xB / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; xEv = _mm_set1_ps(1.0 / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]); for (q = 0; q < Q; q++) { MMO(dpc,q) = _mm_mul_ps(MMO(dpc,q), xEv); DMO(dpc,q) = _mm_mul_ps(DMO(dpc,q), xEv); IMO(dpc,q) = _mm_mul_ps(IMO(dpc,q), xEv); } } bck->xmx[L*p7X_NXCELLS+p7X_SCALE] = fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; bck->totscale = log(bck->xmx[L*p7X_NXCELLS+p7X_SCALE]); /* Stores */ bck->xmx[L*p7X_NXCELLS+p7X_E] = xE; bck->xmx[L*p7X_NXCELLS+p7X_N] = xN; bck->xmx[L*p7X_NXCELLS+p7X_J] = xJ; bck->xmx[L*p7X_NXCELLS+p7X_B] = xB; bck->xmx[L*p7X_NXCELLS+p7X_C] = xC; #if p7_DEBUGGING if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, L, 9, 4, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=L, width=9, precision=4*/ #endif /* main recursion */ for (i = L-1; i >= 1; i--) /* backwards stride */ { /* phase 1. B(i) collected. Old row destroyed, new row contains * complete I(i,k), partial {MD}(i,k) w/ no {MD}->{DE} paths yet. */ dpc = bck->dpf[i * do_full]; dpp = bck->dpf[(i+1) * do_full]; rp = om->rfv[dsq[i+1]] + Q-1; /* <*rp> is now the [4 8 12 x] match emission quad */ tp = om->tfv + 7*Q - 1; /* <*tp> is now the [4 8 12 x] TII transition quad */ /* leftshift the first transition quads */ tmmv = _mm_move_ss(om->tfv[1], zerov); tmmv = _mm_shuffle_ps(tmmv, tmmv, _MM_SHUFFLE(0,3,2,1)); timv = _mm_move_ss(om->tfv[2], zerov); timv = _mm_shuffle_ps(timv, timv, _MM_SHUFFLE(0,3,2,1)); tdmv = _mm_move_ss(om->tfv[3], zerov); tdmv = _mm_shuffle_ps(tdmv, tdmv, _MM_SHUFFLE(0,3,2,1)); mpv = _mm_mul_ps(MMO(dpp,0), om->rfv[dsq[i+1]][0]); /* precalc M(i+1,k+1) * e(M_k+1, x_{i+1}) */ mpv = _mm_move_ss(mpv, zerov); mpv = _mm_shuffle_ps(mpv, mpv, _MM_SHUFFLE(0,3,2,1)); xBv = zerov; for (q = Q-1; q >= 0; q--) /* backwards stride */ { ipv = IMO(dpp,q); /* assumes emission odds ratio of 1.0; i+1's IMO(q) now free */ IMO(dpc,q) = _mm_add_ps(_mm_mul_ps(ipv, *tp), _mm_mul_ps(mpv, timv)); tp--; DMO(dpc,q) = _mm_mul_ps(mpv, tdmv); mcv = _mm_add_ps(_mm_mul_ps(ipv, *tp), _mm_mul_ps(mpv, tmmv)); tp-= 2; mpv = _mm_mul_ps(MMO(dpp,q), *rp); rp--; /* obtain mpv for next q. i+1's MMO(q) is freed */ MMO(dpc,q) = mcv; tdmv = *tp; tp--; timv = *tp; tp--; tmmv = *tp; tp--; xBv = _mm_add_ps(xBv, _mm_mul_ps(mpv, *tp)); tp--; } /* phase 2: now that we have accumulated the B->Mk transitions in xBv, we can do the specials */ /* this incantation is a horiz sum of xBv elements: (_mm_hadd_ps() would require SSE3) */ xBv = _mm_add_ps(xBv, _mm_shuffle_ps(xBv, xBv, _MM_SHUFFLE(0, 3, 2, 1))); xBv = _mm_add_ps(xBv, _mm_shuffle_ps(xBv, xBv, _MM_SHUFFLE(1, 0, 3, 2))); _mm_store_ss(&xB, xBv); xC = xC * om->xf[p7O_C][p7O_LOOP]; xJ = (xB * om->xf[p7O_J][p7O_MOVE]) + (xJ * om->xf[p7O_J][p7O_LOOP]); /* must come after xB */ xN = (xB * om->xf[p7O_N][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_LOOP]); /* must come after xB */ xE = (xC * om->xf[p7O_E][p7O_MOVE]) + (xJ * om->xf[p7O_E][p7O_LOOP]); /* must come after xJ, xC */ xEv = _mm_set1_ps(xE); /* splat */ /* phase 3: {MD}->E paths and one step of the D->D paths */ tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */ dpv = _mm_add_ps(DMO(dpc,0), xEv); dpv = _mm_move_ss(dpv, zerov); dpv = _mm_shuffle_ps(dpv, dpv, _MM_SHUFFLE(0,3,2,1)); for (q = Q-1; q >= 0; q--) { dcv = _mm_mul_ps(dpv, *tp); tp--; DMO(dpc,q) = _mm_add_ps(DMO(dpc,q), _mm_add_ps(dcv, xEv)); dpv = DMO(dpc,q); MMO(dpc,q) = _mm_add_ps(MMO(dpc,q), xEv); } /* phase 4: finish extending the DD paths */ /* fully serialized for now */ for (j = 1; j < 4; j++) /* three passes: we've already done 1 segment, we need 4 total */ { dcv = _mm_move_ss(dcv, zerov); dcv = _mm_shuffle_ps(dcv, dcv, _MM_SHUFFLE(0,3,2,1)); tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */ for (q = Q-1; q >= 0; q--) { dcv = _mm_mul_ps(dcv, *tp); tp--; DMO(dpc,q) = _mm_add_ps(DMO(dpc,q), dcv); } } /* phase 5: add M->D paths */ dcv = _mm_move_ss(DMO(dpc,0), zerov); dcv = _mm_shuffle_ps(dcv, dcv, _MM_SHUFFLE(0,3,2,1)); tp = om->tfv + 7*Q - 3; /* <*tp> is now the [4 8 12 x] Mk->Dk+1 quad */ for (q = Q-1; q >= 0; q--) { MMO(dpc,q) = _mm_add_ps(MMO(dpc,q), _mm_mul_ps(dcv, *tp)); tp -= 7; dcv = DMO(dpc,q); } /* Sparse rescaling */ /* In rare cases [J3/119] scale factors from <fwd> are * insufficient and backwards will overflow. In this case, we * switch on the fly to using our own scale factors, different * from those in <fwd>. This will complicate subsequent * posterior decoding routines. */ if (xB > 1.0e16) bck->has_own_scales = TRUE; if (bck->has_own_scales) bck->xmx[i*p7X_NXCELLS+p7X_SCALE] = (xB > 1.0e4) ? xB : 1.0; else bck->xmx[i*p7X_NXCELLS+p7X_SCALE] = fwd->xmx[i*p7X_NXCELLS+p7X_SCALE]; if (bck->xmx[i*p7X_NXCELLS+p7X_SCALE] > 1.0) { xE /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE]; xN /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE]; xJ /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE]; xB /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE]; xC /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE]; xBv = _mm_set1_ps(1.0 / bck->xmx[i*p7X_NXCELLS+p7X_SCALE]); for (q = 0; q < Q; q++) { MMO(dpc,q) = _mm_mul_ps(MMO(dpc,q), xBv); DMO(dpc,q) = _mm_mul_ps(DMO(dpc,q), xBv); IMO(dpc,q) = _mm_mul_ps(IMO(dpc,q), xBv); } bck->totscale += log(bck->xmx[i*p7X_NXCELLS+p7X_SCALE]); } /* Stores are separate only for pedagogical reasons: easy to * turn this into a more memory efficient version just by * deleting the stores. */ bck->xmx[i*p7X_NXCELLS+p7X_E] = xE; bck->xmx[i*p7X_NXCELLS+p7X_N] = xN; bck->xmx[i*p7X_NXCELLS+p7X_J] = xJ; bck->xmx[i*p7X_NXCELLS+p7X_B] = xB; bck->xmx[i*p7X_NXCELLS+p7X_C] = xC; #if p7_DEBUGGING if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, i, 9, 4, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=i, width=9, precision=4*/ #endif } /* thus ends the loop over sequence positions i */ /* Termination at i=0, where we can only reach N,B states. */ dpp = bck->dpf[1 * do_full]; tp = om->tfv; /* <*tp> is now the [1 5 9 13] TBMk transition quad */ rp = om->rfv[dsq[1]]; /* <*rp> is now the [1 5 9 13] match emission quad */ xBv = zerov; for (q = 0; q < Q; q++) { mpv = _mm_mul_ps(MMO(dpp,q), *rp); rp++; mpv = _mm_mul_ps(mpv, *tp); tp += 7; xBv = _mm_add_ps(xBv, mpv); } /* horizontal sum of xBv */ xBv = _mm_add_ps(xBv, _mm_shuffle_ps(xBv, xBv, _MM_SHUFFLE(0, 3, 2, 1))); xBv = _mm_add_ps(xBv, _mm_shuffle_ps(xBv, xBv, _MM_SHUFFLE(1, 0, 3, 2))); _mm_store_ss(&xB, xBv); xN = (xB * om->xf[p7O_N][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_LOOP]); bck->xmx[p7X_B] = xB; bck->xmx[p7X_C] = 0.0; bck->xmx[p7X_J] = 0.0; bck->xmx[p7X_N] = xN; bck->xmx[p7X_E] = 0.0; bck->xmx[p7X_SCALE] = 1.0; #if p7_DEBUGGING dpc = bck->dpf[0]; for (q = 0; q < Q; q++) /* Not strictly necessary, but if someone's looking at DP matrices, this is nice to do: */ MMO(dpc,q) = DMO(dpc,q) = IMO(dpc,q) = zerov; if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, 0, 9, 4, bck->xmx[p7X_E], bck->xmx[p7X_N], bck->xmx[p7X_J], bck->xmx[p7X_B], bck->xmx[p7X_C]); /* logify=TRUE, <rowi>=0, width=9, precision=4*/ #endif if (isnan(xN)) ESL_EXCEPTION(eslERANGE, "backward score is NaN"); else if (L>0 && xN == 0.0) ESL_EXCEPTION(eslERANGE, "backward score underflow (is 0.0)"); /* if L==0, xN *should* be 0.0 [J5/118]*/ else if (isinf(xN) == 1) ESL_EXCEPTION(eslERANGE, "backward score overflow (is infinity)"); if (opt_sc != NULL) *opt_sc = bck->totscale + log(xN); return eslOK; }
static int forward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc) { register __m128 mpv, dpv, ipv; /* previous row values */ register __m128 sv; /* temp storage of 1 curr row value in progress */ register __m128 dcv; /* delayed storage of D(i,q+1) */ register __m128 xEv; /* E state: keeps max for Mk->E as we go */ register __m128 xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ __m128 zerov; /* splatted 0.0's in a vector */ float xN, xE, xB, xC, xJ; /* special states' scores */ int i; /* counter over sequence positions 1..L */ int q; /* counter over quads 0..nq-1 */ int j; /* counter over DD iterations (4 is full serialization) */ int Q = p7O_NQF(om->M); /* segment length: # of vectors */ __m128 *dpc = ox->dpf[0]; /* current row, for use in {MDI}MO(dpp,q) access macro */ __m128 *dpp; /* previous row, for use in {MDI}MO(dpp,q) access macro */ __m128 *rp; /* will point at om->rfv[x] for residue x[i] */ __m128 *tp; /* will point into (and step thru) om->tfv */ /* Initialization. */ ox->M = om->M; ox->L = L; ox->has_own_scales = TRUE; /* all forward matrices control their own scalefactors */ zerov = _mm_setzero_ps(); for (q = 0; q < Q; q++) MMO(dpc,q) = IMO(dpc,q) = DMO(dpc,q) = zerov; xE = ox->xmx[p7X_E] = 0.; xN = ox->xmx[p7X_N] = 1.; xJ = ox->xmx[p7X_J] = 0.; xB = ox->xmx[p7X_B] = om->xf[p7O_N][p7O_MOVE]; xC = ox->xmx[p7X_C] = 0.; ox->xmx[p7X_SCALE] = 1.0; ox->totscale = 0.0; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFBRow(ox, TRUE, 0, 9, 5, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=0, width=8, precision=5*/ #endif for (i = 1; i <= L; i++) { dpp = dpc; dpc = ox->dpf[do_full * i]; /* avoid conditional, use do_full as kronecker delta */ rp = om->rfv[dsq[i]]; tp = om->tfv; dcv = _mm_setzero_ps(); xEv = _mm_setzero_ps(); xBv = _mm_set1_ps(xB); /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. Shift zeros on. */ mpv = esl_sse_rightshift_ps(MMO(dpp,Q-1), zerov); dpv = esl_sse_rightshift_ps(DMO(dpp,Q-1), zerov); ipv = esl_sse_rightshift_ps(IMO(dpp,Q-1), zerov); for (q = 0; q < Q; q++) { /* Calculate new MMO(i,q); don't store it yet, hold it in sv. */ sv = _mm_mul_ps(xBv, *tp); tp++; sv = _mm_add_ps(sv, _mm_mul_ps(mpv, *tp)); tp++; sv = _mm_add_ps(sv, _mm_mul_ps(ipv, *tp)); tp++; sv = _mm_add_ps(sv, _mm_mul_ps(dpv, *tp)); tp++; sv = _mm_mul_ps(sv, *rp); rp++; xEv = _mm_add_ps(xEv, sv); /* Load {MDI}(i-1,q) into mpv, dpv, ipv; * {MDI}MX(q) is then the current, not the prev row */ mpv = MMO(dpp,q); dpv = DMO(dpp,q); ipv = IMO(dpp,q); /* Do the delayed stores of {MD}(i,q) now that memory is usable */ MMO(dpc,q) = sv; DMO(dpc,q) = dcv; /* Calculate the next D(i,q+1) partially: M->D only; * delay storage, holding it in dcv */ dcv = _mm_mul_ps(sv, *tp); tp++; /* Calculate and store I(i,q); assumes odds ratio for emission is 1.0 */ sv = _mm_mul_ps(mpv, *tp); tp++; IMO(dpc,q) = _mm_add_ps(sv, _mm_mul_ps(ipv, *tp)); tp++; } /* Now the DD paths. We would rather not serialize them but * in an accurate Forward calculation, we have few options. */ /* dcv has carried through from end of q loop above; store it * in first pass, we add M->D and D->D path into DMX */ /* We're almost certainly're obligated to do at least one complete * DD path to be sure: */ dcv = esl_sse_rightshift_ps(dcv, zerov); DMO(dpc,0) = zerov; tp = om->tfv + 7*Q; /* set tp to start of the DD's */ for (q = 0; q < Q; q++) { DMO(dpc,q) = _mm_add_ps(dcv, DMO(dpc,q)); dcv = _mm_mul_ps(DMO(dpc,q), *tp); tp++; /* extend DMO(q), so we include M->D and D->D paths */ } /* now. on small models, it seems best (empirically) to just go * ahead and serialize. on large models, we can do a bit better, * by testing for when dcv (DD path) accrued to DMO(q) is below * machine epsilon for all q, in which case we know DMO(q) are all * at their final values. The tradeoff point is (empirically) somewhere around M=100, * at least on my desktop. We don't worry about the conditional here; * it's outside any inner loops. */ if (om->M < 100) { /* Fully serialized version */ for (j = 1; j < 4; j++) { dcv = esl_sse_rightshift_ps(dcv, zerov); tp = om->tfv + 7*Q; /* set tp to start of the DD's */ for (q = 0; q < Q; q++) { /* note, extend dcv, not DMO(q); only adding DD paths now */ DMO(dpc,q) = _mm_add_ps(dcv, DMO(dpc,q)); dcv = _mm_mul_ps(dcv, *tp); tp++; } } } else { /* Slightly parallelized version, but which incurs some overhead */ for (j = 1; j < 4; j++) { register __m128 cv; /* keeps track of whether any DD's change DMO(q) */ dcv = esl_sse_rightshift_ps(dcv, zerov); tp = om->tfv + 7*Q; /* set tp to start of the DD's */ cv = zerov; for (q = 0; q < Q; q++) { /* using cmpgt below tests if DD changed any DMO(q) *without* conditional branch */ sv = _mm_add_ps(dcv, DMO(dpc,q)); cv = _mm_or_ps(cv, _mm_cmpgt_ps(sv, DMO(dpc,q))); DMO(dpc,q) = sv; /* store new DMO(q) */ dcv = _mm_mul_ps(dcv, *tp); tp++; /* note, extend dcv, not DMO(q) */ } if (! _mm_movemask_ps(cv)) break; /* DD's didn't change any DMO(q)? Then done, break out. */ } } /* Add D's to xEv */ for (q = 0; q < Q; q++) xEv = _mm_add_ps(DMO(dpc,q), xEv); /* Finally the "special" states, which start from Mk->E (->C, ->J->B) */ /* The following incantation is a horizontal sum of xEv's elements */ /* These must follow DD calculations, because D's contribute to E in Forward * (as opposed to Viterbi) */ xEv = _mm_add_ps(xEv, _mm_shuffle_ps(xEv, xEv, _MM_SHUFFLE(0, 3, 2, 1))); xEv = _mm_add_ps(xEv, _mm_shuffle_ps(xEv, xEv, _MM_SHUFFLE(1, 0, 3, 2))); _mm_store_ss(&xE, xEv); xN = xN * om->xf[p7O_N][p7O_LOOP]; xC = (xC * om->xf[p7O_C][p7O_LOOP]) + (xE * om->xf[p7O_E][p7O_MOVE]); xJ = (xJ * om->xf[p7O_J][p7O_LOOP]) + (xE * om->xf[p7O_E][p7O_LOOP]); xB = (xJ * om->xf[p7O_J][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_MOVE]); /* and now xB will carry over into next i, and xC carries over after i=L */ /* Sparse rescaling. xE above threshold? trigger a rescaling event. */ if (xE > 1.0e4) /* that's a little less than e^10, ~10% of our dynamic range */ { xN = xN / xE; xC = xC / xE; xJ = xJ / xE; xB = xB / xE; xEv = _mm_set1_ps(1.0 / xE); for (q = 0; q < Q; q++) { MMO(dpc,q) = _mm_mul_ps(MMO(dpc,q), xEv); DMO(dpc,q) = _mm_mul_ps(DMO(dpc,q), xEv); IMO(dpc,q) = _mm_mul_ps(IMO(dpc,q), xEv); } ox->xmx[i*p7X_NXCELLS+p7X_SCALE] = xE; ox->totscale += log(xE); xE = 1.0; } else ox->xmx[i*p7X_NXCELLS+p7X_SCALE] = 1.0; /* Storage of the specials. We could've stored these already * but using xE, etc. variables makes it easy to convert this * code to O(M) memory versions just by deleting storage steps. */ ox->xmx[i*p7X_NXCELLS+p7X_E] = xE; ox->xmx[i*p7X_NXCELLS+p7X_N] = xN; ox->xmx[i*p7X_NXCELLS+p7X_J] = xJ; ox->xmx[i*p7X_NXCELLS+p7X_B] = xB; ox->xmx[i*p7X_NXCELLS+p7X_C] = xC; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFBRow(ox, TRUE, i, 9, 5, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=i, width=8, precision=5*/ #endif } /* end loop over sequence residues 1..L */ /* finally C->T, and flip total score back to log space (nats) */ /* On overflow, xC is inf or nan (nan arises because inf*0 = nan). */ /* On an underflow (which shouldn't happen), we counterintuitively return infinity: * the effect of this is to force the caller to rescore us with full range. */ if (isnan(xC)) ESL_EXCEPTION(eslERANGE, "forward score is NaN"); else if (L>0 && xC == 0.0) ESL_EXCEPTION(eslERANGE, "forward score underflow (is 0.0)"); /* if L==0, xC *should* be 0.0; J5/118 */ else if (isinf(xC) == 1) ESL_EXCEPTION(eslERANGE, "forward score overflow (is infinity)"); if (opt_sc != NULL) *opt_sc = ox->totscale + log(xC * om->xf[p7O_C][p7O_MOVE]); return eslOK; }
/* Function: p7_oprofile_MPIPack() * Synopsis: Packs an OPROFILE into MPI buffer. * Incept: MSF, Wed Oct 21, 2009 [Janelia] * * Purpose: Packs OPROFILE <om> into an MPI packed message buffer <buf> * of length <n> bytes, starting at byte position <*position>, * for MPI communicator <comm>. * * The caller must know that <buf>'s allocation of <n> * bytes is large enough to append the packed OPROFILE at * position <*pos>. This typically requires a call to * <p7_oprofile_MPIPackSize()> first, and reallocation if * needed. * * Returns: <eslOK> on success; <buf> now contains the * packed <om>, and <*position> is set to the byte * immediately following the last byte of the OPROFILE * in <buf>. * * Throws: <eslESYS> if an MPI call fails; or <eslEMEM> if the * buffer's length <n> was overflowed in trying to pack * <msa> into <buf>. In either case, the state of * <buf> and <*position> is undefined, and both should * be considered to be corrupted. */ int p7_oprofile_MPIPack(P7_OPROFILE *om, char *buf, int n, int *pos, MPI_Comm comm) { int K = om->abc->Kp; int atype = om->abc->type; int len; int x; int Q4 = p7O_NQF(om->M); int Q8 = p7O_NQW(om->M); int Q16 = p7O_NQB(om->M); int vsz = sizeof(vector float); /* model configuration */ if (MPI_Pack(&om->M, 1, MPI_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&atype, 1, MPI_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->L, 1, MPI_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->mode, 1, MPI_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->nj, 1, MPI_FLOAT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); /* MSV Filter information */ if (MPI_Pack(&om->tbm_b, 1, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->tec_b, 1, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->tjb_b, 1, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->scale_b, 1, MPI_FLOAT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->base_b, 1, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->bias_b, 1, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); for (x = 0; x < K; x++) if (MPI_Pack( om->rbv[x], vsz*Q16, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); /* Viterbi Filter information */ if (MPI_Pack(&om->scale_w, 1, MPI_FLOAT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->base_w, 1, MPI_SHORT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->ddbound_w, 1, MPI_SHORT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->ncj_roundoff, 1, MPI_FLOAT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack( om->twv, 8*vsz*Q8, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); for (x = 0; x < p7O_NXSTATES; x++) if (MPI_Pack( om->xw[x], p7O_NXTRANS, MPI_SHORT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); for (x = 0; x < K; x++) if (MPI_Pack( om->rwv[x], vsz*Q8, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); /* Forward/Backward information */ if (MPI_Pack( om->tfv, 8*vsz*Q4, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); for (x = 0; x < p7O_NXSTATES; x++) if (MPI_Pack( om->xf[x], p7O_NXTRANS, MPI_FLOAT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); for (x = 0; x < K; x++) if (MPI_Pack( om->rfv[x], vsz*Q4, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); /* Forward/Backward information */ if (MPI_Pack( om->offs, p7_NOFFSETS, MPI_LONG_LONG_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->roff, 1, MPI_LONG_LONG_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack(&om->eoff, 1, MPI_LONG_LONG_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); /* Annotation information */ len = (om->name != NULL) ? strlen(om->name)+1 : 0; if (MPI_Pack(&len, 1, MPI_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (len > 0) if (MPI_Pack( om->name, len, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); len = (om->acc != NULL) ? strlen(om->acc)+1 : 0; if (MPI_Pack(&len, 1, MPI_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (len > 0) if (MPI_Pack( om->acc, len, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); len = (om->desc != NULL) ? strlen(om->desc)+1 : 0; if (MPI_Pack(&len, 1, MPI_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (len > 0) if (MPI_Pack( om->desc, len, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); len = (om->rf != NULL) ? strlen(om->rf)+1 : 0; if (MPI_Pack(&len, 1, MPI_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (len > 0) if (MPI_Pack( om->rf, len, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); len = (om->cs != NULL) ? strlen(om->cs)+1 : 0; if (MPI_Pack(&len, 1, MPI_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (len > 0) if (MPI_Pack( om->cs, len, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); len = (om->consensus != NULL) ? strlen(om->consensus)+1 : 0; if (MPI_Pack(&len, 1, MPI_INT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (len > 0) if (MPI_Pack( om->consensus, len, MPI_CHAR, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack( om->evparam, p7_NEVPARAM, MPI_FLOAT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack( om->cutoff, p7_NCUTOFFS, MPI_FLOAT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (MPI_Pack( om->compo, p7_MAXABET, MPI_FLOAT, buf, n, pos, comm) != 0) ESL_EXCEPTION(eslESYS, "pack failed"); if (*pos > n) ESL_EXCEPTION(eslEMEM, "buffer overflow"); return eslOK; }
/* Function: p7_oprofile_ReadRest() * Synopsis: Read the rest of an optimized profile. * Incept: SRE, Wed Jan 21 11:04:56 2009 [Janelia] * * Purpose: Read the rest of an optimized profile <om> from * the <.h3p> file associated with an open HMM * file <hfp>. * * This is the second part of a two-part calling sequence. * The <om> here must be the result of a previous * successful <p7_oprofile_ReadMSV()> call on the same * open <hfp>. * * Args: hfp - open HMM file, from which we've previously * called <p7_oprofile_ReadMSV()>. * om - optimized profile that was successfully * returned by <p7_oprofile_ReadMSV()>. * * Returns: <eslOK> on success, and <om> is now a complete * optimized profile. * * Returns <eslEFORMAT> if <hfp> has no <.h3p> file open, * or on any parsing error, and set <hfp->errbuf> to * an informative error message. * * Throws: <eslESYS> if an <fseek()> fails to reposition the * binary <.h3p> file. * * <eslEMEM> on allocation error. */ int p7_oprofile_ReadRest(P7_HMMFILE *hfp, P7_OPROFILE *om) { uint32_t magic; int M, Q4, Q8; int x,n; char *name = NULL; int alphatype; int status; #ifdef HMMER_THREADS /* lock the mutex to prevent other threads from reading from the optimized * profile at the same time. */ if (hfp->syncRead) { if (pthread_mutex_lock (&hfp->readMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex lock failed"); } #endif if (hfp->errbuf != NULL) hfp->errbuf[0] = '\0'; if (hfp->pfp == NULL) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "no MSV profile file; hmmpress probably wasn't run"); /* Position the <hfp->pfp> using offset stored in <om> */ if (fseeko(hfp->pfp, om->offs[p7_POFFSET], SEEK_SET) != 0) ESL_EXCEPTION(eslESYS, "fseeko() failed"); if (! fread( (char *) &magic, sizeof(uint32_t), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read magic"); if (magic == v3a_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "this is an outdated HMM database (3/a format); please hmmpress your HMM file again"); if (magic != v3b_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "bad magic; not an HMM database file?"); if (! fread( (char *) &M, sizeof(int), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read model size M"); if (! fread( (char *) &alphatype, sizeof(int), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read alphabet type"); if (! fread( (char *) &n, sizeof(int), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read name length"); if (M != om->M) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "p/f model length mismatch"); if (alphatype != om->abc->type) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "p/f alphabet type mismatch"); ESL_ALLOC(name, sizeof(char) * (n+1)); if (! fread( (char *) name, sizeof(char), n+1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read name"); if (strcmp(name, om->name) != 0) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "p/f name mismatch"); if (! fread((char *) &n, sizeof(int), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read accession length"); if (n > 0) { ESL_ALLOC(om->acc, sizeof(char) * (n+1)); if (! fread( (char *) om->acc, sizeof(char), n+1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read accession"); } if (! fread((char *) &n, sizeof(int), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read description length"); if (n > 0) { ESL_ALLOC(om->desc, sizeof(char) * (n+1)); if (! fread( (char *) om->desc, sizeof(char), n+1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read description"); } if (! fread((char *) om->rf, sizeof(char), M+2, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read rf annotation"); if (! fread((char *) om->cs, sizeof(char), M+2, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read cs annotation"); if (! fread((char *) om->consensus, sizeof(char), M+2, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read consensus annotation"); Q4 = p7O_NQF(om->M); Q8 = p7O_NQW(om->M); if (! fread((char *) om->twv, sizeof(__m128i), 8*Q8, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read <tu>, vitfilter transitions"); for (x = 0; x < om->abc->Kp; x++) if (! fread( (char *) om->rwv[x], sizeof(__m128i), Q8, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read <ru>[%d], vitfilter emissions for sym %c", x, om->abc->sym[x]); for (x = 0; x < p7O_NXSTATES; x++) if (! fread( (char *) om->xw[x], sizeof(int16_t), p7O_NXTRANS, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read <xu>[%d], vitfilter special transitions", x); if (! fread((char *) &(om->scale_w), sizeof(float), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read scale_w"); if (! fread((char *) &(om->base_w), sizeof(int16_t), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read base_w"); if (! fread((char *) &(om->ddbound_w), sizeof(int16_t), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read ddbound_w"); if (! fread((char *) &(om->ncj_roundoff), sizeof(float), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read ddbound_w"); if (! fread((char *) om->tfv, sizeof(__m128), 8*Q4, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read <tf> transitions"); for (x = 0; x < om->abc->Kp; x++) if (! fread( (char *) om->rfv[x], sizeof(__m128), Q4, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read <rf>[%d] emissions for sym %c", x, om->abc->sym[x]); for (x = 0; x < p7O_NXSTATES; x++) if (! fread( (char *) om->xf[x], sizeof(float), p7O_NXTRANS, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read <xf>[%d] special transitions", x); if (! fread((char *) om->cutoff, sizeof(float), p7_NCUTOFFS, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read Pfam score cutoffs"); if (! fread((char *) &(om->nj), sizeof(float), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read nj"); if (! fread((char *) &(om->mode), sizeof(int), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read mode"); if (! fread((char *) &(om->L) , sizeof(int), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read L"); #ifdef HMMER_THREADS if (hfp->syncRead) { if (pthread_mutex_unlock (&hfp->readMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed"); } #endif free(name); return eslOK; ERROR: #ifdef HMMER_THREADS if (hfp->syncRead) { if (pthread_mutex_unlock (&hfp->readMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed"); } #endif if (name != NULL) free(name); return status; }