/* Function: p7_filtermx_DumpMFRow() * Synopsis: Dump one row from MSV version of a DP matrix. * * Purpose: Dump current row of MSV calculations from DP matrix <fx> * for diagnostics, and include the values of specials * <xE>, etc. The index <rowi> for the current row is used * as a row label. This routine has to be specialized for * the layout of the MSVFilter() row, because it's all * match scores dp[0..q..Q-1], rather than triplets of * M,D,I. * * If <rowi> is 0, print a header first too. * * The output format is coordinated with <p7_refmx_Dump()> to * facilitate comparison to a known answer. * * This also works for an SSV filter row, for SSV implementations * that use a single row of DP memory (like <_longtarget>). * The Knudsen assembly code SSV does not use any RAM. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. */ int p7_filtermx_DumpMFRow_neon64(const P7_FILTERMX *fx, int rowi, uint8_t xE, uint8_t xN, uint8_t xJ, uint8_t xB, uint8_t xC) { #ifdef HAVE_NEON64 int Q = P7_NVB(fx->M); /* number of vectors in the MSV row */ uint8_t *v = NULL; /* array of scores after unstriping them */ int q,z,k; union { esl_neon_128i_t v; uint8_t i[16]; } tmp; int status; ESL_DASSERT1( (fx->type == p7F_MSVFILTER || fx->type == p7F_SSVFILTER) ); /* We'll unstripe the whole row; then print it in its normal order. */ ESL_ALLOC(v, sizeof(unsigned char) * ((Q*16)+1)); v[0] = 0; /* Header (if we're on the 0th row) */ if (rowi == 0) { fprintf(fx->dfp, " "); for (k = 0; k <= fx->M; k++) fprintf(fx->dfp, "%3d ", k); fprintf(fx->dfp, "%3s %3s %3s %3s %3s\n", "E", "N", "J", "B", "C"); fprintf(fx->dfp, " "); for (k = 0; k <= fx->M+5; k++) fprintf(fx->dfp, "%3s ", "---"); fprintf(fx->dfp, "\n"); } /* Unpack and unstripe, then print M's. */ for (q = 0; q < Q; q++) { tmp.v = fx->dp[q]; for (z = 0; z < 16; z++) v[q+Q*z+1] = tmp.i[z]; } fprintf(fx->dfp, "%4d M ", rowi); for (k = 0; k <= fx->M; k++) fprintf(fx->dfp, "%3d ", v[k]); /* The specials */ fprintf(fx->dfp, "%3d %3d %3d %3d %3d\n", xE, xN, xJ, xB, xC); /* I's are all 0's; print just to facilitate comparison to refmx. */ fprintf(fx->dfp, "%4d I ", rowi); for (k = 0; k <= fx->M; k++) fprintf(fx->dfp, "%3d ", 0); fprintf(fx->dfp, "\n"); /* D's are all 0's too */ fprintf(fx->dfp, "%4d D ", rowi); for (k = 0; k <= fx->M; k++) fprintf(fx->dfp, "%3d ", 0); fprintf(fx->dfp, "\n\n"); free(v); return eslOK; ERROR: free(v); return status; #endif //HAVE_NEON64 #ifndef HAVE_NEON64 return eslENORESULT; #endif }
/* Function: p7_oprofile_Write() * Synopsis: Write an optimized profile in two files. * * Purpose: Write the MSV filter part of <om> to open binary stream * <ffp>, and the rest of the model to <pfp>. These two * streams will typically be <.h3f> and <.h3p> files * being created by hmmpress. * * Args: ffp - open binary stream for saving MSV filter part * pfp - open binary stream for saving rest of profile * om - optimized profile to save * * Returns: <eslOK> on success. * * Throws: <eslEWRITE> on any write failure, such as filling * the disk. */ int p7_oprofile_Write(FILE *ffp, FILE *pfp, P7_OPROFILE *om) { int Q4 = P7_NVF(om->M); int Q8 = P7_NVW(om->M); int Q16 = P7_NVB(om->M); int Q16x = P7_NVB(om->M) + p7O_EXTRA_SB; int n = strlen(om->name); int x; /* <ffp> is the part of the oprofile that MSVFilter() needs */ if (fwrite((char *) &(v3f_fmagic), sizeof(uint32_t), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->M), sizeof(int), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->abc->type), sizeof(int), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &n, sizeof(int), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) om->name, sizeof(char), n+1, ffp) != n+1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->max_length),sizeof(int), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->tbm_b), sizeof(uint8_t), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->tec_b), sizeof(uint8_t), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->tjb_b), sizeof(uint8_t), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->scale_b), sizeof(float), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->base_b), sizeof(uint8_t), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->bias_b), sizeof(uint8_t), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); for (x = 0; x < om->abc->Kp; x++) if (fwrite( (char *) om->sbv[x], sizeof(__m128i), Q16x, ffp) != Q16x) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); for (x = 0; x < om->abc->Kp; x++) if (fwrite( (char *) om->rbv[x], sizeof(__m128i), Q16, ffp) != Q16) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) om->evparam, sizeof(float), p7_NEVPARAM, ffp) != p7_NEVPARAM) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) om->offs, sizeof(off_t), p7_NOFFSETS, ffp) != p7_NOFFSETS) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) om->compo, sizeof(float), p7_MAXABET, ffp) != p7_MAXABET) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(v3f_fmagic), sizeof(uint32_t), 1, ffp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); /* sentinel */ /* <pfp> gets the rest of the oprofile */ if (fwrite((char *) &(v3f_pmagic), sizeof(uint32_t), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->M), sizeof(int), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->abc->type), sizeof(int), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &n, sizeof(int), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) om->name, sizeof(char), n+1, pfp) != n+1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (om->acc == NULL) { n = 0; if (fwrite((char *) &n, sizeof(int), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); } else { n = strlen(om->acc); if (fwrite((char *) &n, sizeof(int), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) om->acc, sizeof(char), n+1, pfp) != n+1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); } if (om->desc == NULL) { n = 0; if (fwrite((char *) &n, sizeof(int), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); } else { n = strlen(om->desc); if (fwrite((char *) &n, sizeof(int), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) om->desc, sizeof(char), n+1, pfp) != n+1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); } if (fwrite((char *) om->rf, sizeof(char), om->M+2, pfp) != om->M+2) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) om->mm, sizeof(char), om->M+2, pfp) != om->M+2) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) om->cs, sizeof(char), om->M+2, pfp) != om->M+2) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) om->consensus, sizeof(char), om->M+2, pfp) != om->M+2) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); /* ViterbiFilter part */ if (fwrite((char *) om->twv, sizeof(__m128i), 8*Q8, pfp) != 8*Q8) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); for (x = 0; x < om->abc->Kp; x++) if (fwrite( (char *) om->rwv[x], sizeof(__m128i), Q8, pfp) != Q8) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); for (x = 0; x < p7O_NXSTATES; x++) if (fwrite( (char *) om->xw[x], sizeof(int16_t), p7O_NXTRANS, pfp) != p7O_NXTRANS) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->scale_w), sizeof(float), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->base_w), sizeof(int16_t), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->ddbound_w), sizeof(int16_t), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->ncj_roundoff), sizeof(float), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); /* Forward/Backward part */ if (fwrite((char *) om->tfv, sizeof(__m128), 8*Q4, pfp) != 8*Q4) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); for (x = 0; x < om->abc->Kp; x++) if (fwrite( (char *) om->rfv[x], sizeof(__m128), Q4, pfp) != Q4) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); for (x = 0; x < p7O_NXSTATES; x++) if (fwrite( (char *) om->xf[x], sizeof(float), p7O_NXTRANS, pfp) != p7O_NXTRANS) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) om->cutoff, sizeof(float), p7_NCUTOFFS, pfp) != p7_NCUTOFFS) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->nj), sizeof(float), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->mode), sizeof(int), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(om->L) , sizeof(int), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); if (fwrite((char *) &(v3f_pmagic), sizeof(uint32_t), 1, pfp) != 1) ESL_EXCEPTION_SYS(eslEWRITE, "oprofile write failed"); /* sentinel */ return eslOK; }
/* Function: p7_oprofile_ReadMSV() * Synopsis: Read MSV filter part of an optimized profile. * * Purpose: Read the MSV filter part of a profile from the * <.h3f> file associated with an open HMM file <hfp>. * Allocate a new model, populate it with this minimal * MSV filter information, and return a pointer to it * in <*ret_om>. * * Our alphabet may get set by the first HMM we read. If * <*byp_abc> is <NULL> at start, create a new alphabet and * return a pointer to it in <*byp_abc>. If <*byp_abc> is * non-<NULL>, it is assumed to be a pointer to an existing * alphabet; we verify that the HMM's alphabet matches it * and <*ret_abc> isn't changed. This is the same * convention used by <p7_hmmfile_Read()>. * * The <.h3f> file was opened automatically, if it existed, * when the HMM file was opened with <p7_hmmfile_OpenE()>. * * When no more HMMs remain in the file, return <eslEOF>. * * Args: hfp - open HMM file, with associated .h3p file * byp_abc - BYPASS: <*byp_abc == ESL_ALPHABET *> if known; * <*byp_abc == NULL> if desired; * <NULL> if unwanted. * ret_om - RETURN: newly allocated <om> with MSV filter * data filled in. * * Returns: <eslOK> on success. <*ret_om> is allocated here; * caller free's with <p7_oprofile_Destroy()>. * <*byp_abc> is allocated here if it was requested; * caller free's with <esl_alphabet_Destroy()>. * * Returns <eslEFORMAT> if <hfp> has no <.h3f> file open, * or on any parsing error. * * Returns <eslEINCOMPAT> if the HMM we read is incompatible * with the existing alphabet <*byp_abc> led us to expect. * * On any returned error, <hfp->errbuf> contains an * informative error message. * * Throws: <eslEMEM> on allocation error. */ int p7_oprofile_ReadMSV(P7_HMMFILE *hfp, ESL_ALPHABET **byp_abc, P7_OPROFILE **ret_om) { P7_OPROFILE *om = NULL; ESL_ALPHABET *abc = NULL; uint32_t magic; off_t roff; int M, Q16, Q16x; int x,n; int alphatype; int status; hfp->errbuf[0] = '\0'; if (hfp->ffp == NULL) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "no MSV profile file; hmmpress probably wasn't run"); if (feof(hfp->ffp)) { status = eslEOF; goto ERROR; } /* normal EOF: no more profiles */ /* keep track of the starting offset of the MSV model */ roff = ftello(hfp->ffp); if (! fread( (char *) &magic, sizeof(uint32_t), 1, hfp->ffp)) { status = eslEOF; goto ERROR; } if (magic == v3a_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/a); please hmmpress your HMM file again"); if (magic == v3b_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/b); please hmmpress your HMM file again"); if (magic == v3c_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/c); please hmmpress your HMM file again"); if (magic == v3d_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/d); please hmmpress your HMM file again"); if (magic == v3e_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/e); please hmmpress your HMM file again"); if (magic != v3f_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "bad magic; not an HMM database?"); if (! fread( (char *) &M, sizeof(int), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read model size M"); if (! fread( (char *) &alphatype, sizeof(int), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read alphabet type"); Q16 = P7_NVB(M); Q16x = P7_NVB(M) + p7O_EXTRA_SB; /* Set or verify alphabet. */ if (byp_abc == NULL || *byp_abc == NULL) { /* alphabet unknown: whether wanted or unwanted, make a new one */ if ((abc = esl_alphabet_Create(alphatype)) == NULL) ESL_XFAIL(eslEMEM, hfp->errbuf, "allocation failed: alphabet"); } else { /* alphabet already known: verify it against what we see in the HMM */ abc = *byp_abc; if (abc->type != alphatype) ESL_XFAIL(eslEINCOMPAT, hfp->errbuf, "Alphabet type mismatch: was %s, but current profile says %s", esl_abc_DecodeType(abc->type), esl_abc_DecodeType(alphatype)); } /* Now we know the sizes of things, so we can allocate. */ if ((om = p7_oprofile_Create(M, abc)) == NULL) ESL_XFAIL(eslEMEM, hfp->errbuf, "allocation failed: oprofile"); om->M = M; om->roff = roff; if (! fread((char *) &n, sizeof(int), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read name length"); ESL_ALLOC(om->name, sizeof(char) * (n+1)); if (! fread((char *) om->name, sizeof(char), n+1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read name"); if (! fread((char *) &(om->max_length),sizeof(int), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read max_length"); if (! fread((char *) &(om->tbm_b), sizeof(uint8_t), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read tbm"); if (! fread((char *) &(om->tec_b), sizeof(uint8_t), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read tec"); if (! fread((char *) &(om->tjb_b), sizeof(uint8_t), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read tjb"); if (! fread((char *) &(om->scale_b), sizeof(float), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read scale"); if (! fread((char *) &(om->base_b), sizeof(uint8_t), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read base"); if (! fread((char *) &(om->bias_b), sizeof(uint8_t), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read bias"); for (x = 0; x < abc->Kp; x++) if (! fread((char *) om->sbv[x], sizeof(__m128i), Q16x, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read ssv scores at %d [residue %c]", x, abc->sym[x]); for (x = 0; x < abc->Kp; x++) if (! fread((char *) om->rbv[x], sizeof(__m128i), Q16, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read msv scores at %d [residue %c]", x, abc->sym[x]); if (! fread((char *) om->evparam, sizeof(float), p7_NEVPARAM, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read stat params"); if (! fread((char *) om->offs, sizeof(off_t), p7_NOFFSETS, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read hmmpfam offsets"); if (! fread((char *) om->compo, sizeof(float), p7_MAXABET, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read model composition"); /* record ends with magic sentinel, for detecting binary file corruption */ if (! fread( (char *) &magic, sizeof(uint32_t), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "no sentinel magic: .h3f file corrupted?"); if (magic != v3f_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "bad sentinel magic; .h3f file corrupted?"); /* keep track of the ending offset of the MSV model */ om->eoff = ftello(hfp->ffp) - 1;; /* MSV models are always multilocal. ReadRest() might override this later; that's ok. */ om->mode = p7_LOCAL; om->nj = 1.0f; if (byp_abc != NULL) *byp_abc = abc; /* pass alphabet (whether new or not) back to caller, if caller wanted it */ *ret_om = om; return eslOK; ERROR: if (abc != NULL && (byp_abc == NULL || *byp_abc == NULL)) esl_alphabet_Destroy(abc); /* destroy alphabet if we created it here */ if (om != NULL) p7_oprofile_Destroy(om); *ret_om = NULL; return status; }
/* Function: p7_oprofile_ReadInfoMSV() * Synopsis: Read MSV filter info, but not the scores. * * Purpose: Read just enough of the MSV filter header from the * <.h3f> file associated with an open HMM file <hfp> * to skip ahead to the next MSV filter. Allocate a new * model, populate it with just the file offsets of this * model and return a pointer to it in <*ret_om>. * * The <.h3f> file was opened automatically, if it existed, * when the HMM file was opened with <p7_hmmfile_OpenE()>. * * When no more HMMs remain in the file, return <eslEOF>. * * Args: hfp - open HMM file, with associated .h3p file * byp_abc - BYPASS: <*byp_abc == ESL_ALPHABET *> if known; * <*byp_abc == NULL> if desired; * <NULL> if unwanted. * ret_om - RETURN: newly allocated <om> with partial MSV * filter data filled in. * * Returns: <eslOK> on success. <*ret_om> is allocated here; * caller free's with <p7_oprofile_Destroy()>. * <*byp_abc> is allocated here if it was requested; * caller free's with <esl_alphabet_Destroy()>. * * Returns <eslEFORMAT> if <hfp> has no <.h3f> file open, * or on any parsing error. * * Returns <eslEINCOMPAT> if the HMM we read is incompatible * with the existing alphabet <*byp_abc> led us to expect. * * On any returned error, <hfp->errbuf> contains an * informative error message. * * Throws: <eslEMEM> on allocation error. */ int p7_oprofile_ReadInfoMSV(P7_HMMFILE *hfp, ESL_ALPHABET **byp_abc, P7_OPROFILE **ret_om) { P7_OPROFILE *om = NULL; ESL_ALPHABET *abc = NULL; uint32_t magic; off_t roff; int M, Q16, Q16x; int n; int alphatype; int status; hfp->errbuf[0] = '\0'; if (hfp->ffp == NULL) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "no MSV profile file; hmmpress probably wasn't run"); if (feof(hfp->ffp)) { status = eslEOF; goto ERROR; } /* normal EOF: no more profiles */ /* keep track of the starting offset of the MSV model */ roff = ftello(hfp->ffp); if (! fread( (char *) &magic, sizeof(uint32_t), 1, hfp->ffp)) { status = eslEOF; goto ERROR; } if (magic == v3a_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/a); please hmmpress your HMM file again"); if (magic == v3b_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/b); please hmmpress your HMM file again"); if (magic == v3c_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/c); please hmmpress your HMM file again"); if (magic == v3d_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/d); please hmmpress your HMM file again"); if (magic == v3e_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/e); please hmmpress your HMM file again"); if (magic != v3f_fmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "bad magic; not an HMM database?"); if (! fread( (char *) &M, sizeof(int), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read model size M"); if (! fread( (char *) &alphatype, sizeof(int), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read alphabet type"); Q16 = P7_NVB(M); Q16x = P7_NVB(M) + p7O_EXTRA_SB; /* Set or verify alphabet. */ if (byp_abc == NULL || *byp_abc == NULL) { /* alphabet unknown: whether wanted or unwanted, make a new one */ if ((abc = esl_alphabet_Create(alphatype)) == NULL) ESL_XFAIL(eslEMEM, hfp->errbuf, "allocation failed: alphabet"); } else { /* alphabet already known: verify it against what we see in the HMM */ abc = *byp_abc; if (abc->type != alphatype) ESL_XFAIL(eslEINCOMPAT, hfp->errbuf, "Alphabet type mismatch: was %s, but current profile says %s", esl_abc_DecodeType(abc->type), esl_abc_DecodeType(alphatype)); } /* Now we know the sizes of things, so we can allocate. */ if ((om = p7_oprofile_Create(M, abc)) == NULL) ESL_XFAIL(eslEMEM, hfp->errbuf, "allocation failed: oprofile"); om->M = M; om->roff = roff; /* calculate the remaining length of the msv model */ om->name = NULL; if (!fread((char *) &n, sizeof(int), 1, hfp->ffp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read name length"); roff += (sizeof(int) * 5); /* magic, model size, alphabet type, max length, name length */ roff += (sizeof(char) * (n + 1)); /* name string and terminator '\0' */ roff += (sizeof(float) + sizeof(uint8_t) * 5); /* transition costs, bias, scale and base */ roff += (sizeof(__m128i) * abc->Kp * Q16x); /* ssv scores */ roff += (sizeof(__m128i) * abc->Kp * Q16); /* msv scores */ roff += (sizeof(float) * p7_NEVPARAM); /* stat params */ roff += (sizeof(off_t) * p7_NOFFSETS); /* hmmscan offsets */ roff += (sizeof(float) * p7_MAXABET); /* model composition */ roff += sizeof(uint32_t); /* sentinel magic */ /* keep track of the ending offset of the MSV model */ p7_oprofile_Position(hfp, roff); om->eoff = ftello(hfp->ffp) - 1; /* MSV models are always multilocal. ReadRest() might override this later; that's ok. */ om->mode = p7_LOCAL; om->nj = 1.0f; if (byp_abc != NULL) *byp_abc = abc; /* pass alphabet (whether new or not) back to caller, if caller wanted it */ *ret_om = om; return eslOK; ERROR: if (abc != NULL && (byp_abc == NULL || *byp_abc == NULL)) esl_alphabet_Destroy(abc); /* destroy alphabet if we created it here */ if (om != NULL) p7_oprofile_Destroy(om); *ret_om = NULL; return status; }
/* Function: p7_SSVFilter_longtarget() * Synopsis: Finds windows with SSV scores above some threshold (vewy vewy fast, in limited precision) * * Purpose: Calculates an approximation of the SSV (single ungapped diagonal) * score for regions of sequence <dsq> of length <L> residues, using * optimized profile <om>, and a preallocated one-row DP matrix <ox>, * and captures the positions at which such regions exceed the score * required to be significant in the eyes of the calling function, * which depends on the <bg> and <p> (usually p=0.02 for nhmmer). * Note that this variant performs only SSV computations, never * passing through the J state - the score required to pass SSV at * the default threshold (or less restrictive) is sufficient to * pass MSV in essentially all DNA models we've tested. * * Above-threshold diagonals are captured into a preallocated list * <windowlist>. Rather than simply capturing positions at which a * score threshold is reached, this function establishes windows * around those high-scoring positions, using scores in <msvdata>. * These windows can be merged by the calling function. * * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - DP matrix * msvdata - compact representation of substitution scores, for backtracking diagonals * bg - the background model, required for translating a P-value threshold into a score threshold * P - p-value below which a region is captured as being above threshold * windowlist - preallocated container for all hits (resized if necessary) * * * Note: We misuse the matrix <ox> here, using only a third of the * first dp row, accessing it as <dp[0..Q-1]> rather than * in triplets via <{MDI}MX(q)> macros, since we only need * to store M state values. We know that if <ox> was big * enough for normal DP calculations, it must be big enough * to hold the MSVFilter calculation. * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if <ox> allocation is too small. */ int p7_SSVFilter_longtarget_avx(const ESL_DSQ *dsq, int L, P7_OPROFILE *om, P7_FILTERMX *ox, const P7_SCOREDATA *msvdata, P7_BG *bg, double P, P7_HMM_WINDOWLIST *windowlist) { #ifdef HAVE_AVX2 register __m128i mpv; /* previous row values */ register __m128i xEv; /* E state: keeps max for Mk->E for a single iteration */ register __m128i xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ register __m128i sv; /* temp storage of 1 curr row value in progress */ register __m128i biasv; /* emission bias in a vector */ int i; /* counter over sequence positions 1..L */ int q; /* counter over vectors 0..nq-1 */ int Q = P7_NVB(om->M); /* segment length: # of vectors */ __m128i *dp = ox->dp; /* one DP row of memory */ __m128i *rsc; /* will point at om->rbv[x] for residue x[i] */ __m128i tjbmv; /* vector for J->B move cost + B->M move costs */ __m128i basev; /* offset for scores */ __m128i ceilingv; /* saturated simd value used to test for overflow */ __m128i tempv; /* work vector */ int cmp; int k; int n; int end; int rem_sc; int start; int target_end; int target_start; int max_end; int max_sc; int sc; int pos_since_max; float ret_sc; union { __m128i v; uint8_t b[16]; } u; int status; /* * Computing the score required to let P meet the F1 prob threshold * In original code, converting from a scaled int MSV * score S (the score getting to state E) to a probability goes like this: * usc = S - om->tec_b - om->tjb_b - om->base_b; * usc /= om->scale_b; * usc -= 3.0; * P = f ( (usc - nullsc) / eslCONST_LOG2 , mu, lambda) * and we're computing the threshold usc, so reverse it: * (usc - nullsc) / eslCONST_LOG2 = inv_f( P, mu, lambda) * usc = nullsc + eslCONST_LOG2 * inv_f( P, mu, lambda) * usc += 3 * usc *= om->scale_b * S = usc + om->tec_b + om->tjb_b + om->base_b * * Here, I compute threshold with length model based on max_length. Doesn't * matter much - in any case, both the bg and om models will change with roughly * 1 bit for each doubling of the length model, so they offset. */ float nullsc; __m128i sc_threshv; uint8_t sc_thresh; float invP = esl_gumbel_invsurv(P, om->evparam[p7_MMU], om->evparam[p7_MLAMBDA]); p7_bg_SetLength(bg, om->max_length); p7_oprofile_ReconfigMSVLength(om, om->max_length); p7_bg_NullOne (bg, dsq, om->max_length, &nullsc); sc_thresh = (int) ceil( ( ( nullsc + (invP * eslCONST_LOG2) + 3.0 ) * om->scale_b ) + om->base_b + om->tec_b + om->tjb_b ); sc_threshv = _mm_set1_epi8((int8_t) 255 - sc_thresh); /* Resize the filter mx as needed */ if (( status = p7_filtermx_GrowTo(ox, om->M)) != eslOK) ESL_EXCEPTION(status, "Reallocation of SSV filter matrix failed"); /* Matrix type and size must be set early, not late: debugging dump functions need this information. */ ox->M = om->M; ox->type = p7F_SSVFILTER; /* Initialization. In offset unsigned arithmetic, -infinity is 0, and 0 is om->base. */ biasv = _mm_set1_epi8((int8_t) om->bias_b); /* yes, you can set1() an unsigned char vector this way */ ceilingv = _mm_cmpeq_epi8(biasv, biasv); for (q = 0; q < Q; q++) dp[q] = _mm_setzero_si128(); basev = _mm_set1_epi8((int8_t) om->base_b); tjbmv = _mm_set1_epi8((int8_t) om->tjb_b + (int8_t) om->tbm_b); xBv = _mm_subs_epu8(basev, tjbmv); for (i = 1; i <= L; i++) { rsc = om->rbv[dsq[i]]; xEv = _mm_setzero_si128(); /* Right shifts by 1 byte. 4,8,12,x becomes x,4,8,12. * Because ia32 is littlendian, this means a left bit shift. * Zeros shift on automatically, which is our -infinity. */ mpv = _mm_slli_si128(dp[Q-1], 1); for (q = 0; q < Q; q++) { /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */ sv = _mm_max_epu8(mpv, xBv); sv = _mm_adds_epu8(sv, biasv); sv = _mm_subs_epu8(sv, *rsc); rsc++; xEv = _mm_max_epu8(xEv, sv); mpv = dp[q]; /* Load {MDI}(i-1,q) into mpv */ dp[q] = sv; /* Do delayed store of M(i,q) now that memory is usable */ } /* test if the pthresh significance threshold has been reached; * note: don't use _mm_cmpgt_epi8, because it's a signed comparison, which won't work on uint8s */ tempv = _mm_adds_epu8(xEv, sc_threshv); tempv = _mm_cmpeq_epi8(tempv, ceilingv); cmp = _mm_movemask_epi8(tempv); if (cmp != 0) { //hit pthresh, so add position to list and reset values //figure out which model state hit threshold end = -1; rem_sc = -1; for (q = 0; q < Q; q++) { /// Unpack and unstripe, so we can find the state that exceeded pthresh u.v = dp[q]; for (k = 0; k < 16; k++) { // unstripe //(q+Q*k+1) is the model position k at which the xE score is found if (u.b[k] >= sc_thresh && u.b[k] > rem_sc && (q+Q*k+1) <= om->M) { end = (q+Q*k+1); rem_sc = u.b[k]; } } dp[q] = _mm_set1_epi8(0); // while we're here ... this will cause values to get reset to xB in next dp iteration } //recover the diagonal that hit threshold start = end; target_end = target_start = i; sc = rem_sc; while (rem_sc > om->base_b - om->tjb_b - om->tbm_b) { rem_sc -= om->bias_b - msvdata->msv_scores[start*om->abc->Kp + dsq[target_start]]; --start; --target_start; } start++; target_start++; //extend diagonal further with single diagonal extension k = end+1; n = target_end+1; max_end = target_end; max_sc = sc; pos_since_max = 0; while (k<om->M && n<=L) { sc += om->bias_b - msvdata->msv_scores[k*om->abc->Kp + dsq[n]]; if (sc >= max_sc) { max_sc = sc; max_end = n; pos_since_max=0; } else { pos_since_max++; if (pos_since_max == 5) break; } k++; n++; } end += (max_end - target_end); k += (max_end - target_end); target_end = max_end; ret_sc = ((float) (max_sc - om->tjb_b) - (float) om->base_b); ret_sc /= om->scale_b; ret_sc -= 3.0; // that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ p7_hmmwindow_new(windowlist, 0, target_start, k, end, end-start+1 , ret_sc, p7_NOCOMPLEMENT); i = target_end; // skip forward } } /* end loop over sequence residues 1..L */ return eslOK; #endif /* HAVE_AVX2 */ #ifndef HAVE_AVX2 return 0; #endif }