/* annotate() * Transfer annotation information from MSA to new HMM. * Also sets model-specific residue composition (hmm->compo). */ static int annotate(P7_BUILDER *bld, const ESL_MSA *msa, P7_HMM *hmm) { int status; /* Name. */ if (msa->name) p7_hmm_SetName(hmm, msa->name); else ESL_XFAIL(eslEINVAL, bld->errbuf, "Unable to name the HMM."); if ((status = p7_hmm_SetAccession (hmm, msa->acc)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to record MSA accession"); if ((status = p7_hmm_SetDescription(hmm, msa->desc)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to record MSA description"); // if ((status = p7_hmm_AppendComlog(hmm, go->argc, go->argv)) != eslOK) ESL_XFAIL(status, errbuf, "Failed to record command log"); if ((status = p7_hmm_SetCtime(hmm)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to record timestamp"); if ((status = p7_hmm_SetComposition(hmm)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to determine model composition"); if ((status = p7_hmm_SetConsensus(hmm, NULL)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to set consensus line"); if (msa->cutset[eslMSA_GA1] && msa->cutset[eslMSA_GA2]) { hmm->cutoff[p7_GA1] = msa->cutoff[eslMSA_GA1]; hmm->cutoff[p7_GA2] = msa->cutoff[eslMSA_GA2]; hmm->flags |= p7H_GA; } if (msa->cutset[eslMSA_TC1] && msa->cutset[eslMSA_TC2]) { hmm->cutoff[p7_TC1] = msa->cutoff[eslMSA_TC1]; hmm->cutoff[p7_TC2] = msa->cutoff[eslMSA_TC2]; hmm->flags |= p7H_TC; } if (msa->cutset[eslMSA_NC1] && msa->cutset[eslMSA_NC2]) { hmm->cutoff[p7_NC1] = msa->cutoff[eslMSA_NC1]; hmm->cutoff[p7_NC2] = msa->cutoff[eslMSA_NC2]; hmm->flags |= p7H_NC; } return eslOK; ERROR: return status; }
/* Function: p7_Seqmodel() * Synopsis: Make a profile HMM from a single sequence. * * Purpose: Make a profile HMM from a single sequence, for * probabilistic Smith/Waterman alignment, HMMER3-style. * * The query is digital sequence <dsq> of length <M> * residues in alphabet <abc>, named <name>. * * The scoring system is given by <Q>, <f>, <popen>, and * <pextend>. <Q> is a $K \times K$ matrix giving * conditional residue probabilities $P(a \mid b)}$; these * are typically obtained by reverse engineering a score * matrix like BLOSUM62. <f> is a vector of $K$ background * frequencies $p_a$. <popen> and <pextend> are the * probabilities assigned to gap-open ($t_{MI}$ and * $t_{MD}$) and gap-extend ($t_{II}$ and $t_{DD}$) * transitions. * * The <p7H_SINGLE> flag is set on the <hmm>. Model * configuration (<p7_profile_Config(), friends> detects * this flag. <B->Mk> entry transitions include a match * state occupancy term for profile HMMs, but for single * queries, that <occ[]> term is assumed 1.0 for all * positions. See commentary in modelconfig.c. * * Args: * * Returns: <eslOK> on success, and a newly allocated HMM is returned * in <ret_hmm>. * * Throws: <eslEMEM> on allocation error, and <*ret_hmm> is <NULL>. */ int p7_Seqmodel(const ESL_ALPHABET *abc, ESL_DSQ *dsq, int M, char *name, ESL_DMATRIX *Q, float *f, double popen, double pextend, P7_HMM **ret_hmm) { int status; P7_HMM *hmm = NULL; char *logmsg = "[HMM created from a query sequence]"; int k; if ((hmm = p7_hmm_Create(M, abc)) == NULL) { status = eslEMEM; goto ERROR; } for (k = 0; k <= M; k++) { /* Use rows of P matrix as source of match emission vectors */ if (k > 0) esl_vec_D2F(Q->mx[(int) dsq[k]], abc->K, hmm->mat[k]); /* Set inserts to background for now. This will be improved. */ esl_vec_FCopy(f, abc->K, hmm->ins[k]); hmm->t[k][p7H_MM] = 1.0 - 2 * popen; hmm->t[k][p7H_MI] = popen; hmm->t[k][p7H_MD] = popen; hmm->t[k][p7H_IM] = 1.0 - pextend; hmm->t[k][p7H_II] = pextend; hmm->t[k][p7H_DM] = 1.0 - pextend; hmm->t[k][p7H_DD] = pextend; } /* Deal w/ special stuff at node M, overwriting a little of what we * just did. */ hmm->t[M][p7H_MM] = 1.0 - popen; hmm->t[M][p7H_MD] = 0.; hmm->t[M][p7H_DM] = 1.0; hmm->t[M][p7H_DD] = 0.; /* Add mandatory annotation */ p7_hmm_SetName(hmm, name); p7_hmm_AppendComlog(hmm, 1, &logmsg); hmm->nseq = 1; p7_hmm_SetCtime(hmm); hmm->checksum = 0; hmm->flags |= p7H_SINGLE; *ret_hmm = hmm; return eslOK; ERROR: if (hmm != NULL) p7_hmm_Destroy(hmm); *ret_hmm = NULL; return status; }