/* annotate() * Transfer annotation information from MSA to new HMM. * Also sets model-specific residue composition (hmm->compo). */ static int annotate(P7_BUILDER *bld, const ESL_MSA *msa, P7_HMM *hmm) { int status; /* Name. */ if (msa->name) p7_hmm_SetName(hmm, msa->name); else ESL_XFAIL(eslEINVAL, bld->errbuf, "Unable to name the HMM."); if ((status = p7_hmm_SetAccession (hmm, msa->acc)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to record MSA accession"); if ((status = p7_hmm_SetDescription(hmm, msa->desc)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to record MSA description"); // if ((status = p7_hmm_AppendComlog(hmm, go->argc, go->argv)) != eslOK) ESL_XFAIL(status, errbuf, "Failed to record command log"); if ((status = p7_hmm_SetCtime(hmm)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to record timestamp"); if ((status = p7_hmm_SetComposition(hmm)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to determine model composition"); if ((status = p7_hmm_SetConsensus(hmm, NULL)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to set consensus line"); if (msa->cutset[eslMSA_GA1] && msa->cutset[eslMSA_GA2]) { hmm->cutoff[p7_GA1] = msa->cutoff[eslMSA_GA1]; hmm->cutoff[p7_GA2] = msa->cutoff[eslMSA_GA2]; hmm->flags |= p7H_GA; } if (msa->cutset[eslMSA_TC1] && msa->cutset[eslMSA_TC2]) { hmm->cutoff[p7_TC1] = msa->cutoff[eslMSA_TC1]; hmm->cutoff[p7_TC2] = msa->cutoff[eslMSA_TC2]; hmm->flags |= p7H_TC; } if (msa->cutset[eslMSA_NC1] && msa->cutset[eslMSA_NC2]) { hmm->cutoff[p7_NC1] = msa->cutoff[eslMSA_NC1]; hmm->cutoff[p7_NC2] = msa->cutoff[eslMSA_NC2]; hmm->flags |= p7H_NC; } return eslOK; ERROR: return status; }
/* Function: p7_SingleBuilder() * Synopsis: Build a new HMM from a single sequence. * * Purpose: Take the sequence <sq> and a build configuration <bld>, and * build a new HMM. * * The single sequence scoring system in the <bld> * configuration must have been previously initialized by * <p7_builder_SetScoreSystem()>. * * Args: bld - build configuration * sq - query sequence * bg - null model (needed to paramaterize insert emission probs) * opt_hmm - optRETURN: new HMM * opt_gm - optRETURN: profile corresponding to <hmm> * opt_om - optRETURN: optimized profile corresponding to <gm> * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation error. * <eslEINVAL> if <bld> isn't properly configured somehow. */ int p7_SingleBuilder(P7_BUILDER *bld, ESL_SQ *sq, P7_BG *bg, P7_HMM **opt_hmm, P7_TRACE **opt_tr, P7_PROFILE **opt_gm, P7_OPROFILE **opt_om) { P7_HMM *hmm = NULL; P7_TRACE *tr = NULL; int k; int status; bld->errbuf[0] = '\0'; if (! bld->Q) ESL_XEXCEPTION(eslEINVAL, "score system not initialized"); if ((status = p7_Seqmodel(bld->abc, sq->dsq, sq->n, sq->name, bld->Q, bg->f, bld->popen, bld->pextend, &hmm)) != eslOK) goto ERROR; if ((status = p7_hmm_SetComposition(hmm)) != eslOK) goto ERROR; if ((status = p7_hmm_SetConsensus(hmm, sq)) != eslOK) goto ERROR; if ((status = calibrate(bld, hmm, bg, opt_gm, opt_om)) != eslOK) goto ERROR; if ( bld->abc->type == eslDNA || bld->abc->type == eslRNA ) { if (bld->w_len > 0) hmm->max_length = bld->w_len; else if (bld->w_beta == 0.0) hmm->max_length = hmm->M *4; else if ( (status = p7_Builder_MaxLength(hmm, bld->w_beta)) != eslOK) goto ERROR; } /* build a faux trace: relative to core model (B->M_1..M_L->E) */ if (opt_tr != NULL) { if ((tr = p7_trace_Create()) == NULL) goto ERROR; if ((status = p7_trace_Append(tr, p7T_B, 0, 0)) != eslOK) goto ERROR; for (k = 1; k <= sq->n; k++) if ((status = p7_trace_Append(tr, p7T_M, k, k)) != eslOK) goto ERROR; if ((status = p7_trace_Append(tr, p7T_E, 0, 0)) != eslOK) goto ERROR; tr->M = sq->n; tr->L = sq->n; } /* note that <opt_gm> and <opt_om> were already set by calibrate() call above. */ if (opt_hmm != NULL) *opt_hmm = hmm; else p7_hmm_Destroy(hmm); if (opt_tr != NULL) *opt_tr = tr; return eslOK; ERROR: p7_hmm_Destroy(hmm); if (tr != NULL) p7_trace_Destroy(tr); if (opt_gm != NULL) p7_profile_Destroy(*opt_gm); if (opt_om != NULL) p7_oprofile_Destroy(*opt_om); return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); char *qfile = esl_opt_GetArg(go, 1); char *tfile = esl_opt_GetArg(go, 2); ESL_SQFILE *qfp = NULL; ESL_SQFILE *tfp = NULL; ESL_SQ *qsq = esl_sq_CreateDigital(abc); ESL_SQ *tsq = esl_sq_CreateDigital(abc); ESL_SCOREMATRIX *S = esl_scorematrix_Create(abc); ESL_DMATRIX *Q = NULL; P7_BG *bg = p7_bg_Create(abc); P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_REFMX *vit = p7_refmx_Create(200, 400); /* will grow as needed */ double *fa = malloc(sizeof(double) * abc->K); double popen = 0.02; double pextend = 0.4; double lambda; float vsc; float nullsc; int status; esl_composition_BL62(fa); esl_vec_D2F(fa, abc->K, bg->f); esl_scorematrix_Set("BLOSUM62", S); esl_scorematrix_ProbifyGivenBG(S, fa, fa, &lambda, &Q); esl_scorematrix_JointToConditionalOnQuery(abc, Q); if (esl_sqfile_OpenDigital(abc, qfile, eslSQFILE_UNKNOWN, NULL, &qfp) != eslOK) esl_fatal("failed to open %s", qfile); if (esl_sqio_Read(qfp, qsq) != eslOK) esl_fatal("failed to read query seq"); p7_Seqmodel(abc, qsq->dsq, qsq->n, qsq->name, Q, bg->f, popen, pextend, &hmm); p7_hmm_SetComposition(hmm); p7_hmm_SetConsensus(hmm, qsq); gm = p7_profile_Create(hmm->M, abc); p7_profile_ConfigUnilocal(gm, hmm, bg, 400); if (esl_sqfile_OpenDigital(abc, tfile, eslSQFILE_UNKNOWN, NULL, &tfp) != eslOK) esl_fatal("failed to open %s", tfile); while ((status = esl_sqio_Read(tfp, tsq)) == eslOK) { p7_bg_SetLength (bg, tsq->n); p7_profile_SetLength(gm, tsq->n); p7_ReferenceViterbi(tsq->dsq, tsq->n, gm, vit, NULL, &vsc); p7_bg_NullOne(bg, tsq->dsq, tsq->n, &nullsc); printf("%.4f %-25s %-25s\n", (vsc - nullsc) / eslCONST_LOG2, tsq->name, gm->name); esl_sq_Reuse(tsq); p7_refmx_Reuse(vit); } p7_refmx_Destroy(vit); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); p7_bg_Destroy(bg); esl_dmatrix_Destroy(Q); esl_scorematrix_Destroy(S); free(fa); esl_sq_Destroy(qsq); esl_sq_Destroy(tsq); esl_sqfile_Close(qfp); esl_sqfile_Close(tfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }