int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *msafile = esl_opt_GetArg(go, 1); int fmt = eslMSAFILE_UNKNOWN; ESL_ALPHABET *abc = NULL; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_PRIOR *prior = NULL; P7_TRACE **trarr = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; ESL_MSA *postmsa = NULL; int i; int status; /* Standard idioms for opening and reading a digital MSA. (See esl_msafile.c example). */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); if ((status = esl_msafile_Open(&abc, msafile, NULL, fmt, NULL, &afp)) != eslOK) esl_msafile_OpenFailure(afp, status); bg = p7_bg_Create(abc); switch (abc->type) { case eslAMINO: prior = p7_prior_CreateAmino(); break; case eslDNA: prior = p7_prior_CreateNucleic(); break; case eslRNA: prior = p7_prior_CreateNucleic(); break; default: prior = p7_prior_CreateLaplace(abc); break; } if (prior == NULL) esl_fatal("Failed to initialize prior"); while ((status = esl_msafile_Read(afp, &msa)) != eslEOF) { if (status != eslOK) esl_msafile_ReadFailure(afp, status); /* The modelmakers collect counts in an HMM structure */ status = p7_Handmodelmaker(msa, NULL, &hmm, &trarr); if (status == eslENORESULT) esl_fatal("no consensus columns in alignment %s\n", msa->name); else if (status != eslOK) esl_fatal("failed to build HMM from alignment %s\n", msa->name); printf("COUNTS:\n"); p7_hmm_Dump(stdout, hmm); /* These counts, in combination with a prior, are converted to probability parameters */ status = p7_ParameterEstimation(hmm, prior); if (status != eslOK) esl_fatal("failed to parameterize HMM for %s", msa->name); printf("PROBABILITIES:\n"); p7_hmm_Dump(stdout, hmm); /* Just so we can dump a more informatively annotated trace - build a profile */ gm = p7_profile_Create(hmm->M, abc); p7_profile_Config (gm, hmm, bg); p7_profile_SetLength(gm, 400); /* Dump the individual traces */ for (i = 0; i < msa->nseq; i++) { printf("Trace %d: %s\n", i+1, msa->sqname[i]); p7_trace_DumpAnnotated(stdout, trarr[i], gm, msa->ax[i]); } /* Create an MSA from the individual traces */ status = p7_tracealign_MSA(msa, trarr, hmm->M, p7_DEFAULT, &postmsa); if (status != eslOK) esl_fatal("failed to create new MSA from traces\n"); esl_msafile_Write(stdout, postmsa, eslMSAFILE_PFAM); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); p7_trace_DestroyArray(trarr, msa->nseq); esl_msa_Destroy(postmsa); esl_msa_Destroy(msa); } esl_msafile_Close(afp); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
/* Function: p7_builder_Create() * Synopsis: Create a default HMM construction configuration. * * Purpose: Create a construction configuration for building * HMMs in alphabet <abc>, and return a pointer to it. * * An application configuration <go> may optionally be * provided. If <go> is <NULL>, default parameters are * used. If <go> is non-<NULL>, it must include appropriate * settings for all of the following ``standard build options'': * * Model construction: --fast --hand --symfrac --fragthresh * Relative weighting: --wgsc --wblosum --wpb --wgiven --wid * Effective seq #: --eent --eclust --enone --eset --ere --esigma --eid * Prior scheme: --pnone --plaplace * E-val calibration: --EmL --EmN --EvL --EvN --EfL --EfN --Eft * run-to-run variation: --seed * * See <hmmbuild.c> or other big users of the build * pipeline for an example of appropriate <ESL_GETOPTS> * initializations of these 24 options. */ P7_BUILDER * p7_builder_Create(const ESL_GETOPTS *go, const ESL_ALPHABET *abc) { P7_BUILDER *bld = NULL; int seed; int status; ESL_ALLOC(bld, sizeof(P7_BUILDER)); bld->prior = NULL; bld->r = NULL; bld->S = NULL; bld->Q = NULL; bld->eset = -1.0; /* -1.0 = unset; must be set if effn_strategy is p7_EFFN_SET */ bld->re_target = -1.0; if (go == NULL) { bld->arch_strategy = p7_ARCH_FAST; bld->wgt_strategy = p7_WGT_PB; bld->effn_strategy = p7_EFFN_ENTROPY; seed = 42; } else { if (esl_opt_GetBoolean(go, "--fast")) bld->arch_strategy = p7_ARCH_FAST; else if (esl_opt_GetBoolean(go, "--hand")) bld->arch_strategy = p7_ARCH_HAND; if (esl_opt_GetBoolean(go, "--wpb")) bld->wgt_strategy = p7_WGT_PB; else if (esl_opt_GetBoolean(go, "--wgsc")) bld->wgt_strategy = p7_WGT_GSC; else if (esl_opt_GetBoolean(go, "--wblosum")) bld->wgt_strategy = p7_WGT_BLOSUM; else if (esl_opt_GetBoolean(go, "--wnone")) bld->wgt_strategy = p7_WGT_NONE; else if (esl_opt_GetBoolean(go, "--wgiven")) bld->wgt_strategy = p7_WGT_GIVEN; if (esl_opt_GetBoolean(go, "--eent")) bld->effn_strategy = p7_EFFN_ENTROPY; else if (esl_opt_GetBoolean(go, "--eclust")) bld->effn_strategy = p7_EFFN_CLUST; else if (esl_opt_GetBoolean(go, "--enone")) bld->effn_strategy = p7_EFFN_NONE; else if (esl_opt_IsOn (go, "--eset")) { bld->effn_strategy = p7_EFFN_SET; bld->eset = esl_opt_GetReal(go, "--eset"); } seed = esl_opt_GetInteger(go, "--seed"); } bld->max_insert_len = 0; /* The default RE target is alphabet dependent. */ if (go != NULL && esl_opt_IsOn (go, "--ere")) bld->re_target = esl_opt_GetReal(go, "--ere"); else { switch (abc->type) { case eslAMINO: bld->re_target = p7_ETARGET_AMINO; break; case eslDNA: bld->re_target = p7_ETARGET_DNA; break; case eslRNA: bld->re_target = p7_ETARGET_DNA; break; default: bld->re_target = p7_ETARGET_OTHER; break; } } bld->symfrac = (go != NULL) ? esl_opt_GetReal (go, "--symfrac") : 0.5; bld->fragthresh = (go != NULL) ? esl_opt_GetReal (go, "--fragthresh") : 0.5; bld->wid = (go != NULL) ? esl_opt_GetReal (go, "--wid") : 0.62; bld->esigma = (go != NULL) ? esl_opt_GetReal (go, "--esigma") : 45.0; bld->eid = (go != NULL) ? esl_opt_GetReal (go, "--eid") : 0.62; bld->EmL = (go != NULL) ? esl_opt_GetInteger(go, "--EmL") : 200; bld->EmN = (go != NULL) ? esl_opt_GetInteger(go, "--EmN") : 200; bld->EvL = (go != NULL) ? esl_opt_GetInteger(go, "--EvL") : 200; bld->EvN = (go != NULL) ? esl_opt_GetInteger(go, "--EvN") : 200; bld->EfL = (go != NULL) ? esl_opt_GetInteger(go, "--EfL") : 100; bld->EfN = (go != NULL) ? esl_opt_GetInteger(go, "--EfN") : 200; bld->Eft = (go != NULL) ? esl_opt_GetReal (go, "--Eft") : 0.04; /* Normally we reinitialize the RNG to original seed before calibrating each model. * This eliminates run-to-run variation. * As a special case, seed==0 means choose an arbitrary seed and shut off the * reinitialization; this allows run-to-run variation. */ bld->r = esl_randomness_CreateFast(seed); bld->do_reseeding = (seed == 0) ? FALSE : TRUE; if (go && esl_opt_GetBoolean(go, "--pnone") ) bld->prior = NULL; else if (go && esl_opt_GetBoolean(go, "--plaplace") ) bld->prior = p7_prior_CreateLaplace(abc); else { switch (abc->type) { case eslAMINO: bld->prior = p7_prior_CreateAmino(); break; case eslDNA: bld->prior = p7_prior_CreateNucleic(); break; case eslRNA: bld->prior = p7_prior_CreateNucleic(); break; default: bld->prior = p7_prior_CreateLaplace(abc); break; } if (bld->prior == NULL) goto ERROR; } bld->abc = abc; bld->errbuf[0] = '\0'; bld->popen = -1; bld->pextend = -1; return bld; ERROR: p7_builder_Destroy(bld); return NULL; }