/* Sample random domain segment positions, start/end pairs, sorted and nonoverlapping. */ int p7_coords2_Sample(ESL_RANDOMNESS *rng, P7_COORDS2 *c2, int32_t maxseg, int32_t L, int32_t **byp_wrk) { int32_t *wrk = NULL; int32_t nseg = 1 + esl_rnd_Roll(rng, maxseg); /* 1..maxseg */ int32_t i; int status; /* Using the bypass idiom, make sure we have a workspace for <L> coords */ if (esl_byp_IsInternal(byp_wrk) ) ESL_ALLOC(wrk, sizeof(int32_t) * L); else if (esl_byp_IsReturned(byp_wrk) ) ESL_ALLOC(wrk, sizeof(int32_t) * L); else if (esl_byp_IsProvided(byp_wrk) ) { wrk = *byp_wrk; ESL_REALLOC(wrk, sizeof(int32_t) * L); } /* We put the numbers 1..L into the workspace <wrk>; shuffle them; * then sort the top nseg*2 of them. This gives us <nseg> * nonoverlapping start/end coords, in order. */ for (i = 0; i < L; i++) wrk[i] = i+1; esl_vec_IShuffle(rng, wrk, L); esl_vec_ISortIncreasing(wrk, nseg*2); /* Store those randomized coords now in the data structure. */ p7_coords2_GrowTo(c2, nseg); c2->n = nseg; for (i = 0; i < nseg; i++) { c2->arr[i].n1 = wrk[i*2]; c2->arr[i].n2 = wrk[i*2+1]; } /* Using the bypass idiom, recycle workspace, if we're supposed to */ if (esl_byp_IsInternal(byp_wrk)) free(wrk); else if (esl_byp_IsReturned(byp_wrk)) *byp_wrk = wrk; else if (esl_byp_IsProvided(byp_wrk)) *byp_wrk = wrk; return eslOK; ERROR: if (esl_byp_IsInternal(byp_wrk) && wrk) free(wrk); return status; }
/* Function: p7_Calibrate() * Synopsis: Calibrate the E-value parameters of a model. * Incept: SRE, Thu Dec 25 09:29:31 2008 [Magallon] * * Purpose: Calibrate the E-value parameters of a model with * one calculation ($\lambda$) and two brief simulations * (Viterbi $\mu$, Forward $\tau$). * * Args: hmm - HMM to be calibrated * cfg_b - OPTCFG: ptr to optional build configuration; * if <NULL>, use default parameters. * byp_rng - BYPASS optimization: pass ptr to <ESL_RANDOMNESS> generator * if already known; * <*byp_rng> == NULL> if <rng> return is desired; * pass <NULL> to use and discard internal default. * byp_bg - BYPASS optimization: pass ptr to <P7_BG> if already known; * <*byp_bg == NULL> if <bg> return is desired; * pass <NULL> to use and discard internal default. * byp_gm - BYPASS optimization: pass ptr to <gm> profile if already known; * pass <*byp_gm == NULL> if <gm> return desired; * pass <NULL> to use and discard internal default. * byp_om - BYPASS optimization: pass ptr to <om> profile if already known; * pass <*byp_om == NULL> if <om> return desired; * pass <NULL> to use and discard internal default. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. * <eslEINVAL> if <hmm>, <gm>, <om> aren't compatible somehow. * * Xref: J4/41 */ int p7_Calibrate(P7_HMM *hmm, P7_BUILDER *cfg_b, ESL_RANDOMNESS **byp_rng, P7_BG **byp_bg, P7_PROFILE **byp_gm, P7_OPROFILE **byp_om) { P7_BG *bg = (esl_byp_IsProvided(byp_bg) ? *byp_bg : NULL); P7_PROFILE *gm = (esl_byp_IsProvided(byp_gm) ? *byp_gm : NULL); P7_OPROFILE *om = (esl_byp_IsProvided(byp_om) ? *byp_om : NULL); ESL_RANDOMNESS *r = (esl_byp_IsProvided(byp_rng) ? *byp_rng : NULL); char *errbuf = ((cfg_b != NULL) ? cfg_b->errbuf : NULL); int EmL = ((cfg_b != NULL) ? cfg_b->EmL : 200); int EmN = ((cfg_b != NULL) ? cfg_b->EmN : 200); int EvL = ((cfg_b != NULL) ? cfg_b->EvL : 200); int EvN = ((cfg_b != NULL) ? cfg_b->EvN : 200); int EfL = ((cfg_b != NULL) ? cfg_b->EfL : 100); int EfN = ((cfg_b != NULL) ? cfg_b->EfN : 200); double Eft = ((cfg_b != NULL) ? cfg_b->Eft : 0.04); double lambda, mmu, vmu, tau; int status; /* Configure any objects we need * that weren't already passed to us as a bypass optimization */ if (r == NULL) { if ((r = esl_randomness_CreateFast(42)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create RNG"); } else if (cfg_b != NULL && cfg_b->do_reseeding) { esl_randomness_Init(r, esl_randomness_GetSeed(r)); } if (bg == NULL) { if ((bg = p7_bg_Create(hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to allocate background"); } /* there's an odd case where the <om> is provided and a <gm> isn't going to be returned * where we don't need a <gm> at all, and <gm> stays <NULL> after the next block. * Note that the <EvL> length in the ProfileConfig doesn't matter; the individual * calibration routines MSVMu(), etc. contain their own length reconfig calls. */ if ((esl_byp_IsInternal(byp_gm) && ! esl_byp_IsProvided(byp_om)) || esl_byp_IsReturned(byp_gm)) { if ( (gm = p7_profile_Create(hmm->M, hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to allocate profile"); if ( (status = p7_ProfileConfig(hmm, bg, gm, EvL, p7_LOCAL)) != eslOK) ESL_XFAIL(status, errbuf, "failed to configure profile"); } if (om == NULL) { if ((om = p7_oprofile_Create(hmm->M, hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create optimized profile"); if ((status = p7_oprofile_Convert(gm, om)) != eslOK) ESL_XFAIL(status, errbuf, "failed to convert to optimized profile"); } /* The calibration steps themselves */ if ((status = p7_Lambda(hmm, bg, &lambda)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine lambda"); if ((status = p7_MSVMu (r, om, bg, EmL, EmN, lambda, &mmu)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine msv mu"); if ((status = p7_ViterbiMu(r, om, bg, EvL, EvN, lambda, &vmu)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine vit mu"); if ((status = p7_Tau (r, om, bg, EfL, EfN, lambda, Eft, &tau)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine fwd tau"); /* Store results */ hmm->evparam[p7_MLAMBDA] = om->evparam[p7_MLAMBDA] = lambda; hmm->evparam[p7_VLAMBDA] = om->evparam[p7_VLAMBDA] = lambda; hmm->evparam[p7_FLAMBDA] = om->evparam[p7_FLAMBDA] = lambda; hmm->evparam[p7_MMU] = om->evparam[p7_MMU] = mmu; hmm->evparam[p7_VMU] = om->evparam[p7_VMU] = vmu; hmm->evparam[p7_FTAU] = om->evparam[p7_FTAU] = tau; hmm->flags |= p7H_STATS; if (gm != NULL) { gm->evparam[p7_MLAMBDA] = lambda; gm->evparam[p7_VLAMBDA] = lambda; gm->evparam[p7_FLAMBDA] = lambda; gm->evparam[p7_MMU] = mmu; gm->evparam[p7_VMU] = vmu; gm->evparam[p7_FTAU] = tau; } if (byp_rng != NULL) *byp_rng = r; else esl_randomness_Destroy(r); /* bypass convention: no-op if rng was provided.*/ if (byp_bg != NULL) *byp_bg = bg; else p7_bg_Destroy(bg); /* bypass convention: no-op if bg was provided. */ if (byp_gm != NULL) *byp_gm = gm; else p7_profile_Destroy(gm); /* bypass convention: no-op if gm was provided. */ if (byp_om != NULL) *byp_om = om; else p7_oprofile_Destroy(om); /* bypass convention: no-op if om was provided. */ return eslOK; ERROR: if (! esl_byp_IsProvided(byp_rng)) esl_randomness_Destroy(r); if (! esl_byp_IsProvided(byp_bg)) p7_bg_Destroy(bg); if (! esl_byp_IsProvided(byp_gm)) p7_profile_Destroy(gm); if (! esl_byp_IsProvided(byp_om)) p7_oprofile_Destroy(om); return status; }
/* All input sources funnel through here. * Here, <afp> is already allocated and initialized, and the input * <bf> is opened successfully. */ static int profillic_msafile_OpenBuffer(ESL_ALPHABET **byp_abc, ESL_BUFFER *bf, int format, ESLX_MSAFILE_FMTDATA *fmtd, ESLX_MSAFILE *afp) { ESL_ALPHABET *abc = NULL; int alphatype = eslUNKNOWN; int status; /* if caller provided <fmtd>, copy it into afp->fmtd */ if (fmtd) eslx_msafile_fmtdata_Copy(fmtd, &(afp->fmtd)); /* Determine the format */ if (format == eslMSAFILE_UNKNOWN) { status = eslx_msafile_GuessFileFormat(afp->bf, &format, &(afp->fmtd)); if (status == eslENOFORMAT) ESL_XFAIL(eslENOFORMAT, afp->errmsg, "couldn't determine alignment input format"); /* ENOFORMAT is normal failure */ else if (status != eslOK) goto ERROR; } afp->format = format; /* Determine the alphabet; set <abc>. (<abc> == NULL means text mode.) */ /* Note that GuessAlphabet() functions aren't allowed to use the inmap, because it isn't set yet */ #ifdef eslAUGMENT_ALPHABET if (byp_abc && *byp_abc) /* Digital mode, and caller provided the alphabet */ { abc = *byp_abc; alphatype = abc->type; } else if (byp_abc) /* Digital mode, and caller wants us to guess and create an alphabet */ { status = eslx_msafile_GuessAlphabet(afp, &alphatype); if (status == eslENOALPHABET) ESL_XFAIL(eslENOALPHABET, afp->errmsg, "couldn't guess alphabet (maybe try --dna/--rna/--amino if available)"); else if (status != eslOK) goto ERROR; if ( (abc = esl_alphabet_Create(alphatype)) == NULL) { status = eslEMEM; goto ERROR; } } #endif if (abc && ! byp_abc) ESL_EXCEPTION(eslEINCONCEIVABLE, "Your version of Easel does not include digital alphabet code."); /* ^^^^^^^^^^^^^^^^^ this test interacts tricksily with the #ifdef above */ afp->abc = abc; /* with afp->abc set, the inmap config functions know whether to do digital/text */ /** * <pre> * Configure the format-specific, digital or text mode character * input map in afp->inmap. * All of these must: * * set inmap[0] to an appropriate 'unknown' character, to replace * invalid input with. * set ' ' to eslDSQ_IGNORE (if we're supposed to accept and skip * it), or map it to a gap, or set it as eslDSQ_ILLEGAL. * in digital mode, copy the abc->inmap * in text mode, decide if we should accept most any * non-whitespace character (isgraph()), or if the format is * inherently restrictive and we should go with isalpha() + * some other valid characters "_-.~*" instead. * </pre> */ switch (afp->format) { case eslMSAFILE_A2M: status = esl_msafile_a2m_SetInmap( afp); break; case eslMSAFILE_AFA: status = esl_msafile_afa_SetInmap( afp); break; case eslMSAFILE_CLUSTAL: status = esl_msafile_clustal_SetInmap( afp); break; case eslMSAFILE_CLUSTALLIKE: status = esl_msafile_clustal_SetInmap( afp); break; case eslMSAFILE_PFAM: status = esl_msafile_stockholm_SetInmap(afp); break; case eslMSAFILE_PHYLIP: status = esl_msafile_phylip_SetInmap( afp); break; case eslMSAFILE_PHYLIPS: status = esl_msafile_phylip_SetInmap( afp); break; case eslMSAFILE_PSIBLAST: status = esl_msafile_psiblast_SetInmap( afp); break; case eslMSAFILE_SELEX: status = esl_msafile_selex_SetInmap( afp); break; case eslMSAFILE_STOCKHOLM: status = esl_msafile_stockholm_SetInmap(afp); break; case eslMSAFILE_PROFILLIC: status = eslOK; break; /// \todo status = profillic_esl_msafile_profile_SetInmap(afp); */ break; default: ESL_XEXCEPTION(eslENOFORMAT, "no such alignment file format"); break; } if (esl_byp_IsReturned(byp_abc)) *byp_abc = abc; return eslOK; ERROR: /* on normal errors, afp is returned in an error state */ if (abc && ! esl_byp_IsProvided(byp_abc)) { esl_alphabet_Destroy(abc); } if (esl_byp_IsReturned(byp_abc)) *byp_abc = NULL; afp->abc = NULL; return status; }