static void utest_SendRecv(ESL_RANDOMNESS *rng, int my_rank, int nproc) { char msg[] = "utest_SendRecv() failed"; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_HMM *hmm = NULL; P7_HMM *xhmm = NULL; int M = 200; char *wbuf = NULL; int wn = 0; int i; uint32_t rngseed; MPI_Status mpistatus; char errmsg[eslERRBUFSIZE]; if (my_rank == 0) { /* First we send our RNG seed to all workers */ rngseed = esl_randomness_GetSeed(rng); for (i = 1; i < nproc; i++) if (MPI_Send( &rngseed, 1, MPI_UNSIGNED, i, 0, MPI_COMM_WORLD) != MPI_SUCCESS) esl_fatal(msg); /* We sample an HMM that's going to be identical to the workers' */ if (p7_modelsample(rng, M, abc, &hmm) != eslOK) esl_fatal(msg); for (i = 1; i < nproc; i++) { if (p7_hmm_mpi_Recv(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &wbuf, &wn, &abc, &xhmm) != eslOK) esl_fatal(msg); if (p7_hmm_Validate(xhmm, errmsg, 0.001) != eslOK) esl_fatal("%s:\n %s", msg, errmsg); if (p7_hmm_Compare(hmm, xhmm, 0.001) != eslOK) esl_fatal(msg); p7_hmm_Destroy(xhmm); } } else { /* Worker(s) must first receive the exact same RNG seed that the master is using. */ if (MPI_Recv(&rngseed, 1, MPI_UNSIGNED, 0, 0, MPI_COMM_WORLD, &mpistatus) != MPI_SUCCESS) esl_fatal(msg); /* and then the worker(s) can create the exact same RNG (and random number sequence) that the master has */ rng = esl_randomness_CreateFast(rngseed); /* so when the worker samples this HMM, the master has independently sampled an exact duplicate of it... */ if (p7_modelsample(rng, M, abc, &hmm) != eslOK) esl_fatal(msg); /* each worker sends the HMM to the master (it's the same HMM for each worker. The test is intended for one master, one worker.) */ if (p7_hmm_mpi_Send(hmm, 0, 0, MPI_COMM_WORLD, &wbuf, &wn) != eslOK) esl_fatal(msg); /* worker's RNG is a private copy; destroy it. Master keeps its RNG, which the caller is responsible for. */ esl_randomness_Destroy(rng); } p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); free(wbuf); return; }
/* mpi_worker() * The main control for an MPI worker process. */ static void mpi_worker(ESL_GETOPTS *go, struct cfg_s *cfg) { int xstatus = eslOK; int status; P7_HMM *hmm = NULL; char *wbuf = NULL; double *xv = NULL; /* result: array of N scores */ int *av = NULL; /* optional result: array of N alignment lengths */ int wn = 0; char errbuf[eslERRBUFSIZE]; int pos; /* Worker initializes */ if ((status = minimum_mpi_working_buffer(go, cfg->N, &wn)) != eslOK) xstatus = status; ESL_ALLOC(wbuf, wn * sizeof(char)); ESL_ALLOC(xv, cfg->N * sizeof(double) + 2); if (esl_opt_GetBoolean(go, "-a")) ESL_ALLOC(av, cfg->N * sizeof(int)); /* Main worker loop */ while (p7_hmm_mpi_Recv(0, 0, MPI_COMM_WORLD, &wbuf, &wn, &(cfg->abc), &hmm) == eslOK) { if (esl_opt_GetBoolean(go, "--recal")) { if (( status = recalibrate_model(go, cfg, errbuf, hmm)) != eslOK) goto CLEANERROR; } if ((status = process_workunit(go, cfg, errbuf, hmm, xv, av)) != eslOK) goto CLEANERROR; pos = 0; MPI_Pack(&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Pack(xv, cfg->N, MPI_DOUBLE, wbuf, wn, &pos, MPI_COMM_WORLD); if (esl_opt_GetBoolean(go, "-a")) MPI_Pack(av, cfg->N, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); p7_hmm_Destroy(hmm); } free(wbuf); free(xv); if (av != NULL) free(av); return; CLEANERROR: pos = 0; MPI_Pack(&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Pack(errbuf, eslERRBUFSIZE, MPI_CHAR, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); if (wbuf != NULL) free(wbuf); if (hmm != NULL) p7_hmm_Destroy(hmm); if (xv != NULL) free(xv); if (av != NULL) free(av); return; ERROR: p7_Fail("Allocation error in mpi_worker"); }
/* Function: p7_Builder() * Synopsis: Build a new HMM from an MSA. * * Purpose: Take the multiple sequence alignment <msa> and a build configuration <bld>, * and build a new HMM. * * Effective sequence number determination and calibration steps require * additionally providing a null model <bg>. * * Args: bld - build configuration * msa - multiple sequence alignment * bg - null model * opt_hmm - optRETURN: new HMM * opt_trarr - optRETURN: array of faux tracebacks, <0..nseq-1> * opt_gm - optRETURN: profile corresponding to <hmm> * opt_om - optRETURN: optimized profile corresponding to <gm> * opt_postmsa - optRETURN: RF-annotated, possibly modified MSA * * Returns: <eslOK> on success. The new HMM is optionally returned in * <*opt_hmm>, along with optional returns of an array of faux tracebacks * for each sequence in <*opt_trarr>, the annotated MSA used to construct * the model in <*opt_postmsa>, a configured search profile in * <*opt_gm>, and an optimized search profile in <*opt_om>. These are * all optional returns because the caller may, for example, be interested * only in an optimized profile, or may only be interested in the HMM. * * Returns <eslENORESULT> if no consensus columns were annotated. * Returns <eslEFORMAT> on MSA format problems, such as a missing RF annotation * line in hand architecture construction. On any returned error, * <bld->errbuf> contains an informative error message. * * Throws: <eslEMEM> on allocation error. * <eslEINVAL> if relative weights couldn't be calculated from <msa>. * * Xref: J4/30. */ int p7_Builder(P7_BUILDER *bld, ESL_MSA *msa, P7_BG *bg, P7_HMM **opt_hmm, P7_TRACE ***opt_trarr, P7_PROFILE **opt_gm, P7_OPROFILE **opt_om, ESL_MSA **opt_postmsa) { int i,j; uint32_t checksum = 0; /* checksum calculated for the input MSA. hmmalign --mapali verifies against this. */ P7_HMM *hmm = NULL; P7_TRACE **tr = NULL; P7_TRACE ***tr_ptr = (opt_trarr != NULL || opt_postmsa != NULL) ? &tr : NULL; int status; if ((status = validate_msa (bld, msa)) != eslOK) goto ERROR; if ((status = esl_msa_Checksum (msa, &checksum)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to calculate checksum"); if ((status = relative_weights (bld, msa)) != eslOK) goto ERROR; if ((status = esl_msa_MarkFragments(msa, bld->fragthresh)) != eslOK) goto ERROR; if ((status = build_model (bld, msa, &hmm, tr_ptr)) != eslOK) goto ERROR; //Ensures that the weighted-average I->I count <= bld->max_insert_len //(MI currently contains the number of observed insert-starts) if (bld->max_insert_len>0) for (i=1; i<hmm->M; i++ ) hmm->t[i][p7H_II] = ESL_MIN(hmm->t[i][p7H_II], bld->max_insert_len*hmm->t[i][p7H_MI]); if ((status = effective_seqnumber (bld, msa, hmm, bg)) != eslOK) goto ERROR; if ((status = parameterize (bld, hmm)) != eslOK) goto ERROR; if ((status = annotate (bld, msa, hmm)) != eslOK) goto ERROR; if ((status = calibrate (bld, hmm, bg, opt_gm, opt_om)) != eslOK) goto ERROR; if ((status = make_post_msa (bld, msa, hmm, tr, opt_postmsa)) != eslOK) goto ERROR; //force masked positions to background (it'll be close already, so no relevant impact on weighting) if (hmm->mm != NULL) for (i=1; i<hmm->M; i++ ) if (hmm->mm[i] == 'm') for (j=0; j<hmm->abc->K; j++) hmm->mat[i][j] = bg->f[j]; if ( bld->abc->type == eslDNA || bld->abc->type == eslRNA ) { if (bld->w_len > 0) hmm->max_length = bld->w_len; else if (bld->w_beta == 0.0) hmm->max_length = hmm->M *4; else if ( (status = p7_Builder_MaxLength(hmm, bld->w_beta)) != eslOK) goto ERROR; } hmm->checksum = checksum; hmm->flags |= p7H_CHKSUM; if (opt_hmm != NULL) *opt_hmm = hmm; else p7_hmm_Destroy(hmm); if (opt_trarr != NULL) *opt_trarr = tr; else p7_trace_DestroyArray(tr, msa->nseq); return eslOK; ERROR: p7_hmm_Destroy(hmm); p7_trace_DestroyArray(tr, msa->nseq); if (opt_gm != NULL) p7_profile_Destroy(*opt_gm); if (opt_om != NULL) p7_oprofile_Destroy(*opt_om); return status; }
/* Function: p7_SingleBuilder() * Synopsis: Build a new HMM from a single sequence. * * Purpose: Take the sequence <sq> and a build configuration <bld>, and * build a new HMM. * * The single sequence scoring system in the <bld> * configuration must have been previously initialized by * <p7_builder_SetScoreSystem()>. * * Args: bld - build configuration * sq - query sequence * bg - null model (needed to paramaterize insert emission probs) * opt_hmm - optRETURN: new HMM * opt_gm - optRETURN: profile corresponding to <hmm> * opt_om - optRETURN: optimized profile corresponding to <gm> * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation error. * <eslEINVAL> if <bld> isn't properly configured somehow. */ int p7_SingleBuilder(P7_BUILDER *bld, ESL_SQ *sq, P7_BG *bg, P7_HMM **opt_hmm, P7_TRACE **opt_tr, P7_PROFILE **opt_gm, P7_OPROFILE **opt_om) { P7_HMM *hmm = NULL; P7_TRACE *tr = NULL; int k; int status; bld->errbuf[0] = '\0'; if (! bld->Q) ESL_XEXCEPTION(eslEINVAL, "score system not initialized"); if ((status = p7_Seqmodel(bld->abc, sq->dsq, sq->n, sq->name, bld->Q, bg->f, bld->popen, bld->pextend, &hmm)) != eslOK) goto ERROR; if ((status = p7_hmm_SetComposition(hmm)) != eslOK) goto ERROR; if ((status = p7_hmm_SetConsensus(hmm, sq)) != eslOK) goto ERROR; if ((status = calibrate(bld, hmm, bg, opt_gm, opt_om)) != eslOK) goto ERROR; if ( bld->abc->type == eslDNA || bld->abc->type == eslRNA ) { if (bld->w_len > 0) hmm->max_length = bld->w_len; else if (bld->w_beta == 0.0) hmm->max_length = hmm->M *4; else if ( (status = p7_Builder_MaxLength(hmm, bld->w_beta)) != eslOK) goto ERROR; } /* build a faux trace: relative to core model (B->M_1..M_L->E) */ if (opt_tr != NULL) { if ((tr = p7_trace_Create()) == NULL) goto ERROR; if ((status = p7_trace_Append(tr, p7T_B, 0, 0)) != eslOK) goto ERROR; for (k = 1; k <= sq->n; k++) if ((status = p7_trace_Append(tr, p7T_M, k, k)) != eslOK) goto ERROR; if ((status = p7_trace_Append(tr, p7T_E, 0, 0)) != eslOK) goto ERROR; tr->M = sq->n; tr->L = sq->n; } /* note that <opt_gm> and <opt_om> were already set by calibrate() call above. */ if (opt_hmm != NULL) *opt_hmm = hmm; else p7_hmm_Destroy(hmm); if (opt_tr != NULL) *opt_tr = tr; return eslOK; ERROR: p7_hmm_Destroy(hmm); if (tr != NULL) p7_trace_Destroy(tr); if (opt_gm != NULL) p7_profile_Destroy(*opt_gm); if (opt_om != NULL) p7_oprofile_Destroy(*opt_om); return status; }
/* utest_basic() * An MSA to ex{e,o}rcise past demons. * 1. seq2 gives an I->end transition. * 2. seq1 contains degenerate Z,X, exercising symbol counting * of degenerate residues. */ static void utest_basic(void) { char *failmsg = "failure in build.c::utest_basic() unit test"; char msafile[16] = "p7tmpXXXXXX"; /* tmpfile name template */ FILE *ofp = NULL; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; float symfrac = 0.5; if (esl_tmpfile_named(msafile, &ofp) != eslOK) esl_fatal(failmsg); fprintf(ofp, "# STOCKHOLM 1.0\n"); fprintf(ofp, "#=GC RF --xxxxxxxxxxxxxxxx-xxx-x--\n"); fprintf(ofp, "seq1 --ACDEFGHIKLMNPZXS-TVW-Yyy\n"); fprintf(ofp, "seq2 aaACDEFGHIKLMNPQRS-TVWw---\n"); fprintf(ofp, "seq3 aaAC-EFGHIKLMNPQRS-TVW-Y--\n"); fprintf(ofp, "seq4 aaAC-EFGHIKLMNPQRS-TVW-Y--\n"); fprintf(ofp, "//\n"); fclose(ofp); if (esl_msafile_Open(&abc, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp) != eslOK) esl_fatal(failmsg); if (esl_msafile_Read(afp, &msa) != eslOK) esl_fatal(failmsg); if (p7_Fastmodelmaker(msa, symfrac, NULL, &hmm, NULL) != eslOK) esl_fatal(failmsg); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); esl_msafile_Close(afp); esl_alphabet_Destroy(abc); remove(msafile); return; }
static void utest_normalization(ESL_GETOPTS *go) { char *msg = "seqmodel normalization utest failed"; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); char *seq = "ACDEFGHIKLMNPQRSTVWYBJZOUX"; int L = strlen(seq); ESL_DSQ *dsq = NULL; float popen = 0.1; float pextend = 0.4; P7_BUILDER *bld = NULL; P7_BG *bg = p7_bg_Create(abc); P7_HMM *hmm = NULL; char errbuf[eslERRBUFSIZE]; if ( esl_abc_CreateDsq(abc, seq, &dsq) != eslOK) esl_fatal(msg); if ( (bld = p7_builder_Create(NULL, abc)) == NULL) esl_fatal(msg); if ( p7_builder_LoadScoreSystem(bld, "BLOSUM62", popen, pextend, bg) != eslOK) esl_fatal(msg); if ( p7_Seqmodel(abc, dsq, L, "aatest", bld->Q, bg->f, bld->popen, bld->pextend, &hmm) != eslOK) esl_fatal(msg); if (p7_hmm_Validate(hmm, errbuf, 0.0001) != eslOK) esl_fatal("normalization utest failed\n%s\n", errbuf); free(dsq); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_builder_Destroy(bld); esl_alphabet_Destroy(abc); }
int main(int argc, char **argv) { char *msg = "p7_gmx unit test driver failed"; ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_BG *bg = p7_bg_Create(abc); P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; int M = esl_opt_GetInteger(go, "-M"); int L = esl_opt_GetInteger(go, "-L"); float tol = esl_opt_GetReal (go, "-t"); p7_FLogsumInit(); if (p7_hmm_Sample(r, M, abc, &hmm) != eslOK) esl_fatal(msg); if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal(msg); if (p7_bg_SetLength(bg, L) != eslOK) esl_fatal(msg); if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL) != eslOK) esl_fatal(msg); utest_GrowTo(); utest_Compare(r, gm, bg, L, tol); esl_getopts_Destroy(go); esl_randomness_Destroy(r); esl_alphabet_Destroy(abc); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_profile_Destroy(gm); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_BG *bg = NULL; P7_HMM *hmm = NULL; P7_OPROFILE *om = NULL; int M = esl_opt_GetInteger(go, "-M"); int L = esl_opt_GetInteger(go, "-L"); /* Sample a random HMM and optimized profile, in amino acid alphabet. */ if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create alphabet"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); if (( p7_oprofile_Sample(r, abc, bg, M, L, &hmm, NULL, &om)) != eslOK) esl_fatal("failed to sample HMM and profile"); /* unit test(s) */ utest_ReadWrite(hmm, om); p7_oprofile_Destroy(om); p7_hmm_Destroy(hmm); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return eslOK; }
static int serial_loop(WORKER_INFO *info, struct cfg_s *cfg) { P7_BUILDER *bld = NULL; ESL_MSA *msa = NULL; ESL_MSA *postmsa = NULL; ESL_MSA **postmsa_ptr = (cfg->postmsafile != NULL) ? &postmsa : NULL; P7_HMM *hmm = NULL; char errmsg[eslERRBUFSIZE]; int status; double entropy; cfg->nali = 0; while ((status = esl_msa_Read(cfg->afp, &msa)) == eslOK) { cfg->nali++; if ((status = set_msa_name(cfg, errmsg, msa)) != eslOK) p7_Fail("%s\n", errmsg); /* cfg->nnamed gets incremented in this call */ /* bg new-HMM trarr gm om */ if ((status = p7_Builder(info->bld, msa, info->bg, &hmm, NULL, NULL, NULL, postmsa_ptr)) != eslOK) p7_Fail("build failed: %s", bld->errbuf); entropy = p7_MeanMatchRelativeEntropy(hmm, info->bg); if ((status = output_result(cfg, errmsg, cfg->nali, msa, hmm, postmsa, entropy)) != eslOK) p7_Fail(errmsg); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); esl_msa_Destroy(postmsa); } return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *gx1 = NULL; P7_GMX *gx2 = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); float null2[p7_MAXCODE]; int i; float fsc, bsc; double Mcs; if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); gx1 = p7_gmx_Create(gm->M, L); gx2 = p7_gmx_Create(gm->M, L); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_GForward (dsq, L, gm, gx1, &fsc); p7_GBackward(dsq, L, gm, gx2, &bsc); p7_GDecoding(gm, gx1, gx2, gx2); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_GNull2_ByExpectation(gm, gx2, null2); esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_gmx_Destroy(gx1); p7_gmx_Destroy(gx2); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); int N = esl_opt_GetInteger(go, "-N"); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); esl_stopwatch_Start(w); while (N--) { /* cfg rng bg gm om */ p7_Calibrate(hmm, NULL, NULL, NULL, NULL, NULL); } esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_STOPWATCH *w = esl_stopwatch_Create(); char *hmmfile = esl_opt_GetArg(go, 1); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); int i; /* Read one HMM from <hmmfile> */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); bg = p7_bg_Create(abc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_bg_SetFilterByHMM(bg, hmm); esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; }
/* Function: p7_oprofile_Sample() * Synopsis: Sample a random profile. * Incept: MSF Tue Nov 3, 2009 [Janelia] * * Purpose: Sample a random profile of <M> nodes for alphabet <abc>, * using <r> as the source of random numbers. Parameterize * it for generation of target sequences of mean length * <L>. Calculate its log-odds scores using background * model <bg>. * * Args: r - random number generator * abc - emission alphabet * bg - background frequency model * M - size of sampled profile, in nodes * L - configured target seq mean length * opt_hmm - optRETURN: sampled HMM * opt_gm - optRETURN: sampled normal profile * opt_om - RETURN: optimized profile * * Returns: <eslOK> on success. * * Throws: (no abnormal error conditions) */ int p7_oprofile_Sample(ESL_RANDOMNESS *r, const ESL_ALPHABET *abc, const P7_BG *bg, int M, int L, P7_HMM **opt_hmm, P7_PROFILE **opt_gm, P7_OPROFILE **ret_om) { P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; int status; if ((gm = p7_profile_Create (M, abc)) == NULL) { status = eslEMEM; goto ERROR; } if ((om = p7_oprofile_Create(M, abc)) == NULL) { status = eslEMEM; goto ERROR; } if ((status = p7_hmm_Sample(r, M, abc, &hmm)) != eslOK) goto ERROR; if ((status = p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL)) != eslOK) goto ERROR; if ((status = p7_oprofile_Convert(gm, om)) != eslOK) goto ERROR; if ((status = p7_oprofile_ReconfigLength(om, L)) != eslOK) goto ERROR; if (opt_hmm != NULL) *opt_hmm = hmm; else p7_hmm_Destroy(hmm); if (opt_gm != NULL) *opt_gm = gm; else p7_profile_Destroy(gm); *ret_om = om; return eslOK; ERROR: if (opt_hmm != NULL) *opt_hmm = NULL; if (opt_gm != NULL) *opt_gm = NULL; *ret_om = NULL; return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_BG *bg = NULL; int M = 100; int L = 200; int nseq = 20; char errbuf[eslERRBUFSIZE]; if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create alphabet"); if (p7_hmm_Sample(r, M, abc, &hmm) != eslOK) esl_fatal("failed to sample an HMM"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal("failed to create profile"); if (p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL) != eslOK) esl_fatal("failed to config profile"); if (p7_hmm_Validate (hmm, errbuf, 0.0001) != eslOK) esl_fatal("whoops, HMM is bad!: %s", errbuf); if (p7_profile_Validate(gm, errbuf, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!: %s", errbuf); utest_basic (go); utest_viterbi(go, r, abc, bg, gm, nseq, L); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
static void utest_Compare(void) { ESL_RANDOMNESS *r = esl_randomness_CreateFast(42); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_PROFILE *gm2 = NULL; int M = 200; int L = 400; p7_modelsample(r, M, abc, &hmm); /* master and worker's sampled profiles are identical */ bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); gm2 = p7_profile_Create(hmm->M, abc); p7_profile_Config(gm, hmm, bg); p7_profile_Config(gm2, hmm, bg); p7_profile_SetLength(gm, L); p7_profile_SetLength(gm2, L); if (p7_profile_Compare(gm, gm2, 0.001) != eslOK) p7_Die("identical profile comparison failed"); p7_profile_Destroy(gm); p7_profile_Destroy(gm2); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); return; }
/* ViterbiScore() unit test * * We can compare these scores to GViterbi() almost exactly; the only * differences should be negligible roundoff errors. Must convert * the optimized profile to lspace, though, rather than pspace. */ static void utest_viterbi_score(ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, int M, int L, int N) { P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); P7_OMX *ox = p7_omx_Create(M, 0, 0); P7_GMX *gx = p7_gmx_Create(M, L); float sc1, sc2; p7_oprofile_Sample(r, abc, bg, M, L, &hmm, &gm, &om); p7_oprofile_Logify(om); while (N--) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_ViterbiScore(dsq, L, om, ox, &sc1); p7_GViterbi (dsq, L, gm, gx, &sc2); if (fabs(sc1-sc2) > 0.001) esl_fatal("viterbi score unit test failed: scores differ"); } free(dsq); p7_hmm_Destroy(hmm); p7_omx_Destroy(ox); p7_gmx_Destroy(gx); p7_profile_Destroy(gm); p7_oprofile_Destroy(om); }
/* Create an SSI index file for open HMM file <hfp>. * Both name and accession of HMMs are stored as keys. */ static void create_ssi_index(ESL_GETOPTS *go, P7_HMMFILE *hfp) { ESL_NEWSSI *ns = NULL; ESL_ALPHABET *abc = NULL; P7_HMM *hmm = NULL; int nhmm = 0; char *ssifile = NULL; uint16_t fh; int status; if (esl_sprintf(&ssifile, "%s.ssi", hfp->fname) != eslOK) p7_Die("esl_sprintf() failed"); status = esl_newssi_Open(ssifile, FALSE, &ns); if (status == eslENOTFOUND) esl_fatal("failed to open SSI index %s", ssifile); else if (status == eslEOVERWRITE) esl_fatal("SSI index %s already exists; delete or rename it", ssifile); else if (status != eslOK) esl_fatal("failed to create a new SSI index"); if (esl_newssi_AddFile(ns, hfp->fname, 0, &fh) != eslOK) /* 0 = format code (HMMs don't have any yet) */ esl_fatal("Failed to add HMM file %s to new SSI index\n", hfp->fname); printf("Working... "); fflush(stdout); while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) p7_Fail("read failed, HMM file %s may be truncated?", hfp->fname); else if (status == eslEFORMAT) p7_Fail("bad file format in HMM file %s", hfp->fname); else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", hfp->fname); nhmm++; if (hmm->name == NULL) p7_Fail("Every HMM must have a name to be indexed. Failed to find name of HMM #%d\n", nhmm); if (esl_newssi_AddKey(ns, hmm->name, fh, hmm->offset, 0, 0) != eslOK) p7_Fail("Failed to add key %s to SSI index", hmm->name); if (hmm->acc) { if (esl_newssi_AddAlias(ns, hmm->acc, hmm->name) != eslOK) p7_Fail("Failed to add secondary key %s to SSI index", hmm->acc); } p7_hmm_Destroy(hmm); } if (esl_newssi_Write(ns) != eslOK) p7_Fail("Failed to write keys to ssi file %s\n", ssifile); printf("done.\n"); if (ns->nsecondary > 0) printf("Indexed %d HMMs (%ld names and %ld accessions).\n", nhmm, (long) ns->nprimary, (long) ns->nsecondary); else printf("Indexed %d HMMs (%ld names).\n", nhmm, (long) ns->nprimary); printf("SSI index written to file %s\n", ssifile); free(ssifile); esl_alphabet_Destroy(abc); esl_newssi_Close(ns); return; }
/* The "basic" utest is a minimal driver for making a small DNA profile and a small DNA sequence, * then running Viterbi and Forward. It's useful for dumping DP matrices and profiles for debugging. */ static void utest_basic(ESL_GETOPTS *go) { char *query= "# STOCKHOLM 1.0\n\nseq1 GAATTC\nseq2 GAATTC\n//\n"; int fmt = eslMSAFILE_STOCKHOLM; char *targ = "GAATTC"; ESL_ALPHABET *abc = NULL; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_BG *bg = NULL; P7_PRIOR *pri = NULL; ESL_DSQ *dsq = NULL; P7_GMX *gx = NULL; P7_TRACE *tr = NULL; int L = strlen(targ); float vsc, vsc2, fsc; if ((abc = esl_alphabet_Create(eslDNA)) == NULL) esl_fatal("failed to create alphabet"); if ((pri = p7_prior_CreateNucleic()) == NULL) esl_fatal("failed to create prior"); if ((msa = esl_msa_CreateFromString(query, fmt)) == NULL) esl_fatal("failed to create MSA"); if (esl_msa_Digitize(abc, msa, NULL) != eslOK) esl_fatal("failed to digitize MSA"); if (p7_Fastmodelmaker(msa, 0.5, NULL, &hmm, NULL) != eslOK) esl_fatal("failed to create GAATTC model"); if (p7_ParameterEstimation(hmm, pri) != eslOK) esl_fatal("failed to parameterize GAATTC model"); if (p7_hmm_SetConsensus(hmm, NULL) != eslOK) esl_fatal("failed to make consensus"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create DNA null model"); if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal("failed to create GAATTC profile"); if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL)!= eslOK) esl_fatal("failed to config profile"); if (p7_profile_Validate(gm, NULL, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!"); if (esl_abc_CreateDsq(abc, targ, &dsq) != eslOK) esl_fatal("failed to create GAATTC digital sequence"); if ((gx = p7_gmx_Create(gm->M, L)) == NULL) esl_fatal("failed to create DP matrix"); if ((tr = p7_trace_Create()) == NULL) esl_fatal("trace creation failed"); p7_GViterbi (dsq, L, gm, gx, &vsc); if (esl_opt_GetBoolean(go, "-v")) printf("Viterbi score: %.4f\n", vsc); if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT); p7_GTrace (dsq, L, gm, gx, tr); p7_trace_Score(tr, dsq, gm, &vsc2); if (esl_opt_GetBoolean(go, "-v")) p7_trace_Dump(stdout, tr, gm, dsq); if (esl_FCompare(vsc, vsc2, 1e-5) != eslOK) esl_fatal("trace score and Viterbi score don't agree."); p7_GForward (dsq, L, gm, gx, &fsc); if (esl_opt_GetBoolean(go, "-v")) printf("Forward score: %.4f\n", fsc); if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT); p7_trace_Destroy(tr); p7_gmx_Destroy(gx); free(dsq); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); p7_prior_Destroy(pri); esl_alphabet_Destroy(abc); return; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); char *hmmfile = esl_opt_GetArg(go, 1); int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_TRACE *tr = p7_trace_Create(); ESL_SQ *sq = NULL; char errbuf[eslERRBUFSIZE]; int i; int status; status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); status = p7_hmmfile_Read(hfp, &abc, &hmm); if (status == eslEFORMAT) p7_Fail("Bad file format in HMM file %s:\n%s\n", hfp->fname, hfp->errbuf); else if (status == eslEINCOMPAT) p7_Fail("HMM in %s is not in the expected %s alphabet\n", hfp->fname, esl_abc_DecodeType(abc->type)); else if (status == eslEOF) p7_Fail("Empty HMM file %s? No HMM data found.\n", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s\n", hfp->fname); p7_hmmfile_Close(hfp); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); sq = esl_sq_CreateDigital(abc); for (i = 0; i < N; i++) { p7_ProfileEmit(rng, hmm, gm, bg, sq, tr); esl_sq_FormatName(sq, "%s-sample%d", hmm->name, i); esl_sqio_Write(stdout, sq, eslSQFILE_FASTA, FALSE); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) esl_fatal(errbuf); esl_sq_Reuse(sq); p7_trace_Reuse(tr); } esl_sq_Destroy(sq); p7_trace_Destroy(tr); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; }
static void utest_oprofileSendRecv(int my_rank, int nproc) { ESL_RANDOMNESS *r = esl_randomness_CreateFast(42); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OPROFILE *om2 = NULL; int M = 200; int L = 400; char *wbuf = NULL; int wn = 0; int i; char errbuf[eslERRBUFSIZE]; p7_hmm_Sample(r, M, abc, &hmm); /* master and worker's sampled profiles are identical */ bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); om = p7_oprofile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); p7_oprofile_Convert(gm, om); p7_bg_SetLength (bg, L); if (my_rank == 0) { for (i = 1; i < nproc; i++) { ESL_DPRINTF1(("Master: receiving test profile\n")); p7_oprofile_MPIRecv(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &wbuf, &wn, &abc, &om2); ESL_DPRINTF1(("Master: test profile received\n")); if (p7_oprofile_Compare(om, om2, 0.001, errbuf) != eslOK) p7_Die("Received profile not identical to what was sent\n%s", errbuf); p7_oprofile_Destroy(om2); } } else { ESL_DPRINTF1(("Worker %d: sending test profile\n", my_rank)); p7_oprofile_MPISend(om, 0, 0, MPI_COMM_WORLD, &wbuf, &wn); ESL_DPRINTF1(("Worker %d: test profile sent\n", my_rank)); } free(wbuf); p7_profile_Destroy(gm); p7_oprofile_Destroy(om); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); return; }
int main(int argc, char **argv) { char *hmmfile = argv[1]; /* name of HMM file to read one HMM from */ ESL_ALPHABET *abc = NULL; /* sequence alphabet */ ESL_RANDOMNESS *r = NULL; /* source of randomness */ P7_HMMFILE *hfp = NULL; /* open hmmfile */ P7_HMM *hmm = NULL; /* HMM to emit from */ P7_PROFILE *gm = NULL; /* profile HMM (scores) */ P7_BG *bg = NULL; /* null model */ P7_TRACE *tr = NULL; /* sampled trace */ ESL_SQ *sq = NULL; /* sampled digital sequence */ int n = 1000; int counts[p7T_NSTATETYPES]; int i; float sc; float nullsc; double bitscore; r = esl_randomness_CreateFast(0); tr = p7_trace_Create(); if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("failed to open %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("failed to read HMM"); sq = esl_sq_CreateDigital(abc); bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); for (i = 0; i < n; i++) { p7_ProfileEmit(r, hmm, gm, bg, sq, tr); p7_trace_GetStateUseCounts(tr, counts); p7_ReconfigLength(gm, sq->n); p7_bg_SetLength(bg, sq->n); p7_trace_Score(tr, sq->dsq, gm, &sc); p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc); bitscore = (sc - nullsc)/ eslCONST_LOG2; printf("%d %8.4f\n", counts[p7T_M] + (counts[p7T_I] + counts[p7T_D])/2, bitscore); } p7_profile_Destroy(gm); esl_sq_Destroy(sq); p7_trace_Destroy(tr); esl_randomness_Destroy(r); esl_alphabet_Destroy(abc); p7_hmmfile_Close(hfp); p7_hmm_Destroy(hmm); return eslOK; }
/* Function: p7_Seqmodel() * Synopsis: Make a profile HMM from a single sequence. * * Purpose: Make a profile HMM from a single sequence, for * probabilistic Smith/Waterman alignment, HMMER3-style. * * The query is digital sequence <dsq> of length <M> * residues in alphabet <abc>, named <name>. * * The scoring system is given by <Q>, <f>, <popen>, and * <pextend>. <Q> is a $K \times K$ matrix giving * conditional residue probabilities $P(a \mid b)}$; these * are typically obtained by reverse engineering a score * matrix like BLOSUM62. <f> is a vector of $K$ background * frequencies $p_a$. <popen> and <pextend> are the * probabilities assigned to gap-open ($t_{MI}$ and * $t_{MD}$) and gap-extend ($t_{II}$ and $t_{DD}$) * transitions. * * The <p7H_SINGLE> flag is set on the <hmm>. Model * configuration (<p7_profile_Config(), friends> detects * this flag. <B->Mk> entry transitions include a match * state occupancy term for profile HMMs, but for single * queries, that <occ[]> term is assumed 1.0 for all * positions. See commentary in modelconfig.c. * * Args: * * Returns: <eslOK> on success, and a newly allocated HMM is returned * in <ret_hmm>. * * Throws: <eslEMEM> on allocation error, and <*ret_hmm> is <NULL>. */ int p7_Seqmodel(const ESL_ALPHABET *abc, ESL_DSQ *dsq, int M, char *name, ESL_DMATRIX *Q, float *f, double popen, double pextend, P7_HMM **ret_hmm) { int status; P7_HMM *hmm = NULL; char *logmsg = "[HMM created from a query sequence]"; int k; if ((hmm = p7_hmm_Create(M, abc)) == NULL) { status = eslEMEM; goto ERROR; } for (k = 0; k <= M; k++) { /* Use rows of P matrix as source of match emission vectors */ if (k > 0) esl_vec_D2F(Q->mx[(int) dsq[k]], abc->K, hmm->mat[k]); /* Set inserts to background for now. This will be improved. */ esl_vec_FCopy(f, abc->K, hmm->ins[k]); hmm->t[k][p7H_MM] = 1.0 - 2 * popen; hmm->t[k][p7H_MI] = popen; hmm->t[k][p7H_MD] = popen; hmm->t[k][p7H_IM] = 1.0 - pextend; hmm->t[k][p7H_II] = pextend; hmm->t[k][p7H_DM] = 1.0 - pextend; hmm->t[k][p7H_DD] = pextend; } /* Deal w/ special stuff at node M, overwriting a little of what we * just did. */ hmm->t[M][p7H_MM] = 1.0 - popen; hmm->t[M][p7H_MD] = 0.; hmm->t[M][p7H_DM] = 1.0; hmm->t[M][p7H_DD] = 0.; /* Add mandatory annotation */ p7_hmm_SetName(hmm, name); p7_hmm_AppendComlog(hmm, 1, &logmsg); hmm->nseq = 1; p7_hmm_SetCtime(hmm); hmm->checksum = 0; hmm->flags |= p7H_SINGLE; *ret_hmm = hmm; return eslOK; ERROR: if (hmm != NULL) p7_hmm_Destroy(hmm); *ret_hmm = NULL; return status; }
/* * compare to GForward() scores. */ static void utest_fwdback(ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, int M, int L, int N) { char *msg = "forward/backward unit test failed"; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); P7_OMX *fwd = p7_omx_Create(M, 0, L); P7_OMX *bck = p7_omx_Create(M, 0, L); P7_OMX *oxf = p7_omx_Create(M, L, L); P7_OMX *oxb = p7_omx_Create(M, L, L); P7_GMX *gx = p7_gmx_Create(M, L); float tolerance; float fsc1, fsc2; float bsc1, bsc2; float generic_sc; p7_FLogsumInit(); if (p7_FLogsumError(-0.4, -0.5) > 0.0001) tolerance = 1.0; /* weaker test against GForward() */ else tolerance = 0.0001; /* stronger test: FLogsum() is in slow exact mode. */ p7_oprofile_Sample(r, abc, bg, M, L, &hmm, &gm, &om); while (N--) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, oxf, &fsc1); p7_Backward (dsq, L, om, oxf, oxb, &bsc1); p7_ForwardParser (dsq, L, om, fwd, &fsc2); p7_BackwardParser(dsq, L, om, fwd, bck, &bsc2); p7_GForward (dsq, L, gm, gx, &generic_sc); /* Forward and Backward scores should agree with high tolerance */ if (fabs(fsc1-bsc1) > 0.0001) esl_fatal(msg); if (fabs(fsc2-bsc2) > 0.0001) esl_fatal(msg); if (fabs(fsc1-fsc2) > 0.0001) esl_fatal(msg); /* GForward scores should approximate Forward scores, * with tolerance that depends on how logsum.c was compiled */ if (fabs(fsc1-generic_sc) > tolerance) esl_fatal(msg); } free(dsq); p7_hmm_Destroy(hmm); p7_omx_Destroy(oxb); p7_omx_Destroy(oxf); p7_omx_Destroy(bck); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_profile_Destroy(gm); p7_oprofile_Destroy(om); }
/* multifetch: * given a file containing lines with one name or key per line; * parse the file line-by-line; * if we have an SSI index available, retrieve the HMMs by key * as we see each line; * else, without an SSI index, store the keys in a hash, then * read the entire HMM file in a single pass, outputting HMMs * that are in our keylist. * * Note that with an SSI index, you get the HMMs in the order they * appear in the <keyfile>, but without an SSI index, you get HMMs in * the order they occur in the HMM file. */ static void multifetch(ESL_GETOPTS *go, FILE *ofp, char *keyfile, P7_HMMFILE *hfp) { ESL_KEYHASH *keys = esl_keyhash_Create(); ESL_FILEPARSER *efp = NULL; ESL_ALPHABET *abc = NULL; P7_HMM *hmm = NULL; int nhmm = 0; char *key; int keylen; int keyidx; int status; if (esl_fileparser_Open(keyfile, NULL, &efp) != eslOK) p7_Fail("Failed to open key file %s\n", keyfile); esl_fileparser_SetCommentChar(efp, '#'); while (esl_fileparser_NextLine(efp) == eslOK) { if (esl_fileparser_GetTokenOnLine(efp, &key, &keylen) != eslOK) p7_Fail("Failed to read HMM name on line %d of file %s\n", efp->linenumber, keyfile); status = esl_key_Store(keys, key, &keyidx); if (status == eslEDUP) p7_Fail("HMM key %s occurs more than once in file %s\n", key, keyfile); if (hfp->ssi != NULL) { onefetch(go, ofp, key, hfp); nhmm++; } } if (hfp->ssi == NULL) { while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) p7_Fail("read failed, HMM file %s may be truncated?", hfp->fname); else if (status == eslEFORMAT) p7_Fail("bad file format in HMM file %s", hfp->fname); else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", hfp->fname); if (esl_key_Lookup(keys, hmm->name, &keyidx) == eslOK || ((hmm->acc) && esl_key_Lookup(keys, hmm->acc, &keyidx) == eslOK)) { p7_hmmfile_WriteASCII(ofp, -1, hmm); nhmm++; } p7_hmm_Destroy(hmm); } } if (ofp != stdout) printf("\nRetrieved %d HMMs.\n", nhmm); if (abc != NULL) esl_alphabet_Destroy(abc); esl_keyhash_Destroy(keys); esl_fileparser_Close(efp); return; }
void GTest_UHMM3Search::cleanup() { if( cleanuped ) { return; } if( ctxAdded ) { removeContext( searchTaskCtxName ); } if( NULL != hmm ) { p7_hmm_Destroy( hmm ); } cleanuped = true; }
void free_QueueData(QUEUE_DATA *data) { /* free the query data */ esl_getopts_Destroy(data->opts); if (data->abc != NULL) esl_alphabet_Destroy(data->abc); if (data->hmm != NULL) p7_hmm_Destroy(data->hmm); if (data->seq != NULL) esl_sq_Destroy(data->seq); if (data->cmd != NULL) free(data->cmd); memset(data, 0, sizeof(*data)); free(data); }
/* compare results to GDecoding(). */ static void utest_null2_expectation(ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, int M, int L, int N, float tolerance) { char *msg = "decoding unit test failed"; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); P7_OMX *fwd = p7_omx_Create(M, L, L); P7_OMX *bck = p7_omx_Create(M, L, L); P7_OMX *pp = p7_omx_Create(M, L, L); P7_GMX *gxf = p7_gmx_Create(M, L); P7_GMX *gxb = p7_gmx_Create(M, L); P7_GMX *gpp = p7_gmx_Create(M, L); float *on2 = malloc(sizeof(float) * abc->Kp); float *gn2 = malloc(sizeof(float) * abc->Kp); float fsc1, fsc2; float bsc1, bsc2; if (!gn2 || !on2) esl_fatal(msg); if (p7_oprofile_Sample(r, abc, bg, M, L, &hmm, &gm, &om) != eslOK) esl_fatal(msg); while (N--) { if (esl_rsq_xfIID(r, bg->f, abc->K, L, dsq) != eslOK) esl_fatal(msg); if (p7_Forward (dsq, L, om, fwd, &fsc1) != eslOK) esl_fatal(msg); if (p7_Backward (dsq, L, om, fwd, bck, &bsc1) != eslOK) esl_fatal(msg); if (p7_Decoding(om, fwd, bck, pp) != eslOK) esl_fatal(msg); if (p7_Null2_ByExpectation(om, pp, on2) != eslOK) esl_fatal(msg); if (p7_GForward (dsq, L, gm, gxf, &fsc2) != eslOK) esl_fatal(msg); if (p7_GBackward(dsq, L, gm, gxb, &bsc2) != eslOK) esl_fatal(msg); if (p7_GDecoding(gm, gxf, gxb, gpp) != eslOK) esl_fatal(msg); if (p7_GNull2_ByExpectation(gm, gpp, gn2) != eslOK) esl_fatal(msg); if (esl_vec_FCompare(gn2, on2, abc->Kp, tolerance) != eslOK) esl_fatal(msg); } p7_gmx_Destroy(gpp); p7_gmx_Destroy(gxf); p7_gmx_Destroy(gxb); p7_omx_Destroy(pp); p7_omx_Destroy(fwd); p7_omx_Destroy(bck); free(on2); free(gn2); free(dsq); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); }
/* Function: p7_EntropyWeight() * Incept: SRE, Fri May 4 15:32:59 2007 [Janelia] * * Purpose: Use the "entropy weighting" algorithm to determine * what effective sequence number we should use, and * return it in <ret_Neff>. * * Caller provides a count-based <hmm>, and the * Dirichlet prior <pri> that's to be used to parameterize * models; neither of these will be modified. * Caller also provides the relative entropy * target in bits in <etarget>. * * <ret_Neff> will range from 0 to the true number of * sequences counted into the model, <hmm->nseq>. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. */ int p7_EntropyWeight(const P7_HMM *hmm, const P7_BG *bg, const P7_PRIOR *pri, double etarget, double *ret_Neff) { int status; ESL_ROOTFINDER *R = NULL; struct ew_param_s p; double Neff; double fx; /* Store parameters in the structure we'll pass to the rootfinder */ p.hmm = hmm; p.bg = bg; p.pri = pri; if ((p.h2 = p7_hmm_Clone(hmm)) == NULL) return eslEMEM; p.etarget = etarget; Neff = (double) hmm->nseq; if ((status = eweight_target_f(Neff, &p, &fx)) != eslOK) goto ERROR; if (fx > 0.) { if ((R = esl_rootfinder_Create(eweight_target_f, &p)) == NULL) {status = eslEMEM; goto ERROR;} esl_rootfinder_SetAbsoluteTolerance(R, 0.01); /* getting Neff to ~2 sig digits is fine */ if ((status = esl_root_Bisection(R, 0., (double) hmm->nseq, &Neff)) != eslOK) goto ERROR; esl_rootfinder_Destroy(R); } p7_hmm_Destroy(p.h2); *ret_Neff = Neff; return eslOK; ERROR: if (p.h2 != NULL) p7_hmm_Destroy(p.h2); if (R != NULL) esl_rootfinder_Destroy(R); *ret_Neff = (double) hmm->nseq; return status; }
/* compare results to GDecoding(). */ static void utest_decoding(ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, int M, int L, int N, float tolerance) { char *msg = "decoding unit test failed"; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); P7_OMX *fwd = p7_omx_Create(M, L, L); P7_OMX *bck = p7_omx_Create(M, L, L); P7_OMX *pp = p7_omx_Create(M, L, L); P7_GMX *gxf = p7_gmx_Create(M, L); P7_GMX *gxb = p7_gmx_Create(M, L); P7_GMX *gxp1 = p7_gmx_Create(M, L); P7_GMX *gxp2 = p7_gmx_Create(M, L); float fsc1, fsc2; float bsc1, bsc2; if (p7_oprofile_Sample(r, abc, bg, M, L, &hmm, &gm, &om) != eslOK) esl_fatal(msg); while (N--) { if (esl_rsq_xfIID(r, bg->f, abc->K, L, dsq) != eslOK) esl_fatal(msg); if (p7_Forward (dsq, L, om, fwd, &fsc1) != eslOK) esl_fatal(msg); if (p7_Backward (dsq, L, om, fwd, bck, &bsc1) != eslOK) esl_fatal(msg); if (p7_Decoding(om, fwd, bck, pp) != eslOK) esl_fatal(msg); if (p7_omx_FDeconvert(pp, gxp1) != eslOK) esl_fatal(msg); if (p7_GForward (dsq, L, gm, gxf, &fsc2) != eslOK) esl_fatal(msg); if (p7_GBackward(dsq, L, gm, gxb, &bsc2) != eslOK) esl_fatal(msg); if (p7_GDecoding(gm, gxf, gxb, gxp2) != eslOK) esl_fatal(msg); // p7_gmx_Dump(stdout, gxp1, p7_DEFAULT); // p7_gmx_Dump(stdout, gxp2, p7_DEFAULT); if (p7_gmx_Compare(gxp1, gxp2, tolerance) != eslOK) esl_fatal(msg); } p7_gmx_Destroy(gxp1); p7_gmx_Destroy(gxp2); p7_gmx_Destroy(gxf); p7_gmx_Destroy(gxb); p7_omx_Destroy(fwd); p7_omx_Destroy(bck); p7_omx_Destroy(pp); free(dsq); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); }
/* onefetch(): * Given one <key> (an HMM name or accession), retrieve the corresponding HMM. * In SSI mode, we can do this quickly by positioning the file, then reading * and writing the HMM that's at that position. * Without an SSI index, we have to parse the HMMs sequentially 'til we find * the one we're after. */ static void onefetch(ESL_GETOPTS *go, FILE *ofp, char *key, P7_HMMFILE *hfp) { ESL_ALPHABET *abc = NULL; P7_HMM *hmm = NULL; int status; if (hfp->ssi != NULL) { status = p7_hmmfile_PositionByKey(hfp, key); if (status == eslENOTFOUND) p7_Fail("HMM %s not found in SSI index for file %s\n", key, hfp->fname); else if (status == eslEFORMAT) p7_Fail("Failed to parse SSI index for %s\n", hfp->fname); else if (status != eslOK) p7_Fail("Failed to look up location of HMM %s in SSI index of file %s\n", key, hfp->fname); } while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) p7_Fail("read failed, HMM file %s may be truncated?", hfp->fname); else if (status == eslEFORMAT) p7_Fail("bad file format in HMM file %s", hfp->fname); else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", hfp->fname); if (strcmp(key, hmm->name) == 0 || (hmm->acc && strcmp(key, hmm->acc) == 0)) break; p7_hmm_Destroy(hmm); hmm = NULL; } if (status == eslOK) { p7_hmmfile_WriteASCII(ofp, -1, hmm); p7_hmm_Destroy(hmm); } else p7_Fail("HMM %s not found in file %s\n", key, hfp->fname); esl_alphabet_Destroy(abc); }