static void utest_Compare(void) { ESL_RANDOMNESS *r = esl_randomness_CreateFast(42); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_PROFILE *gm2 = NULL; int M = 200; int L = 400; p7_modelsample(r, M, abc, &hmm); /* master and worker's sampled profiles are identical */ bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); gm2 = p7_profile_Create(hmm->M, abc); p7_profile_Config(gm, hmm, bg); p7_profile_Config(gm2, hmm, bg); p7_profile_SetLength(gm, L); p7_profile_SetLength(gm2, L); if (p7_profile_Compare(gm, gm2, 0.001) != eslOK) p7_Die("identical profile comparison failed"); p7_profile_Destroy(gm); p7_profile_Destroy(gm2); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); return; }
/* Create an SSI index file for open HMM file <hfp>. * Both name and accession of HMMs are stored as keys. */ static void create_ssi_index(ESL_GETOPTS *go, P7_HMMFILE *hfp) { ESL_NEWSSI *ns = NULL; ESL_ALPHABET *abc = NULL; P7_HMM *hmm = NULL; int nhmm = 0; char *ssifile = NULL; uint16_t fh; int status; if (esl_sprintf(&ssifile, "%s.ssi", hfp->fname) != eslOK) p7_Die("esl_sprintf() failed"); status = esl_newssi_Open(ssifile, FALSE, &ns); if (status == eslENOTFOUND) esl_fatal("failed to open SSI index %s", ssifile); else if (status == eslEOVERWRITE) esl_fatal("SSI index %s already exists; delete or rename it", ssifile); else if (status != eslOK) esl_fatal("failed to create a new SSI index"); if (esl_newssi_AddFile(ns, hfp->fname, 0, &fh) != eslOK) /* 0 = format code (HMMs don't have any yet) */ esl_fatal("Failed to add HMM file %s to new SSI index\n", hfp->fname); printf("Working... "); fflush(stdout); while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) p7_Fail("read failed, HMM file %s may be truncated?", hfp->fname); else if (status == eslEFORMAT) p7_Fail("bad file format in HMM file %s", hfp->fname); else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", hfp->fname); nhmm++; if (hmm->name == NULL) p7_Fail("Every HMM must have a name to be indexed. Failed to find name of HMM #%d\n", nhmm); if (esl_newssi_AddKey(ns, hmm->name, fh, hmm->offset, 0, 0) != eslOK) p7_Fail("Failed to add key %s to SSI index", hmm->name); if (hmm->acc) { if (esl_newssi_AddAlias(ns, hmm->acc, hmm->name) != eslOK) p7_Fail("Failed to add secondary key %s to SSI index", hmm->acc); } p7_hmm_Destroy(hmm); } if (esl_newssi_Write(ns) != eslOK) p7_Fail("Failed to write keys to ssi file %s\n", ssifile); printf("done.\n"); if (ns->nsecondary > 0) printf("Indexed %d HMMs (%ld names and %ld accessions).\n", nhmm, (long) ns->nprimary, (long) ns->nsecondary); else printf("Indexed %d HMMs (%ld names).\n", nhmm, (long) ns->nprimary); printf("SSI index written to file %s\n", ssifile); free(ssifile); esl_alphabet_Destroy(abc); esl_newssi_Close(ns); return; }
static void utest_oprofileSendRecv(int my_rank, int nproc) { ESL_RANDOMNESS *r = esl_randomness_CreateFast(42); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OPROFILE *om2 = NULL; int M = 200; int L = 400; char *wbuf = NULL; int wn = 0; int i; char errbuf[eslERRBUFSIZE]; p7_hmm_Sample(r, M, abc, &hmm); /* master and worker's sampled profiles are identical */ bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); om = p7_oprofile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); p7_oprofile_Convert(gm, om); p7_bg_SetLength (bg, L); if (my_rank == 0) { for (i = 1; i < nproc; i++) { ESL_DPRINTF1(("Master: receiving test profile\n")); p7_oprofile_MPIRecv(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &wbuf, &wn, &abc, &om2); ESL_DPRINTF1(("Master: test profile received\n")); if (p7_oprofile_Compare(om, om2, 0.001, errbuf) != eslOK) p7_Die("Received profile not identical to what was sent\n%s", errbuf); p7_oprofile_Destroy(om2); } } else { ESL_DPRINTF1(("Worker %d: sending test profile\n", my_rank)); p7_oprofile_MPISend(om, 0, 0, MPI_COMM_WORLD, &wbuf, &wn); ESL_DPRINTF1(("Worker %d: test profile sent\n", my_rank)); } free(wbuf); p7_profile_Destroy(gm); p7_oprofile_Destroy(om); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); return; }
static void process_commandline(int argc, char **argv, ESL_GETOPTS **ret_go, char **ret_hmmfile, char **ret_alifile) { ESL_GETOPTS *go = NULL; if ((go = esl_getopts_Create(options)) == NULL) p7_Die("problem with options structure"); if (esl_opt_ProcessEnvironment(go) != eslOK) { printf("Failed to process environment: %s\n", go->errbuf); goto ERROR; } if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); goto ERROR; } if (esl_opt_VerifyConfig(go) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); goto ERROR; } /* help format: */ if (esl_opt_GetBoolean(go, "-h") == TRUE) { p7_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); puts("\nwhere basic options are:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); puts("\nOptions for selecting alphabet rather than guessing it:"); esl_opt_DisplayHelp(stdout, go, 2, 2, 80); puts("\nAlternative model construction strategies:"); esl_opt_DisplayHelp(stdout, go, 3, 2, 80); puts("\nAlternative relative sequence weighting strategies:"); esl_opt_DisplayHelp(stdout, go, 4, 2, 80); puts("\nAlternate effective sequence weighting strategies:"); esl_opt_DisplayHelp(stdout, go, 5, 2, 80); puts("\nControl of E-value calibration:"); esl_opt_DisplayHelp(stdout, go, 6, 2, 80); puts("\nOther options:"); esl_opt_DisplayHelp(stdout, go, 8, 2, 80); exit(0); } if (esl_opt_ArgNumber(go) != 2) { puts("Incorrect number of command line arguments."); goto ERROR; } if ((*ret_hmmfile = esl_opt_GetArg(go, 1)) == NULL) { puts("Failed to get <hmmfile> argument on command line"); goto ERROR; } if ((*ret_alifile = esl_opt_GetArg(go, 2)) == NULL) { puts("Failed to get <alifile> argument on command line"); goto ERROR; } *ret_go = go; return; ERROR: /* all errors handled here are user errors, so be polite. */ esl_usage(stdout, argv[0], usage); puts("\nwhere basic options are:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); printf("\nTo see more help on other available options, do %s -h\n\n", argv[0]); exit(1); }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *fwd = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; P7_TRACE *tr = NULL; int format = eslSQFILE_UNKNOWN; char errbuf[eslERRBUFSIZE]; float sc; int d; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); esl_sqfile_Close(sqfp); /* Configure a profile from the HMM */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); /* Allocate matrix and a trace */ fwd = p7_gmx_Create(gm->M, sq->n); tr = p7_trace_Create(); /* Run Viterbi; do traceback */ p7_GViterbi (sq->dsq, sq->n, gm, fwd, &sc); p7_GTrace (sq->dsq, sq->n, gm, fwd, tr); /* Dump and validate the trace. */ p7_trace_Dump(stdout, tr, gm, sq->dsq); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace fails validation:\n%s\n", errbuf); /* Domain info in the trace. */ p7_trace_Index(tr); printf("# Viterbi: %d domains : ", tr->ndom); for (d = 0; d < tr->ndom; d++) printf("%6d %6d %6d %6d ", tr->sqfrom[d], tr->sqto[d], tr->hmmfrom[d], tr->hmmto[d]); printf("\n"); /* Cleanup */ p7_trace_Destroy(tr); p7_gmx_Destroy(fwd); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_sq_Destroy(sq); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *gx1 = NULL; P7_GMX *gx2 = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; P7_TRACE *tr = NULL; int format = eslSQFILE_UNKNOWN; char errbuf[eslERRBUFSIZE]; float fsc, bsc, vsc; float accscore; int status; /* Read in one HMM */ if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_OpenDigital(abc, seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); esl_sqfile_Close(sqfp); /* Configure a profile from the HMM */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); /* multihit local: H3 default */ /* Allocations */ gx1 = p7_gmx_Create(gm->M, sq->n); gx2 = p7_gmx_Create(gm->M, sq->n); tr = p7_trace_CreateWithPP(); p7_FLogsumInit(); /* Run Forward, Backward; do OA fill and trace */ p7_GForward (sq->dsq, sq->n, gm, gx1, &fsc); p7_GBackward(sq->dsq, sq->n, gm, gx2, &bsc); p7_GDecoding(gm, gx1, gx2, gx2); /* <gx2> is now the posterior decoding matrix */ p7_GOptimalAccuracy(gm, gx2, gx1, &accscore); /* <gx1> is now the OA matrix */ p7_GOATrace(gm, gx2, gx1, tr); if (esl_opt_GetBoolean(go, "-d")) p7_gmx_Dump(stdout, gx2, p7_DEFAULT); if (esl_opt_GetBoolean(go, "-m")) p7_gmx_Dump(stdout, gx1, p7_DEFAULT); p7_trace_Dump(stdout, tr, gm, sq->dsq); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace fails validation:\n%s\n", errbuf); printf("fwd = %.4f nats\n", fsc); printf("bck = %.4f nats\n", bsc); printf("acc = %.4f (%.2f%%)\n", accscore, accscore * 100. / (float) sq->n); p7_trace_Reuse(tr); p7_GViterbi(sq->dsq, sq->n, gm, gx1, &vsc); p7_GTrace (sq->dsq, sq->n, gm, gx1, tr); p7_trace_SetPP(tr, gx2); p7_trace_Dump(stdout, tr, gm, sq->dsq); printf("vit = %.4f nats\n", vsc); printf("acc = %.4f\n", p7_trace_GetExpectedAccuracy(tr)); /* Cleanup */ esl_sq_Destroy(sq); p7_trace_Destroy(tr); p7_gmx_Destroy(gx1); p7_gmx_Destroy(gx2); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; ESL_RANDOMNESS *rng = esl_randomness_CreateFast(0); P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_TRACE *tr = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; int N = esl_opt_GetInteger(go, "-N"); int i; float vsc, fsc, tsc; char errbuf[eslERRBUFSIZE]; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); if (esl_opt_GetBoolean(go, "-p")) p7_oprofile_Dump(stdout, om); fwd = p7_omx_Create(gm->M, sq->n, sq->n); gx = p7_gmx_Create(gm->M, sq->n); tr = p7_trace_Create(); if (esl_opt_GetBoolean(go, "-m") == TRUE) p7_omx_SetDumpMode(stdout, fwd, TRUE); p7_GViterbi(sq->dsq, sq->n, gm, gx, &vsc); p7_Forward (sq->dsq, sq->n, om, fwd, &fsc); for (i = 0; i < N; i++) { p7_StochasticTrace(rng, sq->dsq, sq->n, om, fwd, tr); p7_trace_Score(tr, sq->dsq, gm, &tsc); if (esl_opt_GetBoolean(go, "-t") == TRUE) p7_trace_Dump(stdout, tr, gm, sq->dsq); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace %d fails validation:\n%s\n", i, errbuf); printf("Sampled trace: %.4f nats\n", tsc); p7_trace_Reuse(tr); } printf("Forward score: %.4f nats\n", fsc); printf("Viterbi score: %.4f nats\n", vsc); /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_trace_Destroy(tr); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_randomness_Destroy(rng); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
/* process_workunit() * * This is the routine that actually does the work. * * A work unit consists of one HMM, <hmm>. * The result is the <scores> array, which contains an array of N scores; * caller provides this memory. * How those scores are generated is controlled by the application configuration in <cfg>. */ static int process_workunit(ESL_GETOPTS *go, struct cfg_s *cfg, char *errbuf, P7_HMM *hmm, double *scores, int *alilens) { int L = esl_opt_GetInteger(go, "-L"); P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_REFMX *rmx = NULL; P7_CHECKPTMX *cx = NULL; P7_FILTERMX *fx = NULL; P7_TRACE *tr = NULL; ESL_DSQ *dsq = NULL; int i; int scounts[p7T_NSTATETYPES]; /* state usage counts from a trace */ float sc; float nullsc; int status; P7_HARDWARE *hw; if ((hw = p7_hardware_Create ()) == NULL) p7_Fail("Couldn't get HW information data structure"); /* Optionally set a custom background, determined by model composition; * an experimental hack. */ if (esl_opt_GetBoolean(go, "--bgcomp")) { float *p = NULL; float KL; p7_hmm_CompositionKLDist(hmm, cfg->bg, &KL, &p); esl_vec_FCopy(p, cfg->abc->K, cfg->bg->f); } /* Create and configure our generic profile, as requested */ gm = p7_profile_Create(hmm->M, cfg->abc); if (esl_opt_GetBoolean(go, "--multi")) { if (esl_opt_GetBoolean(go, "--dual")) { p7_profile_Config (gm, hmm, cfg->bg); } else if (esl_opt_GetBoolean(go, "--local")) { p7_profile_ConfigLocal (gm, hmm, cfg->bg, L); } else if (esl_opt_GetBoolean(go, "--glocal")) { p7_profile_ConfigGlocal(gm, hmm, cfg->bg, L); } } else if (esl_opt_GetBoolean(go, "--uni")) { if (esl_opt_GetBoolean(go, "--dual")) { p7_profile_ConfigCustom (gm, hmm, cfg->bg, L, 0.0, 0.5); } else if (esl_opt_GetBoolean(go, "--local")) { p7_profile_ConfigUnilocal (gm, hmm, cfg->bg, L); } else if (esl_opt_GetBoolean(go, "--glocal")) { p7_profile_ConfigUniglocal(gm, hmm, cfg->bg, L); } } p7_profile_SetLength(gm, L); p7_bg_SetLength(cfg->bg, L); if (esl_opt_GetBoolean(go, "--x-no-lengthmodel")) elide_length_model(gm, cfg->bg); /* Allocate DP matrix for <gm>. */ rmx = p7_refmx_Create(gm->M, L); /* Create and configure the vectorized profile, if needed; * and allocate its DP matrix */ if (esl_opt_GetBoolean(go, "--vector")) { om = p7_oprofile_Create(gm->M, cfg->abc, om->simd); p7_oprofile_Convert(gm, om); cx = p7_checkptmx_Create(gm->M, L, ESL_MBYTES(32), om->simd); fx = p7_filtermx_Create(gm->M, om->simd); } /* Remaining allocation */ ESL_ALLOC(dsq, sizeof(ESL_DSQ) * (L+2)); tr = p7_trace_Create(); /* Collect scores from N random sequences of length L */ for (i = 0; i < cfg->N; i++) { esl_rsq_xfIID(cfg->r, cfg->bg->f, cfg->abc->K, L, dsq); sc = eslINFINITY; /* Vectorized implementations of Viterbi, MSV may overflow. * In this case, they'll leave sc=eslINFINITY. * Then we fail over to the nonvector "generic" implementation. * That's why this next block isn't an if/else. */ if (esl_opt_GetBoolean(go, "--vector")) { if (esl_opt_GetBoolean(go, "--vit")) p7_ViterbiFilter(dsq, L, om, fx, &sc); else if (esl_opt_GetBoolean(go, "--fwd")) p7_ForwardFilter(dsq, L, om, cx, &sc); else if (esl_opt_GetBoolean(go, "--msv")) p7_MSVFilter (dsq, L, om, fx, &sc); } /* If we tried a vector calculation above but it overflowed, * or if we're to do --generic DP calculations, sc==eslINFINITY now; * hence the if condition here: */ if (sc == eslINFINITY) { if (esl_opt_GetBoolean(go, "--fwd")) p7_ReferenceForward(dsq, L, gm, rmx, &sc); /* any mode: dual,local,glocal; gm's config takes care of this */ else if (esl_opt_GetBoolean(go, "--vit")) p7_ReferenceViterbi(dsq, L, gm, rmx, tr, &sc); /* local-only mode. cmdline opts processing has already assured that --local set */ else if (esl_opt_GetBoolean(go, "--msv")) p7_Die("We used to be able to do a generic MSV algorithm - but no longer"); } /* Optional: get Viterbi alignment length too. */ if (esl_opt_GetBoolean(go, "-a")) /* -a only works with Viterbi; getopts has checked this already; <tr> must be valid */ { p7_trace_GetStateUseCounts(tr, scounts); /* there's various ways we could counts "alignment length". * Here we'll use the total length of model used, in nodes: M+D states. * score vs al would gives us relative entropy / model position. */ /* alilens[i] = scounts[p7T_D] + scounts[p7T_I]; SRE: temporarily testing this instead */ alilens[i] = scounts[p7T_ML] + scounts[p7T_DL] + scounts[p7T_IL] + scounts[p7T_MG] + scounts[p7T_DG] + scounts[p7T_IG]; p7_trace_Reuse(tr); } p7_bg_NullOne(cfg->bg, dsq, L, &nullsc); scores[i] = (sc - nullsc) / eslCONST_LOG2; if (cx) p7_checkptmx_Reuse(cx); if (fx) p7_filtermx_Reuse(fx); p7_refmx_Reuse(rmx); } status = eslOK; /* deliberate flowthru */ ERROR: if (dsq != NULL) free(dsq); p7_checkptmx_Destroy(cx); p7_filtermx_Destroy(fx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_refmx_Destroy(rmx); p7_trace_Destroy(tr); if (status == eslEMEM) sprintf(errbuf, "allocation failure"); return status; }
/* map_new_msa() * * Construct <inscount[0..M]>, <matuse[1..M]>, and <matmap[1..M]> * arrays for mapping model consensus nodes <1..M> onto columns * <1..alen> of a new MSA. * * Here's the problem. We want to align the match states in columns, * but some sequences have inserted symbols in them; we need some * sort of overall knowledge of where the inserts are and how long * they are in order to create the alignment. * * Here's our trick. inscount[] is a 0..M array; inserts[k] stores * the maximum number of times insert substate k was used. This * is the maximum number of gaps to insert between canonical * column k and k+1. inserts[0] is the N-term tail; inserts[M] is * the C-term tail. * * Additionally, matuse[k=1..M] says whether we're going to make an * alignment column for consensus position k. By default this is * <TRUE> only if there is at least one residue in the column. If * the <p7_ALL_CONSENSUS_COLS> option flag is set, though, all * matuse[1..M] are set <TRUE>. (matuse[0] is unused, always <FALSE>.) * * Then, using these arrays, we construct matmap[] and determine alen. * If match state k is represented as an alignment column, * matmap[1..M] = that position, <1..alen>. * If match state k is not in the alignment (<matuse[k] == FALSE>), * matmap[k] = matmap[k-1] = the last alignment column that a match * state did map to; this is a trick to make some apos coordinate setting * work cleanly. * Because of this trick, you can't just assume because matmap[k] is * nonzero that match state k maps somewhere in the alignment; * you have to check matuse[k] == TRUE, then look at what matmap[k] says. * Remember that N and C emit on transition, hence the check for an * N->N or C->C transition before bumping nins. * <matmap[0]> is unused; by convention, <matmap[0] = 0>. */ static int map_new_msa(P7_TRACE **tr, int nseq, int M, int optflags, int **ret_inscount, int **ret_matuse, int **ret_matmap, int *ret_alen) { int *inscount = NULL; /* inscount[k=0..M] == max # of inserts in node k */ int *matuse = NULL; /* matuse[k=1..M] == TRUE|FALSE: does node k map to an alignment column */ int *matmap = NULL; /* matmap[k=1..M]: if matuse[k] TRUE, what column 1..alen does node k map to */ int idx; /* counter over sequences */ int nins; /* counter for inserted residues observed */ int z; /* index into trace positions */ int alen; /* length of alignment */ int k; /* counter over nodes 1..M */ int status; ESL_ALLOC(inscount, sizeof(int) * (M+1)); ESL_ALLOC(matuse, sizeof(int) * (M+1)); matuse[0] = 0; ESL_ALLOC(matmap, sizeof(int) * (M+1)); matmap[0] = 0; esl_vec_ISet(inscount, M+1, 0); if (optflags & p7_ALL_CONSENSUS_COLS) esl_vec_ISet(matuse+1, M, TRUE); else esl_vec_ISet(matuse+1, M, FALSE); for (idx = 0; idx < nseq; idx++) { nins = 0; k = 0; for (z = 1; z < tr[idx]->N; z++) { switch (tr[idx]->st[z]) { case p7T_I: nins++; break; case p7T_N: if (tr[idx]->st[z-1] == p7T_N) nins++; break; case p7T_C: if (tr[idx]->st[z-1] == p7T_C) nins++; break; case p7T_M: /* M,D: record max. reset ctr; M only: set matuse[] */ k = tr[idx]->k[z]; /* k++ doesn't work. May be a B->X->Mk fragment entry */ inscount[k-1] = ESL_MAX(nins, inscount[k-1]); matuse[k] = TRUE; nins = 0; break; case p7T_D: /* Can handle I->D transitions even though currently not in H3 models */ k = tr[idx]->k[z]; /* k++ doesn't work; see above */ inscount[k-1] = ESL_MAX(nins, inscount[k-1]); nins = 0; break; case p7T_T: /* T: record C-tail max, for a profile trace */ inscount[M] = ESL_MAX(nins, inscount[M]); break; case p7T_E: /* this handles case of core traces, which do have I_M state */ inscount[k] = ESL_MAX(nins, inscount[k]); /* [M] doesn't work, because of {DMI}k->X->E frag exit */ break; case p7T_B: /* B: record N-tail max for a profile trace; I0 for a core trace */ inscount[0] = ESL_MAX(nins, inscount[0]); nins = 0; break; case p7T_S: break; /* don't need to do anything on S,X states */ case p7T_X: break; case p7T_J: p7_Die("J state unsupported"); default: p7_Die("Unrecognized statetype %d", tr[idx]->st[z]); } } } /* if we're trimming N and C off, reset inscount[0], inscount[M] to 0. */ if (optflags & p7_TRIM) { inscount[0] = inscount[M] = 0; } /* Use inscount, matuse to set the matmap[] */ alen = inscount[0]; for (k = 1; k <= M; k++) { if (matuse[k]) { matmap[k] = alen+1; alen += 1+inscount[k]; } else { matmap[k] = alen; alen += inscount[k]; } } *ret_inscount = inscount; *ret_matuse = matuse; *ret_matmap = matmap; *ret_alen = alen; return eslOK; ERROR: if (inscount != NULL) free(inscount); if (matuse != NULL) free(matuse); if (matmap != NULL) free(matmap); *ret_inscount = NULL; *ret_matuse = NULL; *ret_matmap = NULL; *ret_alen = 0; return status; }