static int serial_loop(WORKER_INFO *info, struct cfg_s *cfg) { P7_BUILDER *bld = NULL; ESL_MSA *msa = NULL; ESL_MSA *postmsa = NULL; ESL_MSA **postmsa_ptr = (cfg->postmsafile != NULL) ? &postmsa : NULL; P7_HMM *hmm = NULL; char errmsg[eslERRBUFSIZE]; int status; double entropy; cfg->nali = 0; while ((status = esl_msa_Read(cfg->afp, &msa)) == eslOK) { cfg->nali++; if ((status = set_msa_name(cfg, errmsg, msa)) != eslOK) p7_Fail("%s\n", errmsg); /* cfg->nnamed gets incremented in this call */ /* bg new-HMM trarr gm om */ if ((status = p7_Builder(info->bld, msa, info->bg, &hmm, NULL, NULL, NULL, postmsa_ptr)) != eslOK) p7_Fail("build failed: %s", bld->errbuf); entropy = p7_MeanMatchRelativeEntropy(hmm, info->bg); if ((status = output_result(cfg, errmsg, cfg->nali, msa, hmm, postmsa, entropy)) != eslOK) p7_Fail(errmsg); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); esl_msa_Destroy(postmsa); } return status; }
/* Function: p7_Lambda() * Synopsis: Determines length-corrected local lambda parameter. * Incept: SRE, Wed Aug 8 17:54:55 2007 [Janelia] * * Purpose: Determine the effective scale parameter $\hat{\lambda}$ to * use for model <hmm>. This will be applied both to * Viterbi Gumbel distributions and Forward exponential * tails. * * The 'true' $\lambda$ is always $\log 2 = 0.693$. The effective * lambda is corrected for edge effect, using the equation * * \[ * \hat{\lambda} = \lambda + \frac{1.44}{MH} * \] * * where $M$ is the model length and $H$ is the model * relative entropy. The model relative entropy is * approximated by the average relative entropy of match * emission distributions. The 1.44 is an empirically * determined fudge factor [J1/125]. This edge-effect * correction is based largely on \citep{Altschul01}, * except for the fudge factor, which we don't understand * and can't theoretically justify. * * Args: hmm : model to calculate corrected lambda for * bg : null model (source of background frequencies) * ret_lambda : RETURN: edge-corrected lambda * * Returns: <eslOK> on success, and <*ret_lambda> is the result. * * Throws: (no abnormal error conditions) */ int p7_Lambda(P7_HMM *hmm, P7_BG *bg, double *ret_lambda) { double H = p7_MeanMatchRelativeEntropy(hmm, bg); *ret_lambda = eslCONST_LOG2 + 1.44 / ((double) hmm->M * H); return eslOK; }
/* Evaluate fx = rel entropy - etarget, which we want to be = 0, * for effective sequence number <x>. */ static int eweight_target_f(double Neff, void *params, double *ret_fx) { struct ew_param_s *p = (struct ew_param_s *) params; p7_hmm_CopyParameters(p->hmm, p->h2); p7_hmm_Scale(p->h2, Neff / (double) p->h2->nseq); p7_ParameterEstimation(p->h2, p->pri); *ret_fx = p7_MeanMatchRelativeEntropy(p->h2, p->bg) - p->etarget; return eslOK; }
static void pipeline_thread(void *arg) { int workeridx; int status; WORK_ITEM *item; void *newItem; WORKER_INFO *info; ESL_THREADS *obj; obj = (ESL_THREADS *) arg; esl_threads_Started(obj, &workeridx); info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx); status = esl_workqueue_WorkerUpdate(info->queue, NULL, &newItem); if (status != eslOK) esl_fatal("Work queue worker failed"); /* loop until all blocks have been processed */ item = (WORK_ITEM *) newItem; while (item->msa != NULL) { status = p7_Builder(info->bld, item->msa, info->bg, &item->hmm, NULL, NULL, NULL, &item->postmsa); if (status != eslOK) p7_Fail("build failed: %s", info->bld->errbuf); item->entropy = p7_MeanMatchRelativeEntropy(item->hmm, info->bg); item->processed = TRUE; status = esl_workqueue_WorkerUpdate(info->queue, item, &newItem); if (status != eslOK) esl_fatal("Work queue worker failed"); item = (WORK_ITEM *) newItem; } status = esl_workqueue_WorkerUpdate(info->queue, item, NULL); if (status != eslOK) esl_fatal("Work queue worker failed"); esl_threads_Finished(obj, workeridx); return; }
/* mpi_master() * The MPI version of hmmbuild. * Follows standard pattern for a master/worker load-balanced MPI program (J1/78-79). * * A master can only return if it's successful. * Errors in an MPI master come in two classes: recoverable and nonrecoverable. * * Recoverable errors include all worker-side errors, and any * master-side error that do not affect MPI communication. Error * messages from recoverable messages are delayed until we've cleanly * shut down the workers. * * Unrecoverable errors are master-side errors that may affect MPI * communication, meaning we cannot count on being able to reach the * workers and shut them down. Unrecoverable errors result in immediate * p7_Fail()'s, which will cause MPI to shut down the worker processes * uncleanly. */ static void mpi_master(const ESL_GETOPTS *go, struct cfg_s *cfg) { int xstatus = eslOK; /* changes from OK on recoverable error */ int status; int have_work = TRUE; /* TRUE while alignments remain */ int nproc_working = 0; /* number of worker processes working, up to nproc-1 */ int wi; /* rank of next worker to get an alignment to work on */ char *buf = NULL; /* input/output buffer, for packed MPI messages */ int bn = 0; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; ESL_MSA **msalist = NULL; ESL_MSA *postmsa = NULL; int *msaidx = NULL; char errmsg[eslERRBUFSIZE]; MPI_Status mpistatus; int n; int pos; double entropy; /* Master initialization: including, figure out the alphabet type. * If any failure occurs, delay printing error message until we've shut down workers. */ if (xstatus == eslOK) { if ((status = init_master_cfg(go, cfg, errmsg)) != eslOK) xstatus = status; } if (xstatus == eslOK) { bn = 4096; if ((buf = malloc(sizeof(char) * bn)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } } if (xstatus == eslOK) { if ((msalist = malloc(sizeof(ESL_MSA *) * cfg->nproc)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } } if (xstatus == eslOK) { if ((msaidx = malloc(sizeof(int) * cfg->nproc)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } } MPI_Bcast(&xstatus, 1, MPI_INT, 0, MPI_COMM_WORLD); if (xstatus != eslOK) { MPI_Finalize(); p7_Fail(errmsg); } ESL_DPRINTF1(("MPI master is initialized\n")); bg = p7_bg_Create(cfg->abc); for (wi = 0; wi < cfg->nproc; wi++) { msalist[wi] = NULL; msaidx[wi] = 0; } /* Worker initialization: * Because we've already successfully initialized the master before we start * initializing the workers, we don't expect worker initialization to fail; * so we just receive a quick OK/error code reply from each worker to be sure, * and don't worry about an informative message. */ MPI_Bcast(&(cfg->abc->type), 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Reduce(&xstatus, &status, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); if (status != eslOK) { MPI_Finalize(); p7_Fail("One or more MPI worker processes failed to initialize."); } ESL_DPRINTF1(("%d workers are initialized\n", cfg->nproc-1)); /* Main loop: combining load workers, send/receive, clear workers loops; * also, catch error states and die later, after clean shutdown of workers. * * When a recoverable error occurs, have_work = FALSE, xstatus != * eslOK, and errmsg is set to an informative message. No more * errmsg's can be received after the first one. We wait for all the * workers to clear their work units, then send them shutdown signals, * then finally print our errmsg and exit. * * Unrecoverable errors just crash us out with p7_Fail(). */ wi = 1; while (have_work || nproc_working) { if (have_work) { if ((status = esl_msa_Read(cfg->afp, &msa)) == eslOK) { cfg->nali++; ESL_DPRINTF1(("MPI master read MSA %s\n", msa->name == NULL? "" : msa->name)); } else { have_work = FALSE; if (status == eslEFORMAT) { xstatus = eslEFORMAT; snprintf(errmsg, eslERRBUFSIZE, "Alignment file parse error:\n%s\n", cfg->afp->errbuf); } else if (status == eslEINVAL) { xstatus = eslEFORMAT; snprintf(errmsg, eslERRBUFSIZE, "Alignment file parse error:\n%s\n", cfg->afp->errbuf); } else if (status != eslEOF) { xstatus = status; snprintf(errmsg, eslERRBUFSIZE, "Alignment file read unexpectedly failed with code %d\n", status); } ESL_DPRINTF1(("MPI master has run out of MSAs (having read %d)\n", cfg->nali)); } } if ((have_work && nproc_working == cfg->nproc-1) || (!have_work && nproc_working > 0)) { if (MPI_Probe(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &mpistatus) != 0) { MPI_Finalize(); p7_Fail("mpi probe failed"); } if (MPI_Get_count(&mpistatus, MPI_PACKED, &n) != 0) { MPI_Finalize(); p7_Fail("mpi get count failed"); } wi = mpistatus.MPI_SOURCE; ESL_DPRINTF1(("MPI master sees a result of %d bytes from worker %d\n", n, wi)); if (n > bn) { if ((buf = realloc(buf, sizeof(char) * n)) == NULL) p7_Fail("reallocation failed"); bn = n; } if (MPI_Recv(buf, bn, MPI_PACKED, wi, 0, MPI_COMM_WORLD, &mpistatus) != 0) { MPI_Finalize(); p7_Fail("mpi recv failed"); } ESL_DPRINTF1(("MPI master has received the buffer\n")); /* If we're in a recoverable error state, we're only clearing worker results; * just receive them, don't unpack them or print them. * But if our xstatus is OK, go ahead and process the result buffer. */ if (xstatus == eslOK) { pos = 0; if (MPI_Unpack(buf, bn, &pos, &xstatus, 1, MPI_INT, MPI_COMM_WORLD) != 0) { MPI_Finalize(); p7_Fail("mpi unpack failed");} if (xstatus == eslOK) /* worker reported success. Get the HMM. */ { ESL_DPRINTF1(("MPI master sees that the result buffer contains an HMM\n")); if (p7_hmm_MPIUnpack(buf, bn, &pos, MPI_COMM_WORLD, &(cfg->abc), &hmm) != eslOK) { MPI_Finalize(); p7_Fail("HMM unpack failed"); } ESL_DPRINTF1(("MPI master has unpacked the HMM\n")); if (cfg->postmsafile != NULL) { if (esl_msa_MPIUnpack(cfg->abc, buf, bn, &pos, MPI_COMM_WORLD, &postmsa) != eslOK) { MPI_Finalize(); p7_Fail("postmsa unpack failed");} } entropy = p7_MeanMatchRelativeEntropy(hmm, bg); if ((status = output_result(cfg, errmsg, msaidx[wi], msalist[wi], hmm, postmsa, entropy)) != eslOK) xstatus = status; esl_msa_Destroy(postmsa); postmsa = NULL; p7_hmm_Destroy(hmm); hmm = NULL; } else /* worker reported an error. Get the errmsg. */ { if (MPI_Unpack(buf, bn, &pos, errmsg, eslERRBUFSIZE, MPI_CHAR, MPI_COMM_WORLD) != 0) { MPI_Finalize(); p7_Fail("mpi unpack of errmsg failed"); } ESL_DPRINTF1(("MPI master sees that the result buffer contains an error message\n")); } } esl_msa_Destroy(msalist[wi]); msalist[wi] = NULL; msaidx[wi] = 0; nproc_working--; } if (have_work) { ESL_DPRINTF1(("MPI master is sending MSA %s to worker %d\n", msa->name == NULL ? "":msa->name, wi)); if (esl_msa_MPISend(msa, wi, 0, MPI_COMM_WORLD, &buf, &bn) != eslOK) p7_Fail("MPI msa send failed"); msalist[wi] = msa; msaidx[wi] = cfg->nali; /* 1..N for N alignments in the MSA database */ msa = NULL; wi++; nproc_working++; } } /* On success or recoverable errors: * Shut down workers cleanly. */ ESL_DPRINTF1(("MPI master is done. Shutting down all the workers cleanly\n")); for (wi = 1; wi < cfg->nproc; wi++) if (esl_msa_MPISend(NULL, wi, 0, MPI_COMM_WORLD, &buf, &bn) != eslOK) p7_Fail("MPI msa send failed"); free(buf); free(msaidx); free(msalist); p7_bg_Destroy(bg); if (xstatus != eslOK) { MPI_Finalize(); p7_Fail(errmsg); } else return; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* command line processing */ ESL_ALPHABET *abc = NULL; char *hmmfile = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; int nhmm; double x; float KL; int status; char errbuf[eslERRBUFSIZE]; float nseq; int do_eval2score = 0; int do_score2eval = 0; int z_val; float e_val; float s_val; /* Process the command line options. */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if (esl_opt_GetBoolean(go, "-h") == TRUE) { p7_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); puts("\nOptions:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=docgroup, 2 = indentation; 80=textwidth*/ exit(0); } if (esl_opt_ArgNumber(go) != 1) { puts("Incorrect number of command line arguments."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if ((hmmfile = esl_opt_GetArg(go, 1)) == NULL) { puts("Failed to read <hmmfile> argument from command line."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } output_header(stdout, go); if ( esl_opt_IsOn(go, "--eval2score") ) { do_eval2score = TRUE; e_val = esl_opt_GetReal(go, "-E"); } else if ( esl_opt_IsOn(go, "--score2eval") ) { do_score2eval = TRUE; s_val = esl_opt_GetReal(go, "-S"); } else if ( esl_opt_IsUsed(go, "--baseZ") || esl_opt_IsUsed(go, "--baseZ1") || esl_opt_IsUsed(go, "-Z") ) { puts("The flags -Z, --baseZ, and --baseZ1 are for use with --eval2score and --score2eval."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if (esl_opt_IsUsed(go, "--baseZ") ) { z_val = 1000000 * 2 * (long)(esl_opt_GetInteger(go, "--baseZ")); } else if (esl_opt_IsUsed(go, "--baseZ1") ) { z_val = 1000000 * (long)(esl_opt_GetInteger(go, "--baseZ1")); } else { z_val = esl_opt_GetInteger(go, "-Z"); } /* Initializations: open the HMM file */ status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); /* Main body: read HMMs one at a time, print one line of stats */ printf("#\n"); printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s", "idx", "name", "accession", "nseq", "eff_nseq", "M", "relent", "info", "p relE", "compKL"); if (do_eval2score) printf (" %6s %6.2g", "sc for", e_val); if (do_score2eval) printf (" %6s %6.2f", "E-val for", s_val); printf("\n"); printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s", "----", "--------------------", "------------", "--------", "--------", "------", "------", "------", "------", "------"); if (do_eval2score) printf (" %13s", "-------------"); if (do_score2eval) printf (" %13s", "-------------"); printf("\n"); nhmm = 0; while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) esl_fatal("read failed, HMM file %s may be truncated?", hmmfile); else if (status == eslEFORMAT) esl_fatal("bad file format in HMM file %s", hmmfile); else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets", hmmfile); else if (status != eslOK) esl_fatal("Unexpected error in reading HMMs from %s", hmmfile); nhmm++; if ( esl_opt_IsOn(go, "--eval2score") || esl_opt_IsOn(go, "--score2eval") ) { if (esl_opt_IsUsed(go, "--baseZ") || esl_opt_IsUsed(go, "--baseZ1" ) ) { if ( hmm->abc->type != eslRNA && hmm->abc->type != eslDNA) { puts("The flags --baseZ and --baseZ1 can't be used with non-nucleotide models."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } } else if ( hmm->abc->type != eslAMINO && hmm->abc->type != eslRNA && hmm->abc->type != eslDNA) { puts("The flags --eval2score and --score2eval can't be used with non-sequence models."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } } if (esl_opt_IsUsed(go, "--baseZ") ) { nseq = (float)z_val / (float)(hmm->max_length); } else if (esl_opt_IsUsed(go, "--baseZ1") ) { nseq = (float)z_val / (float)(hmm->max_length); } else { nseq = z_val; } if (bg == NULL) bg = p7_bg_Create(abc); p7_MeanPositionRelativeEntropy(hmm, bg, &x); p7_hmm_CompositionKLDist(hmm, bg, &KL, NULL); printf("%-6d %-20s %-12s %8d %8.2f %6d %6.2f %6.2f %6.2f %6.2f", nhmm, hmm->name, hmm->acc == NULL ? "-" : hmm->acc, hmm->nseq, hmm->eff_nseq, hmm->M, p7_MeanMatchRelativeEntropy(hmm, bg), p7_MeanMatchInfo(hmm, bg), x, KL); if ( do_eval2score ) { float sc; sc = esl_exp_invsurv( e_val / nseq , hmm->evparam[p7_FTAU], hmm->evparam[p7_FLAMBDA]); printf("%13.2f", sc); } else if ( do_score2eval) { float e; e = nseq * esl_exp_surv( s_val , hmm->evparam[p7_FTAU], hmm->evparam[p7_FLAMBDA]); printf("%13.2g", e); } printf("\n"); /* p7_MeanForwardScore(hmm, bg)); */ p7_hmm_Destroy(hmm); } p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); p7_hmmfile_Close(hfp); esl_getopts_Destroy(go); exit(0); }
/** * int main(int argc, char **argv) * Main driver */ int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* command line processing */ ESL_ALPHABET *abc = NULL; char *hmmfile = NULL; char *outhmmfile = NULL; P7_HMMFILE *hfp = NULL; FILE *outhmmfp; /* HMM output file handle */ P7_HMM *hmm = NULL; P7_BG *bg = NULL; int nhmm; double x; float KL; int status; char errbuf[eslERRBUFSIZE]; float average_internal_transitions[ p7H_NTRANSITIONS ]; int k; char errmsg[eslERRBUFSIZE]; /* Process the command line options. */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if (esl_opt_GetBoolean(go, "-h") == TRUE) { profillic_p7_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); puts("\nOptions:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=docgroup, 2 = indentation; 80=textwidth*/ exit(0); } if (esl_opt_ArgNumber(go) != 2) { puts("Incorrect number of command line arguments."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if ((hmmfile = esl_opt_GetArg(go, 1)) == NULL) { puts("Failed to read <input hmmfile> argument from command line."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if ((outhmmfile = esl_opt_GetArg(go, 2)) == NULL) { puts("Failed to read <output hmmfile> argument from command line."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } profillic_p7_banner(stdout, argv[0], banner); /* Initializations: open the input HMM file for reading */ status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); /* Initializations: open the output HMM file for writing */ if ((outhmmfp = fopen(outhmmfile, "w")) == NULL) ESL_FAIL(status, errmsg, "Failed to open HMM file %s for writing", outhmmfile); /* Main body: read HMMs one at a time, print one line of stats */ printf("#\n"); printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s\n", "idx", "name", "accession", "nseq", "eff_nseq", "M", "relent", "info", "p relE", "compKL"); printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s\n", "----", "--------------------", "------------", "--------", "--------", "------", "------", "------", "------", "------"); nhmm = 0; while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) esl_fatal("read failed, HMM file %s may be truncated?", hmmfile); else if (status == eslEFORMAT) esl_fatal("bad file format in HMM file %s", hmmfile); else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets", hmmfile); else if (status != eslOK) esl_fatal("Unexpected error in reading HMMs from %s", hmmfile); nhmm++; if (bg == NULL) bg = p7_bg_Create(abc); esl_vec_FSet(average_internal_transitions, p7H_NTRANSITIONS, 0.); for( k = 1; k < hmm->M; k++ ) { esl_vec_FAdd(average_internal_transitions, hmm->t[k], p7H_NTRANSITIONS); } // Match transitions esl_vec_FNorm(average_internal_transitions, 3); // Insert transitions esl_vec_FNorm(average_internal_transitions + 3, 2); // Delete transitions esl_vec_FNorm(average_internal_transitions + 5, 2); // Ok now set them. for( k = 1; k < hmm->M; k++ ) { esl_vec_FCopy( average_internal_transitions, p7H_NTRANSITIONS, hmm->t[k] ); } if ((status = p7_hmm_Validate(hmm, errmsg, 0.0001)) != eslOK) return status; if ((status = p7_hmmfile_WriteASCII(outhmmfp, -1, hmm)) != eslOK) ESL_FAIL(status, errmsg, "HMM save failed"); p7_MeanPositionRelativeEntropy(hmm, bg, &x); p7_hmm_CompositionKLDist(hmm, bg, &KL, NULL); printf("%-6d %-20s %-12s %8d %8.2f %6d %6.2f %6.2f %6.2f %6.2f\n", nhmm, hmm->name, hmm->acc == NULL ? "-" : hmm->acc, hmm->nseq, hmm->eff_nseq, hmm->M, p7_MeanMatchRelativeEntropy(hmm, bg), p7_MeanMatchInfo(hmm, bg), x, KL); /* p7_MeanForwardScore(hmm, bg)); */ p7_hmm_Destroy(hmm); } p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); p7_hmmfile_Close(hfp); if (outhmmfp != NULL) fclose(outhmmfp); esl_getopts_Destroy(go); exit(0); }
int main(int argc, char **argv) { int status; ESL_GETOPTS *go = NULL; /* command line processing */ ESL_ALPHABET *abc = NULL; char *hmmfile = NULL; char *outhmmfile = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; int nhmm; double x; float KL; char errmsg[eslERRBUFSIZE]; /* Process the command line options. */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if (esl_opt_GetBoolean(go, "-h") == TRUE) { profillic_p7_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); puts("\nwhere options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=docgroup, 2 = indentation; 80=textwidth*/ exit(0); } if (esl_opt_ArgNumber(go) != 2) { puts("Incorrect number of command line arguments."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if ((hmmfile = esl_opt_GetArg(go, 1)) == NULL) { puts("Failed to read <input hmmfile> argument from command line."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if ((outhmmfile = esl_opt_GetArg(go, 2)) == NULL) { puts("Failed to read <output hmmfile> argument from command line."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } profillic_p7_banner(stdout, argv[0], banner); /* Initializations: open the input HMM file for reading */ status = p7_hmmfile_Open(hmmfile, NULL, &hfp); if (status == eslENOTFOUND) p7_Fail("Failed to open HMM file %s for reading.\n", hmmfile); else if (status == eslEFORMAT) p7_Fail("File %s does not appear to be in a recognized HMM format.\n", hmmfile); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n", status, hmmfile); /* Main body: read HMMs one at a time, print one line of stats */ printf("#\n"); printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s\n", "idx", "name", "accession", "nseq", "eff_nseq", "M", "relent", "info", "p relE", "compKL"); printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s\n", "----", "--------------------", "------------", "--------", "--------", "------", "------", "------", "------", "------"); nhmm = 0; if ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) esl_fatal("read failed, HMM file %s may be truncated?", hmmfile); else if (status == eslEFORMAT) esl_fatal("bad file format in HMM file %s", hmmfile); else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets", hmmfile); else if (status != eslOK) esl_fatal("Unexpected error in reading HMMs from %s", hmmfile); nhmm++; if (bg == NULL) bg = p7_bg_Create(abc); if( abc->type == eslDNA ) { galosh::ProfileTreeRoot<seqan::Dna, floatrealspace> profile; if( (status = convert_to_galosh_profile( hmm, profile )) != eslOK ) esl_fatal("Unexpected error in converting HMM from file %s to a dna galosh profile", hmmfile); std::ofstream fs ( outhmmfile ); if( !fs.is_open() ) { esl_fatal("Unexpected error in opening the file %s for writing", outhmmfile); } else { fs << profile; fs.close(); } } else if( abc->type == eslAMINO ) { galosh::ProfileTreeRoot<seqan::AminoAcid20, floatrealspace> profile; if( (status = convert_to_galosh_profile( hmm, profile )) != eslOK ) esl_fatal("Unexpected error in converting HMM from file %s to an amino galosh profile", hmmfile); std::ofstream fs ( outhmmfile ); if( !fs.is_open() ) { esl_fatal("Unexpected error in opening the file %s for writing", outhmmfile); } else { fs << profile; fs.close(); } } else { ESL_EXCEPTION(eslEUNIMPLEMENTED, "Sorry, at present the profillic-hmmtoprofile software can only handle amino and dna."); } p7_MeanPositionRelativeEntropy(hmm, bg, &x); p7_hmm_CompositionKLDist(hmm, bg, &KL, NULL); printf("%-6d %-20s %-12s %8d %8.2f %6d %6.2f %6.2f %6.2f %6.2f\n", nhmm, hmm->name, hmm->acc == NULL ? "-" : hmm->acc, hmm->nseq, hmm->eff_nseq, hmm->M, p7_MeanMatchRelativeEntropy(hmm, bg), p7_MeanMatchInfo(hmm, bg), x, KL); /* p7_MeanForwardScore(hmm, bg)); */ p7_hmm_Destroy(hmm); } p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); p7_hmmfile_Close(hfp); esl_getopts_Destroy(go); exit(0); }