int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_ALPHABET *abc = NULL; char *hmmfile = esl_opt_GetArg(go, 1); P7_HMMFILE *hfp = NULL; P7_OPROFILE *om = NULL; int nmodel = 0; uint64_t totM = 0; int status; char errbuf[eslERRBUFSIZE]; esl_stopwatch_Start(w); status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); while ((status = p7_oprofile_ReadMSV(hfp, &abc, &om)) == eslOK) { nmodel++; totM += om->M; p7_oprofile_Destroy(om); } if (status == eslEFORMAT) p7_Fail("bad file format in profile file %s", hmmfile); else if (status == eslEINCOMPAT) p7_Fail("profile file %s contains different alphabets", hmmfile); else if (status != eslEOF) p7_Fail("Unexpected error in reading profiles from %s", hmmfile); esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# number of models: %d\n", nmodel); printf("# total M: %" PRId64 "\n", totM); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; }
/* Function: p7_oprofile_ReadBlockMSV() * Synopsis: Read the next block of optimized profiles from a hmm file. * Incept: * * Purpose: Reads a block of optimized profiles from open hmm file <hfp> into * <hmmBlock>. * * Returns: <eslOK> on success; the new sequence is stored in <sqBlock>. * * Returns <eslEOF> when there is no profiles left in the * file (including first attempt to read an empty file). * * Otherwise return the status of the p7_oprofile_ReadMSV function. */ int p7_oprofile_ReadBlockMSV(P7_HMMFILE *hfp, ESL_ALPHABET **byp_abc, P7_OM_BLOCK *hmmBlock) { int i; int size = 0; int status = eslOK; hmmBlock->count = 0; for (i = 0; i < hmmBlock->listSize; ++i) { status = p7_oprofile_ReadMSV(hfp, byp_abc, &hmmBlock->list[i]); if (status != eslOK) break; size += hmmBlock->list[i]->M; ++hmmBlock->count; } /* EOF will be returned only in the case were no profiles were read */ if (status == eslEOF && i > 0) status = eslOK; return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_ALPHABET *abc = NULL; char *msvfile = esl_opt_GetArg(go, 1); FILE *msvfp = NULL; P7_OPROFILE *om = NULL; int nmodel = 0; uint64_t totM = 0; int status; esl_stopwatch_Start(w); if ((msvfp = fopen(msvfile, "r")) == NULL) p7_Fail("Failed to open MSV file %s for reading.\n", msvfile); while ((status = p7_oprofile_ReadMSV(msvfp, &abc, NULL, &om)) == eslOK) { nmodel++; totM += om->M; p7_oprofile_Destroy(om); } if (status == eslEFORMAT) p7_Fail("bad file format in profile file %s", msvfile); else if (status == eslEINCOMPAT) p7_Fail("profile file %s contains different alphabets", msvfile); else if (status != eslEOF) p7_Fail("Unexpected error in reading profiles from %s", msvfile); esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# number of models: %d\n", nmodel); printf("# total M: %" PRId64 "\n", totM); fclose(msvfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_BG *bg = p7_bg_Create(abc); int my_rank; int nproc; char *buf = NULL; int nbuf = 0; int subtotalM = 0; int allM = 0; int stalling = esl_opt_GetBoolean(go, "--stall"); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); while (stalling); /* Master MPI process: */ if (my_rank == 0) { ESL_STOPWATCH *w = esl_stopwatch_Create(); P7_HMMFILE *hfp = NULL; P7_OPROFILE *om = NULL; P7_HMM *hmm = NULL; /* Read HMMs from a file. */ if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); esl_stopwatch_Start(w); while (p7_oprofile_ReadMSV(hfp, &abc, &om) == eslOK && p7_oprofile_ReadRest(hfp, om) == eslOK) { if (!esl_opt_GetBoolean(go, "-b")) p7_oprofile_MPISend(om, 1, 0, MPI_COMM_WORLD, &buf, &nbuf); /* 1 = dest; 0 = tag */ p7_hmm_Destroy(hmm); p7_oprofile_Destroy(om); } p7_oprofile_MPISend(NULL, 1, 0, MPI_COMM_WORLD, &buf, &nbuf); /* send the "no more HMMs" sign */ MPI_Reduce(&subtotalM, &allM, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); printf("total: %d\n", allM); esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "CPU Time: "); esl_stopwatch_Destroy(w); } /* Worker MPI process: */ else { P7_OPROFILE *om_recd = NULL; while (p7_oprofile_MPIRecv(0, 0, MPI_COMM_WORLD, &buf, &nbuf, &abc, &om_recd) == eslOK) { subtotalM += om_recd->M; p7_oprofile_Destroy(om_recd); } MPI_Reduce(&subtotalM, &allM, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); } free(buf); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); MPI_Finalize(); exit(0); }
static int serial_loop(WORKER_INFO *info, P7_HMMFILE *hfp) { int status; int i; int seq_len = 0; int prev_hit_cnt = 0; P7_OPROFILE *om = NULL; P7_SCOREDATA *scoredata = NULL; /* hmm-specific data used by nhmmer */ ESL_ALPHABET *abc = NULL; P7_DOMAIN *dcl; #ifdef eslAUGMENT_ALPHABET ESL_SQ *sq_revcmp = NULL; if (info->pli->strand != p7_STRAND_TOPONLY && info->qsq->abc->complement != NULL ) { sq_revcmp = esl_sq_CreateDigital(info->qsq->abc); esl_sq_Copy(info->qsq,sq_revcmp); esl_sq_ReverseComplement(sq_revcmp); info->pli->nres += info->qsq->n; } #endif /*eslAUGMENT_ALPHABET*/ /* Main loop: */ while ((status = p7_oprofile_ReadMSV(hfp, &abc, &om)) == eslOK) { seq_len = 0; p7_pli_NewModel(info->pli, om, info->bg); p7_bg_SetLength(info->bg, info->qsq->n); p7_oprofile_ReconfigLength(om, info->qsq->n); scoredata = p7_hmm_ScoreDataCreate(om, FALSE); #ifdef eslAUGMENT_ALPHABET //reverse complement if (info->pli->strand != p7_STRAND_TOPONLY && info->qsq->abc->complement != NULL ) { p7_Pipeline_LongTarget(info->pli, om, scoredata, info->bg, sq_revcmp, info->th, 0); p7_pipeline_Reuse(info->pli); // prepare for next search seq_len = info->qsq->n; for (i = prev_hit_cnt; i < info->th->N ; i++) { dcl = info->th->unsrt[i].dcl; // modify hit positions to account for the position of the window in the full sequence dcl->ienv = seq_len - dcl->ienv + 1; dcl->jenv = seq_len - dcl->jenv + 1; dcl->iali = seq_len - dcl->iali + 1; dcl->jali = seq_len - dcl->jali + 1; dcl->ad->sqfrom = seq_len - dcl->ad->sqfrom + 1; dcl->ad->sqto = seq_len - dcl->ad->sqto + 1; } } #endif if (info->pli->strand != p7_STRAND_BOTTOMONLY) { p7_Pipeline_LongTarget(info->pli, om, scoredata, info->bg, info->qsq, info->th, 0); p7_pipeline_Reuse(info->pli); seq_len += info->qsq->n; } for (i = prev_hit_cnt; i < info->th->N ; i++) { info->th->unsrt[i].lnP += log((float)seq_len / (float)om->max_length); info->th->unsrt[i].dcl[0].lnP = info->th->unsrt[i].lnP; info->th->unsrt[i].sortkey = -1.0 * info->th->unsrt[i].lnP; info->th->unsrt[i].dcl[0].ad->L = om->M; } prev_hit_cnt = info->th->N; p7_oprofile_Destroy(om); p7_hmm_ScoreDataDestroy(scoredata); } esl_alphabet_Destroy(abc); #ifdef eslAUGMENT_ALPHABET esl_sq_Destroy(sq_revcmp); #endif return status; }
/* serial_master() * The serial version of hmmsearch. * For each query HMM in <hmmdb> search the database for hits. * * A master can only return if it's successful. All errors are handled * immediately and fatally with p7_Fail(). */ static int serial_master(ESL_GETOPTS *go, struct cfg_s *cfg) { FILE *ofp = stdout; /* output file for results (default stdout) */ FILE *tblfp = NULL; /* output stream for tabular per-seq (--tblout) */ FILE *dfamtblfp = NULL; /* output stream for tabular Dfam format (--dfamtblout) */ FILE *aliscoresfp = NULL; /* output stream for alignment scores (--aliscoresout) */ // P7_HMM *hmm = NULL; /* one HMM query */ // P7_SCOREDATA *scoredata = NULL; int seqfmt = eslSQFILE_UNKNOWN; /* format of seqfile */ ESL_SQFILE *sqfp = NULL; /* open seqfile */ P7_HMMFILE *hfp = NULL; /* open HMM database file */ ESL_ALPHABET *abc = NULL; /* sequence alphabet */ P7_OPROFILE *om = NULL; /* target profile */ ESL_STOPWATCH *w = NULL; /* timing */ ESL_SQ *qsq = NULL; /* query sequence */ int nquery = 0; int textw; int status = eslOK; int hstatus = eslOK; int sstatus = eslOK; int i; int ncpus = 0; int infocnt = 0; WORKER_INFO *info = NULL; #ifdef HMMER_THREADS P7_OM_BLOCK *block = NULL; ESL_THREADS *threadObj= NULL; ESL_WORK_QUEUE *queue = NULL; #endif char errbuf[eslERRBUFSIZE]; double window_beta = -1.0 ; int window_length = -1; if (esl_opt_IsUsed(go, "--w_beta")) { if ( ( window_beta = esl_opt_GetReal(go, "--w_beta") ) < 0 || window_beta > 1 ) esl_fatal("Invalid window-length beta value\n"); } if (esl_opt_IsUsed(go, "--w_length")) { if (( window_length = esl_opt_GetInteger(go, "--w_length")) < 4 ) esl_fatal("Invalid window length value\n"); } w = esl_stopwatch_Create(); if (esl_opt_GetBoolean(go, "--notextw")) textw = 0; else textw = esl_opt_GetInteger(go, "--textw"); /* If caller declared an input format, decode it */ if (esl_opt_IsOn(go, "--qformat")) { seqfmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--qformat")); if (seqfmt == eslSQFILE_UNKNOWN) p7_Fail("%s is not a recognized input sequence file format\n", esl_opt_GetString(go, "--qformat")); } /* validate options if running as a daemon */ // if (esl_opt_IsOn(go, "--daemon")) { /* running as a daemon, the input format must be type daemon */ // if (seqfmt != eslSQFILE_UNKNOWN && seqfmt != eslSQFILE_DAEMON) // esl_fatal("Input format %s not supported. Must be daemon\n", esl_opt_GetString(go, "--qformat")); // seqfmt = eslSQFILE_DAEMON; // if (strcmp(cfg->seqfile, "-") != 0) esl_fatal("Query sequence file must be '-'\n"); // } /* Open the target profile database to get the sequence alphabet */ status = p7_hmmfile_OpenE(cfg->hmmfile, p7_HMMDBENV, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", cfg->hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem, trying to open HMM file %s.\n%s\n", cfg->hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, cfg->hmmfile, errbuf); if (! hfp->is_pressed) p7_Fail("Failed to open binary auxfiles for %s: use hmmpress first\n", hfp->fname); hstatus = p7_oprofile_ReadMSV(hfp, &abc, &om); if (hstatus == eslEFORMAT) p7_Fail("bad format, binary auxfiles, %s:\n%s", cfg->hmmfile, hfp->errbuf); else if (hstatus == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", cfg->hmmfile); else if (hstatus != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", cfg->hmmfile); p7_oprofile_Destroy(om); p7_hmmfile_Close(hfp); /* Open the query sequence database */ status = esl_sqfile_OpenDigital(abc, cfg->seqfile, seqfmt, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("Failed to open sequence file %s for reading\n", cfg->seqfile); else if (status == eslEFORMAT) p7_Fail("Sequence file %s is empty or misformatted\n", cfg->seqfile); else if (status == eslEINVAL) p7_Fail("Can't autodetect format of a stdin or .gz seqfile"); else if (status != eslOK) p7_Fail("Unexpected error %d opening sequence file %s\n", status, cfg->seqfile); if (sqfp->format > 100) // breaking the law! That range is reserved for msa, for aligned formats p7_Fail("%s contains a multiple sequence alignment; expect unaligned sequences, like FASTA\n", cfg->seqfile); qsq = esl_sq_CreateDigital(abc); /* Open the results output files */ if (esl_opt_IsOn(go, "-o")) { if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL) esl_fatal("Failed to open output file %s for writing\n", esl_opt_GetString(go, "-o")); } if (esl_opt_IsOn(go, "--tblout")) { if ((tblfp = fopen(esl_opt_GetString(go, "--tblout"), "w")) == NULL) esl_fatal("Failed to open tabular per-seq output file %s for writing\n", esl_opt_GetString(go, "--tblfp")); } if (esl_opt_IsOn(go, "--dfamtblout")) { if ((dfamtblfp = fopen(esl_opt_GetString(go, "--dfamtblout"),"w")) == NULL) esl_fatal("Failed to open tabular dfam output file %s for writing\n", esl_opt_GetString(go, "--dfamtblout")); } if (esl_opt_IsOn(go, "--aliscoresout")) { if ((aliscoresfp = fopen(esl_opt_GetString(go, "--aliscoresout"),"w")) == NULL) esl_fatal("Failed to open alignment scores output file %s for writing\n", esl_opt_GetString(go, "--aliscoresout")); } output_header(ofp, go, cfg->hmmfile, cfg->seqfile); #ifdef HMMER_THREADS /* initialize thread data */ if (esl_opt_IsOn(go, "--cpu")) ncpus = esl_opt_GetInteger(go, "--cpu"); else esl_threads_CPUCount(&ncpus); if (ncpus > 0) { threadObj = esl_threads_Create(&pipeline_thread); queue = esl_workqueue_Create(ncpus * 2); } #endif infocnt = (ncpus == 0) ? 1 : ncpus; ESL_ALLOC(info, sizeof(*info) * infocnt); for (i = 0; i < infocnt; ++i) { info[i].bg = p7_bg_Create(abc); #ifdef HMMER_THREADS info[i].queue = queue; #endif } #ifdef HMMER_THREADS for (i = 0; i < ncpus * 2; ++i) { block = p7_oprofile_CreateBlock(BLOCK_SIZE); if (block == NULL) esl_fatal("Failed to allocate sequence block"); status = esl_workqueue_Init(queue, block); if (status != eslOK) esl_fatal("Failed to add block to work queue"); } #endif /* Outside loop: over each query sequence in <seqfile>. */ while ((sstatus = esl_sqio_Read(sqfp, qsq)) == eslOK) { if (sstatus == eslEMEM) p7_Fail("Memory allocation error reading sequence file\n", status); if (sstatus == eslEINCONCEIVABLE) p7_Fail("Unexpected error %d reading sequence file\n", status); // if (qsq->L > NHMMER_MAX_RESIDUE_COUNT) p7_Fail("Input sequence %s in file %s exceeds maximum length of %d bases.\n", qsq->name, cfg->seqfile, NHMMER_MAX_RESIDUE_COUNT); nquery++; esl_stopwatch_Start(w); /* Open the target profile database */ status = p7_hmmfile_OpenE(cfg->hmmfile, p7_HMMDBENV, &hfp, NULL); if (status != eslOK) p7_Fail("Unexpected error %d in opening hmm file %s.\n", status, cfg->hmmfile); #ifdef HMMER_THREADS /* if we are threaded, create a lock to prevent multiple readers */ if (ncpus > 0) { status = p7_hmmfile_CreateLock(hfp); if (status != eslOK) p7_Fail("Unexpected error %d creating lock\n", status); } #endif if (fprintf(ofp, "Query: %s [L=%ld]\n", qsq->name, (long) qsq->n) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (qsq->acc[0] != 0 && fprintf(ofp, "Accession: %s\n", qsq->acc) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (qsq->desc[0] != 0 && fprintf(ofp, "Description: %s\n", qsq->desc) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); for (i = 0; i < infocnt; ++i) { /* Create processing pipeline and hit list */ info[i].th = p7_tophits_Create(); info[i].pli = p7_pipeline_Create(go, 100, 100, TRUE, p7_SCAN_MODELS); /* M_hint = 100, L_hint = 100 are just dummies for now */ info[i].pli->hfp = hfp; /* for two-stage input, pipeline needs <hfp> */ p7_pli_NewSeq(info[i].pli, qsq); info[i].qsq = qsq; if ( esl_opt_IsUsed(go, "--toponly") ) info[i].pli->strand = p7_STRAND_TOPONLY; else if ( esl_opt_IsUsed(go, "--bottomonly") ) info[i].pli->strand = p7_STRAND_BOTTOMONLY; else info[i].pli->strand = p7_STRAND_BOTH; #ifdef HMMER_THREADS if (ncpus > 0) esl_threads_AddThread(threadObj, &info[i]); #endif } #ifdef HMMER_THREADS if (ncpus > 0) hstatus = thread_loop(threadObj, queue, hfp); else hstatus = serial_loop(info, hfp); #else hstatus = serial_loop(info, hfp); #endif switch(hstatus) { case eslEFORMAT: p7_Fail("bad file format in HMM file %s", cfg->hmmfile); break; case eslEINCOMPAT: p7_Fail("HMM file %s contains different alphabets", cfg->hmmfile); break; case eslEOF: case eslOK: /* do nothing */ break; default: p7_Fail("Unexpected error in reading HMMs from %s", cfg->hmmfile); } /* merge the results of the search results */ for (i = 1; i < infocnt; ++i) { p7_tophits_Merge(info[0].th, info[i].th); p7_pipeline_Merge(info[0].pli, info[i].pli); p7_pipeline_Destroy(info[i].pli); p7_tophits_Destroy(info[i].th); } /* modify e-value to account for number of models */ for (i = 0; i < info->th->N ; i++) { info->th->unsrt[i].lnP += log((float)info->pli->nmodels); info->th->unsrt[i].dcl[0].lnP = info->th->unsrt[i].lnP; info->th->unsrt[i].sortkey = -1.0 * info->th->unsrt[i].lnP; } /* it's possible to have duplicates based on how viterbi ranges can overlap */ p7_tophits_SortByModelnameAndAlipos(info->th); p7_tophits_RemoveDuplicates(info->th, info->pli->use_bit_cutoffs); /* Print results */ p7_tophits_SortBySortkey(info->th); p7_tophits_Threshold(info->th, info->pli); //tally up total number of hits and target coverage info->pli->n_output = info->pli->pos_output = 0; for (i = 0; i < info->th->N; i++) { if ( (info->th->hit[i]->flags & p7_IS_REPORTED) || info->th->hit[i]->flags & p7_IS_INCLUDED) { info->pli->n_output++; info->pli->pos_output += abs(info->th->hit[i]->dcl[0].jali - info->th->hit[i]->dcl[0].iali) + 1; } } p7_tophits_Targets(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); p7_tophits_Domains(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (tblfp) p7_tophits_TabularTargets(tblfp, qsq->name, qsq->acc, info->th, info->pli, (nquery == 1)); if (dfamtblfp) p7_tophits_TabularXfam(dfamtblfp, qsq->name, NULL, info->th, info->pli); if (aliscoresfp) p7_tophits_AliScores(aliscoresfp, qsq->name, info->th ); esl_stopwatch_Stop(w); info->pli->nseqs = 1; p7_pli_Statistics(ofp, info->pli, w); if (fprintf(ofp, "//\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); fflush(ofp); p7_hmmfile_Close(hfp); p7_pipeline_Destroy(info->pli); p7_tophits_Destroy(info->th); esl_sq_Reuse(qsq); } if (sstatus == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n", sqfp->filename, esl_sqfile_GetErrorBuf(sqfp)); else if (sstatus != eslEOF) esl_fatal("Unexpected error %d reading sequence file %s", sstatus, sqfp->filename); /* Terminate outputs - any last words? */ if (tblfp) p7_tophits_TabularTail(tblfp, "hmmscan", p7_SCAN_MODELS, cfg->seqfile, cfg->hmmfile, go); if (ofp) { if (fprintf(ofp, "[ok]\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); } /* Cleanup - prepare for successful exit */ for (i = 0; i < infocnt; ++i) p7_bg_Destroy(info[i].bg); #ifdef HMMER_THREADS if (ncpus > 0) { esl_workqueue_Reset(queue); while (esl_workqueue_Remove(queue, (void **) &block) == eslOK) p7_oprofile_DestroyBlock(block); esl_workqueue_Destroy(queue); esl_threads_Destroy(threadObj); } #endif free(info); esl_sq_Destroy(qsq); esl_stopwatch_Destroy(w); esl_alphabet_Destroy(abc); esl_sqfile_Close(sqfp); if (ofp != stdout) fclose(ofp); if (tblfp) fclose(tblfp); if (dfamtblfp) fclose(dfamtblfp); if (aliscoresfp) fclose(aliscoresfp); return eslOK; ERROR: if (ofp != stdout) fclose(ofp); if (tblfp) fclose(tblfp); if (dfamtblfp) fclose(dfamtblfp); if (aliscoresfp) fclose(aliscoresfp); return status; }
static void utest_ReadWrite(P7_HMM *hmm, P7_OPROFILE *om) { char *msg = "oprofile read/write unit test failure"; ESL_ALPHABET *abc = NULL; P7_OPROFILE *om2 = NULL; char tmpfile[16] = "esltmpXXXXXX"; char *mfile = NULL; char *ffile = NULL; char *pfile = NULL; char *ssifile = NULL; FILE *fp = NULL; FILE *mfp = NULL; FILE *ffp = NULL; FILE *pfp = NULL; ESL_NEWSSI *nssi = NULL; P7_HMMFILE *hfp = NULL; uint16_t fh = 0; float tolerance = 0.001; char errbuf[eslERRBUFSIZE]; /* 1. A mini version of hmmpress: save the test HMM to a file along with its associated .h3{mfpi} files */ if ( esl_tmpfile_named(tmpfile, &fp) != eslOK) esl_fatal(msg); if ( esl_sprintf(&mfile, "%s.h3m", tmpfile) != eslOK) esl_fatal(msg); if ( esl_sprintf(&ffile, "%s.h3f", tmpfile) != eslOK) esl_fatal(msg); if ( esl_sprintf(&pfile, "%s.h3p", tmpfile) != eslOK) esl_fatal(msg); if ( esl_sprintf(&ssifile, "%s.h3i", tmpfile) != eslOK) esl_fatal(msg); if ( esl_newssi_Open(ssifile, TRUE, &nssi) != eslOK) esl_fatal(msg); if (( mfp = fopen(mfile, "wb")) == NULL) esl_fatal(msg); if (( ffp = fopen(ffile, "wb")) == NULL) esl_fatal(msg); if (( pfp = fopen(pfile, "wb")) == NULL) esl_fatal(msg); /* the disk offsets are all 0 by construction, if there's only one * HMM in the file - but don't want to forget them, if we change the * unit test in the future to be multi HMM */ if ((om->offs[p7_MOFFSET] = ftello(mfp)) == -1) esl_fatal(msg); if ((om->offs[p7_FOFFSET] = ftello(ffp)) == -1) esl_fatal(msg); if ((om->offs[p7_POFFSET] = ftello(pfp)) == -1) esl_fatal(msg); if ( p7_hmmfile_WriteASCII(fp, -1, hmm) != eslOK) esl_fatal(msg); if ( p7_hmmfile_WriteBinary(mfp, -1, hmm) != eslOK) esl_fatal(msg); if ( p7_oprofile_Write(ffp, pfp, om) != eslOK) esl_fatal(msg); if ( esl_newssi_AddFile(nssi, tmpfile, 0, &fh) != eslOK) esl_fatal(msg); if ( esl_newssi_AddKey (nssi, hmm->name, fh, om->offs[p7_MOFFSET], 0, 0) != eslOK) esl_fatal(msg); if ( esl_newssi_Write(nssi) != eslOK) esl_fatal(msg); fclose(fp); fclose(mfp); fclose(ffp); fclose(pfp); esl_newssi_Close(nssi); /* 2. read the optimized profile back in */ if ( p7_hmmfile_Open(tmpfile, NULL, &hfp) != eslOK) esl_fatal(msg); if ( p7_oprofile_ReadMSV(hfp, &abc, &om2) != eslOK) esl_fatal(msg); if ( p7_oprofile_ReadRest(hfp, om2) != eslOK) esl_fatal(msg); /* 3. it should be identical to the original */ if ( p7_oprofile_Compare(om, om2, tolerance, errbuf) != eslOK) esl_fatal("%s\n%s", msg, errbuf); p7_oprofile_Destroy(om2); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); remove(ssifile); remove(ffile); remove(pfile); remove(mfile); remove(tmpfile); free(ssifile); free(mfile); free(ffile); free(pfile); }
/* Function: p7_hmmcache_Open() * Synopsis: Cache a profile database. * * Purpose: Open <hmmfile> and read all of its contents, creating * a cached profile database in memory. Return a ptr to the * cached profile database in <*ret_cache>. * * Caller may optionally provide an <errbuf> ptr to * at least <eslERRBUFSIZE> bytes, to capture an * informative error message on failure. * * Args: hmmfile - (base) name of profile file to open * ret_cache - RETURN: cached profile database * errbuf - optRETURN: error message for a failure * * Returns: <eslOK> on success. <*ret_cache> points to the * cached db. <errbuf> is unchanged. * * Failure codes: * <eslENOTFOUND> : <hmmfile> couldn't be opened for reading * <eslEFORMAT> : <hmmfile> isn't in recognized HMMER file format * <eslEINCOMPAT> : profiles in <hmmfile> have different alphabets * * On any failure, <*ret_cache> is <NULL> and <errbuf> contains * an informative error message for the user. * * Throws: <eslEMEM> : memory allocation error. */ int p7_hmmcache_Open(char *hmmfile, P7_HMMCACHE **ret_cache, char *errbuf) { P7_HMMCACHE *cache = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; int status; if (errbuf) errbuf[0] = '\0'; ESL_ALLOC(cache, sizeof(P7_HMMCACHE)); cache->name = NULL; cache->abc = NULL; cache->omlist = NULL; cache->gmlist = NULL; cache->lalloc = 4096; /* allocation chunk size for <list> of ptrs */ cache->n = 0; if ( ( status = esl_strdup(hmmfile, -1, &cache->name) != eslOK)) goto ERROR; ESL_ALLOC(cache->omlist, sizeof(P7_OPROFILE *) * cache->lalloc); ESL_ALLOC(cache->gmlist, sizeof(P7_PROFILE *) * cache->lalloc); if ( (status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf)) != eslOK) goto ERROR; // eslENOTFOUND | eslEFORMAT; <errbuf> while ((status = p7_hmmfile_Read(hfp, &(cache->abc), &hmm)) != eslEOF) // eslEFORMAT | eslEINCOMPAT; <errbuf> { if (status != eslOK) ESL_XFAIL(status, errbuf, "%s", hfp->errbuf); if (!bg && (bg = p7_bg_Create(cache->abc)) == NULL) { status = eslEMEM; goto ERROR; } if ( ( gm = p7_profile_Create(hmm->M, cache->abc)) == NULL) { status = eslEMEM; goto ERROR; } if ( (status = p7_profile_Config(gm, hmm, bg)) != eslOK) goto ERROR; if ( (status = p7_oprofile_ReadMSV (hfp, &(cache->abc), &om)) != eslOK || /* eslEFORMAT: hfp->errbuf | eslEINCOMPAT | eslEOF */ (status = p7_oprofile_ReadRest(hfp, om)) != eslOK) /* eslEFORMAT: hfp->errbuf */ { if (status == eslEOF) ESL_XFAIL(eslEFORMAT, errbuf, "Premature EOF in vectorized profile files"); else goto ERROR; } ESL_DASSERT1(( strcmp(gm->name, om->name) == 0 )); if (cache->n >= cache->lalloc) { ESL_REALLOC(cache->gmlist, sizeof(P7_PROFILE *) * cache->lalloc * 2); ESL_REALLOC(cache->omlist, sizeof(P7_OPROFILE *) * cache->lalloc * 2); cache->lalloc *= 2; } cache->omlist[cache->n] = om; cache->gmlist[cache->n] = gm; cache->n++; om = NULL; gm = NULL; p7_hmm_Destroy(hmm); } //printf("\nfinal:: %d memory %" PRId64 "\n", inx, total_mem); p7_hmmfile_Close(hfp); p7_bg_Destroy(bg); *ret_cache = cache; return eslOK; ERROR: if (cache) p7_hmmcache_Close(cache); if (om) p7_oprofile_Destroy(om); if (gm) p7_profile_Destroy(gm); if (hmm) p7_hmm_Destroy(hmm); if (bg) p7_bg_Destroy(bg); if (hfp) p7_hmmfile_Close(hfp); return status; }