/* Create an SSI index file for open MSA file <afp>. * Both name and accession of MSAs are stored as keys. */ static void create_ssi_index(ESL_GETOPTS *go, ESLX_MSAFILE *afp) { ESL_NEWSSI *ns = NULL; ESL_MSA *msa = NULL; int nali = 0; char *ssifile = NULL; uint16_t fh; int status; if (afp->bf->mode_is != eslBUFFER_FILE && afp->bf->mode_is != eslBUFFER_ALLFILE && afp->bf->mode_is != eslBUFFER_MMAP) esl_fatal("<msafile> must be a regular file to be SSI indexed"); esl_sprintf(&ssifile, "%s.ssi", afp->bf->filename); status = esl_newssi_Open(ssifile, FALSE, &ns); if (status == eslENOTFOUND) esl_fatal("failed to open SSI index %s", ssifile); else if (status == eslEOVERWRITE) esl_fatal("SSI index %s already exists; delete or rename it", ssifile); else if (status != eslOK) esl_fatal("failed to create a new SSI index"); if (esl_newssi_AddFile(ns, afp->bf->filename, afp->format, &fh) != eslOK) esl_fatal("Failed to add MSA file %s to new SSI index\n", afp->bf->filename); printf("Working... "); fflush(stdout); while ((status = eslx_msafile_Read(afp, &msa)) != eslEOF) { if (status != eslOK) eslx_msafile_ReadFailure(afp, status); nali++; if (! msa->name) esl_fatal("Every alignment in file must have a name to be indexed. Failed to find name of alignment #%d\n", nali); if (esl_newssi_AddKey(ns, msa->name, fh, msa->offset, 0, 0) != eslOK) esl_fatal("Failed to add key %s to SSI index", msa->name); if (msa->acc && esl_newssi_AddAlias(ns, msa->acc, msa->name) != eslOK) esl_fatal("Failed to add secondary key %s to SSI index", msa->acc); esl_msa_Destroy(msa); } if (esl_newssi_Write(ns) != eslOK) esl_fatal("Failed to write keys to ssi file %s\n", ssifile); printf("done.\n"); if (ns->nsecondary) printf("Indexed %d alignments (%ld names and %ld accessions).\n", nali, (long) ns->nprimary, (long) ns->nsecondary); else printf("Indexed %d alignments (%ld names).\n", nali, (long) ns->nprimary); printf("SSI index written to file %s\n", ssifile); free(ssifile); esl_newssi_Close(ns); return; }
/* Create an SSI index file for open HMM file <hfp>. * Both name and accession of HMMs are stored as keys. */ static void create_ssi_index(ESL_GETOPTS *go, P7_HMMFILE *hfp) { ESL_NEWSSI *ns = NULL; ESL_ALPHABET *abc = NULL; P7_HMM *hmm = NULL; int nhmm = 0; char *ssifile = NULL; uint16_t fh; int status; if (esl_sprintf(&ssifile, "%s.ssi", hfp->fname) != eslOK) p7_Die("esl_sprintf() failed"); status = esl_newssi_Open(ssifile, FALSE, &ns); if (status == eslENOTFOUND) esl_fatal("failed to open SSI index %s", ssifile); else if (status == eslEOVERWRITE) esl_fatal("SSI index %s already exists; delete or rename it", ssifile); else if (status != eslOK) esl_fatal("failed to create a new SSI index"); if (esl_newssi_AddFile(ns, hfp->fname, 0, &fh) != eslOK) /* 0 = format code (HMMs don't have any yet) */ esl_fatal("Failed to add HMM file %s to new SSI index\n", hfp->fname); printf("Working... "); fflush(stdout); while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) p7_Fail("read failed, HMM file %s may be truncated?", hfp->fname); else if (status == eslEFORMAT) p7_Fail("bad file format in HMM file %s", hfp->fname); else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", hfp->fname); nhmm++; if (hmm->name == NULL) p7_Fail("Every HMM must have a name to be indexed. Failed to find name of HMM #%d\n", nhmm); if (esl_newssi_AddKey(ns, hmm->name, fh, hmm->offset, 0, 0) != eslOK) p7_Fail("Failed to add key %s to SSI index", hmm->name); if (hmm->acc) { if (esl_newssi_AddAlias(ns, hmm->acc, hmm->name) != eslOK) p7_Fail("Failed to add secondary key %s to SSI index", hmm->acc); } p7_hmm_Destroy(hmm); } if (esl_newssi_Write(ns) != eslOK) p7_Fail("Failed to write keys to ssi file %s\n", ssifile); printf("done.\n"); if (ns->nsecondary > 0) printf("Indexed %d HMMs (%ld names and %ld accessions).\n", nhmm, (long) ns->nprimary, (long) ns->nsecondary); else printf("Indexed %d HMMs (%ld names).\n", nhmm, (long) ns->nprimary); printf("SSI index written to file %s\n", ssifile); free(ssifile); esl_alphabet_Destroy(abc); esl_newssi_Close(ns); return; }
/* Function: p7_hmmcache_SetNumericNames() * Synopsis: Rename each profile in cache with a numeric name. * * Purpose: Rename every profile in profile cache <cache> * with a numeric code, starting from "000000001". * * The code is nine digits long, left padded with * 0's. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. */ int p7_hmmcache_SetNumericNames(P7_HMMCACHE *cache) { int namelen = 9; /* 9 digit numeric code: 000000001, 000000002... */ P7_PROFILE *gm; P7_OPROFILE *om; int i; int status; for (i = 0; i < cache->n; i++) { gm = cache->gmlist[i]; if (gm->name) free(gm->name); if (( status = esl_sprintf(&(gm->name), "%0*d", namelen, i+1)) != eslOK) return status; om = cache->omlist[i]; if (om->name) free(om->name); if (( status = esl_sprintf(&(om->name), "%0*d", namelen, i+1)) != eslOK) return status; } return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* application configuration */ char *alifile = NULL; /* alignment file name */ int infmt = eslMSAFILE_UNKNOWN; /* format code for alifile */ int outfmt = eslMSAFILE_UNKNOWN; /* output format for fetched msa's */ ESLX_MSAFILE *afp = NULL; /* open alignment file */ FILE *ofp = NULL; /* output stream for alignments */ int status; /* easel return code */ /*********************************************** * Parse command line ***********************************************/ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) cmdline_failure(argv[0], "Error in configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") ) cmdline_help (argv[0], go); if (esl_opt_ArgNumber(go) < 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); if (esl_opt_IsOn(go, "--informat")) { infmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--informat")); if (infmt == eslMSAFILE_UNKNOWN) esl_fatal("%s is not a valid input alignment file format for --informat", esl_opt_GetString(go, "--informat")); } outfmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--outformat")); if (outfmt == eslMSAFILE_UNKNOWN) esl_fatal("%s is not a valid output alignment file format for --outformat", esl_opt_GetString(go, "--outformat")); alifile = esl_opt_GetArg(go, 1); /* Open the alignment file. */ if ( (status = eslx_msafile_Open(NULL, alifile, NULL, infmt, NULL, &afp)) != eslOK) eslx_msafile_OpenFailure(afp, status); /* Open the SSI index, if any */ if (! esl_opt_GetBoolean(go, "--index")) { if (afp->bf->mode_is == eslBUFFER_FILE || afp->bf->mode_is == eslBUFFER_ALLFILE || afp->bf->mode_is == eslBUFFER_MMAP) { char *ssifile = NULL; esl_sprintf(&ssifile, "%s.ssi", afp->bf->filename); status = esl_ssi_Open(ssifile, &(afp->ssi)); if (status == eslERANGE ) esl_fatal("SSI index %s has 64-bit offsets; this system doesn't support them", ssifile); else if (status == eslEFORMAT) esl_fatal("SSI index %s has an unrecognized format. Try recreating, w/ esl-afetch --index", ssifile); else if (status == eslENOTFOUND) afp->ssi = NULL; else if (status != eslOK) esl_fatal("SSI index %s: open failed, error code %d\n", ssifile, status); free(ssifile); } } /* Open the output file, if any */ if (esl_opt_GetBoolean(go, "-O")) { if ((ofp = fopen(esl_opt_GetArg(go, 2), "w")) == NULL) esl_fatal("Failed to open output file %s\n", esl_opt_GetArg(go, 2)); } else if (esl_opt_GetString(go, "-o") != NULL) { if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL) esl_fatal("Failed to open output file %s\n", esl_opt_GetString(go, "-o")); } else ofp = stdout; /* Hand off control flow as appropriate */ if (esl_opt_GetBoolean(go, "--index")) { if (esl_opt_ArgNumber(go) != 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); create_ssi_index(go, afp); } else if (esl_opt_GetBoolean(go, "-f")) { if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); multifetch(go, ofp, outfmt, esl_opt_GetArg(go, 2), afp); } else { if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); onefetch(go, ofp, outfmt, esl_opt_GetArg(go, 2), afp); if (ofp != stdout) printf("\n\nRetrieved alignment %s.\n", esl_opt_GetArg(go, 2)); } eslx_msafile_Close(afp); esl_getopts_Destroy(go); exit(0); }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_ALPHABET *abc = NULL; char *hmmfile = esl_opt_GetArg(go, 1); P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; char *fname = NULL; char *pname = NULL; FILE *ffp = NULL; FILE *pfp = NULL; int nmodel = 0; uint64_t totM = 0; int status; status = p7_hmmfile_Open(hmmfile, NULL, &hfp); if (status == eslENOTFOUND) esl_fatal("Failed to open HMM file %s for reading.\n", hmmfile); else if (status == eslEFORMAT) esl_fatal("File %s does not appear to be in a recognized HMM format.\n", hmmfile); else if (status != eslOK) esl_fatal("Unexpected error %d in opening HMM file %s.\n", status, hmmfile); esl_sprintf(&fname, "%s.h3f", hmmfile); esl_sprintf(&pname, "%s.h3f", hmmfile); if ((ffp = fopen(fname, "wb")) == NULL) esl_fatal("failed to open %s\n", fname); if ((pfp = fopen(pname, "wb")) == NULL) esl_fatal("failed to open %s\n", pname); free(fname); free(pname); while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) == eslOK) { if (nmodel == 0) { /* first time initialization, now that alphabet known */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, 400); } if (esl_opt_GetBoolean(go, "-v")) printf("%s\n", hmm->name); nmodel++; totM += hmm->M; gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, 400, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_Write(ffp, pfp, om); p7_profile_Destroy(gm); p7_oprofile_Destroy(om); p7_hmm_Destroy(hmm); } if (status == eslEFORMAT) esl_fatal("bad file format in HMM file %s", hmmfile); else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets", hmmfile); else if (status != eslEOF) esl_fatal("Unexpected error in reading HMMs from %s", hmmfile); fclose(ffp); fclose(pfp); p7_bg_Destroy(bg); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
static void utest_ReadWrite(P7_HMM *hmm, P7_OPROFILE *om) { char *msg = "oprofile read/write unit test failure"; ESL_ALPHABET *abc = NULL; P7_OPROFILE *om2 = NULL; char tmpfile[16] = "esltmpXXXXXX"; char *mfile = NULL; char *ffile = NULL; char *pfile = NULL; char *ssifile = NULL; FILE *fp = NULL; FILE *mfp = NULL; FILE *ffp = NULL; FILE *pfp = NULL; ESL_NEWSSI *nssi = NULL; P7_HMMFILE *hfp = NULL; uint16_t fh = 0; float tolerance = 0.001; char errbuf[eslERRBUFSIZE]; /* 1. A mini version of hmmpress: save the test HMM to a file along with its associated .h3{mfpi} files */ if ( esl_tmpfile_named(tmpfile, &fp) != eslOK) esl_fatal(msg); if ( esl_sprintf(&mfile, "%s.h3m", tmpfile) != eslOK) esl_fatal(msg); if ( esl_sprintf(&ffile, "%s.h3f", tmpfile) != eslOK) esl_fatal(msg); if ( esl_sprintf(&pfile, "%s.h3p", tmpfile) != eslOK) esl_fatal(msg); if ( esl_sprintf(&ssifile, "%s.h3i", tmpfile) != eslOK) esl_fatal(msg); if ( esl_newssi_Open(ssifile, TRUE, &nssi) != eslOK) esl_fatal(msg); if (( mfp = fopen(mfile, "wb")) == NULL) esl_fatal(msg); if (( ffp = fopen(ffile, "wb")) == NULL) esl_fatal(msg); if (( pfp = fopen(pfile, "wb")) == NULL) esl_fatal(msg); /* the disk offsets are all 0 by construction, if there's only one * HMM in the file - but don't want to forget them, if we change the * unit test in the future to be multi HMM */ if ((om->offs[p7_MOFFSET] = ftello(mfp)) == -1) esl_fatal(msg); if ((om->offs[p7_FOFFSET] = ftello(ffp)) == -1) esl_fatal(msg); if ((om->offs[p7_POFFSET] = ftello(pfp)) == -1) esl_fatal(msg); if ( p7_hmmfile_WriteASCII(fp, -1, hmm) != eslOK) esl_fatal(msg); if ( p7_hmmfile_WriteBinary(mfp, -1, hmm) != eslOK) esl_fatal(msg); if ( p7_oprofile_Write(ffp, pfp, om) != eslOK) esl_fatal(msg); if ( esl_newssi_AddFile(nssi, tmpfile, 0, &fh) != eslOK) esl_fatal(msg); if ( esl_newssi_AddKey (nssi, hmm->name, fh, om->offs[p7_MOFFSET], 0, 0) != eslOK) esl_fatal(msg); if ( esl_newssi_Write(nssi) != eslOK) esl_fatal(msg); fclose(fp); fclose(mfp); fclose(ffp); fclose(pfp); esl_newssi_Close(nssi); /* 2. read the optimized profile back in */ if ( p7_hmmfile_Open(tmpfile, NULL, &hfp) != eslOK) esl_fatal(msg); if ( p7_oprofile_ReadMSV(hfp, &abc, &om2) != eslOK) esl_fatal(msg); if ( p7_oprofile_ReadRest(hfp, om2) != eslOK) esl_fatal(msg); /* 3. it should be identical to the original */ if ( p7_oprofile_Compare(om, om2, tolerance, errbuf) != eslOK) esl_fatal("%s\n%s", msg, errbuf); p7_oprofile_Destroy(om2); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); remove(ssifile); remove(ffile); remove(pfile); remove(mfile); remove(tmpfile); free(ssifile); free(mfile); free(ffile); free(pfile); }