int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *msafile = esl_opt_GetArg(go, 1); ESL_ALPHABET *abc = NULL; int infmt = eslMSAFILE_UNKNOWN; ESLX_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; FILE *ofp = stdout; int nali = 0; int namewidth; double pid; int nid, n; int i,j; int status; /* allow user to assert the input MSA alphabet */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); /* allow user to assert the input MSA format */ if (esl_opt_IsOn(go, "--informat") && (infmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--informat"))) == eslMSAFILE_UNKNOWN) esl_fatal("%s is not a valid MSA file format for --informat", esl_opt_GetString(go, "--informat")); /* digital open */ if ( ( status = eslx_msafile_Open(&abc, msafile, NULL, infmt, NULL, &afp)) != eslOK) eslx_msafile_OpenFailure(afp, status); while ((status = eslx_msafile_Read(afp, &msa)) == eslOK) { nali++; namewidth = esl_str_GetMaxWidth(msa->sqname, msa->nseq); for (i = 0; i < msa->nseq; i++) for (j = i+1; j < msa->nseq; j++) { esl_dst_XPairId(abc, msa->ax[i], msa->ax[j], &pid, &nid, &n); fprintf(ofp, "%-*s %-*s %6.2f %6d %6d\n", namewidth, msa->sqname[i], namewidth, msa->sqname[j], pid*100.0, nid, n); } esl_msa_Destroy(msa); } if (nali == 0 || status != eslEOF) eslx_msafile_ReadFailure(afp, status); eslx_msafile_Close(afp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(0); char *msafile = esl_opt_GetArg(go, 1); int fmt = eslMSAFILE_UNKNOWN; ESL_ALPHABET *abc = NULL; ESLX_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; int textmode = esl_opt_GetBoolean(go, "--text"); int nali = 0; int status; /* If you know the alphabet you want, create it - you'll pass it to eslx_msafile_Open() */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); /* Open in text or digital mode. * To let the Open() function autoguess the format, you pass <infmt=eslMSAFILE_UNKNOWN>. * To let it autoguess the alphabet, you set <abc=NULL> and pass <&abc>. * To open in text mode instead of digital, you pass <NULL> for the alphabet argument. * eslx_msafile_OpenFailure() is a convenience, printing various diagnostics of any * open failure to <stderr>. You can of course handle your own diagnostics instead. */ if (textmode) status = eslx_msafile_Open(NULL, msafile, NULL, fmt, NULL, &afp); else status = eslx_msafile_Open(&abc, msafile, NULL, fmt, NULL, &afp); if (status != eslOK) eslx_msafile_OpenFailure(afp, status); fmt = afp->format; while ((status = eslx_msafile_Read(afp, &msa)) == eslOK) { /* if digital MSA: msa->ax[idx=0..nseq-1][acol=1..alen] is the alignment data; * if text MSA: msa->aseq[idx=0..nseq-1][acol=0..alen-1] */ nali++; /* permute it */ esl_msashuffle_PermuteSequenceOrder(rng, msa); eslx_msafile_Write(stdout, msa, fmt); esl_msa_Destroy(msa); } if (nali == 0 || status != eslEOF) eslx_msafile_ReadFailure(afp, status); /* a convenience, like eslx_msafile_OpenFailure() */ esl_alphabet_Destroy(abc); eslx_msafile_Close(afp); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); exit(0); }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_BG *bg = NULL; int M = 100; int L = 200; int nseq = 20; char errbuf[eslERRBUFSIZE]; if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create alphabet"); if (p7_hmm_Sample(r, M, abc, &hmm) != eslOK) esl_fatal("failed to sample an HMM"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal("failed to create profile"); if (p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL) != eslOK) esl_fatal("failed to config profile"); if (p7_hmm_Validate (hmm, errbuf, 0.0001) != eslOK) esl_fatal("whoops, HMM is bad!: %s", errbuf); if (p7_profile_Validate(gm, errbuf, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!: %s", errbuf); utest_basic (go); utest_viterbi(go, r, abc, bg, gm, nseq, L); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_BG *bg = NULL; P7_HMM *hmm = NULL; P7_OPROFILE *om = NULL; int M = esl_opt_GetInteger(go, "-M"); int L = esl_opt_GetInteger(go, "-L"); /* Sample a random HMM and optimized profile, in amino acid alphabet. */ if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create alphabet"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); if (( p7_oprofile_Sample(r, abc, bg, M, L, &hmm, NULL, &om)) != eslOK) esl_fatal("failed to sample HMM and profile"); /* unit test(s) */ utest_ReadWrite(hmm, om); p7_oprofile_Destroy(om); p7_hmm_Destroy(hmm); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return eslOK; }
static int make_occasionally_dishonest_casino(ESL_HMM **ret_hmm, ESL_ALPHABET **ret_abc) { ESL_ALPHABET *abc = esl_alphabet_Create(eslDICE); ESL_HMM *hmm = esl_hmm_Create(abc, 2); int x; /* State 0 = fair die */ hmm->pi[0] = 1.0; hmm->pi[1] = 0.0; hmm->pi[2] = 0.0; /* no L=0 seqs */ hmm->t[0][0] = 0.96; hmm->t[0][1] = 0.03; hmm->t[0][2] = 0.01; /* end from state 0; mean length 100 */ for (x = 0; x < abc->K; x++) hmm->e[0][x] = 1.0 / (float) abc->K; /* State 1 = loaded die */ hmm->t[1][0] = 0.05; hmm->t[1][1] = 0.95; hmm->t[1][2] = 0.0; /* no end from state 1 */ for (x = 0; x < abc->K-1; x++) hmm->e[1][x] = 0.5 / ((float) abc->K-1); hmm->e[1][abc->K-1] = 0.5; esl_hmm_Configure(hmm, NULL); *ret_hmm = hmm; *ret_abc = abc; return eslOK; }
int main(int argc, char **argv) { char *msg = "p7_gmx unit test driver failed"; ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_BG *bg = p7_bg_Create(abc); P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; int M = esl_opt_GetInteger(go, "-M"); int L = esl_opt_GetInteger(go, "-L"); float tol = esl_opt_GetReal (go, "-t"); p7_FLogsumInit(); if (p7_hmm_Sample(r, M, abc, &hmm) != eslOK) esl_fatal(msg); if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal(msg); if (p7_bg_SetLength(bg, L) != eslOK) esl_fatal(msg); if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL) != eslOK) esl_fatal(msg); utest_GrowTo(); utest_Compare(r, gm, bg, L, tol); esl_getopts_Destroy(go); esl_randomness_Destroy(r); esl_alphabet_Destroy(abc); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_profile_Destroy(gm); return eslOK; }
static void utest_ReadWrite(ESL_RANDOMNESS *rng) { char msg[] = "bg Read/Write unit test failed"; char tmpfile[32] = "esltmpXXXXXX"; FILE *fp = NULL; ESL_ALPHABET *abc = NULL; /* random alphabet choice eslRNA..eslDICE */ float *fq = NULL; P7_BG *bg = NULL; if ((abc = esl_alphabet_Create(esl_rnd_Roll(rng, 5) + 1)) == NULL) esl_fatal(msg); if (( bg = p7_bg_Create(abc)) == NULL) esl_fatal(msg); if (( fq = malloc(sizeof(float) * abc->K)) == NULL) esl_fatal(msg); do { if (esl_dirichlet_FSampleUniform(rng, abc->K, fq) != eslOK) esl_fatal(msg); } while (esl_vec_FMin(fq, abc->K) < 0.001); /* small p's will get rounded off and fail FCompare() */ esl_vec_FCopy(fq, abc->K, bg->f); if (esl_tmpfile_named(tmpfile, &fp) != eslOK) esl_fatal(msg); if ( p7_bg_Write(fp, bg) != eslOK) esl_fatal(msg); fclose(fp); esl_vec_FSet(bg->f, bg->abc->K, 0.0); if ( p7_bg_Read(tmpfile, bg, NULL) != eslOK) esl_fatal(msg); if ( esl_vec_FCompare(fq, bg->f, bg->abc->K, 0.01) != eslOK) esl_fatal(msg); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); free(fq); remove(tmpfile); }
static void utest_Compare(void) { ESL_RANDOMNESS *r = esl_randomness_CreateFast(42); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_PROFILE *gm2 = NULL; int M = 200; int L = 400; p7_modelsample(r, M, abc, &hmm); /* master and worker's sampled profiles are identical */ bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); gm2 = p7_profile_Create(hmm->M, abc); p7_profile_Config(gm, hmm, bg); p7_profile_Config(gm2, hmm, bg); p7_profile_SetLength(gm, L); p7_profile_SetLength(gm2, L); if (p7_profile_Compare(gm, gm2, 0.001) != eslOK) p7_Die("identical profile comparison failed"); p7_profile_Destroy(gm); p7_profile_Destroy(gm2); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); return; }
/* utest_basic() * An MSA to ex{e,o}rcise past demons. * 1. seq2 gives an I->end transition. * 2. seq1 contains degenerate Z,X, exercising symbol counting * of degenerate residues. */ static void utest_basic(void) { char *failmsg = "failure in build.c::utest_basic() unit test"; char msafile[16] = "p7tmpXXXXXX"; /* tmpfile name template */ FILE *ofp = NULL; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; float symfrac = 0.5; if (esl_tmpfile_named(msafile, &ofp) != eslOK) esl_fatal(failmsg); fprintf(ofp, "# STOCKHOLM 1.0\n"); fprintf(ofp, "#=GC RF --xxxxxxxxxxxxxxxx-xxx-x--\n"); fprintf(ofp, "seq1 --ACDEFGHIKLMNPZXS-TVW-Yyy\n"); fprintf(ofp, "seq2 aaACDEFGHIKLMNPQRS-TVWw---\n"); fprintf(ofp, "seq3 aaAC-EFGHIKLMNPQRS-TVW-Y--\n"); fprintf(ofp, "seq4 aaAC-EFGHIKLMNPQRS-TVW-Y--\n"); fprintf(ofp, "//\n"); fclose(ofp); if (esl_msafile_Open(&abc, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp) != eslOK) esl_fatal(failmsg); if (esl_msafile_Read(afp, &msa) != eslOK) esl_fatal(failmsg); if (p7_Fastmodelmaker(msa, symfrac, NULL, &hmm, NULL) != eslOK) esl_fatal(failmsg); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); esl_msafile_Close(afp); esl_alphabet_Destroy(abc); remove(msafile); return; }
static void utest_normalization(ESL_GETOPTS *go) { char *msg = "seqmodel normalization utest failed"; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); char *seq = "ACDEFGHIKLMNPQRSTVWYBJZOUX"; int L = strlen(seq); ESL_DSQ *dsq = NULL; float popen = 0.1; float pextend = 0.4; P7_BUILDER *bld = NULL; P7_BG *bg = p7_bg_Create(abc); P7_HMM *hmm = NULL; char errbuf[eslERRBUFSIZE]; if ( esl_abc_CreateDsq(abc, seq, &dsq) != eslOK) esl_fatal(msg); if ( (bld = p7_builder_Create(NULL, abc)) == NULL) esl_fatal(msg); if ( p7_builder_LoadScoreSystem(bld, "BLOSUM62", popen, pextend, bg) != eslOK) esl_fatal(msg); if ( p7_Seqmodel(abc, dsq, L, "aatest", bld->Q, bg->f, bld->popen, bld->pextend, &hmm) != eslOK) esl_fatal(msg); if (p7_hmm_Validate(hmm, errbuf, 0.0001) != eslOK) esl_fatal("normalization utest failed\n%s\n", errbuf); free(dsq); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_builder_Destroy(bld); esl_alphabet_Destroy(abc); }
static void utest_SendRecv(ESL_RANDOMNESS *rng, int my_rank, int nproc) { char msg[] = "utest_SendRecv() failed"; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_HMM *hmm = NULL; P7_HMM *xhmm = NULL; int M = 200; char *wbuf = NULL; int wn = 0; int i; uint32_t rngseed; MPI_Status mpistatus; char errmsg[eslERRBUFSIZE]; if (my_rank == 0) { /* First we send our RNG seed to all workers */ rngseed = esl_randomness_GetSeed(rng); for (i = 1; i < nproc; i++) if (MPI_Send( &rngseed, 1, MPI_UNSIGNED, i, 0, MPI_COMM_WORLD) != MPI_SUCCESS) esl_fatal(msg); /* We sample an HMM that's going to be identical to the workers' */ if (p7_modelsample(rng, M, abc, &hmm) != eslOK) esl_fatal(msg); for (i = 1; i < nproc; i++) { if (p7_hmm_mpi_Recv(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &wbuf, &wn, &abc, &xhmm) != eslOK) esl_fatal(msg); if (p7_hmm_Validate(xhmm, errmsg, 0.001) != eslOK) esl_fatal("%s:\n %s", msg, errmsg); if (p7_hmm_Compare(hmm, xhmm, 0.001) != eslOK) esl_fatal(msg); p7_hmm_Destroy(xhmm); } } else { /* Worker(s) must first receive the exact same RNG seed that the master is using. */ if (MPI_Recv(&rngseed, 1, MPI_UNSIGNED, 0, 0, MPI_COMM_WORLD, &mpistatus) != MPI_SUCCESS) esl_fatal(msg); /* and then the worker(s) can create the exact same RNG (and random number sequence) that the master has */ rng = esl_randomness_CreateFast(rngseed); /* so when the worker samples this HMM, the master has independently sampled an exact duplicate of it... */ if (p7_modelsample(rng, M, abc, &hmm) != eslOK) esl_fatal(msg); /* each worker sends the HMM to the master (it's the same HMM for each worker. The test is intended for one master, one worker.) */ if (p7_hmm_mpi_Send(hmm, 0, 0, MPI_COMM_WORLD, &wbuf, &wn) != eslOK) esl_fatal(msg); /* worker's RNG is a private copy; destroy it. Master keeps its RNG, which the caller is responsible for. */ esl_randomness_Destroy(rng); } p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); free(wbuf); return; }
/* The "basic" utest is a minimal driver for making a small DNA profile and a small DNA sequence, * then running Viterbi and Forward. It's useful for dumping DP matrices and profiles for debugging. */ static void utest_basic(ESL_GETOPTS *go) { char *query= "# STOCKHOLM 1.0\n\nseq1 GAATTC\nseq2 GAATTC\n//\n"; int fmt = eslMSAFILE_STOCKHOLM; char *targ = "GAATTC"; ESL_ALPHABET *abc = NULL; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_BG *bg = NULL; P7_PRIOR *pri = NULL; ESL_DSQ *dsq = NULL; P7_GMX *gx = NULL; P7_TRACE *tr = NULL; int L = strlen(targ); float vsc, vsc2, fsc; if ((abc = esl_alphabet_Create(eslDNA)) == NULL) esl_fatal("failed to create alphabet"); if ((pri = p7_prior_CreateNucleic()) == NULL) esl_fatal("failed to create prior"); if ((msa = esl_msa_CreateFromString(query, fmt)) == NULL) esl_fatal("failed to create MSA"); if (esl_msa_Digitize(abc, msa, NULL) != eslOK) esl_fatal("failed to digitize MSA"); if (p7_Fastmodelmaker(msa, 0.5, NULL, &hmm, NULL) != eslOK) esl_fatal("failed to create GAATTC model"); if (p7_ParameterEstimation(hmm, pri) != eslOK) esl_fatal("failed to parameterize GAATTC model"); if (p7_hmm_SetConsensus(hmm, NULL) != eslOK) esl_fatal("failed to make consensus"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create DNA null model"); if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal("failed to create GAATTC profile"); if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL)!= eslOK) esl_fatal("failed to config profile"); if (p7_profile_Validate(gm, NULL, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!"); if (esl_abc_CreateDsq(abc, targ, &dsq) != eslOK) esl_fatal("failed to create GAATTC digital sequence"); if ((gx = p7_gmx_Create(gm->M, L)) == NULL) esl_fatal("failed to create DP matrix"); if ((tr = p7_trace_Create()) == NULL) esl_fatal("trace creation failed"); p7_GViterbi (dsq, L, gm, gx, &vsc); if (esl_opt_GetBoolean(go, "-v")) printf("Viterbi score: %.4f\n", vsc); if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT); p7_GTrace (dsq, L, gm, gx, tr); p7_trace_Score(tr, dsq, gm, &vsc2); if (esl_opt_GetBoolean(go, "-v")) p7_trace_Dump(stdout, tr, gm, dsq); if (esl_FCompare(vsc, vsc2, 1e-5) != eslOK) esl_fatal("trace score and Viterbi score don't agree."); p7_GForward (dsq, L, gm, gx, &fsc); if (esl_opt_GetBoolean(go, "-v")) printf("Forward score: %.4f\n", fsc); if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT); p7_trace_Destroy(tr); p7_gmx_Destroy(gx); free(dsq); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); p7_prior_Destroy(pri); esl_alphabet_Destroy(abc); return; }
static void utest_oprofileSendRecv(int my_rank, int nproc) { ESL_RANDOMNESS *r = esl_randomness_CreateFast(42); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OPROFILE *om2 = NULL; int M = 200; int L = 400; char *wbuf = NULL; int wn = 0; int i; char errbuf[eslERRBUFSIZE]; p7_hmm_Sample(r, M, abc, &hmm); /* master and worker's sampled profiles are identical */ bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); om = p7_oprofile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); p7_oprofile_Convert(gm, om); p7_bg_SetLength (bg, L); if (my_rank == 0) { for (i = 1; i < nproc; i++) { ESL_DPRINTF1(("Master: receiving test profile\n")); p7_oprofile_MPIRecv(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &wbuf, &wn, &abc, &om2); ESL_DPRINTF1(("Master: test profile received\n")); if (p7_oprofile_Compare(om, om2, 0.001, errbuf) != eslOK) p7_Die("Received profile not identical to what was sent\n%s", errbuf); p7_oprofile_Destroy(om2); } } else { ESL_DPRINTF1(("Worker %d: sending test profile\n", my_rank)); p7_oprofile_MPISend(om, 0, 0, MPI_COMM_WORLD, &wbuf, &wn); ESL_DPRINTF1(("Worker %d: test profile sent\n", my_rank)); } free(wbuf); p7_profile_Destroy(gm); p7_oprofile_Destroy(om); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); return; }
static void utest_alphabet_config(int alphatype) { char *msg = "HMMER alphabet config unit test failed"; ESL_ALPHABET *abc = NULL; if ((abc = esl_alphabet_Create(alphatype)) == NULL) esl_fatal(msg); if (abc->K > p7_MAXABET) esl_fatal(msg); if (abc->Kp > p7_MAXCODE) esl_fatal(msg); esl_alphabet_Destroy(abc); }
/* init_master_cfg() * Called by masters, mpi or serial. * Already set: * cfg->hmmfile - command line arg 1 * cfg->alifile - command line arg 2 * cfg->postmsafile - option -O (default NULL) * cfg->fmt - format of alignment file * Sets: * cfg->afp - open alignment file * cfg->abc - digital alphabet * cfg->hmmfp - open HMM file * cfg->postmsafp - open MSA resave file, or NULL * * Errors in the MPI master here are considered to be "recoverable", * in the sense that we'll try to delay output of the error message * until we've cleanly shut down the worker processes. Therefore * errors return (code, errmsg) by the ESL_FAIL mech. */ static int init_master_cfg(const ESL_GETOPTS *go, struct cfg_s *cfg, char *errmsg) { int status; if (esl_opt_GetString(go, "-o") != NULL) { if ((cfg->ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL) ESL_FAIL(eslFAIL, errmsg, "Failed to open -o output file %s\n", esl_opt_GetString(go, "-o")); } else cfg->ofp = stdout; status = esl_msafile_Open(cfg->alifile, cfg->fmt, NULL, &(cfg->afp)); if (status == eslENOTFOUND) ESL_FAIL(status, errmsg, "Alignment file %s doesn't exist or is not readable\n", cfg->alifile); else if (status == eslEFORMAT) ESL_FAIL(status, errmsg, "Couldn't determine format of alignment %s\n", cfg->alifile); else if (status != eslOK) ESL_FAIL(status, errmsg, "Alignment file open failed with error %d\n", status); if (esl_opt_GetBoolean(go, "--amino")) cfg->abc = esl_alphabet_Create(eslAMINO); else if (esl_opt_GetBoolean(go, "--dna")) cfg->abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--rna")) cfg->abc = esl_alphabet_Create(eslRNA); else { int type; status = esl_msafile_GuessAlphabet(cfg->afp, &type); if (status == eslEAMBIGUOUS) ESL_FAIL(status, errmsg, "Failed to guess the bio alphabet used in %s.\nUse --dna, --rna, or --amino option to specify it.", cfg->alifile); else if (status == eslEFORMAT) ESL_FAIL(status, errmsg, "Alignment file parse failed: %s\n", cfg->afp->errbuf); else if (status == eslENODATA) ESL_FAIL(status, errmsg, "Alignment file %s is empty\n", cfg->alifile); else if (status != eslOK) ESL_FAIL(status, errmsg, "Failed to read alignment file %s\n", cfg->alifile); cfg->abc = esl_alphabet_Create(type); } esl_msafile_SetDigital(cfg->afp, cfg->abc); if ((cfg->hmmfp = fopen(cfg->hmmfile, "w")) == NULL) ESL_FAIL(status, errmsg, "Failed to open HMM file %s for writing", cfg->hmmfile); if (cfg->postmsafile != NULL) { if ((cfg->postmsafp = fopen(cfg->postmsafile, "w")) == NULL) ESL_FAIL(status, errmsg, "Failed to MSA resave file %s for writing", cfg->postmsafile); } else cfg->postmsafp = NULL; output_header(go, cfg); /* with msa == NULL, output_result() prints the tabular results header, if needed */ output_result(cfg, errmsg, 0, NULL, NULL, NULL, 0.0); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *filename = esl_opt_GetArg(go, 1); int infmt = eslMSAFILE_UNKNOWN; ESL_ALPHABET *abc = NULL; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; int status; if (esl_opt_GetBoolean(go, "-1")) infmt = eslMSAFILE_A2M; /* override format autodetection */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); /* Text mode: pass NULL for alphabet. * Digital mode: pass ptr to expected ESL_ALPHABET; and if abc=NULL, alphabet is guessed */ if (esl_opt_GetBoolean(go, "-t")) status = esl_msafile_Open(NULL, filename, NULL, infmt, NULL, &afp); else status = esl_msafile_Open(&abc, filename, NULL, infmt, NULL, &afp); if (status != eslOK) esl_msafile_OpenFailure(afp, status); if ((status = esl_msafile_a2m_Read(afp, &msa)) != eslOK) esl_msafile_ReadFailure(afp, status); printf("alphabet: %s\n", (abc ? esl_abc_DecodeType(abc->type) : "none (text mode)")); printf("# of seqs: %d\n", msa->nseq); printf("# of cols: %d\n", (int) msa->alen); printf("\n"); if (! esl_opt_GetBoolean(go, "-q")) esl_msafile_a2m_Write(stdout, msa); esl_msa_Destroy(msa); esl_msafile_Close(afp); if (abc) esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); exit(0); }
int main(int argc, char **argv) { char *filename = argv[1]; int fmt = eslMSAFILE_UNKNOWN; int type = eslUNKNOWN; ESL_ALPHABET *abc = NULL; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; double maxid = 0.62; /* cluster at 62% identity: the BLOSUM62 rule */ int *assignment = NULL; int *nin = NULL; int nclusters; int c, i; int status; /* Open; guess alphabet; set to digital mode */ status = esl_msafile_Open(filename, fmt, NULL, &afp); if (status == eslENOTFOUND) esl_fatal("Alignment file %s isn't readable", filename); else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of %s", filename); else if (status != eslOK) esl_fatal("Alignment file open failed (error code %d)", status); status = esl_msafile_GuessAlphabet(afp, &type); if (status == eslEAMBIGUOUS) esl_fatal("Couldn't guess alphabet from first alignment in %s", filename); else if (status == eslEFORMAT) esl_fatal("Alignment file parse error, line %d of file %s:\n%s\nBad line is: %s\n", afp->linenumber, afp->fname, afp->errbuf, afp->buf); else if (status == eslENODATA) esl_fatal("Alignment file %s contains no data?", filename); else if (status != eslOK) esl_fatal("Failed to guess alphabet (error code %d)\n", status); abc = esl_alphabet_Create(type); esl_msafile_SetDigital(afp, abc); /* read one alignment */ status = esl_msa_Read(afp, &msa); if (status == eslEFORMAT) esl_fatal("alignment file %s: %s\n", afp->fname, afp->errbuf); else if (status != eslOK) esl_fatal("Alignment file read failed with error code %d\n", status); /* do the clustering */ esl_msacluster_SingleLinkage(msa, maxid, &assignment, &nin, &nclusters); printf("%d clusters at threshold of %f fractional identity\n", nclusters, maxid); for (c = 0; c < nclusters; c++) { printf("cluster %d:\n", c); for (i = 0; i < msa->nseq; i++) if (assignment[i] == c) printf(" %s\n", msa->sqname[i]); printf("(%d sequences)\n\n", nin[c]); } esl_msa_Destroy(msa); esl_msafile_Close(afp); free(assignment); free(nin); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_BG *bg = NULL; int M = esl_opt_GetInteger(go, "-M"); int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); /* First round of tests for DNA alphabets. */ if ((abc = esl_alphabet_Create(eslDNA)) == NULL) esl_fatal("failed to create alphabet"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); if (esl_opt_GetBoolean(go, "-v")) printf("ViterbiFilter() tests, DNA\n"); utest_viterbi_filter(r, abc, bg, M, L, N); utest_viterbi_filter(r, abc, bg, 1, L, 10); utest_viterbi_filter(r, abc, bg, M, 1, 10); esl_alphabet_Destroy(abc); p7_bg_Destroy(bg); /* Second round of tests for amino alphabets. */ if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create alphabet"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); if (esl_opt_GetBoolean(go, "-v")) printf("ViterbiFilter() tests, protein\n"); utest_viterbi_filter(r, abc, bg, M, L, N); utest_viterbi_filter(r, abc, bg, 1, L, 10); utest_viterbi_filter(r, abc, bg, M, 1, 10); esl_alphabet_Destroy(abc); p7_bg_Destroy(bg); esl_getopts_Destroy(go); esl_randomness_Destroy(r); return eslOK; }
/* seq_generation() * * Generating sequences. */ static int seq_generation(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt) { ESL_ALPHABET *abc = NULL; ESL_SQ *sq = NULL; double *fq = NULL; int alphatype = eslUNKNOWN; // static checkers can't see that 1 of --rna, --dna, --amino must be true int N = esl_opt_GetInteger(go, "-N"); int L = esl_opt_GetInteger(go, "-L"); int i; int status; if (L <= 0) esl_fatal("To generate sequences, set -L option (length of generated seqs) > 0 "); if (esl_opt_GetBoolean(go, "--rna")) alphatype = eslRNA; if (esl_opt_GetBoolean(go, "--dna")) alphatype = eslDNA; if (esl_opt_GetBoolean(go, "--amino")) alphatype = eslAMINO; abc = esl_alphabet_Create(alphatype); sq = esl_sq_CreateDigital(abc); esl_sq_GrowTo(sq, L); /* Pick the iid frequency distribution to use */ ESL_ALLOC(fq, sizeof(double) * abc->K); switch (alphatype) { case eslRNA: case eslDNA: esl_vec_DSet(fq, 4, 0.25); break; case eslAMINO: esl_composition_SW34(fq); break; default: esl_vec_DSet(fq, abc->K, 1.0 / (double) abc->K); break; } /* generate */ for (i = 0; i < N; i++) { esl_rsq_xIID(r, fq, abc->K, L, sq->dsq); if (N > 1) esl_sq_FormatName(sq, "random%d", i); else esl_sq_SetName(sq, "random"); sq->n = L; esl_sqio_Write(ofp, sq, outfmt, FALSE); } free(fq); esl_alphabet_Destroy(abc); esl_sq_Destroy(sq); return eslOK; ERROR: if (fq != NULL) free(fq); esl_alphabet_Destroy(abc); esl_sq_Destroy(sq); return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_BG *bg = NULL; int M = esl_opt_GetInteger(go, "-M"); int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); /* first round of tests for DNA alphabets. */ if ((abc = esl_alphabet_Create(eslDNA)) == NULL) esl_fatal("failed to create alphabet"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); utest_optacc(go, r, abc, bg, M, L, N); /* normal sized models */ utest_optacc(go, r, abc, bg, 1, L, 10); /* size 1 models */ utest_optacc(go, r, abc, bg, M, 1, 10); /* size 1 sequences */ esl_alphabet_Destroy(abc); p7_bg_Destroy(bg); /* Second round of tests for amino alphabets. */ if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create alphabet"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); utest_optacc(go, r, abc, bg, M, L, N); utest_optacc(go, r, abc, bg, 1, L, 10); utest_optacc(go, r, abc, bg, M, 1, 10); esl_alphabet_Destroy(abc); p7_bg_Destroy(bg); esl_getopts_Destroy(go); esl_randomness_Destroy(r); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); ESL_SQ *sq = esl_sq_CreateDigital(abc); ESL_SQFILE *sqfp = NULL; ESL_HMM *hmm = create_test_hmm(abc); ESL_HMM *bg = create_null_hmm(abc); ESL_HMX *hmx = esl_hmx_Create(400, hmm->M); int format = eslSQFILE_UNKNOWN; char *seqfile = esl_opt_GetArg(go, 1); float fwdsc, nullsc; int status; status = esl_sqfile_OpenDigital(abc, seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) esl_fatal("No such file."); else if (status == eslEFORMAT) esl_fatal("Format unrecognized."); else if (status != eslOK) esl_fatal("Open failed, code %d.", status); while ((status = esl_sqio_Read(sqfp, sq)) == eslOK) { esl_hmx_GrowTo(hmx, sq->n, hmm->M); esl_hmm_Forward(sq->dsq, sq->n, hmm, hmx, &fwdsc); esl_hmm_Forward(sq->dsq, sq->n, bg, hmx, &nullsc); printf("%-16s %5d %11.4f %8.4f %11.4f %8.4f %11.4f %8.4f\n", sq->name, (int) sq->n, fwdsc * eslCONST_LOG2R, (fwdsc * eslCONST_LOG2R) / sq->n, nullsc * eslCONST_LOG2R, (nullsc * eslCONST_LOG2R) / sq->n, (fwdsc - nullsc) * eslCONST_LOG2R, (fwdsc-nullsc) * eslCONST_LOG2R / sq->n); esl_sq_Reuse(sq); } if (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s)\n%s\n", sqfp->filename, sqfp->get_error(sqfp)); else if (status != eslEOF) esl_fatal("Unexpected error %d reading sequence file %s", status, sqfp->filename); esl_sqfile_Close(sqfp); esl_sq_Destroy(sq); esl_hmm_Destroy(hmm); esl_hmm_Destroy(bg); esl_hmx_Destroy(hmx); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_BG *bg = NULL; ESL_DSQ *dsq = NULL; ESL_SQ *sq = NULL; int M = 6; int L = 10; int ntrace = 1000; if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create alphabet"); if (p7_hmm_Sample(r, M, abc, &hmm) != eslOK) esl_fatal("failed to sample an HMM"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal("failed to create profile"); if (p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL) != eslOK) esl_fatal("failed to config profile"); if ((om = p7_oprofile_Create(gm->M, abc)) == NULL) esl_fatal("failed to create optimized profile"); if (p7_oprofile_Convert(gm, om) != eslOK) esl_fatal("failed to convert profile"); /* Test with randomly generated (iid) sequence */ if ((dsq = malloc(sizeof(ESL_DSQ) *(L+2))) == NULL) esl_fatal("malloc failed"); if (esl_rsq_xfIID(r, bg->f, abc->K, L, dsq) != eslOK) esl_fatal("seq generation failed"); utest_stotrace(go, r, abc, gm, om, dsq, L, ntrace); /* Test with seq sampled from profile */ if ((sq = esl_sq_CreateDigital(abc)) == NULL) esl_fatal("sequence allocation failed"); if (p7_ProfileEmit(r, hmm, gm, bg, sq, NULL) != eslOK) esl_fatal("profile emission failed"); utest_stotrace(go, r, abc, gm, om, sq->dsq, sq->n, ntrace); esl_sq_Destroy(sq); free(dsq); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { char *bgfile = argv[1]; char *alphabet = argv[2]; ESL_ALPHABET *abc = esl_alphabet_Create(esl_abc_EncodeType(alphabet)); P7_BG *bg = p7_bg_Create(abc); char errbuf[eslERRBUFSIZE]; int status; status = p7_bg_Read(bgfile, bg, errbuf); if (status == eslENOTFOUND) esl_fatal("open failed: %s", errbuf); else if (status == eslEFORMAT) esl_fatal("parse failed: %s", errbuf); else if (status != eslOK) esl_fatal("failed to read bg file %s (error %d)\n", bgfile, status); p7_bg_Write(stdout, bg); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *fwd = NULL; P7_GMX *bck = NULL; ESL_DSQ *dsq = NULL; int M = esl_opt_GetInteger(go, "-M"); int L = esl_opt_GetInteger(go, "-L"); /* Sample a random HMM */ p7_hmm_Sample(r, M, abc, &hmm); /* Configure a profile from the sampled HMM */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); /* Other initial allocations */ dsq = malloc(sizeof(ESL_DSQ) * (L+2)); fwd = p7_gmx_Create(gm->M, L); bck = p7_gmx_Create(gm->M, L); p7_FLogsumInit(); utest_correct_normalization(r, gm, bg, dsq, L, fwd, bck); free(dsq); p7_gmx_Destroy(fwd); p7_gmx_Destroy(bck); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); int M = 50; fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); utest_generation (rng, M, abc, 10); // test a bunch of seqs to try to make sure we exercise exact domain score recalculation utest_singlemulti(rng, M, abc, 10); fprintf(stderr, "# status = ok\n"); esl_alphabet_Destroy(abc); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_BG *bg = p7_bg_Create(abc); int M = esl_opt_GetInteger(go, "-M"); int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); float tol = esl_opt_GetReal (go, "-t"); p7_FLogsumInit(); utest_decoding(r, abc, bg, M, L, N, tol); esl_getopts_Destroy(go); esl_randomness_Destroy(r); esl_alphabet_Destroy(abc); p7_bg_Destroy(bg); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); ESL_MSA *msa = esl_msa_CreateFromString("\ # STOCKHOLM 1.0\n\ \n\ seq0 AAAAAAAAAA\n\ seq1 AAAAAAAAAA\n\ seq2 AAAAAAAAAC\n\ seq3 AAAAAAAADD\n\ seq4 AAAAAAAEEE\n\ seq5 AAAAAAFFFF\n\ seq6 AAAAAGGGGG\n\ seq7 AAAAHHHHHH\n\ seq8 AAAIIIIIII\n\ seq9 AAKKKKKKKK\n\ seq10 ALLLLLLLLL\n\ seq11 MMMMMMMMMM\n\ //", eslMSAFILE_STOCKHOLM); utest_SingleLinkage(go, msa, 1.0, 11, 10); /* at 100% id, only seq0/seq1 cluster */ utest_SingleLinkage(go, msa, 0.5, 6, 5); /* at 50% id, seq0-seq6 cluster */ utest_SingleLinkage(go, msa, 0.0, 1, 0); /* at 0% id, everything clusters */ /* Do the same tests, but now with a digital MSA */ esl_msa_Digitize(abc, msa, NULL); utest_SingleLinkage(go, msa, 1.0, 11, 10); /* at 100% id, only seq0/seq1 cluster */ utest_SingleLinkage(go, msa, 0.5, 6, 5); /* at 50% id, seq0-seq6 cluster */ utest_SingleLinkage(go, msa, 0.0, 1, 0); /* at 0% id, everything clusters */ esl_msa_Destroy(msa); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_ALPHABET *abc = NULL; ESL_RANDOMNESS *r = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; int M = 10000; if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create amino alphabet"); if ((r = esl_randomness_CreateFast(0)) == NULL) esl_fatal("failed to create randomness"); if (p7_hmm_Sample(r, M, abc, &hmm) != eslOK) esl_fatal("failed to sample random HMM"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to created null model"); utest_Config(hmm, bg); utest_occupancy(hmm); p7_hmm_Destroy(hmm); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); return eslOK; }
int main(int argc, char **argv) { ESL_SQFILE *sqfp = NULL; ESL_SQ *sq = NULL; ESL_SQ *dsq = NULL; ESL_SQ **prot; int c; ESL_ALPHABET *abc, *prot_abc; ESL_SQ *prot6[6]; int x; abc = esl_alphabet_Create(eslDNA); prot_abc = esl_alphabet_Create(eslAMINO); if(argc != 2) { printf("You need to pass an argument for a filepath to a dna/rna fasta file\n"); exit(0); } if(eslOK != esl_sqfile_Open(argv[1], eslSQFILE_FASTA, NULL, &sqfp)) { printf("Invalid filepath: %s\n", argv[1]); exit(0); } sq = esl_sq_Create(); if(sq == NULL) { printf("could not allocate new sequence\n"); exit(0); } if(esl_sqio_Read(sqfp, sq) != eslOK) { printf("Not a valid fasta file %s\n", argv[1]); exit(0); } dsq = esl_sq_Create(); if(dsq == NULL) { printf("could not allocate digital sequence\n"); exit(0); } if(esl_sq_Copy(sq, dsq) != eslOK) { printf("could not copy sequence\n"); exit(0); } if(esl_sq_Digitize(abc, dsq) != eslOK) { printf("could not digitize sequence\n"); exit(0); } esl_sqio_Write(stdout, sq, eslSQFILE_FASTA, 0); if(esl_trans_6frame(sq, prot6) != eslOK) { printf("could not generate six frame translation\n"); exit(0); } for(x = 0; x < 6; x++) { esl_sqio_Write(stdout, prot6[x], eslSQFILE_FASTA, 0); } if(esl_trans_orf(dsq, &prot, &c, 10) != eslOK) { printf("could not translate open reading frames\n"); exit(0); } for(x = 0; x < c; x++) { esl_sqio_Write(stdout, prot[x], eslSQFILE_FASTA, 0); } return 0; }
int esl_trans_s2p(ESL_SQ *in, ESL_SQ **out, int frameshift, int rcFlag) { // The encoding for this is taken from squid: A=0, C=1, G=2, U/T=3, // code[0] corresponds to AAA, code[1] is AAC... code[4] is ACA... // and so on up to 63 being UUU. 64 is a sentinel. Regular 20 amino codes and '*' for stop // the nucleotide indices match well with the easel alphabet index // but the actual translation still needs to be hard coded char code[] = {'K','N','K','N','T','T','T','T','R','S','R','S', 'I','I','M','I','Q','H','Q','H','P','P','P','P', 'R','R','R','R','L','L','L','L','E','D','E','D', 'A','A','A','A','G','G','G','G','V','V','V','V', '*','Y','*','Y','L','F','L','F','*','C','W','C', 'L','F','L','F'}; int status; int codon; //progress in counting current codon char *aaseq; //hold the protein sequence to be output char *aaptr; //pointer records progress in writing to output char *readseq; //pointer records progress in reading nucleotide sequence int read_dg; //index into digital sequence ESL_ALPHABET *abc = esl_alphabet_Create(eslDNA); char errbuf[256]; //validateseq demands this char namestring[256]; (*out) = NULL; if(frameshift >= in->n) return eslFAIL; if(!abc) goto ERROR; //make sure we have a nucleotide sequence; could use esl_abc_ValidateSeq but that wants too //much boilerplate for the simple bit I need done. doesn't help that i don't care if there are U or T //characters but that would test against two alphabets if(in->seq) { if(eslOK != esl_abc_ValidateSeq(abc, in->seq, in->n, errbuf)) goto ERROR; } else if(in->dsq) { if(in->abc->type != eslRNA && in->abc->type != eslDNA) goto ERROR; } else { goto ERROR; } //apply the reverse compliment if(rcFlag) {if(esl_sq_ReverseComplement(in) != eslOK) goto ERROR;} ESL_ALLOC(aaseq, (in->n+1) * sizeof(char)); aaptr = aaseq; if(in->seq) //text sequence { //get an alphabet to do the lookup with. //an ordinary text sequence doesn't have in->abc //if it has one that is not a standard dna/rna alphabet //then this code won't work. I wanted to use an alphabet if available, could save some allocating time that way //if we're calling this repeatedly //but the compiler complains about "pointer qualifiers" so nevermind readseq = in->seq+frameshift; //as long as there are at least 3 nucleotides left, pull and translate another codon for (; *readseq != '\0' && *(readseq+1) != '\0' && *(readseq+2) != '\0'; readseq += 3) { codon = abc->inmap[(int)*(readseq)] * 16 + abc->inmap[(int)*(readseq+1)] * 4 + abc->inmap[(int)*(readseq+2)]; if(codon > 63 || codon < 0) break; *aaptr = code[codon]; aaptr += 1; } *aaptr = '\0'; } else if(in->dsq) //do it digitally { if(in->dsq == NULL) goto ERROR; read_dg = 1+frameshift; //add one here because digital index 0 is a sentinel for(;in->dsq[read_dg] != 255 && in->dsq[read_dg+1] != 255 && in->dsq[read_dg+2] != 255; read_dg += 3) { codon = in->dsq[read_dg] * 16 + in->dsq[read_dg+1] * 4 + in->dsq[read_dg+2]; if(codon > 63 || codon < 0) break; *aaptr = code[codon]; aaptr += 1; } *aaptr = '\0'; } else { goto ERROR; } //modify name to record any reading frame adjustments sprintf(namestring, "%s_s%d", in->name, frameshift); if(rcFlag) strcat(namestring, "_rc"); *out = esl_sq_CreateFrom(namestring, aaseq, in->desc, in->acc, in->ss); if(aaseq != NULL) free(aaseq); //return the input to its original state if(rcFlag) {if(esl_sq_ReverseComplement(in) != eslOK) goto ERROR;} if(abc) esl_alphabet_Destroy(abc); if(*out) return eslOK; ERROR: if(abc) esl_alphabet_Destroy(abc); if(aaseq != NULL) free(aaseq); (*out) = NULL; return eslEMEM; }