static void onefetch_subseq(ESL_GETOPTS *go, FILE *ofp, ESL_SQFILE *sqfp, char *newname, char *key, uint32_t given_start, uint32_t given_end) { int start, end; int do_revcomp; ESL_SQ *sq = esl_sq_Create(); if (sqfp->data.ascii.ssi == NULL) esl_fatal("no ssi index"); /* reverse complement indicated by coords. */ /* -c 52: would be 52,0, so watch out for given_end = 0 case */ if (given_end != 0 && given_start > given_end) { start = given_end; end = given_start; do_revcomp = TRUE; } else { start = given_start; end = given_end; do_revcomp = FALSE; } if (esl_sqio_FetchSubseq(sqfp, key, start, end, sq) != eslOK) esl_fatal(esl_sqfile_GetErrorBuf(sqfp)); if (newname != NULL) esl_sq_SetName(sq, newname); else esl_sq_FormatName(sq, "%s/%d-%d", key, given_start, (given_end == 0) ? sq->L : given_end); /* Two ways we might have been asked to revcomp: by coord, or by -r option */ /* (If both happen, they'll cancel each other out) */ if (do_revcomp) if (esl_sq_ReverseComplement(sq) != eslOK) esl_fatal("Failed to reverse complement %s; is it a protein?\n", sq->name); if (esl_opt_GetBoolean(go, "-r")) if (esl_sq_ReverseComplement(sq) != eslOK) esl_fatal("Failed to reverse complement %s; is it a protein?\n", sq->name); esl_sqio_Write(ofp, sq, eslSQFILE_FASTA, FALSE); esl_sq_Destroy(sq); }
/* onefetch(): * Given one <key> (a seq name or accession), retrieve the corresponding sequence. * In SSI mode, we can do this quickly by positioning the file, then regurgitating * every line until the end-of-record marker; we don't even have to parse. * Without an SSI index, we have to parse the file sequentially 'til we find * the one we're after. */ static void onefetch(ESL_GETOPTS *go, FILE *ofp, char *key, ESL_SQFILE *sqfp) { ESL_SQ *sq = esl_sq_Create(); int do_revcomp = esl_opt_GetBoolean(go, "-r"); char *newname = esl_opt_GetString(go, "-n"); int status; /* Try to position the file at the desired sequence with SSI. */ if (sqfp->data.ascii.ssi != NULL) { status = esl_sqfile_PositionByKey(sqfp, key); if (status == eslENOTFOUND) esl_fatal("seq %s not found in SSI index for file %s\n", key, sqfp->filename); else if (status == eslEFORMAT) esl_fatal("Failed to parse SSI index for %s\n", sqfp->filename); else if (status != eslOK) esl_fatal("Failed to look up location of seq %s in SSI index of file %s\n", key, sqfp->filename); status = esl_sqio_Read(sqfp, sq); if (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n", sqfp->filename, esl_sqfile_GetErrorBuf(sqfp)); else if (status == eslEOF) esl_fatal("Unexpected EOF reading sequence file %s", status, sqfp->filename); else if (status != eslOK) esl_fatal("Unexpected error %d reading sequence file %s", status, sqfp->filename); if (strcmp(key, sq->name) != 0 && strcmp(key, sq->acc) != 0) esl_fatal("whoa, internal error; found the wrong sequence %s, not %s", sq->name, key); } else { /* Else, we have to read the whole damn file sequentially until we find the seq */ while ((status = esl_sqio_Read(sqfp, sq)) != eslEOF) { if (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n", sqfp->filename, esl_sqfile_GetErrorBuf(sqfp)); else if (status != eslOK) esl_fatal("Unexpected error %d reading sequence file %s", status, sqfp->filename); if (strcmp(key, sq->name) == 0 || strcmp(key, sq->acc) == 0) break; esl_sq_Reuse(sq); } if (status == eslEOF) esl_fatal("Failed to find sequence %s in file %s\n", key, sqfp->filename); } if (do_revcomp == FALSE && newname == NULL && ! esl_sqio_IsAlignment(sqfp->format)) { /* If we're not manipulating the sequence in any way, and it's not from an alignment file, we can Echo() it. */ if (esl_sqio_Echo(sqfp, sq, ofp) != eslOK) esl_fatal("Echo failed: %s\n", esl_sqfile_GetErrorBuf(sqfp)); } else { /* Otherwise we Write() the parsed version. */ if (do_revcomp && esl_sq_ReverseComplement(sq) != eslOK) esl_fatal("Failed to reverse complement %s; is it a protein?\n", sq->name); if (newname != NULL) esl_sq_SetName(sq, newname); esl_sqio_Write(ofp, sq, eslSQFILE_FASTA, FALSE); } esl_sq_Destroy(sq); }
/* seq_generation() * * Generating sequences. */ static int seq_generation(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt) { ESL_ALPHABET *abc = NULL; ESL_SQ *sq = NULL; double *fq = NULL; int alphatype = eslUNKNOWN; // static checkers can't see that 1 of --rna, --dna, --amino must be true int N = esl_opt_GetInteger(go, "-N"); int L = esl_opt_GetInteger(go, "-L"); int i; int status; if (L <= 0) esl_fatal("To generate sequences, set -L option (length of generated seqs) > 0 "); if (esl_opt_GetBoolean(go, "--rna")) alphatype = eslRNA; if (esl_opt_GetBoolean(go, "--dna")) alphatype = eslDNA; if (esl_opt_GetBoolean(go, "--amino")) alphatype = eslAMINO; abc = esl_alphabet_Create(alphatype); sq = esl_sq_CreateDigital(abc); esl_sq_GrowTo(sq, L); /* Pick the iid frequency distribution to use */ ESL_ALLOC(fq, sizeof(double) * abc->K); switch (alphatype) { case eslRNA: case eslDNA: esl_vec_DSet(fq, 4, 0.25); break; case eslAMINO: esl_composition_SW34(fq); break; default: esl_vec_DSet(fq, abc->K, 1.0 / (double) abc->K); break; } /* generate */ for (i = 0; i < N; i++) { esl_rsq_xIID(r, fq, abc->K, L, sq->dsq); if (N > 1) esl_sq_FormatName(sq, "random%d", i); else esl_sq_SetName(sq, "random"); sq->n = L; esl_sqio_Write(ofp, sq, outfmt, FALSE); } free(fq); esl_alphabet_Destroy(abc); esl_sq_Destroy(sq); return eslOK; ERROR: if (fq != NULL) free(fq); esl_alphabet_Destroy(abc); esl_sq_Destroy(sq); return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); int N = esl_opt_GetInteger(go, "-N"); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_TRACE *tr = NULL; ESL_SQ *sq = NULL; P7_ALIDISPLAY *ad = NULL; int i,z; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, 0); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, 0, p7_UNIGLOCAL); /* that sets N,C,J to generate nothing */ om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); if (esl_opt_GetBoolean(go, "-p")) tr = p7_trace_CreateWithPP(); else tr = p7_trace_Create(); sq = esl_sq_CreateDigital(abc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { p7_ProfileEmit(r, hmm, gm, bg, sq, tr); esl_sq_SetName(sq, "random"); if (! esl_opt_GetBoolean(go, "-b")) { if (esl_opt_GetBoolean(go, "-p")) for (z = 0; z < tr->N; z++) if (tr->i[z] > 0) tr->pp[z] = esl_random(r); ad = p7_alidisplay_Create(tr, 0, om, sq); p7_alidisplay_Print(stdout, ad, 40, 80, FALSE); p7_alidisplay_Destroy(ad); } p7_trace_Reuse(tr); esl_sq_Reuse(sq); } esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); esl_sq_Destroy(sq); p7_trace_Destroy(tr); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; }