Exemplo n.º 1
0
static void
onefetch_subseq(ESL_GETOPTS *go, FILE *ofp, ESL_SQFILE *sqfp, char *newname, char *key, uint32_t given_start, uint32_t given_end)
{
  int    start, end;
  int    do_revcomp;
  ESL_SQ *sq = esl_sq_Create();

  if (sqfp->data.ascii.ssi == NULL) esl_fatal("no ssi index");

  /* reverse complement indicated by coords. */
  /* -c 52: would be 52,0, so watch out for given_end = 0 case */
  if (given_end != 0 && given_start > given_end)
    { start = given_end;   end = given_start; do_revcomp = TRUE;  }
  else
    { start = given_start; end = given_end;   do_revcomp = FALSE; }

  if (esl_sqio_FetchSubseq(sqfp, key, start, end, sq) != eslOK) esl_fatal(esl_sqfile_GetErrorBuf(sqfp));

  if      (newname != NULL) esl_sq_SetName(sq, newname);
  else                      esl_sq_FormatName(sq, "%s/%d-%d", key, given_start, (given_end == 0) ? sq->L : given_end);

  /* Two ways we might have been asked to revcomp: by coord, or by -r option */
  /* (If both happen, they'll cancel each other out) */
  if (do_revcomp) 
    if (esl_sq_ReverseComplement(sq) != eslOK) esl_fatal("Failed to reverse complement %s; is it a protein?\n", sq->name);
  if (esl_opt_GetBoolean(go, "-r"))
    if (esl_sq_ReverseComplement(sq) != eslOK) esl_fatal("Failed to reverse complement %s; is it a protein?\n", sq->name);

  esl_sqio_Write(ofp, sq, eslSQFILE_FASTA, FALSE);
  esl_sq_Destroy(sq);
}
Exemplo n.º 2
0
/* Fetch in a random sequence of length <L> from the the pre-digitized
 * concatenated sequence database, select a random subseq, shuffle it
 * by the chosen algorithm; set dsq[1..L] to the resulting randomized
 * segment.
 *
 * If <logfp> is non-NULL, append one or more "<sqname> <from> <to>"
 * fields to current line, to record where the random segment was
 * selected from. This is useful in cases where we want to track back
 * the origin of a high-scoring segment, in case the randomization
 * wasn't good enough to obscure the identity of a segment.
 *
 */
static int
set_random_segment(ESL_GETOPTS *go, struct cfg_s *cfg, FILE *logfp, ESL_DSQ *dsq, int L)
{
    ESL_SQ  *sq           = esl_sq_CreateDigital(cfg->abc);
    int      minDPL       = esl_opt_GetInteger(go, "--minDPL");
    int      db_dependent = (esl_opt_GetBoolean(go, "--iid") == TRUE ? FALSE : TRUE);
    char    *pkey         = NULL;
    int      start, end;
    int64_t  Lseq;
    int      status;

    if (L==0) return eslOK;
    if (L > cfg->db_maxL) esl_fatal("can't fetch a segment of length %d; database max is %d\n", L, cfg->db_maxL);

    /* fetch a random subseq from the source database */
    esl_sq_GrowTo(sq, L);
    if (db_dependent)
    {
        do {
            if (pkey != NULL) free(pkey);
            if (esl_ssi_FindNumber(cfg->dbfp->data.ascii.ssi, esl_rnd_Roll(cfg->r, cfg->db_nseq), NULL, NULL, NULL, &Lseq, &pkey) != eslOK)
                esl_fatal("failed to look up a random seq");
        } while (Lseq < L);

        start = 1 + esl_rnd_Roll(cfg->r, Lseq-L);
        end   = start + L - 1;
        if (esl_sqio_FetchSubseq(cfg->dbfp, pkey, start, end, sq) != eslOK) esl_fatal("failed to fetch subseq");
        esl_sq_ConvertDegen2X(sq);
    }

    /* log sequence source info: <name> <start> <end> */
    if (logfp != NULL && db_dependent)
        fprintf(logfp, " %-15s %5d %5d", pkey, start, end);

    /* Now apply the appropriate randomization algorithm */
    if      (esl_opt_GetBoolean(go, "--mono"))    status = esl_rsq_XShuffle  (cfg->r, sq->dsq, L, sq->dsq);
    else if (esl_opt_GetBoolean(go, "--di")) {
        if (L < minDPL)                             status = esl_rsq_XShuffle  (cfg->r, sq->dsq, L, sq->dsq);
        else                                        status = esl_rsq_XShuffleDP(cfg->r, sq->dsq, L, cfg->abc->Kp, sq->dsq);
    }
    else if (esl_opt_GetBoolean(go, "--markov0")) status = esl_rsq_XMarkov0  (cfg->r, sq->dsq, L, cfg->abc->Kp, sq->dsq);
    else if (esl_opt_GetBoolean(go, "--markov1")) status = esl_rsq_XMarkov1  (cfg->r, sq->dsq, L, cfg->abc->Kp, sq->dsq);
    else if (esl_opt_GetBoolean(go, "--reverse")) status = esl_rsq_XReverse  (sq->dsq, L, sq->dsq);
    else if (esl_opt_GetBoolean(go, "--iid"))     status = esl_rsq_xIID      (cfg->r, cfg->fq, cfg->abc->K, L, sq->dsq);
    else                                          status = eslEINCONCEIVABLE;
    if (status != eslOK) esl_fatal("esl's shuffling failed");

    memcpy(dsq, sq->dsq+1, sizeof(ESL_DSQ) * L);
    esl_sq_Destroy(sq);
    free(pkey);
    return eslOK;
}