Exemplo n.º 1
0
/* onefetch():
 * Given one <key> (a seq name or accession), retrieve the corresponding sequence.
 * In SSI mode, we can do this quickly by positioning the file, then regurgitating
 * every line until the end-of-record marker; we don't even have to parse.
 * Without an SSI index, we have to parse the file sequentially 'til we find
 * the one we're after.
 */
static void
onefetch(ESL_GETOPTS *go, FILE *ofp, char *key, ESL_SQFILE *sqfp)
{
  ESL_SQ  *sq            = esl_sq_Create();
  int      do_revcomp    = esl_opt_GetBoolean(go, "-r");
  char    *newname       = esl_opt_GetString(go, "-n");
  int      status;

  /* Try to position the file at the desired sequence with SSI. */
  if (sqfp->data.ascii.ssi != NULL)	
    {
      status = esl_sqfile_PositionByKey(sqfp, key);
      if      (status == eslENOTFOUND) esl_fatal("seq %s not found in SSI index for file %s\n", key, sqfp->filename);
      else if (status == eslEFORMAT)   esl_fatal("Failed to parse SSI index for %s\n", sqfp->filename);
      else if (status != eslOK)        esl_fatal("Failed to look up location of seq %s in SSI index of file %s\n", key, sqfp->filename);

      status = esl_sqio_Read(sqfp, sq);
      if      (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n",
					       sqfp->filename, esl_sqfile_GetErrorBuf(sqfp));
      else if (status == eslEOF)     esl_fatal("Unexpected EOF reading sequence file %s",
					       status, sqfp->filename);
      else if (status != eslOK)      esl_fatal("Unexpected error %d reading sequence file %s",
					       status, sqfp->filename);

      if (strcmp(key, sq->name) != 0 && strcmp(key, sq->acc) != 0) 
	esl_fatal("whoa, internal error; found the wrong sequence %s, not %s", sq->name, key);
    }  
  else 
    { /* Else, we have to read the whole damn file sequentially until we find the seq */
      while ((status = esl_sqio_Read(sqfp, sq)) != eslEOF) {
	if      (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n",
						 sqfp->filename, esl_sqfile_GetErrorBuf(sqfp));
	else if (status != eslOK)      esl_fatal("Unexpected error %d reading sequence file %s",
						 status, sqfp->filename);

	if (strcmp(key, sq->name) == 0 || strcmp(key, sq->acc) == 0) break;
	esl_sq_Reuse(sq);
      }
      if (status == eslEOF) esl_fatal("Failed to find sequence %s in file %s\n", key, sqfp->filename);

    }

  if (do_revcomp == FALSE && newname == NULL && ! esl_sqio_IsAlignment(sqfp->format)) 
    { /* If we're not manipulating the sequence in any way, and it's not from an alignment file, we can Echo() it. */
      if (esl_sqio_Echo(sqfp, sq, ofp) != eslOK) esl_fatal("Echo failed: %s\n", esl_sqfile_GetErrorBuf(sqfp));
    }
  else
    { /* Otherwise we Write() the parsed version. */
      if (do_revcomp && esl_sq_ReverseComplement(sq) != eslOK) esl_fatal("Failed to reverse complement %s; is it a protein?\n", sq->name);
      if (newname != NULL) esl_sq_SetName(sq, newname);
      esl_sqio_Write(ofp, sq, eslSQFILE_FASTA, FALSE);
    }

  esl_sq_Destroy(sq);
}
Exemplo n.º 2
0
/* serial_master()
 * The serial version of hmmsearch.
 * For each query HMM in <hmmfile> search the database for hits.
 *
 * A master can only return if it's successful. All errors are handled
 * immediately and fatally with p7_Fail().  We also use the
 * ESL_EXCEPTION and ERROR: mechanisms, but only because we know we're
 * using a fatal exception handler.
 */
static int
serial_master(ESL_GETOPTS *go, struct cfg_s *cfg)
{
  FILE            *ofp      = stdout;            /* results output file (-o)                        */
  P7_HMMFILE      *hfp      = NULL;              /* open input HMM file                             */
  ESL_SQFILE      *dbfp     = NULL;              /* open input sequence file                        */
  P7_HMM          *hmm      = NULL;              /* one HMM query                                   */
  ESL_ALPHABET    *abc      = NULL;              /* digital alphabet                                */
  int              dbfmt    = eslSQFILE_UNKNOWN; /* format code for sequence database file          */
  ESL_STOPWATCH   *w;
  int              textw    = 0;
  int              nquery   = 0;
  int              status   = eslOK;
  int              hstatus  = eslOK;
  int              sstatus  = eslOK;
  int              i;

  int              ncpus    = 0;

  int              infocnt  = 0;
  WORKER_INFO     *info     = NULL;
 char             errbuf[eslERRBUFSIZE];

  w = esl_stopwatch_Create();

  if (esl_opt_GetBoolean(go, "--notextw")) textw = 0;
  else                                     textw = esl_opt_GetInteger(go, "--textw");

  if (esl_opt_IsOn(go, "--tformat")) {
    dbfmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--tformat"));
    if (dbfmt == eslSQFILE_UNKNOWN) p7_Fail("%s is not a recognized sequence database file format\n", esl_opt_GetString(go, "--tformat"));
  }

  /* Open the target sequence database */
  status = esl_sqfile_Open(cfg->dbfile, dbfmt, p7_SEQDBENV, &dbfp);
  if      (status == eslENOTFOUND) p7_Fail("Failed to open sequence file %s for reading\n",          cfg->dbfile);
  else if (status == eslEFORMAT)   p7_Fail("Sequence file %s is empty or misformatted\n",            cfg->dbfile);
  else if (status == eslEINVAL)    p7_Fail("Can't autodetect format of a stdin or .gz seqfile");
  else if (status != eslOK)        p7_Fail("Unexpected error %d opening sequence file %s\n", status, cfg->dbfile);


  if (esl_opt_IsUsed(go, "--restrictdb_stkey") || esl_opt_IsUsed(go, "--restrictdb_n")) {
    if (esl_opt_IsUsed(go, "--ssifile"))
      esl_sqfile_OpenSSI(dbfp, esl_opt_GetString(go, "--ssifile"));
    else
      esl_sqfile_OpenSSI(dbfp, NULL);
  }



  /* Open the query profile HMM file */
  status = p7_hmmfile_OpenE(cfg->hmmfile, NULL, &hfp, errbuf);
  if      (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", cfg->hmmfile, errbuf);
  else if (status == eslEFORMAT)   p7_Fail("File format problem in trying to open HMM file %s.\n%s\n",                cfg->hmmfile, errbuf);
  else if (status != eslOK)        p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n",               status, cfg->hmmfile, errbuf);

  /* Open the results output files */
  if (esl_opt_IsOn(go, "-o"))          { if ((ofp      = fopen(esl_opt_GetString(go, "-o"), "w+")) == NULL) p7_Fail("Failed to open output file %s for writing\n",    esl_opt_GetString(go, "-o")); }


  infocnt = 1;
  ESL_ALLOC(info, sizeof(*info) * infocnt);

  /* <abc> is not known 'til first HMM is read. */
  hstatus = p7_hmmfile_Read(hfp, &abc, &hmm);
  if (hstatus == eslOK)
    {
      /* One-time initializations after alphabet <abc> becomes known */
//      output_header(ofp, go, cfg->hmmfile, cfg->dbfile);
//      dbfp->abc = abc; //ReadBlock requires knowledge of the alphabet to decide how best to read blocks

//      for (i = 0; i < infocnt; ++i)
//	{
//	  info[i].bg    = p7_bg_Create(abc);
//	}

    }

  /* Outer loop: over each query HMM in <hmmfile>. */
//  while (hstatus == eslOK)
//    {
      P7_PROFILE      *gm      = NULL;
      P7_OPROFILE     *om      = NULL;       /* optimized query profile                  */

      nquery++;
      esl_stopwatch_Start(w);

      /* seqfile may need to be rewound (multiquery mode) */
      if (nquery > 1)
      {
        if (! esl_sqfile_IsRewindable(dbfp))
          esl_fatal("Target sequence file %s isn't rewindable; can't search it with multiple queries", cfg->dbfile);

        if (! esl_opt_IsUsed(go, "--restrictdb_stkey") )
          esl_sqfile_Position(dbfp, 0); //only re-set current position to 0 if we're not planning to set it in a moment
      }

      if ( cfg->firstseq_key != NULL ) { //it's tempting to want to do this once and capture the offset position for future passes, but ncbi files make this non-trivial, so this keeps it general
        sstatus = esl_sqfile_PositionByKey(dbfp, cfg->firstseq_key);
        if (sstatus != eslOK)
          p7_Fail("Failure setting restrictdb_stkey to %d\n", cfg->firstseq_key);
      }

//      if (fprintf(ofp, "Query:       %s  [M=%d]\n", hmm->name, hmm->M)  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
//      if (hmm->acc)  { if (fprintf(ofp, "Accession:   %s\n", hmm->acc)  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); }
//      if (hmm->desc) { if (fprintf(ofp, "Description: %s\n", hmm->desc) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); }

      /* Convert to an optimized model */
      gm = p7_profile_Create (hmm->M, abc);
      om = p7_oprofile_Create(hmm->M, abc);
//      p7_ProfileConfig(hmm, info->bg, gm, 100, p7_LOCAL); /* 100 is a dummy length for now; and MSVFilter requires local mode */
      p7_oprofile_Convert(gm, om);                  /* <om> is now p7_LOCAL, multihit */

      for (i = 0; i < infocnt; ++i)
      {
        /* Create processing pipeline and hit list */
        info[i].th  = p7_tophits_Create();
        info[i].om  = p7_oprofile_Clone(om);
        info[i].pli = p7_pipeline_Create(go, om->M, 100, FALSE, p7_SEARCH_SEQS); /* L_hint = 100 is just a dummy for now */

        P7_PIPELINE *pli = info[i].pli;

        pli->nmodels++;
        pli->nnodes += info[i].om->M;
//        if (pli->Z_setby == p7_ZSETBY_NTARGETS && pli->mode == p7_SCAN_MODELS) pli->Z = pli->nmodels;

//        if (pli->do_biasfilter) p7_bg_SetFilter(info[i].bg, info[i].om->M, info[i].om->compo);

//        if (pli->mode == p7_SEARCH_SEQS)
//          status = p7_pli_NewModelThresholds(pli, info[i].om);

        pli->W = info[i].om->max_length;

     }

      sstatus = serial_loop(info, dbfp, cfg->n_targetseq, ofp);

      switch(sstatus)
      {
      case eslEFORMAT:
        esl_fatal("Parse failed (sequence file %s):\n%s\n",
            dbfp->filename, esl_sqfile_GetErrorBuf(dbfp));
        break;
      case eslEOF:
        /* do nothing */
        break;
      default:
        esl_fatal("Unexpected error %d reading sequence file %s", sstatus, dbfp->filename);
      }

      /* merge the results of the search results */
      for (i = 1; i < infocnt; ++i)
      {
        p7_tophits_Merge(info[0].th, info[i].th);
        p7_pipeline_Merge(info[0].pli, info[i].pli);

        p7_pipeline_Destroy(info[i].pli);
        p7_tophits_Destroy(info[i].th);
        p7_oprofile_Destroy(info[i].om);
      }

      /* Print the results.  */
      p7_tophits_SortBySortkey(info->th);
      p7_tophits_Threshold(info->th, info->pli);
//      p7_tophits_Targets(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
//      p7_tophits_Domains(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");


      esl_stopwatch_Stop(w);
//      p7_pli_Statistics(ofp, info->pli, w);
//      if (fprintf(ofp, "//\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");


      p7_pipeline_Destroy(info->pli);
      p7_tophits_Destroy(info->th);
      p7_oprofile_Destroy(info->om);
      p7_oprofile_Destroy(om);
      p7_profile_Destroy(gm);
      p7_hmm_Destroy(hmm);

//      hstatus = p7_hmmfile_Read(hfp, &abc, &hmm);
//    } /* end outer loop over query HMMs */

  switch(hstatus) {
  case eslEOD:       p7_Fail("read failed, HMM file %s may be truncated?", cfg->hmmfile);      break;
  case eslEFORMAT:   p7_Fail("bad file format in HMM file %s",             cfg->hmmfile);      break;
  case eslEINCOMPAT: p7_Fail("HMM file %s contains different alphabets",   cfg->hmmfile);      break;
  case eslEOF:
  case eslOK:        /* do nothing. EOF is what we want. */                                    break;
  default:           p7_Fail("Unexpected error (%d) in reading HMMs from %s", hstatus, cfg->hmmfile);
  }


  /* Terminate outputs... any last words?
   */

  /* Cleanup - prepare for exit
   */
//  for (i = 0; i < infocnt; ++i)
//    p7_bg_Destroy(info[i].bg);

  free(info);
  p7_hmmfile_Close(hfp);
  esl_sqfile_Close(dbfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);

  if (ofp != stdout) fclose(ofp);

  printf("44HHHH \n");

  return eslOK;

 ERROR:
  return eslFAIL;
}