static int
process_commandline(int argc, char **argv, ESL_GETOPTS **ret_go, char **ret_fmfile, char **ret_qfile)
{
  ESL_GETOPTS *go = esl_getopts_Create(options);
  int          status;

  if (esl_opt_ProcessEnvironment(go)         != eslOK)  { if (printf("Failed to process environment: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK)  { if (printf("Failed to parse command line: %s\n", go->errbuf)  < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if (esl_opt_VerifyConfig(go)               != eslOK)  { if (printf("Failed to parse command line: %s\n", go->errbuf)  < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }

  /* help format: */
  if (esl_opt_GetBoolean(go, "-h") == TRUE) 
    {
      esl_banner(stdout, argv[0], banner);
      esl_usage(stdout, argv[0], usage);

      if (puts("\nBasic options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 120=textwidth*/

      if (puts("\nSpecial options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 2, 2, 80); /* 2= group; 2 = indentation; 120=textwidth*/

      exit(0);
  }

  if (esl_opt_ArgNumber(go)                  != 2)    { if (puts("Incorrect number of command line arguments.")     < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if ((*ret_qfile  = esl_opt_GetArg(go, 1)) == NULL)  { if (puts("Failed to get <qfile> argument on command line")  < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if ((*ret_fmfile = esl_opt_GetArg(go, 2)) == NULL)  { if (puts("Failed to get <fmfile> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }

  /* Validate any attempted use of stdin streams */
  if (esl_strcmp(*ret_fmfile, "-") == 0 && esl_strcmp(*ret_qfile, "-") == 0) 
    { if (puts("Either <fmfile> or <qfile> may be '-' (to read from stdin), but not both.") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }

  *ret_go = go;
  return eslOK;

 FAILURE:  /* all errors handled here are user errors, so be polite.  */
  esl_usage(stdout, argv[0], usage);
  puts("\nwhere basic options are:");
  esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 80=textwidth*/
  printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
  esl_getopts_Destroy(go);
  exit(1);

 ERROR:
  if (go) esl_getopts_Destroy(go);
  exit(status);
}
Beispiel #2
0
/* process_commandline()
 * 
 * Processes the commandline, filling in fields in <cfg> and creating and returning
 * an <ESL_GETOPTS> options structure. The help page (hmmsearch -h) is formatted
 * here.
 */
static int
process_commandline(int argc, char **argv, ESL_GETOPTS **ret_go, char **ret_hmmfile, char **ret_seqfile)
{
  ESL_GETOPTS *go = esl_getopts_Create(options);
  int          status;

  if (esl_opt_ProcessEnvironment(go)         != eslOK)  { if (printf("Failed to process environment: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK)  { if (printf("Failed to parse command line: %s\n",  go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if (esl_opt_VerifyConfig(go)               != eslOK)  { if (printf("Failed to parse command line: %s\n",  go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
 
  /* help format: */
  if (esl_opt_GetBoolean(go, "-h") == TRUE) 
    {
      p7_banner(stdout, argv[0], banner);
      esl_usage(stdout, argv[0], usage);
      if (puts("\nBasic options:")                                           < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 80=textwidth*/

      if (puts("\nOptions controlling output:")                              < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 2, 2, 80); 

      if (puts("\nOptions controlling reporting thresholds:")                < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 4, 2, 80); 

      if (puts("\nOptions controlling inclusion (significance) thresholds:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 5, 2, 80); 

      if (puts("\nOptions for model-specific thresholding:")                 < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 6, 2, 80); 

      if (puts("\nOptions controlling acceleration heuristics:")             < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 7, 2, 80); 

      if (puts("\nOther expert options:")                                    < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 12, 2, 80); 
      exit(0);
    }

  if (esl_opt_ArgNumber(go)                 != 2)      { if (puts("Incorrect number of command line arguments.")      < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if ((*ret_hmmfile = esl_opt_GetArg(go, 1)) == NULL)  { if (puts("Failed to get <hmmdb> argument on command line")   < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if ((*ret_seqfile = esl_opt_GetArg(go, 2)) == NULL)  { if (puts("Failed to get <seqfile> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }

  /* Validate any attempted use of stdin streams */
  if (strcmp(*ret_hmmfile, "-") == 0) 
    { if (puts("nhmmscan cannot read <hmm database> from stdin stream, because it must have hmmpress'ed auxfiles") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");   goto FAILURE;  }

  *ret_go = go;
  return eslOK;
  
 FAILURE:  /* all errors handled here are user errors, so be polite.  */
  esl_usage(stdout, argv[0], usage);
  if (puts("\nwhere most common options are:")                                 < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
  esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 80=textwidth*/
  if (printf("\nTo see more help on available options, do %s -h\n\n", argv[0]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
  esl_getopts_Destroy(go);
  exit(1);  

 ERROR:
  if (go) esl_getopts_Destroy(go);
  exit(status);
}
static int
output_header(FM_METADATA *meta, FILE *ofp, const ESL_GETOPTS *go, char *fmfile, char *qfile)
{
  char *alph;
  char *appname = NULL;
  int   status;

  if      (meta->alph_type == fm_DNA)       alph = "dna";
  else if (meta->alph_type == fm_AMINO)     alph = "amino";

  if ((status = esl_FileTail(go->argv[0], FALSE, &appname)) != eslOK) return status;



  if (fprintf(ofp, "# %s :: %s\n", appname, banner)                                               < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# %s\n", EASEL_COPYRIGHT)                                                     < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# %s\n", EASEL_LICENSE)                                                       < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");


  if (fprintf(ofp, "# input binary-formatted HMMER database:   %s\n", fmfile) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# input file of query sequences:           %s\n", qfile)  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");

  if (esl_opt_IsUsed(go, "--out")) {
    char *outfile = esl_opt_GetString(go, "--out");
    if (fprintf(ofp, "# output file containing list of hits:     %s\n", (esl_strcmp(outfile, "-") == 0 ? "stdout" : outfile)) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); 
  }

  if (esl_opt_IsUsed(go, "--count_only") && fprintf(ofp, "# output only counts, not hit locations\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");

  if (fprintf(ofp, "# alphabet     :                           %s\n", alph)                         < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# bin_length   :                           %d\n", meta->freq_cnt_b)             < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# suffix array sample rate:                %d\n", meta->freq_SA)                < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");


  if (appname) free(appname);
    return eslOK;

ERROR:
if (appname) free(appname);
return status;

}
static int
process_commandline(int argc, char **argv, ESL_GETOPTS **ret_go, char **ret_alifile, char **ret_postalifile)
{
  ESL_GETOPTS *go = esl_getopts_Create(options);
  int          status;

  if (esl_opt_ProcessEnvironment(go)         != eslOK) { if (printf("Failed to process environment:\n%s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) { if (printf("Failed to parse command line:\n%s\n",  go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if (esl_opt_VerifyConfig(go)               != eslOK) { if (printf("Failed to parse command line:\n%s\n",  go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }

  /* help format: */
  if (esl_opt_GetBoolean(go, "-h") == TRUE) 
    {
      p7_banner(stdout, argv[0], banner);
      esl_usage(stdout, argv[0], usage);

      if (puts("\nBasic options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 1, 2, 80);

      if (puts("\nMask range options (format:  --xxx 10-20,30-40 ) :") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 5, 2, 80);

      if (puts("\nOptions for selecting alphabet rather than guessing it:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 2, 2, 80);

      if (puts("\nAlternative model construction strategies:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 3, 2, 80);

      if (puts("\nAlternative relative sequence weighting strategies:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 4, 2, 80);

      if (puts("\nOther options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 8, 2, 80);
      exit(0);
    }

  if (esl_opt_ArgNumber(go)                  > 2)    { if (puts("Incorrect number of command line arguments.")          < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if ((*ret_alifile     = esl_opt_GetArg(go, 1)) == NULL) { if (puts("Failed to get <msafile> argument on command line")     < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if (esl_opt_IsUsed(go, "--alirange") || esl_opt_IsUsed(go, "--modelrange") ) {
    if ((*ret_postalifile = esl_opt_GetArg(go, 2)) == NULL) { if (puts("Failed to get <postmsafile> argument on command line")     < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  }

  if (strcmp(*ret_alifile, "-") == 0 && ! esl_opt_IsOn(go, "--informat"))
    { if (puts("Must specify --informat to read <alifile> from stdin ('-')") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }


  *ret_go = go;
  return eslOK;
  
 FAILURE:  /* all errors handled here are user errors, so be polite.  */
  esl_usage(stdout, argv[0], usage);
  puts("\nwhere basic options are:");
  esl_opt_DisplayHelp(stdout, go, 1, 2, 80);
  printf("\nTo see more help on other available options, do:\n  %s -h\n\n", argv[0]);
  esl_getopts_Destroy(go);
  exit(1);  

 ERROR:
  if (go) esl_getopts_Destroy(go);
  exit(status);
}
int
main(int argc, char **argv)
{

  int i,j;

  ESL_GETOPTS     *go = NULL;	/* command line processing                 */
  ESL_STOPWATCH   *w  = esl_stopwatch_Create();

  int              status;
  ESL_MSA      *msa         = NULL;
  FILE         *ofp         = NULL;    /* output file (default is stdout) */
  ESL_ALPHABET *abc         = NULL;    /* digital alphabet */

  char         *alifile;  /* name of the alignment file we're building HMMs from  */
  ESLX_MSAFILE *afp         = NULL;            /* open alifile  */
  int           infmt       = eslMSAFILE_UNKNOWN;    /* autodetect alignment format by default. */
  int           outfmt      = eslMSAFILE_STOCKHOLM;


  char         *postmsafile;  /* optional file to resave annotated, modified MSAs to  */
  FILE         *postmsafp = NULL;  /* open <postmsafile>, or NULL */

  int           mask_range_cnt = 0;
  uint32_t      mask_starts[100]; // over-the-top allocation.
  uint32_t      mask_ends[100];

  char         *rangestr;
  char         *range;


  int     *map = NULL; /* map[i]=j,  means model position i comes from column j of the alignment; 1..alen */

  int    keep_mm;

  /* Set processor specific flags */
  impl_Init();
  alifile     = NULL;
  postmsafile = NULL;

  /* Parse the command line
   */
  process_commandline(argc, argv, &go, &alifile, &postmsafile);
  keep_mm = esl_opt_IsUsed(go, "--apendmask");

  /* Initialize what we can in the config structure (without knowing the alphabet yet).
   * Fields controlled by masters are set up in usual_master() or mpi_master()
   * Fields used by workers are set up in mpi_worker()
   */
  ofp         = NULL;
  infmt         = eslMSAFILE_UNKNOWN;
  afp         = NULL;
  abc         = NULL;

  if (esl_opt_IsOn(go, "--informat")) {
    infmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--informat"));
    if (infmt == eslMSAFILE_UNKNOWN) p7_Fail("%s is not a recognized input sequence file format\n", esl_opt_GetString(go, "--informat"));
  }

  /* Determine output alignment file format */
  outfmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--outformat"));
  if (outfmt == eslMSAFILE_UNKNOWN)    p7_Fail(argv[0], "%s is not a recognized output MSA file format\n", esl_opt_GetString(go, "--outformat"));



  /* Parse the ranges */

  if (esl_opt_IsUsed(go, "--alirange")) {
    esl_strdup(esl_opt_GetString(go, "--alirange"), -1, &rangestr) ;
  } else if (esl_opt_IsUsed(go, "--modelrange")) {
    esl_strdup(esl_opt_GetString(go, "--modelrange"), -1, &rangestr) ;
  } else if (esl_opt_IsUsed(go, "--model2ali")) {
    esl_strdup(esl_opt_GetString(go, "--model2ali"), -1, &rangestr) ;
  } else if (esl_opt_IsUsed(go, "--ali2model")) {
    esl_strdup(esl_opt_GetString(go, "--ali2model"), -1, &rangestr) ;
  } else {
    if (puts("Must specify mask range with --modelrange, --alirange, --model2ali, or --ali2model\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto ERROR;
  }

  while ( (status = esl_strtok(&rangestr, ",", &range) ) == eslOK) {
    status = esl_regexp_ParseCoordString(range, mask_starts + mask_range_cnt, mask_ends + mask_range_cnt );
    if (status == eslESYNTAX) esl_fatal("range flags take coords <from>..<to>; %s not recognized", range);
    if (status == eslFAIL)    esl_fatal("Failed to find <from> or <to> coord in %s", range);

    mask_range_cnt++;
  }


  /* Start timing. */
  esl_stopwatch_Start(w);


  /* Open files, set alphabet.
   *   afp       - open alignment file for input
   *   abc       - alphabet expected or guessed in ali file
   *   postmsafp - open MSA output file
   *   ofp       - optional open output file, or stdout
   */
  if      (esl_opt_GetBoolean(go, "--amino"))   abc = esl_alphabet_Create(eslAMINO);
  else if (esl_opt_GetBoolean(go, "--dna"))     abc = esl_alphabet_Create(eslDNA);
  else if (esl_opt_GetBoolean(go, "--rna"))     abc = esl_alphabet_Create(eslRNA);
  else                                          abc = NULL;
  
  status = eslx_msafile_Open(&abc, alifile, NULL, infmt, NULL, &afp);
  if (status != eslOK) eslx_msafile_OpenFailure(afp, status);

  if (esl_opt_IsUsed(go, "--alirange") || esl_opt_IsUsed(go, "--modelrange") ) {
    postmsafp = fopen(postmsafile, "w");
    if (postmsafp == NULL) p7_Fail("Failed to MSA output file %s for writing", postmsafile);
  }

  if (esl_opt_IsUsed(go, "-o")) 
    {
      ofp = fopen(esl_opt_GetString(go, "-o"), "w");
      if (ofp == NULL) p7_Fail("Failed to open -o output file %s\n", esl_opt_GetString(go, "-o"));
    } 
  else ofp = stdout;


  /* Looks like the i/o is set up successfully...
   * Initial output to the user
   */
  output_header(go, ofp, alifile, postmsafile);                                  /* cheery output header                                */

  /* read the alignment */
  if ((status = eslx_msafile_Read(afp, &msa)) != eslOK)  eslx_msafile_ReadFailure(afp, status);


  if (esl_opt_IsUsed(go, "--alirange") || esl_opt_IsUsed(go, "--modelrange") ) {
    /* add/modify mmline for the mask */
    if (msa->mm == NULL) {
      ESL_ALLOC(msa->mm, msa->alen);
      keep_mm = FALSE;
    }

    if (!keep_mm)
      for (i=0; i<msa->alen; i++) msa->mm[i] = '.';

  }

  // convert model coordinates to alignment coordinates, if necessary
  if (esl_opt_IsUsed(go, "--modelrange") || esl_opt_IsUsed(go, "--model2ali") || esl_opt_IsUsed(go, "--ali2model") ) {

    float symfrac = esl_opt_GetReal(go, "--symfrac");
    int do_hand  =  esl_opt_IsOn(go, "--hand");
    int L;

    //same as p7_builder relative_weights
    if      (esl_opt_IsOn(go, "--wnone")  )                  { esl_vec_DSet(msa->wgt, msa->nseq, 1.); }
    else if (esl_opt_IsOn(go, "--wgiven") )                  ;
    else if (esl_opt_IsOn(go, "--wpb")    )                  status = esl_msaweight_PB(msa);
    else if (esl_opt_IsOn(go, "--wgsc")   )                  status = esl_msaweight_GSC(msa);
    else if (esl_opt_IsOn(go, "--wblosum"))                  status = esl_msaweight_BLOSUM(msa, esl_opt_GetReal(go, "--wid"));

    if ((status =  esl_msa_MarkFragments(msa, esl_opt_GetReal(go, "--fragthresh")))           != eslOK) goto ERROR;

    //build a map of model mask coordinates to alignment coords
    ESL_ALLOC(map, sizeof(int)     * (msa->alen+1));
    L = p7_Alimask_MakeModel2AliMap(msa, do_hand, symfrac, map );


    if ( esl_opt_IsUsed(go, "--model2ali") ) {
      //print mapping
      printf ("model coordinates     alignment coordinates\n");
      for (i=0; i<mask_range_cnt; i++)
        printf ("%8d..%-8d -> %8d..%-8d\n", mask_starts[i], mask_ends[i], map[mask_starts[i]-1], map[mask_ends[i]-1]);
      /* If I wanted to, I could print all the map values independently:
        printf("\n\n-----------\n");
        printf("Map\n");
        printf("---\n");
        for (i=0; i<L; i++)
          printf("%d -> %d\n", i+1, map[i]);
      */
    } else if ( esl_opt_IsUsed(go, "--ali2model") ) {
      //print mapping  (requires scanning the inverted map
      int alistart = 0;
      int aliend = 0;
      printf ("alignment coordinates     model coordinates\n");
      for (i=0; i<mask_range_cnt; i++) {
        //find j for ali positions
        while (map[alistart] < mask_starts[i] )
          alistart++;
        aliend = alistart;
        while (map[aliend] < mask_ends[i] )
          aliend++;

        printf ("   %8d..%-8d -> %8d..%-8d\n", map[alistart], map[aliend], alistart+1, aliend+1);
      }
    } else {
      //convert the mask coords based on map
      for (i=0; i<mask_range_cnt; i++) {
          mask_starts[i] = map[mask_starts[i]-1]; //-1 because mmline is offset by one relative to the 1-base alignment
          mask_ends[i]   = map[mask_ends[i]-1];
      }
    }
  }

  if (esl_opt_IsUsed(go, "--alirange") || esl_opt_IsUsed(go, "--modelrange") ) {
    //overwrite '.' with 'm' everywhere the range says to do it
    for (i=0; i<mask_range_cnt; i++)
      for (j=mask_starts[i]; j<=mask_ends[i]; j++)
        msa->mm[j-1] = 'm';

    if ((status = eslx_msafile_Write(postmsafp, msa, outfmt))  != eslOK) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
  }

  esl_stopwatch_Stop(w);

  if (esl_opt_IsOn(go, "-o"))  fclose(ofp);
  if (postmsafp) fclose(postmsafp);
  if (afp)   eslx_msafile_Close(afp);
  if (abc)   esl_alphabet_Destroy(abc);

  esl_getopts_Destroy(go);
  esl_stopwatch_Destroy(w);
  return 0;


  ERROR:
   return eslFAIL;
}
Beispiel #6
0
/* Function:  esl_msafile_a2m_Write()
 * Synopsis:  Write an A2M (UCSC SAM) dotless format alignment to a stream.
 *
 * Purpose:   Write alignment <msa> in dotless UCSC A2M format to a
 *            stream <fp>.
 *            
 *            The <msa> should have a valid reference line <msa->rf>,
 *            with alphanumeric characters marking consensus (match)
 *            columns, and non-alphanumeric characters marking
 *            nonconsensus (insert) columns. If it does not,
 *            then as a fallback, the first sequence in the alignment is
 *            considered to be the consensus.
 *            
 *            In "dotless" A2M format, gap characters (.) in insert
 *            columns are omitted; therefore sequences can be of
 *            different lengths, but each sequence has the same number
 *            of consensus columns (residue or -).
 *            
 *            A2M format cannot represent missing data symbols
 *            (Easel's ~). Any missing data symbols are converted to
 *            gaps.
 *            
 *            A2M format cannot represent pyrrolysine residues in
 *            amino acid sequences, because it treats 'O' symbols
 *            specially, as indicating a position at which a
 *            free-insertion module (FIM) should be created. Any 'O'
 *            in the <msa> is written instead as an unknown
 *            residue ('X', in protein sequences).
 *            
 * Args:      fp  - open output stream
 *            msa - MSA to write       
 *
 * Returns:   <eslOK> on success.
 * 
 * Throws:    <eslEMEM> on allocation failure.
 *            <eslEWRITE> on any system write error, such as filled disk.
 */
int
esl_msafile_a2m_Write(FILE *fp, const ESL_MSA *msa)
{
  char   *buf = NULL;
  int     cpl = 60;
  int     bpos;
  int     pos;
  int     is_consensus;
  int     is_residue;
  int     do_dotless = TRUE;	/* just changing this to FALSE makes it write dots too */
  int     i;
  int     sym;
  int     status;

  ESL_ALLOC(buf, sizeof(char) * (cpl+1));

  for (i = 0; i < msa->nseq; i++)
    {
      /* Construct the name/description line */
      if (fprintf(fp, ">%s", msa->sqname[i])                                                      < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed");
      if (msa->sqacc  != NULL && msa->sqacc[i]  != NULL) { if (fprintf(fp, " %s", msa->sqacc[i])  < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); }
      if (msa->sqdesc != NULL && msa->sqdesc[i] != NULL) { if (fprintf(fp, " %s", msa->sqdesc[i]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); }
      if (fputc('\n', fp)                                                                         < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); 

#ifdef eslAUGMENT_ALPHABET
      if (msa->abc)
	{
	  pos = 0;
	  while (pos < msa->alen)
	    {
	      for (bpos = 0; pos < msa->alen && bpos < cpl; pos++)
		{
		  sym          = msa->abc->sym[msa->ax[i][pos+1]]; /* note off-by-one in digitized aseq: 1..alen */
		  is_residue   = esl_abc_XIsResidue(msa->abc, msa->ax[i][pos+1]);
		  if (msa->rf) is_consensus = (isalnum(msa->rf[pos]) ? TRUE : FALSE);
		  else         is_consensus = (esl_abc_XIsResidue(msa->abc, msa->ax[0][pos+1]) ? TRUE : FALSE);

		  if (sym == 'O') sym = esl_abc_XGetUnknown(msa->abc); /* watch out: O means "insert a FIM" in a2m format, not pyrrolysine */
		  
		  if      (is_consensus) { buf[bpos++] = (is_residue ? toupper(sym) : '-'); }
		  else if (is_residue)   { buf[bpos++] = tolower(sym); }
		  else if (! do_dotless) { buf[bpos++] = '.'; }
		}
	      buf[bpos] = '\0';
	      if (bpos) { if (fprintf(fp, "%s\n", buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed");}
	    }
	}
#endif
      if (! msa->abc)
	{
	  pos = 0;
	  while (pos < msa->alen)
	    {
	      for (bpos = 0; pos < msa->alen && bpos < cpl; pos++)
		{
		  sym          = msa->aseq[i][pos];
		  is_residue   = isalpha(msa->aseq[i][pos]);
		  if (msa->rf) is_consensus = (isalnum(msa->rf[pos]) ? TRUE : FALSE);
		  else         is_consensus = (isalnum(msa->aseq[0][pos]) ? TRUE : FALSE);

		  if (sym == 'O') sym = 'X';

		  if      (is_consensus) { buf[bpos++] = ( is_residue ? toupper(sym) : '-'); }
		  else if (is_residue)   { buf[bpos++] = tolower(sym); }
		  else if (! do_dotless) { buf[bpos++] = '.'; }
		  
		}
	      buf[bpos] = '\0';
	      if (bpos) { if (fprintf(fp, "%s\n", buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); }
	    } 
	}
    } /* end, loop over sequences in the MSA */

  free(buf);
  return eslOK;

 ERROR:
  if (buf) free(buf);
  return status;
}
Beispiel #7
0
/* Function:  esl_msafile_psiblast_Write()
 * Synopsis:  Write an MSA to a stream in PSI-BLAST format
 *
 * Purpose:   Write alignment <msa> in NCBI PSI-BLAST format to 
 *            stream <fp>.
 *            
 *            The <msa> should have a valid reference line <msa->rf>,
 *            with alphanumeric characters marking consensus (match)
 *            columns, and non-alphanumeric characters marking
 *            nonconsensus (insert) columns. If it does not have RF
 *            annotation, then the first sequence in the <msa> 
 *            defines the "consensus".
 *            
 *            PSI-BLAST format allows only one symbol ('-') for gaps,
 *            and cannot represent missing data symbols (Easel's
 *            '~'). Any missing data symbols are converted to gaps.
 *
 * Args:      fp  - open output stream
 *            msa - MSA to write       
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation failure.
 *            <eslEWRITE> on any system write failure, such as filled disk.
 */
int
esl_msafile_psiblast_Write(FILE *fp, const ESL_MSA *msa)
{
  char    *buf = NULL;
  int      cpl = 60;
  int      acpl;
  int      i;
  int      sym;
  int64_t  pos, bpos;
  int      maxnamewidth = esl_str_GetMaxWidth(msa->sqname, msa->nseq);
  int      is_consensus;
  int      is_residue;
  int      status;

  ESL_ALLOC(buf, sizeof(char) * (cpl+1));

  for (pos = 0; pos < msa->alen; pos += cpl)
    {
      for (i = 0; i < msa->nseq; i++)
	{
	  acpl =  (msa->alen - pos > cpl)? cpl : msa->alen - pos;

#ifdef eslAUGMENT_ALPHABET
	  if (msa->abc)
	    {
	      for (bpos = 0; bpos < acpl; bpos++)
		{
		  sym          = msa->abc->sym[msa->ax[i][pos + bpos + 1]];
		  is_residue   = esl_abc_XIsResidue(msa->abc, msa->ax[i][pos+bpos+1]);
		  if (msa->rf) is_consensus = (isalnum(msa->rf[pos + bpos]) ? TRUE : FALSE);
		  else         is_consensus = (esl_abc_XIsResidue(msa->abc, msa->ax[0][pos+bpos+1]) ? TRUE : FALSE);
				      
		  if (is_consensus) { buf[bpos] = (is_residue ? toupper(sym) : '-'); }
		  else              { buf[bpos] = (is_residue ? tolower(sym) : '-'); }
		}
	    }
#endif
	  if (! msa->abc)
	    {
	      for (bpos = 0; bpos < acpl; bpos++)
		{
		  sym          = msa->aseq[i][pos + bpos];
		  is_residue   = isalnum(sym);
		  if (msa->rf) is_consensus = (isalnum(msa->rf[pos + bpos]) ? TRUE : FALSE);
		  else         is_consensus = (isalnum(msa->aseq[0][pos+bpos]) ? TRUE : FALSE);

		  if (is_consensus) { buf[bpos] = (is_residue ? toupper(sym) : '-'); }
		  else              { buf[bpos] = (is_residue ? tolower(sym) : '-'); }
		}
	    }
	  buf[acpl] = '\0';	      
	  if (fprintf(fp, "%-*s  %s\n", maxnamewidth, msa->sqname[i], buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "psiblast msa write failed");
	}  /* end loop over sequences */

      if (pos + cpl < msa->alen) 
	{ if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "psiblast msa write failed"); }
    }
  free(buf);
  return eslOK;

 ERROR:
  if (buf) free(buf);
  return status;
}
Beispiel #8
0
/* Function:  p7_h2io_WriteASCII()
 * Synopsis:  Write an H3 HMM in HMMER2 compatible format
 *
 * Purpose:   Write HMM <hmm> to stream <fp> in HMMER2 ASCII save
 *            file format.
 *            
 *            HMMER2 saved the null model and the search configuration
 *            (local vs. glocal, for example) as part of its HMM file;
 *            H3 only saves the core HMM. The HMMER2 file is created
 *            for HMMER2's default ``ls mode'' (glocal) with default
 *            null model transitions and default special state
 *            transitions (NECJ).
 *            
 *            Optional statistical calibration and alignment checksum
 *            are not written, because for these H3 and H2 differ too
 *            much.
 *
 * Args:      fp   - stream to write save file format to
 *            hmm  - HMM to save
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation error. 
 *
 *            <eslEINVAL> if <hmm> can't be converted; for example, if
 *            it is not in a protein or nucleic acid alphabet (H2
 *            requires biosequence in its save files).
 * 
 *            <eslEWRITE> if any write fails; for example, if the
 *            disk fills up.
 */
int
p7_h2io_WriteASCII(FILE *fp, P7_HMM *hmm)
{
  P7_BG *bg;			/* H2 saves null model in HMM file   */
  int    k;                     /* counter for nodes                 */
  int    x;                     /* counter for symbols               */
  int    ts;			/* counter for state transitions     */
  float  pmove,ploop;		/* default H2 null model transitions */
  int    status;

  if ((bg = p7_bg_Create(hmm->abc)) == NULL) { status = eslEMEM; goto ERROR; }

  /* magic header */
  if (fprintf(fp, "HMMER2.0  [converted from %s]\n", HMMER_VERSION) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  if (fprintf(fp, "NAME  %s\n", hmm->name)                          < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  if (hmm->acc  && fprintf(fp, "ACC   %s\n", hmm->acc)              < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  if (hmm->desc && fprintf(fp, "DESC  %s\n", hmm->desc)             < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  if (fprintf(fp, "LENG  %d\n", hmm->M)                             < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");

  if      (hmm->abc->type == eslAMINO)   { if (fprintf(fp, "ALPH  Amino\n")   < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
  else if (hmm->abc->type == eslDNA)     { if (fprintf(fp, "ALPH  Nucleic\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
  else if (hmm->abc->type == eslRNA)     { if (fprintf(fp, "ALPH  Nucleic\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
  else    ESL_XEXCEPTION(eslEINVAL, "Only protein, DNA, RNA HMMs can be saved in H2 format");

  if (fprintf(fp, "RF    %s\n", (hmm->flags & p7H_RF)  ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  if (fprintf(fp, "CS    %s\n", (hmm->flags & p7H_CS)  ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  if (fprintf(fp, "MAP   %s\n", (hmm->flags & p7H_MAP) ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  /* H3 consensus line has no counterpart in H2 */

  if (hmm->comlog != NULL)  { if ( (status = h2_multiline(fp, "COM   ",     hmm->comlog)) != eslOK) goto ERROR; }
  if (hmm->nseq   != -1)    { if (           fprintf     (fp, "NSEQ  %d\n", hmm->nseq)     < 0)     ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
  if (hmm->ctime  != NULL)  { if (           fprintf     (fp, "DATE  %s\n", hmm->ctime)    < 0)     ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }

  /* Checksum is not written; H2 and H3 use different checksum algorithms */

  if (hmm->flags & p7H_GA) { if (fprintf(fp, "GA    %.1f %.1f\n", hmm->cutoff[p7_GA1], hmm->cutoff[p7_GA2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
  if (hmm->flags & p7H_TC) { if (fprintf(fp, "TC    %.1f %.1f\n", hmm->cutoff[p7_TC1], hmm->cutoff[p7_TC2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
  if (hmm->flags & p7H_NC) { if (fprintf(fp, "NC    %.1f %.1f\n", hmm->cutoff[p7_NC1], hmm->cutoff[p7_NC2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }

  /* in H3, the HMM does not include NECJ; these are part of the profile.
   * for emulating H2 output, assume default LS config */
  if (fputs("XT     ", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  pmove = ( (hmm->abc->type == eslAMINO) ?   1./351. :    1./1001.); 
  ploop = ( (hmm->abc->type == eslAMINO) ? 350./351. : 1000./1001.); 
  if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR;	/* NB */
  if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR;	/* NN */
  if ( (status = printprob(fp, 6,   0.5, 1.0)) != eslOK) goto ERROR;	/* EC */
  if ( (status = printprob(fp, 6,   0.5, 1.0)) != eslOK) goto ERROR;	/* EJ */
  if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR;	/* CT */
  if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR;	/* CC */
  if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR;	/* JB */
  if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR;	/* JJ */
  if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");

  /* Save the default H2 null model transitions, not H3's null model transitions  */
  if (fprintf(fp, "NULT   ") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR;	/* 1-p1 */
  if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR;	/* p1   */
  if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  
  /* but null emissions really are the H3 null model emissions */
  if (fputs("NULE   ", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  for (x = 0; x < hmm->abc->K; x++)
    { if ( (status = printprob(fp, 6, bg->f[x], 1./(float)hmm->abc->K)) != eslOK) goto ERROR; }
  if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");

  /* Don't save stats; H3 local alignment stats are different from H2 calibration */
  
  /* The main model section */
  if (fprintf(fp, "HMM      ") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  for (x = 0; x < hmm->abc->K; x++) 
    { if (fprintf(fp, "  %c    ", hmm->abc->sym[x]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
  if (fprintf(fp, "\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  if (fprintf(fp, "       %6s %6s %6s %6s %6s %6s %6s %6s %6s\n",
	      "m->m", "m->i", "m->d", "i->m", "i->i", "d->m", "d->d", "b->m", "m->e") < 0) 
    ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");

  /* Print HMM parameters (main section of the save file) */
  if (fprintf(fp, "      ")                                    < 0)     ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  if ( (status = printprob(fp, 6, 1.-hmm->t[0][p7H_MD], 1.0)) != eslOK) goto ERROR;
  if (fprintf(fp, " %6s", "*")                                 < 0)     ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
  if ( (status = printprob(fp, 6, hmm->t[0][p7H_MD], 1.0))    != eslOK) goto ERROR;
  if (fputc('\n', fp)                                          < 0)     ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");

  for (k = 1; k <= hmm->M; k++)
    {
				/* Line 1: k, match emissions, map */
      if (fprintf(fp, " %5d ", k)              < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
      for (x = 0; x < hmm->abc->K; x++) 
	if ( (status = printprob(fp, 6, hmm->mat[k][x], bg->f[x])) != eslOK) goto ERROR;
      if (hmm->flags & p7H_MAP) 
	{ if (fprintf(fp, " %5d", hmm->map[k]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
      if (fputc('\n', fp)                      < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
				/* Line 2: RF and insert emissions */
      if (fprintf(fp, " %5c ", hmm->flags & p7H_RF ? hmm->rf[k] : '-') < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
      for (x = 0; x < hmm->abc->K; x++) 
	if ( (status = printprob(fp, 6, ((k < hmm->M) ? hmm->ins[k][x] : 0.0), bg->f[x])) != eslOK) goto ERROR;
      if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
				/* Line 3: CS and transition probs */
      if (fprintf(fp, " %5c ", hmm->flags & p7H_CS ? hmm->cs[k] : '-') < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
      for (ts = 0; ts < 7; ts++)
	if ( (status = printprob(fp, 6, ((k < hmm->M) ? hmm->t[k][ts] : 0.0), 1.0)) != eslOK) goto ERROR;
      if ( (status = printprob(fp, 6, ((k==1)     ? hmm->t[0][p7H_MM] : 0.0), 1.0)) != eslOK) goto ERROR;
      if ( (status = printprob(fp, 6, ((k<hmm->M) ? 0.0: 1.0), 1.0))                != eslOK) goto ERROR;
      if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");

    }
  if (fputs("//\n", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");

  p7_bg_Destroy(bg);
  return eslOK;

 ERROR:
  p7_bg_Destroy(bg);
  return status;
}