Пример #1
0
/* Function: MSAFileOpen()
 * Date:     SRE, Tue May 18 13:22:01 1999 [St. Louis]
 *
 * Purpose:  Open an alignment database file and prepare
 *           for reading one alignment, or sequentially
 *           in the (rare) case of multiple MSA databases
 *           (e.g. Stockholm format).
 *           
 * Args:     filename - name of file to open
 *                      if "-", read stdin
 *                      if it ends in ".gz", read from pipe to gunzip -dc
 *           format   - format of file (e.g. MSAFILE_STOCKHOLM)
 *           env      - environment variable for path (e.g. BLASTDB)
 *
 * Returns:  opened MSAFILE * on success.
 *           NULL on failure: 
 *             usually, because the file doesn't exist;
 *             for gzip'ed files, may also mean that gzip isn't in the path.
 */
MSAFILE *
MSAFileOpen(char *filename, int format, char *env)
{
  MSAFILE *afp;
  
  afp        = MallocOrDie(sizeof(MSAFILE));
  if (strcmp(filename, "-") == 0)
    {
      afp->f         = stdin;
      afp->do_stdin  = TRUE; 
      afp->do_gzip   = FALSE;
      afp->fname     = sre_strdup("[STDIN]", -1);
      afp->ssi       = NULL;	/* can't index stdin because we can't seek*/
    }
#ifndef SRE_STRICT_ANSI		
  /* popen(), pclose() aren't portable to non-POSIX systems; disable */
  else if (Strparse("^.*\\.gz$", filename, 0))
    {
      char cmd[256];

      /* Note that popen() will return "successfully"
       * if file doesn't exist, because gzip works fine
       * and prints an error! So we have to check for
       * existence of file ourself.
       */
      if (! FileExists(filename))
	Die("%s: file does not exist", filename);
      if (strlen(filename) + strlen("gzip -dc ") >= 256)
	Die("filename > 255 char in MSAFileOpen()"); 
      sprintf(cmd, "gzip -dc %s", filename);
      if ((afp->f = popen(cmd, "r")) == NULL)
	return NULL;

      afp->do_stdin = FALSE;
      afp->do_gzip  = TRUE;
      afp->fname    = sre_strdup(filename, -1);
      /* we can't index a .gz file, because we can't seek in a pipe afaik */
      afp->ssi      = NULL;	
    }
#endif /*SRE_STRICT_ANSI*/
  else
    {
      char *ssifile;
      char *dir;

      /* When we open a file, it may be either in the current
       * directory, or in the directory indicated by the env
       * argument - and we have to construct the SSI filename accordingly.
       */
      if ((afp->f = fopen(filename, "r")) != NULL)
	{
	  ssifile = MallocOrDie(sizeof(char) * (strlen(filename) + 5));
	  sprintf(ssifile, "%s.ssi", filename);
	}
      else if ((afp->f = EnvFileOpen(filename, env, &dir)) != NULL)
	{
	  char *full;
	  full = FileConcat(dir, filename);
	  ssifile = MallocOrDie(sizeof(char) * (strlen(full) + strlen(filename)  + 5));
	  sprintf(ssifile, "%s.ssi", full);
	  free(dir);
	}
      else return NULL;

      afp->do_stdin = FALSE;
      afp->do_gzip  = FALSE;
      afp->fname    = sre_strdup(filename, -1);
      afp->ssi      = NULL;

      /* Open the SSI index file. If it doesn't exist, or
       * it's corrupt, or some error happens, afp->ssi stays NULL.
       */
      SSIOpen(ssifile, &(afp->ssi));
      free(ssifile);
    }

  /* Invoke autodetection if we haven't already been told what
   * to expect.
   */
  if (format == MSAFILE_UNKNOWN)
    {
      if (afp->do_stdin == TRUE || afp->do_gzip)
	Die("Can't autodetect alignment file format from a stdin or gzip pipe");
      format = MSAFileFormat(afp);
      if (format == MSAFILE_UNKNOWN)
	Die("Can't determine format of multiple alignment file %s", afp->fname);
    }

  afp->format     = format;
  afp->linenumber = 0;
  afp->buf        = NULL;
  afp->buflen     = 0;

  return afp;
}
Пример #2
0
/* Function: CP9_HMMFileOpen()
 * 
 * Purpose:  Open an HMM file for reading. The file may be either
 *           an index for a library of HMMs, or an HMM. 
 *           
 * Args:     hmmfile - name of file
 *           env     - NULL, or environment variable for HMM database.
 *           
 * Return:   Valid HMMFILE *, or NULL on failure.
 */
CP9HMMFILE * 
CP9_HMMFileOpen(char *hmmfile, char *env)
{
  CP9HMMFILE     *hmmfp;
  unsigned int magic;
  char         buf[512];
  char        *ssifile;
  char        *dir;        /* dir name in which HMM file was found */
  int          status;

  hmmfp = (CP9HMMFILE *) MallocOrDie (sizeof(CP9HMMFILE));
  hmmfp->f          = NULL; 
  hmmfp->parser     = NULL;
  hmmfp->is_binary  = FALSE;
  hmmfp->byteswap   = FALSE;
  hmmfp->is_seekable= TRUE;	/* always; right now, an HMM must always be in a file. */
  
  /* Open the file. Look in current directory.
   * If that doesn't work, check environment var for
   * a second possible directory (usually the location
   * of a system-wide HMM library).
   * Using dir name if necessary, construct correct SSI file name.
   */
  hmmfp->f   = NULL;
  hmmfp->ssi = NULL;
  if ((hmmfp->f = fopen(hmmfile, "r")) != NULL)
    {
      ssifile = MallocOrDie(sizeof(char) * (strlen(hmmfile) + 5));
      sprintf(ssifile, "%s.ssi", hmmfile);

      if ((hmmfp->mode = SSIRecommendMode(hmmfile)) == -1)
	Die("SSIRecommendMode() failed");
    }
  else if ((hmmfp->f = EnvFileOpen(hmmfile, env, &dir)) != NULL)
    {
      char *full;
      full    = FileConcat(dir, hmmfile);

      ssifile = MallocOrDie(sizeof(char) * (strlen(full) + strlen(hmmfile) + 5));
      sprintf(ssifile, "%s.ssi", full);

      if ((hmmfp->mode = SSIRecommendMode(full)) == -1)
	Die("SSIRecommendMode() failed");

      free(full);
      free(dir);
    }
  else return NULL;
  
  /* Open the SSI index file. If it doesn't exist, or it's corrupt, or 
   * some error happens, hmmfp->ssi stays NULL.
   */
  SQD_DPRINTF1(("Opening ssifile %s...\n", ssifile));
  SSIOpen(ssifile, &(hmmfp->ssi));
  free(ssifile);

  /* Initialize the disk offset stuff.
   */
  status = SSIGetFilePosition(hmmfp->f, hmmfp->mode, &(hmmfp->offset));
  if (status != 0) Die("SSIGetFilePosition() failed");

  /* Check for binary or byteswapped binary format
   * by peeking at first 4 bytes.
   */ 
  if (! fread((char *) &magic, sizeof(unsigned int), 1, hmmfp->f)) {
    CP9_HMMFileClose(hmmfp);
    return NULL;
  }
  rewind(hmmfp->f);

  if (magic == vCP9magic) { 
    hmmfp->parser    = CP9_read_bin_hmm;
    hmmfp->is_binary = TRUE;
    return hmmfp;
  } 
  else if (magic == vCP9swap) { 
    SQD_DPRINTF1(("Opened an Infernal CP9 HMM binary file [byteswapped]\n"));
    hmmfp->parser    = CP9_read_bin_hmm;
    hmmfp->is_binary = TRUE;
    hmmfp->byteswap  = TRUE;
    return hmmfp;
  }
  /* else we fall thru; it may be an ASCII file. */

  /* If magic looks binary but we don't recognize it, choke and die.
   */
  if (magic & 0x80000000) {
    Warn("\
%s appears to be a binary but not a CM plan 9 format that we recognize\n\
It may be from HMMER,\n\
or may be a different kind of binary altogether.\n", hmmfile);
    CP9_HMMFileClose(hmmfp);
    return NULL;
  }