コード例 #1
0
ファイル: esl_keyhash.c プロジェクト: kelkar/Bio-HMM-Logo
/* Function:  esl_keyhash_Clone()
 * Synopsis:  Duplicates a keyhash.
 *
 * Purpose:   Allocates and duplicates a keyhash <kh>. Returns a
 *            pointer to the duplicate.
 *
 * Throws:    <NULL> on allocation failure.
 */
ESL_KEYHASH *
esl_keyhash_Clone(const ESL_KEYHASH *kh)
{
  ESL_KEYHASH *nw;		
  int          h;

  if ((nw = keyhash_create(kh->hashsize, kh->kalloc, kh->salloc)) == NULL) goto ERROR;

  for (h = 0; h < kh->hashsize; h++)
    nw->hashtable[h] = kh->hashtable[h];

  for (h = 0; h < kh->nkeys; h++)  
    {
      nw->nxt[h]        = kh->nxt[h];
      nw->key_offset[h] = kh->key_offset[h];
    }
  nw->nkeys = kh->nkeys;

  memcpy(nw->smem, kh->smem, sizeof(char) * kh->sn);
  nw->sn = kh->sn;
  return nw;
  
 ERROR:
  esl_keyhash_Destroy(nw);
  return NULL;
}
コード例 #2
0
ファイル: esl_keyhash.c プロジェクト: kelkar/Bio-HMM-Logo
/* keyhash_create()
 * 
 * The real creation function, which takes arguments for memory sizes.
 * This is abstracted to a static function because it's used by both
 * Create() and Clone() but slightly differently.
 *
 * Args:  hashsize          - size of hash table; this must be a power of two.
 *        init_key_alloc    - initial allocation for # of keys.
 *        init_string_alloc - initial allocation for total size of key strings.
 *
 * Returns:  An allocated hash table structure; or NULL on failure.
 */
ESL_KEYHASH *
keyhash_create(uint32_t hashsize, int init_key_alloc, int init_string_alloc)
{
  ESL_KEYHASH *kh = NULL;
  int  i;
  int  status;

  ESL_ALLOC(kh, sizeof(ESL_KEYHASH));
  kh->hashtable  = NULL;
  kh->key_offset = NULL;
  kh->nxt        = NULL;
  kh->smem       = NULL;

  kh->hashsize  = hashsize;
  kh->kalloc    = init_key_alloc;
  kh->salloc    = init_string_alloc;

  ESL_ALLOC(kh->hashtable, sizeof(int) * kh->hashsize);
  for (i = 0; i < kh->hashsize; i++)  kh->hashtable[i] = -1;

  ESL_ALLOC(kh->key_offset, sizeof(int) * kh->kalloc);
  ESL_ALLOC(kh->nxt,        sizeof(int) * kh->kalloc);
  for (i = 0; i < kh->kalloc; i++)  kh->nxt[i] = -1;

  ESL_ALLOC(kh->smem,   sizeof(char) * kh->salloc);
  kh->nkeys = 0;
  kh->sn    = 0;
  return kh;

 ERROR:
  esl_keyhash_Destroy(kh);
  return NULL;
}
コード例 #3
0
ファイル: hmmfetch.c プロジェクト: Denis84/EPA-WorkBench
/* multifetch:
 * given a file containing lines with one name or key per line;
 * parse the file line-by-line;
 * if we have an SSI index available, retrieve the HMMs by key
 * as we see each line;
 * else, without an SSI index, store the keys in a hash, then
 * read the entire HMM file in a single pass, outputting HMMs
 * that are in our keylist. 
 * 
 * Note that with an SSI index, you get the HMMs in the order they
 * appear in the <keyfile>, but without an SSI index, you get HMMs in
 * the order they occur in the HMM file.
 */
static void
multifetch(ESL_GETOPTS *go, FILE *ofp, char *keyfile, P7_HMMFILE *hfp)
{
  ESL_KEYHASH    *keys   = esl_keyhash_Create();
  ESL_FILEPARSER *efp    = NULL;
  ESL_ALPHABET   *abc    = NULL;
  P7_HMM         *hmm    = NULL;
  int             nhmm   = 0;
  char           *key;
  int             keylen;
  int             keyidx;
  int             status;
  
  if (esl_fileparser_Open(keyfile, NULL, &efp) != eslOK)  p7_Fail("Failed to open key file %s\n", keyfile);
  esl_fileparser_SetCommentChar(efp, '#');

  while (esl_fileparser_NextLine(efp) == eslOK)
    {
      if (esl_fileparser_GetTokenOnLine(efp, &key, &keylen) != eslOK)
	p7_Fail("Failed to read HMM name on line %d of file %s\n", efp->linenumber, keyfile);
      
      status = esl_key_Store(keys, key, &keyidx);
      if (status == eslEDUP) p7_Fail("HMM key %s occurs more than once in file %s\n", key, keyfile);
	
      if (hfp->ssi != NULL) { onefetch(go, ofp, key, hfp);  nhmm++; }
    }

  if (hfp->ssi == NULL) 
    {
      while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF)
	{
	  if      (status == eslEOD)       p7_Fail("read failed, HMM file %s may be truncated?", hfp->fname);
	  else if (status == eslEFORMAT)   p7_Fail("bad file format in HMM file %s",             hfp->fname);
	  else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets",   hfp->fname);
	  else if (status != eslOK)        p7_Fail("Unexpected error in reading HMMs from %s",   hfp->fname);

	  if (esl_key_Lookup(keys, hmm->name, &keyidx) == eslOK || 
	      ((hmm->acc) && esl_key_Lookup(keys, hmm->acc, &keyidx) == eslOK))
	    {
	      p7_hmmfile_WriteASCII(ofp, -1, hmm);
	      nhmm++;
	    }

	  p7_hmm_Destroy(hmm);
	}
    }
  
  if (ofp != stdout) printf("\nRetrieved %d HMMs.\n", nhmm);
  if (abc != NULL) esl_alphabet_Destroy(abc);
  esl_keyhash_Destroy(keys);
  esl_fileparser_Close(efp);
  return;
}
コード例 #4
0
ファイル: esl-afetch.c プロジェクト: ElofssonLab/TOPCONS2
/* multifetch:
 * given a file containing lines with one name or key per line;
 * parse the file line-by-line;
 * if we have an SSI index available, retrieve the MSAs by key
 * as we see each line;
 * else, without an SSI index, store the keys in a hash, then
 * read the entire MSA file in a single pass, outputting MSAs
 * that are in our keylist. 
 * 
 * Note that with an SSI index, you get the MSAs in the order they
 * appear in the <keyfile>, but without an SSI index, you get MSAs in
 * the order they occur in the MSA file.
 */
static void
multifetch(ESL_GETOPTS *go, FILE *ofp, int outfmt, char *keyfile, ESLX_MSAFILE *afp)
{
  ESL_KEYHASH    *keys   = esl_keyhash_Create();
  ESL_FILEPARSER *efp    = NULL;
  ESL_MSA        *msa    = NULL;
  int             nali   = 0;
  char           *key;
  int             keylen;
  int             keyidx;
  int             status;
  
  if (esl_fileparser_Open(keyfile, NULL, &efp) != eslOK) 
    esl_fatal("Failed to open key file %s\n", keyfile);
  esl_fileparser_SetCommentChar(efp, '#');

  while (esl_fileparser_NextLine(efp) == eslOK)
    {
      if (esl_fileparser_GetTokenOnLine(efp, &key, &keylen) != eslOK)
	esl_fatal("Failed to read MSA name on line %d of file %s\n", efp->linenumber, keyfile);
      
      status = esl_keyhash_Store(keys, key, keylen, &keyidx);
      if (status == eslEDUP) esl_fatal("MSA key %s occurs more than once in file %s\n", key, keyfile);
	
      if (afp->ssi) { onefetch(go, ofp, outfmt, key, afp);  nali++; }

    }

  if (! afp->ssi)
    {
      while ((status = eslx_msafile_Read(afp, &msa)) != eslEOF)
	{
	  if (status != eslOK) eslx_msafile_ReadFailure(afp, status);
	  nali++;

	  if (msa->name == NULL) 
	    esl_fatal("Every alignment in file must have a name to be retrievable. Failed to find name of alignment #%d\n", nali);

	  if ( (esl_keyhash_Lookup(keys, msa->name, -1, NULL) == eslOK) ||
	       (msa->acc != NULL && esl_keyhash_Lookup(keys, msa->acc, -1, NULL) == eslOK))
	    eslx_msafile_Write(ofp, msa, outfmt);

	  esl_msa_Destroy(msa);
	}
    }
  
  if (ofp != stdout) printf("\nRetrieved %d alignments.\n", nali);
  esl_keyhash_Destroy(keys);
  esl_fileparser_Close(efp);
  return;
}
コード例 #5
0
ファイル: esl-cluster.c プロジェクト: ElofssonLab/TOPCONS2
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go       = NULL; 
  char           *keyfile  = NULL;
  char           *tabfile  = NULL;
  ESL_KEYHASH    *kh       = esl_keyhash_Create();
  int             nkeys    = 0;
  ESL_DMATRIX    *D        = NULL;
  ESL_TREE       *T        = NULL;

  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], go, "Failed to parse command line: %s\n", go->errbuf);
  if (esl_opt_VerifyConfig(go)               != eslOK) cmdline_failure(argv[0], go, "Error in app configuration: %s\n",   go->errbuf);
  if (esl_opt_GetBoolean(go, "-h") )                   cmdline_help   (argv[0], go);
  if (esl_opt_ArgNumber(go) != 2)                      cmdline_failure(argv[0], go, "Incorrect number of command line arguments.\n");
  keyfile = esl_opt_GetArg(go, 1);
  tabfile = esl_opt_GetArg(go, 2);

  read_keyfile(go, keyfile, kh);
  nkeys = esl_keyhash_GetNumber(kh);

  D = esl_dmatrix_Create(nkeys, nkeys);
  read_tabfile(go, tabfile, kh, D);

  esl_tree_SingleLinkage(D, &T);
    
  //esl_tree_WriteNewick(stdout, T);
  output_clusters(go, T, kh);


  esl_tree_Destroy(T);
  esl_dmatrix_Destroy(D);
  esl_keyhash_Destroy(kh);
  esl_getopts_Destroy(go);
  return 0;
}
コード例 #6
0
ファイル: esl-sfetch.c プロジェクト: ElofssonLab/TOPCONS2
/* multifetch:
 * given a file containing lines with one name or key per line;
 * parse the file line-by-line;
 * if we have an SSI index available, retrieve the seqs by key
 * as we see each line;
 * else, without an SSI index, store the keys in a hash, then
 * read the entire seq file in a single pass, outputting seqs
 * that are in our keylist. 
 * 
 * Note that with an SSI index, you get the seqs in the order they
 * appear in the <keyfile>, but without an SSI index, you get seqs in
 * the order they occur in the seq file.
 */
static void
multifetch(ESL_GETOPTS *go, FILE *ofp, char *keyfile, ESL_SQFILE *sqfp)
{
  ESL_KEYHASH    *keys   = esl_keyhash_Create();
  ESL_FILEPARSER *efp    = NULL;
  int             nseq   = 0;
  int             nkeys  = 0;
  char           *key;
  int             keylen;
  int             keyidx;
  int             status;

  
  if (esl_fileparser_Open(keyfile, NULL, &efp) != eslOK)  esl_fatal("Failed to open key file %s\n", keyfile);
  esl_fileparser_SetCommentChar(efp, '#');

  while (esl_fileparser_NextLine(efp) == eslOK)
    {
      if (esl_fileparser_GetTokenOnLine(efp, &key, &keylen) != eslOK)
	esl_fatal("Failed to read seq name on line %d of file %s\n", efp->linenumber, keyfile);
      
      status = esl_keyhash_Store(keys, key, keylen, &keyidx);
      if (status == eslEDUP) esl_fatal("seq key %s occurs more than once in file %s\n", key, keyfile);
	
      /* if we have an SSI index, just fetch them as we go. */
      if (sqfp->data.ascii.ssi != NULL) { onefetch(go, ofp, key, sqfp);  nseq++; }
      nkeys++;
    }

  /* If we don't have an SSI index, we haven't fetched anything yet; do it now. */
  if (sqfp->data.ascii.ssi == NULL) 
    {
      ESL_SQ *sq     = esl_sq_Create();

      while ((status = esl_sqio_Read(sqfp, sq)) == eslOK)
	{
	  if ( (sq->name[0] != '\0' && esl_keyhash_Lookup(keys, sq->name, -1, NULL) == eslOK) ||
	       (sq->acc[0]  != '\0' && esl_keyhash_Lookup(keys, sq->acc,  -1, NULL) == eslOK))
	    {
	      if (esl_opt_GetBoolean(go, "-r") )
		if (esl_sq_ReverseComplement(sq) != eslOK) 
		  esl_fatal("Failed to reverse complement %s\n", sq->name);
	      esl_sqio_Write(ofp, sq, eslSQFILE_FASTA, FALSE);
	      nseq++;
	    }
	  esl_sq_Reuse(sq);
	}
      if      (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n",
					       sqfp->filename, esl_sqfile_GetErrorBuf(sqfp));
      else if (status != eslEOF)     esl_fatal("Unexpected error %d reading sequence file %s",
					       status, sqfp->filename);
      esl_sq_Destroy(sq);
    }
  
  if (nkeys != nseq) esl_fatal("Tried to retrieve %d keys, but only retrieved %d sequences\n", nkeys, nseq);

  if (ofp != stdout) printf("\nRetrieved %d sequences.\n", nseq);

  esl_keyhash_Destroy(keys);
  esl_fileparser_Close(efp);
  return;
}