Example #1
0
/* keyhash_create()
 * 
 * The real creation function, which takes arguments for memory sizes.
 * This is abstracted to a static function because it's used by both
 * Create() and Clone() but slightly differently.
 *
 * Args:  hashsize          - size of hash table; this must be a power of two.
 *        init_key_alloc    - initial allocation for # of keys.
 *        init_string_alloc - initial allocation for total size of key strings.
 *
 * Returns:  An allocated hash table structure; or NULL on failure.
 */
ESL_KEYHASH *
keyhash_create(uint32_t hashsize, int init_key_alloc, int init_string_alloc)
{
  ESL_KEYHASH *kh = NULL;
  int  i;
  int  status;

  ESL_ALLOC(kh, sizeof(ESL_KEYHASH));
  kh->hashtable  = NULL;
  kh->key_offset = NULL;
  kh->nxt        = NULL;
  kh->smem       = NULL;

  kh->hashsize  = hashsize;
  kh->kalloc    = init_key_alloc;
  kh->salloc    = init_string_alloc;

  ESL_ALLOC(kh->hashtable, sizeof(int) * kh->hashsize);
  for (i = 0; i < kh->hashsize; i++)  kh->hashtable[i] = -1;

  ESL_ALLOC(kh->key_offset, sizeof(int) * kh->kalloc);
  ESL_ALLOC(kh->nxt,        sizeof(int) * kh->kalloc);
  for (i = 0; i < kh->kalloc; i++)  kh->nxt[i] = -1;

  ESL_ALLOC(kh->smem,   sizeof(char) * kh->salloc);
  kh->nkeys = 0;
  kh->sn    = 0;
  return kh;

 ERROR:
  esl_keyhash_Destroy(kh);
  return NULL;
}
Example #2
0
/* Function:  esl_dmatrix_Create()
 *
 * Purpose:   Creates a general <n> x <m> matrix (<n> rows, <m> 
 *            columns).
 *
 * Args:      <n> - number of rows;    $>= 1$
 *            <m> - number of columns; $>= 1$
 * 
 * Returns:   a pointer to a new <ESL_DMATRIX> object. Caller frees
 *            with <esl_dmatrix_Destroy()>.
 *
 * Throws:    <NULL> if an allocation failed.
 */
ESL_DMATRIX *
esl_dmatrix_Create(int n, int m)
{
  ESL_DMATRIX *A = NULL;
  int r;
  int status;

  ESL_ALLOC(A, sizeof(ESL_DMATRIX));
  A->mx = NULL;
  A->n  = n;
  A->m  = m;

  ESL_ALLOC(A->mx, sizeof(double *) * n);
  A->mx[0] = NULL;

  ESL_ALLOC(A->mx[0], sizeof(double) * n * m);
  for (r = 1; r < n; r++)
    A->mx[r] = A->mx[0] + r*m;

  A->type   = eslGENERAL;
  A->ncells = n * m; 
  return A;
  
 ERROR:
  esl_dmatrix_Destroy(A);
  return NULL;
}
Example #3
0
/* Function:  p7_coords2_hash_Create()
 * Synopsis:  Create a <P7_COORDS2_HASH>
 *
 * Purpose:   Allocate and initialize a <P7_COORDS2_HASH> hash table for storing
 *            lots of coord2 arrays (i.e. domain annotations).
 * 
 *            The <init_*> arguments let you set non-default initial
 *            allocation sizes. To use the default for any of these,
 *            pass a 0 value. Defaults are 128 for the initial 
 *            hashtable size <init_hashsize>; 128 for the initial
 *            allocation for number of keys to be stored <init_nkeyalloc>;
 *            and 2048 for the initial allocation for the number
 *            of integers to be stored in key data. 
 *            
 *            In general the initialization defaults should be
 *            fine. All three are grown automatically as needed, as
 *            you add keys to the hash.
 *            
 *            "key data" means <n> <start>/<end> pairs, plus <n>
 *            itself: it takes 2n+1 integers to store a <P7_COORD2>
 *            array of length <n>.
 *            
 *            <hashsize> must be a power of 2; remember that if you
 *            pass a non-default value.
 *            
 * Args:      init_hashsize : initial hashtable size. Power of 2; >0.
 *            init_keyalloc : initial allocation for # keys. >0.
 *            init_calloc   : initial allocation for key data. >0.
 *
 * Returns:   pointer to the new <P7_COORDS2_HASH> object on success.
 *
 * Throws:    <NULL> on allocation failure.
 */
P7_COORDS2_HASH *
p7_coords2_hash_Create(int32_t init_hashsize, int32_t init_nkeyalloc, int32_t init_calloc)
{
  P7_COORDS2_HASH *ch = NULL;
  int32_t          i;
  int              status;

  ESL_DASSERT1(( init_hashsize == 0 || (init_hashsize && ((init_hashsize & (init_hashsize-1)) == 0)))); /* hashsize is a power of 2 (bitshifting trickery) */
  
  ESL_ALLOC(ch, sizeof(P7_COORDS2_HASH));
  ch->hashtable  = NULL;
  ch->key_offset = NULL;
  ch->nxt        = NULL;
  ch->cmem       = NULL;

  ch->nkeys      = 0;
  ch->cn         = 0;

  ch->hashsize   = (init_hashsize  > 0 ? init_hashsize  : 128);
  ch->kalloc     = (init_nkeyalloc > 0 ? init_nkeyalloc : 128);
  ch->calloc     = (init_calloc    > 0 ? init_calloc    : 2048);
  
  ESL_ALLOC(ch->hashtable, sizeof(int32_t) * ch->hashsize);
  for (i = 0; i < ch->hashsize; i++) ch->hashtable[i] = -1;

  ESL_ALLOC(ch->key_offset, sizeof(int32_t) * ch->kalloc);
  ESL_ALLOC(ch->nxt,        sizeof(int32_t) * ch->kalloc);
  ESL_ALLOC(ch->cmem,       sizeof(int32_t) * ch->calloc);
  return ch;
  
 ERROR:
  p7_coords2_hash_Destroy(ch);
  return NULL;
}
Example #4
0
P7_GBANDS *
p7_gbands_Create(void)
{
  P7_GBANDS *bnd           = NULL;
  int        init_segalloc = 4;
  int        init_rowalloc = 64;
  int        status;

  ESL_ALLOC(bnd, sizeof(P7_GBANDS));
  bnd->nseg  = 0;
  bnd->nrow  = 0;
  bnd->L     = 0;
  bnd->M     = 0;
  bnd->ncell = 0;
  bnd->imem  = NULL;
  bnd->kmem  = NULL;


  ESL_ALLOC(bnd->imem, sizeof(int) * init_segalloc * 2); /* *2: for ia, ib pairs */
  ESL_ALLOC(bnd->kmem, sizeof(int) * init_rowalloc * p7_GBANDS_NK);
  bnd->segalloc = init_segalloc;
  bnd->rowalloc = init_rowalloc;

  return bnd;
  
 ERROR:
  p7_gbands_Destroy(bnd);
  return NULL;
}
Example #5
0
ESL_HMX *
esl_hmx_Create(int allocL, int allocM)
{
  ESL_HMX *mx = NULL;
  int      i;
  int      status;
  
  ESL_ALLOC(mx, sizeof(ESL_HMX));
  mx->dp_mem = NULL;
  mx->dp     = NULL;
  mx->sc     = NULL;

  ESL_ALLOC(mx->dp_mem, sizeof(float) * (allocL+1) * allocM);
  mx->ncells = (allocL+1) * allocM;
  
  ESL_ALLOC(mx->dp, sizeof (float *) * (allocL+1));
  ESL_ALLOC(mx->sc, sizeof (float)   * (allocL+2));
  mx->allocR = allocL+1;

  for (i = 0; i <= allocL; i++)
    mx->dp[i] = mx->dp_mem + i*allocM;
  mx->validR = allocL+1;
  mx->allocM = allocM;

  mx->L = 0;
  mx->M = 0;
  return mx;

 ERROR:
  esl_hmx_Destroy(mx);
  return NULL;
}
Example #6
0
/* Function:  esl_recorder_Create()
 * Synopsis:  Create an <ESL_RECORDER>.
 * Incept:    SRE, Fri Dec 25 16:27:40 2009 [Casa de Gatos]
 *
 * Purpose:   Allocate a new <ESL_RECORDER> that will read
 *            line-by-line from input stream <fp>, saving
 *            a history of up to <maxlines> lines.
 *
 * Returns:   pointer to the new <ESL_RECORDER> on success.
 *
 * Throws:    <NULL> on allocation failure.
 */
ESL_RECORDER *
esl_recorder_Create(FILE *fp, int maxlines)
{
  ESL_RECORDER *rc = NULL;
  int           i;
  int           status;

  ESL_ALLOC(rc, sizeof(ESL_RECORDER));
  rc->fp         = fp;
  rc->line       = NULL;
  rc->nalloc     = maxlines;
  rc->lalloc     = NULL;
  rc->offset     = NULL;
  rc->nread      = 0;
  rc->ncurr      = 0;
  rc->baseline   = 0;
  rc->markline   = -1;

  ESL_ALLOC(rc->line,   sizeof(char *) * rc->nalloc);
  for (i = 0; i < rc->nalloc; i++) rc->line[i]   = NULL;

  ESL_ALLOC(rc->lalloc, sizeof(int)    * rc->nalloc);
  for (i = 0; i < rc->nalloc; i++) rc->lalloc[i] = 0;

  ESL_ALLOC(rc->offset, sizeof(off_t)  * rc->nalloc);
  for (i = 0; i < rc->nalloc; i++) rc->offset[i] = 0;

  return rc;

 ERROR:
  esl_recorder_Destroy(rc);
  return NULL;
}
Example #7
0
/* Function:  p7_bg_Create()
 * Incept:    SRE, Fri Jan 12 13:32:51 2007 [Janelia]
 *
 * Purpose:   Allocate a <P7_BG> object for digital alphabet <abc>,
 *            initializes it to appropriate default values, and
 *            returns a pointer to it.
 *            
 *            For protein models, default iid background frequencies
 *            are set (by <p7_AminoFrequencies()>) to average
 *            SwissProt residue composition. For DNA, RNA and other
 *            alphabets, default frequencies are set to a uniform
 *            distribution.
 *            
 *            The model composition <bg->mcomp[]> is not initialized
 *            here; neither is the filter null model <bg->fhmm>.  To
 *            use the filter null model, caller will want to
 *            initialize these fields by calling
 *            <p7_bg_SetFilterByHMM()>.
 *
 * Throws:    <NULL> on allocation failure.
 *
 * Xref:      STL11/125.
 */
P7_BG *
p7_bg_Create(const ESL_ALPHABET *abc)
{
  P7_BG *bg = NULL;
  int    status;

  ESL_ALLOC(bg, sizeof(P7_BG));
  bg->f     = NULL;
  bg->fhmm  = NULL;

  ESL_ALLOC(bg->f,     sizeof(float) * abc->K);
  if ((bg->fhmm = esl_hmm_Create(abc, 2)) == NULL) goto ERROR;

  if       (abc->type == eslAMINO)
    {
      if (p7_AminoFrequencies(bg->f) != eslOK) goto ERROR;
    }
  else
    esl_vec_FSet(bg->f, abc->K, 1. / (float) abc->K);

  bg->p1    = 350./351.;
  bg->omega = 1./256.;
  bg->abc   = abc;
  return bg;

 ERROR:
  p7_bg_Destroy(bg);
  return NULL;
}
Example #8
0
/* Function:  p7_oprofile_CreateBlock()
 * Synopsis:  Create a new block of empty <P7_OM_BLOCK>.
 * Incept:    
 *
 * Purpose:   Creates a block of empty <P7_OM_BLOCK> profile objects.
 *            
 * Returns:   a pointer to the new <P7_OM_BLOCK>. Caller frees this
 *            with <p7_oprofile_DestroyBlock()>.
 *
 * Throws:    <NULL> if allocation fails.
 */
P7_OM_BLOCK *
p7_oprofile_CreateBlock(int count)
{
  int i = 0;

  P7_OM_BLOCK *block = NULL;
  int status = eslOK;

  ESL_ALLOC(block, sizeof(*block));

  block->count = 0;
  block->listSize = 0;
  block->list  = NULL;

  ESL_ALLOC(block->list, sizeof(P7_OPROFILE *) * count);
  block->listSize = count;

  for (i = 0; i < count; ++i)
    {
      block->list[i] = NULL;
    }

  return block;

 ERROR:
  if (block != NULL)
    {
      if (block->list != NULL)  free(block->list);
      free(block);
    }
  
  return NULL;
}
Example #9
0
static int
parse_replace_string(const char *rstring, char **ret_from, char **ret_to)
{
  int    status;
  int    rlen, mid, i;
  int    is_valid = FALSE;
  char  *from = NULL;
  char  *to   = NULL;

  /* Note: we could use ESL_REGEXP but then multiple ':'s in rstring could cause problems */
  rlen = strlen(rstring);
  /* check validity of rstring: must be "<s1>:<s2>" with len(<s1>)==len(<s2>) */
  if((rlen % 2) != 0) { /* odd num chars, good */
    mid = rlen / 2;
    if(rstring[mid] == ':') { /* middle character is ':', good */
      ESL_ALLOC(from, sizeof(char) * (mid+1));
      ESL_ALLOC(to,   sizeof(char) * (mid+1));
      for(i = 0;     i < mid;  i++) from[i]       = rstring[i];
      for(i = mid+1; i < rlen; i++) to[i-(mid+1)] = rstring[i];
      from[mid] = '\0';
      to[mid]   = '\0';
      is_valid = TRUE;
    }
  }
  if(! is_valid) esl_fatal("--replace takes arg of <s1>:<s2> with len(<s1>) == len(<s2>); %s not recognized", rstring);
  *ret_from = from;
  *ret_to   = to;

  return eslOK;

 ERROR: 
  if(from != NULL) free(from);
  if(to   != NULL) free(to);
  return status;
}
Example #10
0
/* Function:  esl_msaweight_IDFilter()
 * Synopsis:  Filter by %ID.
 * Incept:    ER, Wed Oct 29 10:06:43 2008 [Janelia]
 * 
 * Purpose:   Constructs a new alignment by removing near-identical 
 *            sequences from a given alignment (where identity is 
 *            calculated *based on the alignment*).
 *            Does not affect the given alignment.
 *            Keeps earlier sequence, discards later one. 
 *           
 *            Usually called as an ad hoc sequence "weighting" mechanism.
 *           
 * Limitations:
 *            Unparsed Stockholm markup is not propagated into the
 *            new alignment.
 *           
 * Return:    <eslOK> on success, and the <newmsa>.
 *
 * Throws:    <eslEMEM> on allocation error. <eslEINVAL> if a pairwise
 *            identity calculation fails because of corrupted sequence 
 *            data. In either case, the <msa> is unmodified.
 *
 * Xref:      squid::weight.c::FilterAlignment().
 */
int
esl_msaweight_IDFilter(const ESL_MSA *msa, double maxid, ESL_MSA **ret_newmsa)
{
  int     *list   = NULL;               /* array of seqs in new msa */
  int     *useme  = NULL;               /* TRUE if seq is kept in new msa */
  int      nnew;			/* number of seqs in new alignment */
  double   ident;                       /* pairwise percentage id */
  int      i,j;                         /* seqs counters*/
  int      remove;                      /* TRUE if sq is to be removed */
  int      status;
  
  /* Contract checks
   */
  ESL_DASSERT1( (msa       != NULL) );
  ESL_DASSERT1( (msa->nseq >= 1)    );
  ESL_DASSERT1( (msa->alen >= 1)    );

  /* allocate */
  ESL_ALLOC(list,  sizeof(int) * msa->nseq);
  ESL_ALLOC(useme, sizeof(int) * msa->nseq);
  esl_vec_ISet(useme, msa->nseq, 0); /* initialize array */

  /* find which seqs to keep (list) */
  nnew = 0;
  for (i = 0; i < msa->nseq; i++)
    {
      remove = FALSE;
      for (j = 0; j < nnew; j++)
	{
	  if (! (msa->flags & eslMSA_DIGITAL)) {
	    if ((status = esl_dst_CPairId(msa->aseq[i], msa->aseq[list[j]], &ident, NULL, NULL))       != eslOK) goto ERROR;
	  } 
#ifdef eslAUGMENT_ALPHABET
	  else {
	    if ((status = esl_dst_XPairId(msa->abc, msa->ax[i], msa->ax[list[j]], &ident, NULL, NULL)) != eslOK) goto ERROR;
	  }
#endif
	  
	  if (ident > maxid)
	    { 
	      remove = TRUE; 
	      break; 
	    }
	}
      if (remove == FALSE) {
	list[nnew++] = i;
	useme[i]     = TRUE;
      }
    }
  if ((status = esl_msa_SequenceSubset(msa, useme, ret_newmsa)) != eslOK) goto ERROR;
 
  free(list);
  free(useme);
  return eslOK;

 ERROR:
  if (list  != NULL) free(list);
  if (useme != NULL) free(useme);
  return status;
}
Example #11
0
/* Function:  esl_hyperexp_Create()
 *
 * Purpose:   Creates an object to hold parameters for a <K>-component
 *            hyperexponential. 
 *
 *            Parameters in the object are initialized 
 *            ($q_k = \frac{1}{K}$, $\lambda_k = 1$, $\mu = 0$), but
 *            the caller will want to set these according to its own
 *            purposes.
 *
 * Args:      K  - number of components in the mixture
 *
 * Returns:   ptr to newly allocated/initialized <ESL_HYPEREXP> object.
 *
 * Throws:    NULL on allocation failure.
 */
ESL_HYPEREXP *
esl_hyperexp_Create(int K)
{
  int           status;
  ESL_HYPEREXP *h = NULL;
  int           k;

  ESL_ALLOC(h, sizeof(ESL_HYPEREXP));
  h->q = h->lambda = h->wrk = NULL;
  h->fixlambda = NULL;
  h->K         = K;
  h->fixmix    = FALSE;

  ESL_ALLOC(h->q,         sizeof(double) * K);
  ESL_ALLOC(h->lambda,    sizeof(double) * K);
  ESL_ALLOC(h->wrk,       sizeof(double) * K);
  ESL_ALLOC(h->fixlambda, sizeof(char)   * K);

  for (k = 0; k < K; k++)
    {
      h->q[k]        = 1. / (double) K;
      h->lambda[k]   = 1.;
      h->fixlambda[k]= 0;
    }
  h->mu = 0.;
  return h;
  
 ERROR:
  esl_hyperexp_Destroy(h);
  return NULL;
}
Example #12
0
/* Function:  p7_tophits_Create()
 * Synopsis:  Allocate a hit list.
 *
 * Purpose:   Allocates a new <P7_TOPHITS> hit list, for an initial
 *            allocation of <int_hit_alloc> hits (this will be grown
 *            later as needed). Return a pointer to it.
 *            
 * Args:      init_hit_alloc  - initial allocation size, # of hits.
 *                              Often p7_TOPHITS_DEFAULT_INIT_ALLOC.
 *
 * Throws:    <NULL> on allocation failure.
 */
P7_TOPHITS *
p7_tophits_Create(int init_hit_alloc)
{
  P7_TOPHITS *h = NULL;
  int         status;

  ESL_ALLOC(h, sizeof(P7_TOPHITS));
  h->hit    = NULL;
  h->unsrt  = NULL;

  if (( h->unsrt = p7_hit_Create(init_hit_alloc) ) == NULL) goto ERROR;
  ESL_ALLOC(h->hit, sizeof(P7_HIT *) * init_hit_alloc);

  h->Nalloc               = init_hit_alloc;
  h->N                    = 0;
  h->nreported            = 0;
  h->nincluded            = 0;
  h->is_sorted_by_sortkey = TRUE;     /* but only because there's 0 hits */
  h->is_sorted_by_seqidx  = FALSE;
  h->hit[0]               = h->unsrt; /* if you're going to call it "sorted" when it contains just one hit, you need this */
  return h;

 ERROR:
  p7_tophits_Destroy(h);
  return NULL;
}
Example #13
0
/* Function: annotate_model()
 * 
 * Purpose:  Transfer rf, cs, and other optional annotation from the alignment
 *           to the new model.
 * 
 * Args:     hmm       - [M] new model to annotate
 *           matassign - which alignment columns are MAT; [1..alen]
 *           msa       - alignment, including annotation to transfer
 *           
 * Return:   <eslOK> on success.
 *
 * Throws:   <eslEMEM> on allocation error.
 */
static int
annotate_model(P7_HMM *hmm, int *matassign, ESL_MSA *msa)
{                      
  int   apos;			/* position in matassign, 1.alen  */
  int   k;			/* position in model, 1.M         */
  int   status;

  /* Reference coord annotation  */
  if (msa->rf != NULL) {
    ESL_ALLOC(hmm->rf, sizeof(char) * (hmm->M+2));
    hmm->rf[0] = ' ';
    for (apos = k = 1; apos <= msa->alen; apos++) 
      if (matassign[apos]) hmm->rf[k++] = msa->rf[apos-1]; /* watch off-by-one in msa's rf */
    hmm->rf[k] = '\0';
    hmm->flags |= p7H_RF;
  }

  /* Model mask annotation  */
  if (msa->mm != NULL) {
    ESL_ALLOC(hmm->mm, sizeof(char) * (hmm->M+2));
    hmm->mm[0] = ' ';
    for (apos = k = 1; apos <= msa->alen; apos++)
      if (matassign[apos]) hmm->mm[k++] = msa->mm[apos-1];
    hmm->mm[k] = '\0';
    hmm->flags |= p7H_MMASK;
  }

  /* Consensus structure annotation */
  if (msa->ss_cons != NULL) {
    ESL_ALLOC(hmm->cs, sizeof(char) * (hmm->M+2));
    hmm->cs[0] = ' ';
    for (apos = k = 1; apos <= msa->alen; apos++)
      if (matassign[apos]) hmm->cs[k++] = msa->ss_cons[apos-1];
    hmm->cs[k] = '\0';
    hmm->flags |= p7H_CS;
  }

  /* Surface accessibility annotation */
  if (msa->sa_cons != NULL) {
    ESL_ALLOC(hmm->ca, sizeof(char) * (hmm->M+2));
    hmm->ca[0] = ' ';
    for (apos = k = 1; apos <= msa->alen; apos++)
      if (matassign[apos]) hmm->ca[k++] = msa->sa_cons[apos-1];
    hmm->ca[k] = '\0';
    hmm->flags |= p7H_CA;
  }

  /* The alignment map (1..M in model, 1..alen in alignment) */
  ESL_ALLOC(hmm->map, sizeof(int) * (hmm->M+1));
  hmm->map[0] = 0;
  for (apos = k = 1; apos <= msa->alen; apos++)
    if (matassign[apos]) hmm->map[k++] = apos;
  hmm->flags |= p7H_MAP;

  return eslOK;

 ERROR:
  return status;
}
Example #14
0
/* Function:  cp9_Copy()
 * Synopsis:  Copy a CM plan 9 HMM.
 *
 * Purpose:   Copies cp9 hmm <src> to cp9 hmm <dst>, where <dst>
 *            has already been allocated to be of sufficient size.
 *
 *            <src> should be properly normalized, no check is done to
 *            ensure that. If <src> is logoddsified (src->flags &
 *            CPLAN9_HASBITS) its bit scores will be copied to <dst>,
 *            otherwise they are invalid and won't be copied.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation error; <eslEINVAL> if <dst> is too small
 *            to fit <src>.
 */
int
cp9_Copy(const CP9_t *src, CP9_t *dst)
{
    int status;
    int k;
    int src_has_bits = (src->flags & CPLAN9_HASBITS) ? TRUE : FALSE;

    if (src->M != dst->M) return eslEINVAL;

    dst->abc = src->abc;

    for(k = 0; k <= src->M; k++) {
        esl_vec_FCopy(src->t[k],   cp9_NTRANS,  dst->t[k]);
        esl_vec_FCopy(src->mat[k], src->abc->K, dst->mat[k]);
        esl_vec_FCopy(src->ins[k], src->abc->K, dst->ins[k]);
    }
    esl_vec_FCopy(src->begin, src->M+1, dst->begin);
    esl_vec_FCopy(src->end,   src->M+1, dst->end);
    if(src_has_bits) {
        esl_vec_ICopy(src->bsc_mem, src->M+1, dst->bsc_mem);
        esl_vec_ICopy(src->esc_mem, src->M+1, dst->esc_mem);
    }

    /* exploit linear-memory of these 2d arrays */
    if(src_has_bits) {
        esl_vec_ICopy(src->tsc_mem, cp9_NTRANS   * (src->M+1), dst->tsc_mem);
        esl_vec_ICopy(src->msc_mem, src->abc->Kp * (src->M+1), dst->msc_mem);
        esl_vec_ICopy(src->isc_mem, src->abc->Kp * (src->M+1), dst->isc_mem);
        esl_vec_ICopy(src->otsc,    cp9O_NTRANS  * (src->M+1), dst->otsc);
    }

    /* EL info */
    dst->el_self     = src->el_self;
    dst->el_selfsc   = src->el_selfsc;
    esl_vec_ICopy(src->has_el,     src->M+1,    dst->has_el);
    esl_vec_ICopy(src->el_from_ct, src->M+2,    dst->el_from_ct);
    for(k = 0; k <= src->M+1; k++) {
        if(src->el_from_ct[k] > 0) {
            ESL_ALLOC(dst->el_from_idx[k],  sizeof(int) * src->el_from_ct[k]);
            ESL_ALLOC(dst->el_from_cmnd[k], sizeof(int) * src->el_from_ct[k]);
            esl_vec_ICopy(src->el_from_idx[k],  src->el_from_ct[k], dst->el_from_idx[k]);
            esl_vec_ICopy(src->el_from_cmnd[k], src->el_from_ct[k], dst->el_from_cmnd[k]);
        }
    }

    dst->null2_omega = src->null2_omega;
    dst->null3_omega = src->null3_omega;
    esl_vec_FCopy(src->null, src->abc->K, dst->null);

    dst->p1    = src->p1;
    dst->flags = src->flags;

    return eslOK;

ERROR:
    return status;
}
Example #15
0
/* Function:  esl_msacluster_SingleLinkage()
 * Synopsis:  Single linkage clustering by percent identity.
 * Incept:    SRE, Sun Nov  5 10:11:45 2006 [Janelia]
 *
 * Purpose:   Perform single link clustering of the sequences in 
 *            multiple alignment <msa>. Any pair of sequences with
 *            percent identity $\geq$ <maxid> are linked (using
 *            the definition from the \eslmod{distance} module).
 *            
 *            The resulting clustering is optionally returned in one
 *            or more of <opt_c>, <opt_nin>, and <opt_nc>.  The
 *            <opt_c[0..nseq-1]> array assigns a cluster index
 *            <(0..nc-1)> to each sequence. For example, <c[4] = 1>
 *            means that sequence 4 is assigned to cluster 1.  The
 *            <opt_nin[0..nc-1]> array is the number of sequences
 *            in each cluster. <opt_nc> is the number of clusters.
 *
 *            Importantly, this algorithm runs in $O(N)$ memory, and
 *            produces one discrete clustering. Compare to
 *            <esl_tree_SingleLinkage()>, which requires an $O(N^2)$ 
 *            adjacency matrix, and produces a hierarchical clustering
 *            tree.
 *            
 *            The algorithm is worst case $O(LN^2)$ time, for $N$
 *            sequences of length $L$. However, the worst case is no
 *            links at all, and this is unusual. More typically, time
 *            scales as about $LN \log N$. The best case scales as
 *            $LN$, when there is just one cluster in a completely
 *            connected graph.
 *            
 * Args:      msa     - multiple alignment to cluster
 *            maxid   - pairwise identity threshold: cluster if $\geq$ <maxid>
 *            opt_c   - optRETURN: cluster assignments for each sequence, [0..nseq-1]
 *            opt_nin - optRETURN: number of seqs in each cluster, [0..nc-1] 
 *            opt_nc  - optRETURN: number of clusters        
 *
 * Returns:   <eslOK> on success; the <opt_c[0..nseq-1]> array contains
 *            cluster indices <0..nc-1> assigned to each sequence; the
 *            <opt_nin[0..nc-1]> array contains the number of seqs in
 *            each cluster; and <opt_nc> contains the number of
 *            clusters. The <opt_c> array and <opt_nin> arrays will be
 *            allocated here, if non-<NULL>, and must be free'd by the
 *            caller. The input <msa> is unmodified.
 *            
 *            The caller may pass <NULL> for either <opt_c> or
 *            <opt_nc> if it is only interested in one of the two
 *            results.
 *
 * Throws:    <eslEMEM> on allocation failure, and <eslEINVAL> if a pairwise
 *            comparison is invalid (which means the MSA is corrupted, so it
 *            shouldn't happen). In either case, <opt_c> and <opt_nin> are set to <NULL>
 *            and <opt_nc> is set to 0, and the <msa> is unmodified.
 */
int
esl_msacluster_SingleLinkage(const ESL_MSA *msa, double maxid, 
			     int **opt_c, int **opt_nin, int *opt_nc)

{
  int   status;
  int  *workspace  = NULL;
  int  *assignment = NULL;
  int  *nin        = NULL;
  int   nc;
  int   i;
#ifdef eslAUGMENT_ALPHABET
  struct msa_param_s param;
#endif

  /* Allocations */
  ESL_ALLOC(workspace,  sizeof(int) * msa->nseq * 2);
  ESL_ALLOC(assignment, sizeof(int) * msa->nseq);

  /* call to SLC API: */
  if (! (msa->flags & eslMSA_DIGITAL))
    status = esl_cluster_SingleLinkage((void *) msa->aseq, (size_t) msa->nseq, sizeof(char *),
				       msacluster_clinkage, (void *) &maxid, 
				       workspace, assignment, &nc);
#ifdef eslAUGMENT_ALPHABET
  else {
    param.maxid = maxid;
    param.abc   = msa->abc;
    status = esl_cluster_SingleLinkage((void *) msa->ax, (size_t) msa->nseq, sizeof(ESL_DSQ *),
				       msacluster_xlinkage, (void *) &param, 
				       workspace, assignment, &nc);
  }
#endif

  if (opt_nin != NULL) 
    {
      ESL_ALLOC(nin, sizeof(int) * nc);
      for (i = 0; i < nc; i++) nin[i] = 0;
      for (i = 0; i < msa->nseq; i++)
	nin[assignment[i]]++;
      *opt_nin = nin;
    }

  /* cleanup and return */
  free(workspace);
  if (opt_c  != NULL) *opt_c  = assignment; else free(assignment);
  if (opt_nc != NULL) *opt_nc = nc;
  return eslOK;

 ERROR:
  if (workspace  != NULL) free(workspace);
  if (assignment != NULL) free(assignment);
  if (nin        != NULL) free(nin);
  if (opt_c  != NULL) *opt_c  = NULL;
  if (opt_nc != NULL) *opt_nc = 0;
  return status;
}
Example #16
0
/* Function:  esl_hxp_FitCompleteBinned()
 *
 * Purpose:   Given a histogram <g> with binned observations, where each
 *            bin i holds some number of observed samples x with values from 
 *            lower bound l to upper bound u (that is, $l < x \leq u$),
 *            and given a starting guess <h> for hyperexponential parameters;
 *
 *            Find maximum likelihood parameters <h> by conjugate gradient
 *            descent, starting from the initial <h> and leaving the
 *            optimized solution in <h>.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation error, and <h> is left in its
 *            initial state.
 */
int 
esl_hxp_FitCompleteBinned(ESL_HISTOGRAM *g, ESL_HYPEREXP *h)
{
  struct hyperexp_binned_data data;
  int     status;
  double *p   = NULL;
  double *u   = NULL;
  double *wrk = NULL;
  double  fx;
  int     i;
  double  tol = 1e-6;
  int     np;

  np = 0;
  if (! h->fixmix) np = h->K-1;  /* K-1 mix coefficients...      */
  for (i = 0; i < h->K; i++)     /* ...and up to K lambdas free. */
    if (! h->fixlambda[i]) np++;

  ESL_ALLOC(p,   sizeof(double) * np);
  ESL_ALLOC(u,   sizeof(double) * np);
  ESL_ALLOC(wrk, sizeof(double) * np * 4);

  /* Copy shared info into the "data" structure  */
  data.g     = g;
  data.h     = h;

  /* From h, create the parameter vector. */
  hyperexp_pack_paramvector(p, np, h);

  /* Define the step size vector u.
   */
  for (i = 0; i < np; i++) u[i] = 1.0;

  /* Feed it all to the mighty optimizer.
   */
  status = esl_min_ConjugateGradientDescent(p, u, np, 
					    &hyperexp_complete_binned_func, 
					    &hyperexp_complete_binned_gradient,
					    (void *) (&data), tol, wrk, &fx);
  if (status != eslOK) goto ERROR;

  /* Convert the final parameter vector back to a hyperexponential
   */
  hyperexp_unpack_paramvector(p, np, h);
  
  free(p);
  free(u);
  free(wrk);
  esl_hyperexp_SortComponents(h);
  return eslOK;

 ERROR:
  if (p   != NULL) free(p);
  if (u   != NULL) free(u);
  if (wrk != NULL) free(wrk);
  return status;
}
Example #17
0
/* mpi_worker()
 * The main control for an MPI worker process.
 */
static void
mpi_worker(ESL_GETOPTS *go, struct cfg_s *cfg)
{
  int             xstatus = eslOK;
  int             status;
  P7_HMM         *hmm     = NULL;
  char           *wbuf    = NULL;
  double         *xv      = NULL; /* result: array of N scores */
  int            *av      = NULL; /* optional result: array of N alignment lengths */
  int             wn      = 0;
  char            errbuf[eslERRBUFSIZE];
  int             pos;
 
  /* Worker initializes */
  if ((status = minimum_mpi_working_buffer(go, cfg->N, &wn)) != eslOK) xstatus = status;
  ESL_ALLOC(wbuf, wn * sizeof(char));
  ESL_ALLOC(xv,   cfg->N * sizeof(double) + 2);	
  if (esl_opt_GetBoolean(go, "-a"))
    ESL_ALLOC(av, cfg->N * sizeof(int));

  /* Main worker loop */
  while (p7_hmm_mpi_Recv(0, 0, MPI_COMM_WORLD, &wbuf, &wn, &(cfg->abc), &hmm) == eslOK) 
    {
      if (esl_opt_GetBoolean(go, "--recal")) {
	if (( status = recalibrate_model(go, cfg, errbuf, hmm))     != eslOK) goto CLEANERROR;
      }
      if ((status = process_workunit(go, cfg, errbuf, hmm, xv, av)) != eslOK) goto CLEANERROR;

      pos = 0;
      MPI_Pack(&status, 1,      MPI_INT,    wbuf, wn, &pos, MPI_COMM_WORLD);
      MPI_Pack(xv,      cfg->N, MPI_DOUBLE, wbuf, wn, &pos, MPI_COMM_WORLD);
      if (esl_opt_GetBoolean(go, "-a"))
	MPI_Pack(av,    cfg->N, MPI_INT,    wbuf, wn, &pos, MPI_COMM_WORLD);
      MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD);
      p7_hmm_Destroy(hmm);
    }

  free(wbuf);
  free(xv);
  if (av != NULL) free(av);
  return;

 CLEANERROR:
  pos = 0;
  MPI_Pack(&status, 1,                MPI_INT,  wbuf, wn, &pos, MPI_COMM_WORLD);
  MPI_Pack(errbuf,  eslERRBUFSIZE,    MPI_CHAR, wbuf, wn, &pos, MPI_COMM_WORLD);
  MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD);
  if (wbuf != NULL) free(wbuf);
  if (hmm  != NULL) p7_hmm_Destroy(hmm);
  if (xv   != NULL) free(xv);
  if (av   != NULL) free(av);
  return;

 ERROR:
  p7_Fail("Allocation error in mpi_worker");
}
Example #18
0
/* Function: CreateCP9Matrix()
 * based on  CreatePlan7Matrix() <-- this function's comments below  
 * Purpose:  Create a dynamic programming matrix for standard Forward,
 *           Backward, or Viterbi, with scores kept as scaled log-odds
 *           integers. Keeps 2D arrays compact in RAM in an attempt 
 *           to maximize cache hits. 
 *           
 *           The mx structure can be dynamically grown, if a new
 *           HMM or seq exceeds the currently allocated size. Dynamic
 *           growing is more efficient than an alloc/free of a whole
 *           matrix for every new target. The ResizePlan7Matrix()
 *           call does this reallocation, if needed. Here, in the
 *           creation step, we set up some pads - to inform the resizing
 *           call how much to overallocate when it realloc's. 
 *           
 * Args:     N     - N+1 rows are allocated, usually N == 1 for 
 *                   scanning in memory efficient mode, or N == L, length of sequence.  
 *           M     - size of model in nodes
 *                 
 * Return:   mx
 *           mx is allocated here. Caller frees with FreeCP9Matrix(mx).
 */
CP9_MX *
CreateCP9Matrix(int N, int M)
{
  int status;
  CP9_MX *mx;
  int i;

  ESL_ALLOC(mx,      sizeof(CP9_MX));
  ESL_ALLOC(mx->mmx, sizeof(int *) * (N+1));
  ESL_ALLOC(mx->imx, sizeof(int *) * (N+1));
  ESL_ALLOC(mx->dmx, sizeof(int *) * (N+1));
  ESL_ALLOC(mx->elmx,sizeof(int *) * (N+1)); 
  /* slightly wasteful, some nodes can't go to EL (for ex: right half of MATPs) */
  ESL_ALLOC(mx->erow,    sizeof(int) * (N+1));
  ESL_ALLOC(mx->mmx_mem, sizeof(int) * ((N+1)*(M+1)));
  ESL_ALLOC(mx->imx_mem, sizeof(int) * ((N+1)*(M+1)));
  ESL_ALLOC(mx->dmx_mem, sizeof(int) * ((N+1)*(M+1)));
  ESL_ALLOC(mx->elmx_mem,sizeof(int) * ((N+1)*(M+1)));

  /* The indirect assignment below looks wasteful; it's actually
   * used for aligning data on 16-byte boundaries as a cache 
   * optimization in the fast altivec implementation
   */
  mx->mmx[0] = (int *) mx->mmx_mem;
  mx->imx[0] = (int *) mx->imx_mem;
  mx->dmx[0] = (int *) mx->dmx_mem;
  mx->elmx[0]= (int *) mx->elmx_mem;
  for (i = 1; i <= N; i++)
    {
      mx->mmx[i] = mx->mmx[0] + (i*(M+1));
      mx->imx[i] = mx->imx[0] + (i*(M+1));
      mx->dmx[i] = mx->dmx[0] + (i*(M+1));
      mx->elmx[i]= mx->elmx[0]+ (i*(M+1));
    }

  mx->M = M;
  mx->rows = N;
  mx->kmin = NULL;
  mx->kmax = NULL;
  mx->ncells_allocated = (M+1) * (N+1);
  mx->ncells_valid     = (M+1) * (N+1);
  mx->size_Mb =  (float) sizeof(CP9_MX);
  mx->size_Mb += (float) (sizeof(int *) * (mx->rows+1) * 4); /* mx->*mx ptrs */
  mx->size_Mb += (float) (sizeof(int)   * (mx->rows+1) * (M+1) * 4); /* mx->*mx_mem */
  mx->size_Mb += (float) (sizeof(int)   * (mx->rows+1));             /* mx->erow */
  mx->size_Mb /= 1000000.;

  return mx;

 ERROR:
  cm_Fail("Memory allocation error.");
  return NULL; /* never reached */
}
Example #19
0
/* map_sub_msas
 *                   
 * msa1 and msa2 contain the same named sequences, msa1 contains a superset 
 * of the columns in msa2. Determine which of the msa1 columns the msa2
 * columns correspond to.
 */
static int
map_sub_msas(const ESL_GETOPTS *go, char *errbuf, ESL_MSA *msa1, ESL_MSA *msa2, char **ret_msa1_to_msa2_mask)
{
  int status;
  int  apos1, apos2;          /* counters over alignment position in msa1, msa2 respectively */
  int i;
  int *msa1_to_msa2_map;    /* [0..apos1..msa1->alen] msa2 alignment position that apos1 corresponds to */
  char *mask;

  /* contract check */
  if(! (msa1->flags & eslMSA_DIGITAL)) ESL_FAIL(eslEINVAL, errbuf, "in map_sub_msas() msa1 (%s) not digitized.\n", esl_opt_GetArg(go, 1));
  if(! (msa2->flags & eslMSA_DIGITAL)) ESL_FAIL(eslEINVAL, errbuf, "in map_sub_msas() msa2 (%s) not digitized.\n", esl_opt_GetString(go, "--submap"));
  if(msa1->alen <= msa2->alen) ESL_FAIL(eslEINVAL, errbuf, "in map_sub_msas() alignment length for msa1 (%" PRId64 "d) <= length for msa2 (%" PRId64 ")\n", msa1->alen, msa2->alen);
  
  ESL_ALLOC(mask, sizeof(char) * (msa1->alen+1));
  for(apos1 = 0; apos1 < msa1->alen; apos1++) mask[apos1] = '0';
  mask[msa1->alen] = '\0';

  ESL_ALLOC(msa1_to_msa2_map, sizeof(int) * (msa1->alen+1));
  esl_vec_ISet(msa1_to_msa2_map, (msa1->alen+1), -1);

  /* both alignments must have same 'named' sequences in same order */
  if(msa1->nseq != msa2->nseq) ESL_FAIL(eslEINVAL, errbuf, "in map_sub_msas() msa1 has %d sequences, msa2 has %d sequences\n", msa1->nseq, msa2->nseq);
  for(i = 0; i < msa1->nseq; i++) { 
    if(strcmp(msa1->sqname[i], msa2->sqname[i]) != 0) ESL_FAIL(eslEINVAL, errbuf, "in map_sub_msas() msa1 seq %d is named %s, msa2 seq %d is named %s\n", i, msa1->sqname[i], i, msa2->sqname[i]);
  }

  apos1 = 1;
  apos2 = 1;
  while((apos2 <= msa2->alen) || (apos1 <= msa1->alen)) { /* determine which apos1 (alignment column in msa1), apos2 (alignment column in msa2) corresponds to */
    for(i = 0; i < msa1->nseq; i++) { 
      if(msa1->ax[i][apos1] != msa2->ax[i][apos2]) { 
	apos1++; 
	break; /* try next apos1 */ 
      }
    }	
    if(i == msa1->nseq) { /* found a match */
      msa1_to_msa2_map[apos1] = apos2;
      mask[(apos1-1)] = '1';
      apos1++;
      apos2++;
    }
  }
  if((apos1 != (msa1->alen+1)) || (apos2 != (msa2->alen+1))) ESL_FAIL(eslEINVAL, errbuf, "in map_sub_msas(), failure mapping alignments, end of loop apos1-1 = %d (msa1->alen: %" PRId64 ") and apos2-1 = %d (msa2->alen: %" PRId64 ")\n", apos1-1, msa1->alen, apos2-1, msa2->alen);

  free(msa1_to_msa2_map);
  *ret_msa1_to_msa2_mask = mask;
  return eslOK;
  
 ERROR: 
  return status;
}
Example #20
0
static int
a2m_padding_digital(ESL_MSA *msa, char **csflag, int *nins, int ncons)
{
  ESL_DSQ *ax     = NULL;		/* new aligned sequence - will be swapped into msa->ax[] */
  ESL_DSQ  gapsym = esl_abc_XGetGap(msa->abc);
  int      apos, cpos, spos;	/* position counters for alignment 0..alen, consensus cols 0..cpos-1, sequence position 0..slen-1 */
  int      alen;
  int      icount;
  int      idx;
  int      status;

  alen = ncons;
  for (cpos = 0; cpos <= ncons; cpos++)
    alen += nins[cpos];

  ESL_ALLOC(msa->rf, sizeof(char) * (alen+1));
  for (apos = 0, cpos = 0; cpos <= ncons; cpos++)
    {
      for (icount = 0; icount < nins[cpos]; icount++) msa->rf[apos++] = '.';
      if  (cpos < ncons) msa->rf[apos++] = 'x';
    }
  msa->rf[apos] = '\0';

  for (idx = 0; idx < msa->nseq; idx++)
    {
      ESL_ALLOC(ax, sizeof(ESL_DSQ) * (alen + 2));    
      ax[0] = eslDSQ_SENTINEL;
      apos = spos  = 0; 
      for (cpos = 0; cpos <= ncons; cpos++)
	{
	  icount = 0;   
	  while (csflag[idx][spos] == FALSE)  { ax[apos+1] = msa->ax[idx][spos+1]; apos++; spos++; icount++; }
	  while (icount < nins[cpos]) 	      { ax[apos+1] = gapsym;               apos++;         icount++; }
	  if (cpos < ncons)                   { ax[apos+1] = msa->ax[idx][spos+1]; apos++; spos++;           }
	}
      ESL_DASSERT1( (msa->ax[idx][spos+1] == eslDSQ_SENTINEL) );
      ESL_DASSERT1( (apos == alen) );
      ax[alen+1] = eslDSQ_SENTINEL;
      free(msa->ax[idx]);
      msa->ax[idx] = ax;
      ax = NULL;
    }
  msa->alen = alen;



  return eslOK;
  
 ERROR:
  if (ax) free(ax);
  return status;
}
Example #21
0
/* Function:  p7_ViterbiMu()
 * Synopsis:  Determines the local Viterbi Gumbel mu parameter for a model.
 * Incept:    SRE, Tue May 19 10:26:19 2009 [Janelia]
 *
 * Purpose:   Identical to p7_MSVMu(), above, except that it fits
 *            Viterbi scores instead of MSV scores. 
 *
 *            The difference between the two mus is small, but can be
 *            up to ~1 bit or so for large, low-info models [J4/126] so
 *            decided to calibrate the two mus separately [J5/8]. 
 *            
 * Args:      r       :  source of random numbers
 *            om      :  score profile (length config is changed upon return!)
 *            bg      :  null model    (length config is changed upon return!)
 *            L       :  length of sequences to simulate
 *            N	      :  number of sequences to simulate		
 *            lambda  :  known Gumbel lambda parameter
 *            ret_vmu :  RETURN: ML estimate of location param mu
 *
 * Returns:   <eslOK> on success, and <ret_mu> contains the ML estimate
 *            of $\mu$.
 *
 * Throws:    (no abnormal error conditions)
 */
int
p7_ViterbiMu(ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double *ret_vmu)
{
  P7_OMX  *ox      = p7_omx_Create(om->M, 0, 0); /* DP matrix: 1 row version */
  ESL_DSQ *dsq     = NULL;
  double  *xv      = NULL;
  int      i;
  float    sc, nullsc;
#ifndef p7_IMPL_DUMMY
  float    maxsc   = (32767.0 - om->base_w) / om->scale_w; /* if score overflows, use this [J4/139] */
#endif
  int      status;

  if (ox == NULL) { status = eslEMEM; goto ERROR; }
  ESL_ALLOC(xv,  sizeof(double)  * N);
  ESL_ALLOC(dsq, sizeof(ESL_DSQ) * (L+2));

  p7_oprofile_ReconfigLength(om, L);
  p7_bg_SetLength(bg, L);

  for (i = 0; i < N; i++)
    {
      if ((status = esl_rsq_xfIID(r, bg->f, om->abc->K, L, dsq)) != eslOK) goto ERROR;
      if ((status = p7_bg_NullOne(bg, dsq, L, &nullsc))          != eslOK) goto ERROR;   

      status = p7_ViterbiFilter(dsq, L, om, ox, &sc); 
#ifndef p7_IMPL_DUMMY
      if (status == eslERANGE) { sc = maxsc; status = eslOK; }
#endif
      if (status != eslOK)     goto ERROR;

      xv[i] = (sc - nullsc) / eslCONST_LOG2;
    }

  if ((status = esl_gumbel_FitCompleteLoc(xv, N, lambda, ret_vmu))  != eslOK) goto ERROR;
  p7_omx_Destroy(ox);
  free(xv);
  free(dsq);
  return eslOK;

 ERROR:
  *ret_vmu = 0.0;
  if (ox  != NULL) p7_omx_Destroy(ox);
  if (xv  != NULL) free(xv);
  if (dsq != NULL) free(dsq);
  return status;

}
Example #22
0
/* Step 1. Label all sequence fragments < fragfrac of average raw length */
static int
remove_fragments(struct cfg_s *cfg, ESL_MSA *msa, ESL_MSA **ret_filteredmsa, int *ret_nfrags)
{
  int     *useme    = NULL;
  double   len      = 0.0;
  int      i;
  int      nfrags;
  int      status;

  for (i = 0; i < msa->nseq; i++) 
    len += esl_abc_dsqrlen(msa->abc, msa->ax[i]);
  len *= cfg->fragfrac / (double) msa->nseq;

  ESL_ALLOC(useme, sizeof(int) * msa->nseq);
  for (nfrags = 0, i = 0; i < msa->nseq; i++) 
    useme[i] = (esl_abc_dsqrlen(msa->abc, msa->ax[i]) < len) ? 0 : 1;

  if ((status = esl_msa_SequenceSubset(msa, useme, ret_filteredmsa)) != eslOK) goto ERROR;
  *ret_nfrags = msa->nseq - esl_vec_ISum(useme, msa->nseq);

  free(useme);
  return eslOK;

 ERROR:
  if (useme != NULL) free(useme); 
  *ret_filteredmsa = NULL;
  return status;
}
Example #23
0
/* Function:  p7_checkptmx_DumpFBRow()
 * Synopsis:  Dump one row from fwd or bck version of the matrix.
 *
 * Purpose:   Dump current row <dpc> of forward or backward calculations from
 *            DP matrix <ox> for diagnostics. The index <rowi> is used
 *            as a row label, along with an additional free-text label
 *            <pfx>.  (The checkpointed backward implementation
 *            interleaves backward row calculations with recalculated
 *            fwd rows, both of which it is dumping; they need to be
 *            labeled something like "fwd" and "bck" to distinguish
 *            them in the debugging dump.)
 */
int
p7_checkptmx_DumpFBRow(P7_CHECKPTMX *ox, int rowi, __m128 *dpc, char *pfx)
{
  union { __m128 v; float x[p7_VNF]; } u;
  float *v         = NULL;		/*  */
  int    Q         = ox->Qf;
  int    M         = ox->M;
  float *xc        = (float *) (dpc + Q*p7C_NSCELLS);
  int    logify    = (ox->dump_flags & p7_SHOW_LOG) ? TRUE : FALSE;
  int    maxpfx    = ox->dump_maxpfx;
  int    width     = ox->dump_width;
  int    precision = ox->dump_precision;
  int    k,q,z;
  int    status;

  ESL_ALLOC(v, sizeof(float) * ( (Q*p7_VNF) + 1));
  v[0] = 0.;

  /* Line 1. M cells: unpack, unstripe, print */
  for (q = 0; q < Q; q++) {
    u.v = P7C_MQ(dpc, q);
    for (z = 0; z < p7_VNF; z++) v[q+Q*z+1] = u.x[z];
  }
  fprintf(ox->dfp, "%*s %3d M", maxpfx, pfx, rowi);
  for (k = 0; k <= M; k++) fprintf(ox->dfp, " %*.*f", width, precision, (logify ? esl_logf(v[k]) : v[k]));
  /* a static analyzer may complain about v[k] being uninitialized
   * if it isn't smart enough to see that M,Q are linked.
   */

  /* Line 1 end: Specials */
  for (z = 0; z < p7C_NXCELLS; z++)
    fprintf(ox->dfp, " %*.*f", width, precision, (logify ? esl_logf(xc[z]) : xc[z]));
  fputc('\n', ox->dfp);

  /* Line 2: I cells: unpack, unstripe, print */
  for (q = 0; q < Q; q++) {
    u.v = P7C_IQ(dpc, q);
    for (z = 0; z < p7_VNF; z++) v[q+Q*z+1] = u.x[z];
  }
  fprintf(ox->dfp, "%*s %3d I", maxpfx, pfx, rowi);
  for (k = 0; k <= M; k++) fprintf(ox->dfp, " %*.*f", width, precision, (logify ? esl_logf(v[k]) : v[k]));
  fputc('\n', ox->dfp);

  /* Line 3. D cells: unpack, unstripe, print */
  for (q = 0; q < Q; q++) {
    u.v = P7C_DQ(dpc, q);
    for (z = 0; z < p7_VNF; z++) v[q+Q*z+1] = u.x[z];
  }
  fprintf(ox->dfp, "%*s %3d D", maxpfx, pfx, rowi);
  for (k = 0; k <= M; k++) fprintf(ox->dfp, " %*.*f", width, precision, (logify ? esl_logf(v[k]) : v[k]));
  fputc('\n', ox->dfp);
  fputc('\n', ox->dfp);

  free(v);
  return eslOK;

 ERROR:
  if (v) free(v);
  return status;
}
Example #24
0
/* Function:  p7_prior_CreateLaplace()
 * Synopsis:  Creates Laplace plus-one prior.
 * Incept:    SRE, Sat Jun 30 09:48:13 2007 [Janelia]
 *
 * Purpose:   Create a Laplace plus-one prior for alphabet <abc>.
 */
P7_PRIOR *
p7_prior_CreateLaplace(const ESL_ALPHABET *abc)
{
  P7_PRIOR *pri = NULL;
  int        status;

  ESL_ALLOC(pri, sizeof(P7_PRIOR));
  pri->tm = pri->ti = pri->td = pri->em = pri->ei = NULL;

  pri->tm = esl_mixdchlet_Create(1, 3);	     /* single component; 3 params */
  pri->ti = esl_mixdchlet_Create(1, 2);	     /* single component; 2 params */
  pri->td = esl_mixdchlet_Create(1, 2);	     /* single component; 2 params */
  pri->em = esl_mixdchlet_Create(1, abc->K); /* single component; K params */
  pri->ei = esl_mixdchlet_Create(1, abc->K); /* single component; K params */

  if (pri->tm == NULL || pri->ti == NULL || pri->td == NULL || pri->em == NULL || pri->ei == NULL) goto ERROR;

  pri->tm->pq[0] = 1.0;   esl_vec_DSet(pri->tm->alpha[0], 3,      1.0);  /* match transitions  */
  pri->ti->pq[0] = 1.0;   esl_vec_DSet(pri->ti->alpha[0], 2,      1.0);  /* insert transitions */
  pri->td->pq[0] = 1.0;   esl_vec_DSet(pri->td->alpha[0], 2,      1.0);  /* delete transitions */
  pri->em->pq[0] = 1.0;   esl_vec_DSet(pri->em->alpha[0], abc->K, 1.0);  /* match emissions    */
  pri->ei->pq[0] = 1.0;   esl_vec_DSet(pri->ei->alpha[0], abc->K, 1.0);  /* insert emissions   */
  return pri;

 ERROR:
  p7_prior_Destroy(pri);
  return NULL;
}
Example #25
0
/* Function:  p7_hmm_ScoreDataCreate()
 * Synopsis:  Create a <P7_SCOREDATA> model object, based on MSV-filter
 *            part of profile
 *
 * Purpose:   Allocate a <P7_SCOREDATA> object, then populate
 *            it with data based on the given optimized matrix.
 *
 *            Once a hit passes the MSV filter, and the prefix/suffix
 *            values of P7_SCOREDATA are required, p7_hmm_ScoreDataComputeRest()
 *            must be called.
 *
 * Args:      om         - P7_OPROFILE containing scores used to produce SCOREDATA contents
 *            do_opt_ext - boolean, TRUE if optimal-extension scores are required (for FM-MSV)
 *
 * Returns:   a pointer to the new <P7_SCOREDATA> object.
 *
 * Throws:    <NULL> on allocation failure.
 */
P7_SCOREDATA *
p7_hmm_ScoreDataCreate(P7_OPROFILE *om, P7_PROFILE *gm )
{
  P7_SCOREDATA *data = NULL;
  int    status;

  ESL_ALLOC(data, sizeof(P7_SCOREDATA));

  data->ssv_scores      = NULL;
  data->ssv_scores_f    = NULL;
  data->opt_ext_fwd     = NULL;
  data->opt_ext_rev     = NULL;
  data->prefix_lengths  = NULL;
  data->suffix_lengths  = NULL;
  data->fwd_scores      = NULL;
  data->fwd_transitions = NULL;

  scoredata_GetSSVScoreArrays(om, gm, data);

  return data;

ERROR:
 p7_hmm_ScoreDataDestroy(data);
 return NULL;
}
Example #26
0
/* Function: p7_Handmodelmaker()
 * 
 * Purpose:  Manual model construction.
 *           Construct an HMM from a digital alignment, where the
 *           <#=RF> line of the alignment file is used to indicate the
 *           columns assigned to matches vs. inserts.
 *           
 *           The <msa> must be in digital mode, and it must have
 *           a reference annotation line.
 *           
 *           NOTE: <p7_Handmodelmaker()> will slightly revise the
 *           alignment if necessary, if the assignment of columns
 *           implies DI and ID transitions.
 *           
 *           Returns both the HMM in counts form (ready for applying
 *           Dirichlet priors as the next step), and fake tracebacks
 *           for each aligned sequence. 
 *
 *           Models must have at least one node, so if the <msa> defined 
 *           no consensus columns, a <eslENORESULT> error is returned.
 *           
 * Args:     msa     - multiple sequence alignment
 *           bld       - holds information on regions requiring masking, optionally NULL -> no masking
 *           ret_hmm - RETURN: counts-form HMM
 *           opt_tr  - optRETURN: array of tracebacks for aseq's
 *           
 * Return:   <eslOK> on success. <ret_hmm> and <opt_tr> are allocated 
 *           here, and must be free'd by caller.
 *
 *           Returns <eslENORESULT> if no consensus columns were annotated;
 *           in this case, <ret_hmm> and <opt_tr> are returned NULL. 
 *           
 *           Returns <eslEFORMAT> if the <msa> doesn't have a reference
 *           annotation line.
 *           
 * Throws:   <eslEMEM> on allocation failure. Throws <eslEINVAL> if the <msa>
 *           isn't in digital mode.
 */            
int
p7_Handmodelmaker(ESL_MSA *msa, P7_BUILDER *bld, P7_HMM **ret_hmm, P7_TRACE ***opt_tr)
{
  int        status;
  int       *matassign = NULL;    /* MAT state assignments if 1; 1..alen */
  int        apos;                /* counter for aligned columns         */

  if (! (msa->flags & eslMSA_DIGITAL)) ESL_XEXCEPTION(eslEINVAL, "need a digital msa");
  if (msa->rf == NULL)                 return eslEFORMAT;

  ESL_ALLOC(matassign, sizeof(int) * (msa->alen+1));
 
  /* Watch for off-by-one. rf is [0..alen-1]; matassign is [1..alen] */
  for (apos = 1; apos <= msa->alen; apos++)
    matassign[apos] = (esl_abc_CIsGap(msa->abc, msa->rf[apos-1])? FALSE : TRUE);

  /* matassign2hmm leaves ret_hmm, opt_tr in their proper state: */
  if ((status = matassign2hmm(msa, matassign, ret_hmm, opt_tr)) != eslOK) goto ERROR;

  free(matassign);
  return eslOK;

 ERROR:
  if (matassign != NULL) free(matassign);
  return status;
}
Example #27
0
/* read_mask_file
 *
 * Given an open file pointer, read the first token of the
 * file and return it as *ret_mask. It must contain only
 * '0' or '1' characters.
 *
 * Returns:  eslOK on success.
 */
int
read_mask_file(char *filename, char *errbuf, char **ret_mask, int *ret_mask_len)
{
  ESL_FILEPARSER *efp    = NULL;
  char           *mask   = NULL;
  char           *tok;
  int             toklen;
  int             n;
  int             status;


  if (esl_fileparser_Open(filename, NULL, &efp) != eslOK) ESL_XFAIL(eslFAIL, errbuf, "failed to open %s in read_mask_file\n", filename);
  esl_fileparser_SetCommentChar(efp, '#');
  
  if((status = esl_fileparser_GetToken(efp, &tok, &toklen)) != eslOK) ESL_XFAIL(eslFAIL, errbuf, "failed to read a single token from %s\n", filename);

  ESL_ALLOC(mask, sizeof(char) * (toklen+1));
  for(n = 0; n < toklen; n++) { 
    if((tok[n] == '0') || (tok[n] == '1')) { 
      mask[n] = tok[n];
    }
    else { ESL_XFAIL(eslFAIL, errbuf, "read a non-0 and non-1 character (%c) in the mask file %s\n", tok[n], filename); }
  }
  mask[n] = '\0';

  *ret_mask     = mask;
  *ret_mask_len = n;
  esl_fileparser_Close(efp);
  return eslOK;
  
 ERROR:
  if (efp)  esl_fileparser_Close(efp);
  if (mask) free(mask);
  return status;
}
Example #28
0
/* sample_endpoints()
 * Incept:    SRE, Mon Jan 22 10:43:20 2007 [Janelia]
 *
 * Purpose:   Given a profile <gm> and random number source <r>, sample
 *            a begin transition from the implicit probabilistic profile
 *            model, yielding a sampled start and end node; return these
 *            via <ret_kstart> and <ret_kend>.
 *            
 *            By construction, the entry at node <kstart> is into a
 *            match state, but the exit from node <kend> might turn
 *            out to be from either a match or delete state.
 *            
 *            We assume that exits j are uniformly distributed for a
 *            particular entry point i: $a_{ij} =$ constant $\forall
 *            j$.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation error.
 *            
 * Xref:      STL11/138           
 */
static int
sample_endpoints(ESL_RANDOMNESS *r, const P7_PROFILE *gm, int *ret_kstart, int *ret_kend)
{
  float *pstart = NULL;
  int    k;
  int    kstart, kend;
  int    status;

  /* We have to backcalculate a probability distribution from the
   * lod B->Mk scores in a local model; this is a little time consuming,
   * but we don't have to do it often.
   */
  ESL_ALLOC(pstart, sizeof(float) * (gm->M+1));
  pstart[0] = 0.0f;
  for (k = 1; k <= gm->M; k++)
    pstart[k] = exp(p7P_TSC(gm, k-1, p7P_BM)) * (gm->M - k + 1); /* multiply p_ij by the number of exits j */
  kstart = esl_rnd_FChoose(r, pstart, gm->M+1);          	 /* sample the starting position from that distribution */
  kend   = kstart + esl_rnd_Roll(r, gm->M-kstart+1);           /* and the exit uniformly from possible exits for it */

  free(pstart);
  *ret_kstart = kstart;
  *ret_kend   = kend;
  return eslOK;
  
 ERROR:
  if (pstart != NULL) free(pstart);
  *ret_kstart = 0;
  *ret_kend   = 0;
  return status;
}
Example #29
0
/* Function:  cm_alndata_Create()
 * Synopsis:  Allocate a CM_ALNDATA object.
 * Incept:    EPN, Fri Jan  6 09:01:33 2012
 *            
 * Purpose:   Allocates a new <CM_ALNDATA> and returns a pointer
 *            to it.
 *
 * Throws:    <NULL> on allocation failure.
 */
CM_ALNDATA *
cm_alndata_Create(void)
{
  int status;
  CM_ALNDATA *data = NULL;

  ESL_ALLOC(data, sizeof(CM_ALNDATA));
  data->sq         = NULL;
  data->idx        = -1;
  data->tr         = NULL;
  data->sc         = 0.;
  data->pp         = 0.;
  data->ppstr      = NULL;
  data->spos       = -1;
  data->epos       = -1;
  data->secs_bands = 0.;
  data->secs_aln   = 0.;
  data->mb_tot     = 0.;
  data->tau        = -1.;
  
  return data;

 ERROR: 
  return NULL;
}
Example #30
0
static int
a2m_padding_text(ESL_MSA *msa, char **csflag, int *nins, int ncons)
{
  char   *aseq = NULL;		/* new aligned sequence - will be swapped into msa->aseq[] */
  int     apos, cpos, spos;	/* position counters for alignment 0..alen, consensus cols 0..cpos-1, sequence position 0..slen-1 */
  int     alen;
  int     icount;
  int     idx;
  int     status;

  alen = ncons;
  for (cpos = 0; cpos <= ncons; cpos++)
    alen += nins[cpos];

  ESL_ALLOC(msa->rf, sizeof(char) * (alen+1));
  for (apos = 0, cpos = 0; cpos <= ncons; cpos++)
    {
      for (icount = 0; icount < nins[cpos]; icount++) msa->rf[apos++] = '.';
      if  (cpos < ncons) msa->rf[apos++] = 'x';
    }
  msa->rf[apos] = '\0';
  
  for (idx = 0; idx < msa->nseq; idx++)
    {
      ESL_ALLOC(aseq, sizeof(char) * (alen + 1));    
      apos = spos  = 0; 
      for (cpos = 0; cpos <= ncons; cpos++)
	{
	  icount = 0;   
	  while (csflag[idx][spos] == FALSE)  { aseq[apos] = msa->aseq[idx][spos]; apos++; spos++; icount++; }
	  while (icount < nins[cpos]) 	      { aseq[apos] = '.';                  apos++;         icount++; }
	  if (cpos < ncons) 	              { aseq[apos] = msa->aseq[idx][spos]; apos++; spos++;           }
	}
      ESL_DASSERT1( (msa->aseq[idx][spos] == '\0') );
      ESL_DASSERT1( (apos == alen) );
      aseq[alen] = '\0';
      free(msa->aseq[idx]);
      msa->aseq[idx] = aseq;
      aseq = NULL;
    }
  msa->alen = alen;
  return eslOK;
  
 ERROR:
  if (aseq) free(aseq);
  return status;
}