コード例 #1
0
ファイル: selex.c プロジェクト: Denis84/EPA-WorkBench
/* Function: DealignAseqs()
 * 
 * Given an array of (num) aligned sequences aseqs,
 * strip the gaps. Store the raw sequences in a new allocated array.
 * 
 * Caller is responsible for free'ing the memory allocated to
 * rseqs.
 * 
 * Returns 1 on success. Returns 0 and sets squid_errno on
 * failure.
 */
int
DealignAseqs(char **aseqs, int num, char ***ret_rseqs)
{
  char **rseqs;                 /* de-aligned sequence array   */
  int    idx;			/* counter for sequences       */
  int    depos; 		/* position counter for dealigned seq*/
  int    apos;			/* position counter for aligned seq */
  int    seqlen;		/* length of aligned seq */

				/* alloc space */
  rseqs = (char **) MallocOrDie (num * sizeof(char *));
				/* main loop */
  for (idx = 0; idx < num; idx++)
    {
      seqlen = strlen(aseqs[idx]);
				/* alloc space */
      rseqs[idx] = (char *) MallocOrDie ((seqlen + 1) * sizeof(char));

				/* strip gaps */
      depos = 0;
      for (apos = 0; aseqs[idx][apos] != '\0'; apos++)
	if (!isgap(aseqs[idx][apos]))
	  {
	    rseqs[idx][depos] = aseqs[idx][apos];
	    depos++;
	  }
      rseqs[idx][depos] = '\0';
    }
  *ret_rseqs = rseqs;
  return 1;
}
コード例 #2
0
/* Function: EVDBasicFit()
 * Date:     SRE, Wed Nov 12 11:02:27 1997 [St. Louis]
 * 
 * Purpose:  Fit a score histogram to the extreme value 
 *           distribution. Set the parameters lambda
 *           and mu in the histogram structure. Fill in the
 *           expected values in the histogram. Calculate
 *           a chi-square test as a measure of goodness of fit. 
 *           
 *           This is the basic version of ExtremeValueFitHistogram(),
 *           in a nonrobust form: simple linear regression with no
 *           outlier pruning.
 *           
 * Methods:  Uses a linear regression fitting method [Collins88,Lawless82]
 *
 * Args:     h         - histogram to fit
 *           
 * Return:   (void)
 */
void
EVDBasicFit(struct histogram_s *h)
{
  float *d;            /* distribution P(S < x)          */
  float *x;            /* x-axis of P(S<x) for Linefit() */
  int    hsize;
  int    sum;
  int    sc, idx;		/* loop indices for score or score-h->min   */
  float  slope, intercept;	/* m,b fit from Linefit()                   */
  float  corr;			/* correlation coeff of line fit, not used  */
  float  lambda, mu;		/* slope, intercept converted to EVD params */

  /* Allocations for x, y axes
   * distribution d runs from min..max with indices 0..max-min
   *     i.e. score - min = index into d, x, histogram, and expect
   */
  hsize = h->highscore - h->lowscore + 1;
  d         = (float *) MallocOrDie(sizeof(float) * hsize);
  x         = (float *) MallocOrDie(sizeof(float) * hsize);
  for (idx = 0; idx < hsize; idx++)
    d[idx] = x[idx] = 0.;

  /* Calculate P(S < x) distribution from histogram.
   * note off-by-one of sc, because histogram bin contains scores between
   * x and x+1. 
   */ 
  sum = 0;
  for (sc = h->lowscore; sc <= h->highscore; sc++)
    {
      sum += h->histogram[sc - h->min];
      d[sc - h->lowscore] = (float) sum / (float) h->total;
      x[sc - h->lowscore] = (float) (sc + 1);
    }

  /* Do a linear regression fit to the log[-log(P(S<x))] "line".
   * we have log[-log(1-P(S>x))]  = -lambda * x + lambda * mu
   * so lambda = -m  and mu = b/lambda
   */
				/* convert y axis to log[-log(P(S<x))]  */
  for (sc = h->lowscore; sc < h->highscore; sc++)
    d[sc - h->lowscore] = log(-1. * log(d[sc - h->lowscore]));

				/* do the linear regression */
  FLinefit(x, d, hsize-1, &intercept, &slope, &corr);
				/* calc mu, lambda */
  lambda = -1. * slope;
  mu     = intercept / lambda;

  /* Set the EVD parameters in the histogram;
   * pass 2 for additional lost degrees of freedom because we fit mu, lambda.
   */
  ExtremeValueSetHistogram(h, mu, lambda, h->lowscore, h->highscore, 2);

  free(x);
  free(d);
  return;
}
コード例 #3
0
ファイル: msa.c プロジェクト: hyphaltip/subopt-kaks
/* Function: MSAAddGS()
 * Date:     SRE, Wed Jun  2 06:57:03 1999 [St. Louis]
 *
 * Purpose:  Add an unparsed #=GS markup line to the MSA
 *           structure, allocating as necessary.
 *           
 *           It's possible that we could get more than one
 *           of the same type of GS tag per sequence; for
 *           example, "DR PDB;" structure links in Pfam.
 *           Hack: handle these by appending to the string,
 *           in a \n separated fashion. 
 *
 * Args:     msa    - multiple alignment structure
 *           tag    - markup tag (e.g. "AC")
 *           sqidx  - index of sequence to assoc markup with (0..nseq-1)
 *           value  - markup (e.g. "P00666")
 *
 * Returns:  0 on success
 */
void
MSAAddGS(MSA *msa, char *tag, int sqidx, char *value)
{
  int tagidx;
  int i;

  /* Is this an unparsed tag name that we recognize?
   * If not, handle adding it to index, and reallocating
   * as needed.
   */
  if (msa->gs_tag == NULL)	/* first tag? init w/ malloc  */
    {
      msa->gs_idx = GKIInit();
      tagidx      = GKIStoreKey(msa->gs_idx, tag);
      SQD_DASSERT1((tagidx == 0));
      msa->gs_tag = MallocOrDie(sizeof(char *));
      msa->gs     = MallocOrDie(sizeof(char **));
      msa->gs[0]  = MallocOrDie(sizeof(char *) * msa->nseqalloc);
      for (i = 0; i < msa->nseqalloc; i++)
	msa->gs[0][i] = NULL;
    }
  else 
    {
				/* new tag? */
      tagidx  = GKIKeyIndex(msa->gs_idx, tag); 
      if (tagidx < 0) {		/* it's a new tag name; realloc */
	tagidx = GKIStoreKey(msa->gs_idx, tag);
				/* since we alloc in blocks of 1,
				   we always realloc upon seeing 
				   a new tag. */
	SQD_DASSERT1((tagidx == msa->ngs));
	msa->gs_tag =       ReallocOrDie(msa->gs_tag, (msa->ngs+1) * sizeof(char *));
	msa->gs     =       ReallocOrDie(msa->gs, (msa->ngs+1) * sizeof(char **));
	msa->gs[msa->ngs] = MallocOrDie(sizeof(char *) * msa->nseqalloc);
	for (i = 0; i < msa->nseqalloc; i++) 
	  msa->gs[msa->ngs][i] = NULL;
      }
    }

  if (tagidx == msa->ngs) {
    msa->gs_tag[tagidx] = sre_strdup(tag, -1);
    msa->ngs++;
  }
  
  if (msa->gs[tagidx][sqidx] == NULL) /* first annotation of this seq with this tag? */
    msa->gs[tagidx][sqidx] = sre_strdup(value, -1);
  else {			
				/* >1 annotation of this seq with this tag; append */
    int len;
    if ((len = sre_strcat(&(msa->gs[tagidx][sqidx]), -1, "\n", 1)) < 0)
      Die("failed to sre_strcat()");
    if (sre_strcat(&(msa->gs[tagidx][sqidx]), len, value, -1) < 0)
      Die("failed to sre_strcat()");
  }
  return;
} 
コード例 #4
0
ファイル: sqio.c プロジェクト: obbila/CustomWise
/* Function: ReadMultipleRseqs()
 * 
 * Purpose:  Open a data file and
 *           parse it into an array of rseqs (raw, unaligned
 *           sequences).
 * 
 *           Caller is responsible for free'ing memory allocated
 *           to ret_rseqs, ret_weights, and ret_names.
 *           
 *           Weights are currently only supported for MSF format.
 *           Sequences read from all other formats will be assigned
 *           weights of 1.0. If the caller isn't interested in
 *           weights, it passes NULL as ret_weights.
 * 
 * Returns 1 on success. Returns 0 on failure and sets
 * squid_errno to indicate the cause.
 */
int
ReadMultipleRseqs(char              *seqfile,
		  int                fformat,
		  char            ***ret_rseqs,
		  SQINFO **ret_sqinfo,
		  int               *ret_num)
{
  SQINFO *sqinfo;               /* array of sequence optional info         */
  SQFILE *dbfp;                 /* open ptr for sequential access of file  */
  char  **rseqs;                /* sequence array                          */
  char  **aseqs;                /* aligned sequences, if file is aligned   */
  AINFO   ainfo;      /* alignment-associated information        */
  int     numalloced;           /* num of seqs currently alloced for       */
  int     idx;
  int     num;

  if (fformat == kSelex || fformat == kMSF || fformat == kClustal)
    {
      if (! ReadAlignment(seqfile, fformat, &aseqs, &ainfo)) return 0;
      if (! DealignAseqs(aseqs, ainfo.nseq, &rseqs))                return 0;

      /* copy the sqinfo array
       */
      num = ainfo.nseq;
      sqinfo= (SQINFO *) MallocOrDie (sizeof(SQINFO)*ainfo.nseq);
      for (idx = 0; idx < ainfo.nseq; idx++)
	SeqinfoCopy(&(sqinfo[idx]), &(ainfo.sqinfo[idx]));
      FreeAlignment(aseqs, &ainfo);
    }
  else
    {
				/* initial alloc */
      num        = 0;
      numalloced = 16;
      rseqs  = (char **) MallocOrDie (numalloced * sizeof(char *));
      sqinfo = (SQINFO *) MallocOrDie (numalloced * sizeof(SQINFO));
      if ((dbfp = SeqfileOpen(seqfile, fformat, NULL)) == NULL) return 0;      

      while (ReadSeq(dbfp, fformat, &rseqs[num], &(sqinfo[num])))
	{
	  num++;
	  if (num == numalloced) /* more seqs coming, alloc more room */
	    {
	      numalloced += 16;
	      rseqs  = (char **) ReallocOrDie (rseqs, numalloced*sizeof(char *));
	      sqinfo = (SQINFO *) ReallocOrDie (sqinfo, numalloced * sizeof(SQINFO));
	    }
	}
      SeqfileClose(dbfp);
    }

  *ret_rseqs  = rseqs;
  *ret_sqinfo = sqinfo;
  *ret_num    = num;
  return 1;
}
コード例 #5
0
/*ARGSUSED*/
static int
make_ref_alilist(int *ref, char *k1, char *k2,
		 char *s1, char *s2, int **ret_s1_list, int *ret_listlen)
{
  int *s1_list;
  int  col;			/* column position in alignment */
  int  r1, r2;			/* raw symbol index at current col in s1, s2 */
  int *canons1;			/* flag array, 1 if position i in s1 raw seq is canonical */
  int  lpos;			/* position in list */
  
  /* Allocations. No arrays can exceed the length of their
   * appropriate parent (s1 or s2)
   */
  s1_list = (int *) MallocOrDie (sizeof(int) * strlen(s1));
  canons1 = (int *) MallocOrDie (sizeof(int) * strlen(s1));

  /* First we use refcoords and k1,k2 to construct an array of 1's 
   * and 0's, telling us whether s1's raw symbol number i is countable.
   * It's countable simply if it's under a canonical column.
   */
  r1 =  0;
  for (col = 0; k1[col] != '\0'; col++)
    {
      if (! isgap(k1[col]))
	{
	  canons1[r1] = ref[col] ? 1 : 0;
	  r1++;
	}
    }

  /* Now we can construct the list. We don't count pairs if the sym in s1
   * is non-canonical.
   * We have to keep separate track of our position in the list (lpos)
   * from our positions in the raw sequences (r1,r2)
   */
  r1 = r2 = lpos = 0;
  for (col = 0; s1[col] != '\0'; col++)
    {
      if (! isgap(s1[col]) && canons1[r1])
	{
	  s1_list[lpos] = isgap(s2[col]) ? -1 : r2;
	  lpos++;
	}
      
      if (! isgap(s1[col]))
	r1++;
      if (! isgap(s2[col]))
	r2++;
    }

  free(canons1);
  *ret_listlen = lpos;
  *ret_s1_list = s1_list;
  return 1;
}
コード例 #6
0
ファイル: emit.c プロジェクト: Denis84/EPA-WorkBench
/* Function: StateOccupancy()
 * Date:     SRE, Wed Nov 11 09:46:15 1998 [St. Louis]
 *
 * Purpose:  Calculate the expected state occupancy for
 *           a given HMM in generated traces.
 *           
 *           Note that expected prob of getting into
 *           any special state in a trace is trivial:
 *              S,N,B,E,C,T = 1.0
 *              J = E->J transition prob 
 *
 * Args:     hmm    - the model
 *           ret_mp - RETURN: [1..M] prob's of occupying M
 *           ret_ip - RETURN: [1..M-1] prob's of occupying I
 *           ret_dp - RETURN: [1..M] prob's of occupying D
 *
 * Returns:  void
 *           mp, ip, dp are malloc'ed here. Caller must free().
 */
void
StateOccupancy(struct plan7_s *hmm, float **ret_mp, float **ret_ip, float **ret_dp)
{
  float *fmp, *fip, *fdp;       /* forward probabilities  */
  int k;			/* counter for nodes      */

  /* Initial allocations
   */
  fmp = MallocOrDie (sizeof(float) * (hmm->M+1));
  fip = MallocOrDie (sizeof(float) * (hmm->M));
  fdp = MallocOrDie (sizeof(float) * (hmm->M+1));
  
  /* Forward pass. 
   */
  fdp[1] = hmm->tbd1;
  fmp[1] = hmm->begin[1];
  fip[1] = fmp[1] * hmm->t[1][TMI];
  for (k = 2; k <= hmm->M; k++)
    {
			/* M: from M,D,I at k-1, or B; count t_II as 1.0 */
      fmp[k] = fmp[k-1] * hmm->t[k-1][TMM] +
	       fip[k-1] +
               fdp[k-1] * hmm->t[k-1][TDM] +
	       hmm->begin[k];
			/* D: from M,D at k-1 */
      fdp[k] = fmp[k-1] * hmm->t[k-1][TMD] +
	       fdp[k-1] * hmm->t[k-1][TDD];
			/* I: from M at k; don't count II */
      if (k < hmm->M) {
	fip[k] = fmp[k] * hmm->t[k][TMI];
      }

      SQD_DASSERT2((fabs(1.0f - fmp[k] - fdp[k]) < 1e-6f));
      fmp[k] /= fmp[k]+fdp[k];	/* prevent propagating fp errors */
      fdp[k] /= fmp[k]+fdp[k];
    }
  /* We don't need a backward pass; all backwards P's are 1.0
   * by definition (you can always get out of a state with P=1).
   * The only situation where this might not be true is for
   * a TII of 1.0, when TIM = 0 -- but in that case, if there's
   * a finite chance of getting into that insert state, the model
   * generates infinitely long sequences, so we can consider this
   * situation "perverse" and disallow it elsewhere in building
   * profile HMMs.
   */

  /* Return.
   */
  *ret_mp = fmp;
  *ret_dp = fdp;
  *ret_ip = fip;
}
コード例 #7
0
/* Function: AllocTophits()
 * 
 * Purpose:  Allocate a struct tophit_s, for maintaining
 *           a list of top-scoring hits in a database search.
 *           
 * Args:     lumpsize - allocation lumpsize
 *           
 * Return:   An allocated struct hit_s. Caller must free.
 */
struct tophit_s *
AllocTophits(int lumpsize)
{
  struct tophit_s *hitlist;
  
  hitlist        = MallocOrDie (sizeof(struct tophit_s));
  hitlist->hit   = NULL;
  hitlist->unsrt = MallocOrDie (lumpsize * sizeof(struct hit_s));
  hitlist->alloc = lumpsize;
  hitlist->num   = 0;
  hitlist->lump  = lumpsize; 
  return hitlist;
}
コード例 #8
0
ファイル: gsi64.c プロジェクト: BioInfoTools/MACSE
/*****************************************************************
 * GSI64 index construction routines
 * SRE, Wed Nov 10 11:49:14 1999 [St. Louis]
 * 
 * API:
 *       g = GSI64AllocIndex();
 *       
 *       [foreach filename, <32 char, no directory path]
 *          GSI64AddFileToIndex(g, filename);
 *          filenum++;
 *          [foreach key, <32 char, w/ filenum 1..nfiles, w/ 64bit offset]
 *             GSI64AddKeyToIndex(g, key, filenum, offset);
 *            
 *       GSI64SortIndex(g);
 *       GSI64WriteIndex(fp, g);
 *       GSI64FreeIndex(g);
 *****************************************************************/
struct gsi64index_s *
GSI64AllocIndex(void)
{
  struct gsi64index_s *g;
  
  g = MallocOrDie(sizeof(struct gsi64index_s));
  g->filenames = MallocOrDie(sizeof(char *) * 10);
  g->fmt       = MallocOrDie(sizeof(int) * 10); 
  g->elems     = MallocOrDie(sizeof(struct gsi64key_s) * 100);
  g->nfiles    = 0;
  g->nkeys     = 0;
  return g;
}
コード例 #9
0
ファイル: lists.c プロジェクト: acherm/chess-analysis
        /* Add str to the list of strings in list.
         * List may be a new list, in which case space is allocated
         * for it.
         * Return the index on success, otherwise -1.
         */
static int
add_to_taglist(const char *str,StringArray *list)
{   Boolean everything_ok = TRUE;

    if(list->num_allocated_elements == list->num_used_elements){
        /* We need more space. */
        if(list->num_allocated_elements == 0){
            /* No elements in the list. */
            list->tag_strings = (TagSelection *)MallocOrDie((INIT_LIST_SPACE+1)*
                                                sizeof(TagSelection));
            if(list->tag_strings != NULL){
                list->num_allocated_elements = INIT_LIST_SPACE;
                list->num_used_elements = 0;
            }
            else{
                everything_ok = FALSE;
            }
        }
        else{
            list->tag_strings = (TagSelection *)realloc((void *)list->tag_strings,
                                (list->num_allocated_elements+MORE_LIST_SPACE+1)*
                                                        sizeof(TagSelection));
            if(list->tag_strings != NULL){
                list->num_allocated_elements += MORE_LIST_SPACE;
            }
            else{
                everything_ok = FALSE;
            }
        }
    }
    if(everything_ok){
        /* There is space. */
        unsigned ix = list->num_used_elements;

        list->tag_strings[ix].operator = NONE;
        list->tag_strings[ix].tag_string = (char *) MallocOrDie(strlen(str)+1);
        if(list->tag_strings[ix].tag_string != NULL){
            strcpy(list->tag_strings[ix].tag_string,str);
            list->num_used_elements++;
            /* Make sure that the list is properly terminated at all times. */
            list->tag_strings[ix+1].tag_string = NULL;
            return (int) ix;
        }
        else{
            return -1;
        }
    }
    else{
        return -1;
    }
}
コード例 #10
0
ファイル: gsi.c プロジェクト: iu-parfunc/prof_apps
/* Function: GSIOpen()
 * 
 * Purpose:  Open a GSI file. Returns the number of records in
 *           the file and a file pointer. Returns NULL on failure.
 *           The file pointer should be fclose()'d normally.
 */
GSIFILE *
GSIOpen(char *gsifile)
{
  GSIFILE    *gsi;
  char        magic[GSI_KEYSIZE];

  gsi = (GSIFILE *) MallocOrDie (sizeof(GSIFILE));
  if ((gsi->gsifp = fopen(gsifile, "r")) == NULL)
    { free(gsi); squid_errno = SQERR_NOFILE; return NULL; }

  if (! fread(magic, sizeof(char), GSI_KEYSIZE, gsi->gsifp))
    { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
  if (strcmp(magic, "GSI") != 0) 
    { free(gsi); squid_errno = SQERR_FORMAT; return NULL; }

  if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp))
    { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
  if (! fread(&(gsi->recnum), sizeof(sqd_uint32), 1, gsi->gsifp))
    { free(gsi); squid_errno = SQERR_NODATA; return NULL; }

  gsi->nfiles = sre_ntoh16(gsi->nfiles); /* convert from network short */
  gsi->recnum = sre_ntoh32(gsi->recnum); /* convert from network long  */

  return gsi;
}
コード例 #11
0
ファイル: end.c プロジェクト: kudrix/pgn-extract-16-7
        /* Initialise the count of required pieces prior to reading
         * in the data.
         */
static Ending_details *
new_ending_details(void)
{   Ending_details *details = (Ending_details *) MallocOrDie(sizeof(Ending_details));
    int c;
    Piece piece;

    for(piece = PAWN; piece <= KING; piece++){
        for(c = 0; c < 2; c++){
           details->num_pieces[c][piece] = 0;
           details->occurs[c][piece] = EXACTLY;
        }
    }
    /* Fill out some miscellaneous colour based information. */
    for(c = 0; c < 2; c++){
        /* Only the KING is a requirement for each side. */
        details->num_pieces[c][KING] = 1;
        details->match_depth[c] = 0;
        /* How many general minor pieces to match. */
        details->num_minor_pieces[c] = 0;
        details->minor_occurs[c] = EXACTLY;
    }
    /* Assume that the match must always have a depth of at least two for
     * two half-move stability.
     */
    details->move_depth = 2;
    details->next = NULL;
    return details;
}
コード例 #12
0
ファイル: plan7.c プロジェクト: Denis84/EPA-WorkBench
/* Function: Plan7ComlogAppend()
 * Date:     SRE, Wed Oct 29 09:57:30 1997 [TWA 721 over Greenland] 
 * 
 * Purpose:  Concatenate command line options and append to the
 *           command line log.
 */
void
Plan7ComlogAppend(struct plan7_s *hmm, int argc, char **argv)
{
  int len;
  int i;

  /* figure out length of command line, w/ spaces and \n */
  len = argc;
  for (i = 0; i < argc; i++)
    len += strlen(argv[i]);

  /* allocate */
  if (hmm->comlog != NULL)
    {
      len += strlen(hmm->comlog);
      hmm->comlog = ReallocOrDie(hmm->comlog, sizeof(char)* (len+1));
    }
  else
    {
      hmm->comlog = MallocOrDie(sizeof(char)* (len+1));
      *(hmm->comlog) = '\0'; /* need this to make strcat work */
    }

  /* append */
  strcat(hmm->comlog, "\n");
  for (i = 0; i < argc; i++)
    {
      strcat(hmm->comlog, argv[i]);
      if (i < argc-1) strcat(hmm->comlog, " ");
    }
}
コード例 #13
0
ファイル: sqio.c プロジェクト: obbila/CustomWise
static void
addstruc(char *s, struct ReadSeqVars *V)
{
  char *sptr;

  if (! (V->sqinfo->flags & SQINFO_SS))
    {
      V->sqinfo->ss = (char *) MallocOrDie ((V->maxseq+1) * sizeof(char));
      V->sqinfo->flags |= SQINFO_SS;
      sptr = V->sqinfo->ss;
    }      
  else
    { 
      V->sqinfo->ss = (char *) ReallocOrDie (V->sqinfo->ss, (V->maxseq+1) * sizeof(char));
      sptr = V->sqinfo->ss;
      while (*sptr != '\0') sptr++;
    }

  while (*s != 0)
    {
      if (isSeqChar((int)*s)) { *sptr = *s; sptr++; }
      s++;
    }
  *sptr = '\0';
}
コード例 #14
0
/* Function: make_alilist()
 * 
 * Purpose:  Construct a list (array) mapping the raw symbols of s1
 *           onto the indexes of the aligned symbols in s2 (or -1
 *           for gaps in s2). The list (s1_list) will be of the
 *           length of s1's raw sequence.
 *           
 * Args:     s1          - sequence to construct the list for
 *           s2          - sequence s1 is aligned to
 *           ret_s1_list - RETURN: the constructed list (caller must free)
 *           ret_listlen - RETURN: length of the list
 *           
 * Returns:  1 on success, 0 on failure
 */
static int
make_alilist(char *s1, char *s2, int **ret_s1_list, int *ret_listlen)
{
  int *s1_list;
  int  col;			/* column position in alignment */
  int  r1, r2;			/* raw symbol index at current col in s1, s2 */
  
  /* Malloc for s1_list. It can't be longer than s1 itself; we just malloc
   * for that (and waste a wee bit of space)
   */
  s1_list = (int *) MallocOrDie (sizeof(int) * strlen(s1));
  r1 = r2 = 0;
  for (col = 0; s1[col] != '\0'; col++)
    {
      /* symbol in s1? Record what it's aligned to, and bump
       * the r1 counter.
       */
      if (! isgap(s1[col]))
	{
	  s1_list[r1] = isgap(s2[col]) ? -1 : r2;
	  r1++;
	}

      /* symbol in s2? bump the r2 counter
       */
      if (! isgap(s2[col]))
	r2++;
    }

  *ret_listlen = r1;
  *ret_s1_list = s1_list;
  return 1;
}
コード例 #15
0
ファイル: gsi64.c プロジェクト: BioInfoTools/MACSE
/* Function: GSI64Open()
 * 
 * Purpose:  Open a GSI64 file. Returns the number of records in
 *           the file and a file pointer. Returns NULL on failure.
 *           The file pointer should be fclose()'d normally.
 */
GSI64FILE *
GSI64Open(char *gsifile)
{
  GSI64FILE  *gsi;
  char        magic[GSI64_KEYSIZE];

  gsi = (GSI64FILE *) MallocOrDie (sizeof(GSI64FILE));
  if ((gsi->gsifp = fopen(gsifile, "r")) == NULL)
    { free(gsi); squid_errno = SQERR_NOFILE; return NULL; }

  if (! fread(magic, sizeof(char), GSI64_KEYSIZE, gsi->gsifp))
    { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
  if (strcmp(magic, "GSI64") != 0) 
    { free(gsi); squid_errno = SQERR_FORMAT; return NULL; }

  if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp))
    { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
  if (! fread(&(gsi->recnum), sizeof(sqd_uint64), 1, gsi->gsifp))
    { free(gsi); squid_errno = SQERR_NODATA; return NULL; }

#if 0			/* HACK! we don't byteswap */
  gsi->nfiles = sre_ntohs(gsi->nfiles); /* convert from network short */
  gsi->recnum = sre_ntohl(gsi->recnum); /* convert from network long  */
#endif

  return gsi;
}
コード例 #16
0
ファイル: msa.c プロジェクト: hyphaltip/subopt-kaks
/* Function: MSAAppendGR()
 * Date:     SRE, Thu Jun  3 06:34:38 1999 [Madison]
 *
 * Purpose:  Add an unparsed #=GR markup line to the
 *           MSA structure, allocating as necessary.
 *           
 *           When called multiple times for the same tag,
 *           appends value strings together -- used when
 *           parsing multiblock alignment files, for
 *           example.
 *
 * Args:     msa    - multiple alignment structure
 *           tag    - markup tag (e.g. "SS")
 *           sqidx  - index of seq to assoc markup with (0..nseq-1)
 *           value  - markup, one char per aligned column      
 *
 * Returns:  (void)
 */
void
MSAAppendGR(MSA *msa, char *tag, int sqidx, char *value)
{
  int tagidx;
  int i;

  /* Is this an unparsed tag name that we recognize?
   * If not, handle adding it to index, and reallocating
   * as needed.
   */
  if (msa->gr_tag == NULL)	/* first tag? init w/ malloc  */
    {
      msa->gr_tag = MallocOrDie(sizeof(char *));
      msa->gr     = MallocOrDie(sizeof(char **));
      msa->gr[0]  = MallocOrDie(sizeof(char *) * msa->nseqalloc);
      for (i = 0; i < msa->nseqalloc; i++) 
	msa->gr[0][i] = NULL;
      msa->gr_idx = GKIInit();
      tagidx      = GKIStoreKey(msa->gr_idx, tag);
      SQD_DASSERT1((tagidx == 0));
    }
  else 
    {
				/* new tag? */
      tagidx  = GKIKeyIndex(msa->gr_idx, tag); 
      if (tagidx < 0) {		/* it's a new tag name; realloc */
	tagidx = GKIStoreKey(msa->gr_idx, tag);
				/* since we alloc in blocks of 1,
				   we always realloc upon seeing 
				   a new tag. */
	SQD_DASSERT1((tagidx == msa->ngr));
	msa->gr_tag       = ReallocOrDie(msa->gr_tag, (msa->ngr+1) * sizeof(char *));
	msa->gr           = ReallocOrDie(msa->gr, (msa->ngr+1) * sizeof(char **));
	msa->gr[msa->ngr] = MallocOrDie(sizeof(char *) * msa->nseqalloc);
	for (i = 0; i < msa->nseqalloc; i++) 
	  msa->gr[msa->ngr][i] = NULL;
      }
    }
  
  if (tagidx == msa->ngr) {
    msa->gr_tag[tagidx] = sre_strdup(tag, -1);
    msa->ngr++;
  }
  sre_strcat(&(msa->gr[tagidx][sqidx]), -1, value, -1);
  return;
}
コード例 #17
0
/* Function: PrintNewHampshireTree()
 * 
 * Purpose:  Print out a tree in the "New Hampshire" standard
 *           format. See PHYLIP's draw.doc for a definition of
 *           the New Hampshire format.
 *
 *           Like a CFG, we generate the format string left to
 *           right by a preorder tree traversal.
 *           
 * Args:     fp   - file to print to
 *           ainfo- alignment info, including sequence names 
 *           tree - tree to print
 *           N    - number of leaves
 *           
 */
void
PrintNewHampshireTree(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N)
{                 
  struct intstack_s *stack;
  int    code;
  float *blen;
  int    docomma; 

  blen  = (float *) MallocOrDie (sizeof(float) * (2*N-1));
  stack = InitIntStack();
  PushIntStack(stack, N);	/* push root on stack */
  docomma = FALSE;
  
  /* node index code:
   *     0..N-1   = leaves; indexes of sequences.
   *     N..2N-2  = interior nodes; node-N = index of node in tree structure.
   *                code N is the root. 
   *     2N..3N-2 = special flags for closing interior nodes; node-2N = index in tree
   */
  while (PopIntStack(stack, &code))
    {
      if (code < N)		/* we're a leaf. */
	{
				/* 1) print name:branchlength */
	  if (docomma) fputs(",", fp);
	  fprintf(fp, "%s:%.5f", ainfo->sqinfo[code].name, blen[code]);
	  docomma = TRUE;
	}

      else if (code < 2*N)      /* we're an interior node */
	{
				/* 1) print a '(' */
	  if (docomma) fputs(",\n", fp);
	  fputs("(", fp);
				/* 2) push on stack: ), rchild, lchild */
	  PushIntStack(stack, code+N);
	  PushIntStack(stack, tree[code-N].right);
	  PushIntStack(stack, tree[code-N].left);
				/* 3) record branch lengths */
	  blen[tree[code-N].right] = tree[code-N].rblen;
	  blen[tree[code-N].left]  = tree[code-N].lblen;
	  docomma = FALSE;
	}

      else			/* we're closing an interior node */
	{
				/* print a ):branchlength */
	  if (code == 2*N) fprintf(fp, ");\n");
	  else             fprintf(fp, "):%.5f", blen[code-N]);
	  docomma = TRUE;
	}
    }

  FreeIntStack(stack);
  free(blen);
  return;
}
コード例 #18
0
ファイル: grammar.c プロジェクト: acherm/chess-analysis
/* Initialise the game header structure to contain
 * space for the default number of tags.
 * The space will have to be increased if new tags are
 * identified in the program source.
 */
void
init_game_header(void)
{
    unsigned i;
    GameHeader.header_tags_length = ORIGINAL_NUMBER_OF_TAGS;
    GameHeader.Tags = (char **) MallocOrDie(GameHeader.header_tags_length*
                                            sizeof(*GameHeader.Tags));
    for(i = 0; i < GameHeader.header_tags_length; i++) {
        GameHeader.Tags[i] = (char *) NULL;
    }
}
コード例 #19
0
struct fancyali_s *
AllocFancyAli(void)
{
  struct fancyali_s *ali;

  ali = MallocOrDie (sizeof(struct fancyali_s));
  ali->rfline = ali->csline = ali->model = ali->mline = ali->aseq = NULL;
  ali->query  = ali->target = NULL;
  ali->sqfrom = ali->sqto   = 0;
  return ali;
}
コード例 #20
0
ファイル: lists.c プロジェクト: acherm/chess-analysis
void init_tag_lists(void)
{
    int i;
    tag_list_length = ORIGINAL_NUMBER_OF_TAGS;
    TagLists = (StringArray *) MallocOrDie(tag_list_length*sizeof(*TagLists));
    for(i = 0; i < tag_list_length; i++){
	 TagLists[i].num_allocated_elements  = 0;
	 TagLists[i].num_used_elements  = 0;
	 TagLists[i].tag_strings  = (TagSelection *) NULL;
    }
}
コード例 #21
0
ファイル: alphabet.c プロジェクト: Denis84/EPA-WorkBench
/* Function: DigitizeSequence()
 * 
 * Purpose:  Internal representation of a sequence in HMMER is
 *           as a char array. 1..L are the indices
 *           of seq symbols in Alphabet[]. 0,L+1 are sentinel
 *           bytes, set to be Alphabet_iupac -- i.e. one more
 *           than the maximum allowed index.  
 *           
 *           Assumes that 'X', the fully degenerate character,
 *           is the last character in the allowed alphabet.
 *           
 * Args:     seq - sequence to be digitized (0..L-1)
 *           L   - length of sequence      
 *           
 * Return:   digitized sequence, dsq.
 *           dsq is allocated here and must be free'd by caller.
 */
char *
DigitizeSequence(char *seq, int L)
{
  char *dsq;
  int i;

  dsq = MallocOrDie (sizeof(char) * (L+2));
  dsq[0] = dsq[L+1] = (char) Alphabet_iupac;
  for (i = 1; i <= L; i++) 
    dsq[i] = SymbolIndex(seq[i-1]);
  return dsq;
}
コード例 #22
0
ファイル: alphabet.c プロジェクト: Denis84/EPA-WorkBench
/* Function: DedigitizeSequence()
 * Date:     SRE, Tue Dec 16 10:39:19 1997 [StL]
 * 
 * Purpose:  Returns a 0..L-1 character string, converting the
 *           dsq back to the real alphabet.
 */
char *
DedigitizeSequence(char *dsq, int L)
{
  char *seq;
  int i;

  seq = MallocOrDie(sizeof(char) * (L+1));
  for (i = 0; i < L; i++)
    seq[i] = Alphabet[(int) dsq[i+1]];
  seq[L] = '\0';
  return seq;
}
コード例 #23
0
void TextureAtlas::Initialize(uint32_t width, uint32_t height, xo::TexFormat format, uint32_t padding) {
	TexWidth      = width;
	TexHeight     = height;
	Padding       = padding;
	TexFormat     = format;
	TexStride     = (int) (width * TexFormatBytesPerPixel(format));
	size_t nbytes = height * TexStride;
	TexData       = (uint8_t*) MallocOrDie(nbytes);
	PosTop        = Padding;
	PosBottom     = Padding;
	PosRight      = Padding;
}
コード例 #24
0
ファイル: msa.c プロジェクト: hyphaltip/subopt-kaks
/* Function: MSASetSeqAccession()
 * Date:     SRE, Mon Jun 21 04:13:33 1999 [Sanger Centre]
 *
 * Purpose:  Set a sequence accession in an MSA structure.
 *           Handles some necessary allocation/initialization.
 *
 * Args:     msa      - multiple alignment to add accession to
 *           seqidx   - index of sequence to attach accession to
 *           acc      - accession 
 *
 * Returns:  void
 */
void
MSASetSeqAccession(MSA *msa, int seqidx, char *acc)
{
  int x;

  if (msa->sqacc == NULL) {
    msa->sqacc = MallocOrDie(sizeof(char *) * msa->nseqalloc);
    for (x = 0; x < msa->nseqalloc; x++)
      msa->sqacc[x] = NULL;
  }
  msa->sqacc[seqidx] = sre_strdup(acc, -1);
}
コード例 #25
0
ファイル: msa.c プロジェクト: hyphaltip/subopt-kaks
/* Function: MSASetSeqDescription()
 * Date:     SRE, Mon Jun 21 04:21:09 1999 [Sanger Centre]
 *
 * Purpose:  Set a sequence description in an MSA structure.
 *           Handles some necessary allocation/initialization.
 *
 * Args:     msa      - multiple alignment to add accession to
 *           seqidx   - index of sequence to attach accession to
 *           desc     - description
 *
 * Returns:  void
 */
void
MSASetSeqDescription(MSA *msa, int seqidx, char *desc)
{
  int x;

  if (msa->sqdesc == NULL) {
    msa->sqdesc = MallocOrDie(sizeof(char *) * msa->nseqalloc);
    for (x = 0; x < msa->nseqalloc; x++)
      msa->sqdesc[x] = NULL;
  }
  msa->sqdesc[seqidx] = sre_strdup(desc, -1);
}
コード例 #26
0
ファイル: msa.c プロジェクト: hyphaltip/subopt-kaks
/* Function: MSAAppendGC()
 * Date:     SRE, Thu Jun  3 06:25:14 1999 [Madison]
 *
 * Purpose:  Add an unparsed #=GC markup line to the MSA
 *           structure, allocating as necessary. 
 *           
 *           When called multiple times for the same tag,
 *           appends value strings together -- used when
 *           parsing multiblock alignment files, for
 *           example.
 *
 * Args:     msa   - multiple alignment structure
 *           tag   - markup tag (e.g. "CS")
 *           value - markup, one char per aligned column      
 *
 * Returns:  (void)
 */
void
MSAAppendGC(MSA *msa, char *tag, char *value)
{
  int tagidx;

  /* Is this an unparsed tag name that we recognize?
   * If not, handle adding it to index, and reallocating
   * as needed.
   */
  if (msa->gc_tag == NULL)	/* first tag? init w/ malloc  */
    {
      msa->gc_tag = MallocOrDie(sizeof(char *));
      msa->gc     = MallocOrDie(sizeof(char *));
      msa->gc_idx = GKIInit();
      tagidx      = GKIStoreKey(msa->gc_idx, tag);
      SQD_DASSERT1((tagidx == 0));
      msa->gc[0]  = NULL;
    }
  else
    {			/* new tag? */
      tagidx  = GKIKeyIndex(msa->gc_idx, tag); 
      if (tagidx < 0) {		/* it's a new tag name; realloc */
	tagidx = GKIStoreKey(msa->gc_idx, tag);
				/* since we alloc in blocks of 1,
				   we always realloc upon seeing 
				   a new tag. */
	SQD_DASSERT1((tagidx == msa->ngc));
	msa->gc_tag = ReallocOrDie(msa->gc_tag, (msa->ngc+1) * sizeof(char **));
	msa->gc     = ReallocOrDie(msa->gc, (msa->ngc+1) * sizeof(char **));
	msa->gc[tagidx] = NULL;
      }
    }

  if (tagidx == msa->ngc) {
    msa->gc_tag[tagidx] = sre_strdup(tag, -1);
    msa->ngc++;
  }
  sre_strcat(&(msa->gc[tagidx]), -1, value, -1);
  return;
}
コード例 #27
0
ファイル: alphabet.c プロジェクト: Denis84/EPA-WorkBench
/* Function: DigitizeAlignment() 
 * 
 * Purpose:  Given an alignment, return digitized unaligned
 *           sequence array. (Tracebacks are always relative
 *           to digitized unaligned seqs, even if they are
 *           faked from an existing alignment in modelmakers.c.)
 *           
 * Args:     msa      - alignment to digitize
 *           ret_dsqs - RETURN: array of digitized unaligned sequences
 *           
 * Return:   (void)
 *           dsqs is alloced here. Free2DArray(dseqs, nseq).
 */ 
void
DigitizeAlignment(MSA *msa, char ***ret_dsqs)
{
  char **dsq;
  int    idx;			/* counter for sequences     */
  int    dpos;			/* position in digitized seq */
  int    apos;			/* position in aligned seq   */

  dsq = (char **) MallocOrDie (sizeof(char *) * msa->nseq);
  for (idx = 0; idx < msa->nseq; idx++) {
    dsq[idx] = (char *) MallocOrDie (sizeof(char) * (msa->alen+2));

    dsq[idx][0] = (char) Alphabet_iupac; /* sentinel byte at start */

    for (apos = 0, dpos = 1; apos < msa->alen; apos++) {
      if (! isgap(msa->aseq[idx][apos]))  /* skip gaps */
	dsq[idx][dpos++] = SymbolIndex(msa->aseq[idx][apos]);
    }
    dsq[idx][dpos] = (char) Alphabet_iupac; /* sentinel byte at end */
  }
  *ret_dsqs = dsq;
}
コード例 #28
0
/* Function: GaussianSetHistogram()
 * 
 * Purpose:  Instead of fitting the histogram to a Gaussian,
 *           simply set the Gaussian parameters from an external source.
 */
void
GaussianSetHistogram(struct histogram_s *h, float mean, float sd)
{
  int   sc;
  int   hsize, idx;
  int   nbins;
  float delta;

  UnfitHistogram(h);
  h->fit_type          = HISTFIT_GAUSSIAN;
  h->param[GAUSS_MEAN] = mean;
  h->param[GAUSS_SD]   = sd;

  /* Calculate the expected values for the histogram.
   */
  hsize     = h->max - h->min + 1;
  h->expect = (float *) MallocOrDie(sizeof(float) * hsize);
  for (idx = 0; idx < hsize; idx++)
    h->expect[idx] = 0.;

  /* Note: ideally we'd use the Gaussian distribution function
   * to find the histogram occupancy in the window sc..sc+1. 
   * However, the distribution function is hard to calculate.
   * Instead, estimate the histogram by taking the density at sc+0.5.
   */
  for (sc = h->min; sc <= h->max; sc++)
    { 
      delta = ((float)sc + 0.5) - h->param[GAUSS_MEAN];
      h->expect[sc - h->min] =
	(float) h->total * ((1. / (h->param[GAUSS_SD] * sqrt(2.*3.14159))) * 
	    (exp(-1.*delta*delta / (2. * h->param[GAUSS_SD] * h->param[GAUSS_SD]))));
    }

  /* Calculate the goodness-of-fit (within whole region)
   */
  h->chisq = 0.;
  nbins    = 0;
  for (sc = h->lowscore; sc <= h->highscore; sc++)
    if (h->expect[sc-h->min] >= 5. && h->histogram[sc-h->min] >= 5)
      {
	delta = (float) h->histogram[sc-h->min] - h->expect[sc-h->min];
	h->chisq += delta * delta / h->expect[sc-h->min];
	nbins++;
      }
	/* -1 d.f. for normalization */
  if (nbins > 1)
    h->chip = (float) IncompleteGamma((double)(nbins-1)/2., 
				      (double) h->chisq/2.);
  else
    h->chip = 0.;		
}
コード例 #29
0
ファイル: plan7.c プロジェクト: Denis84/EPA-WorkBench
struct plan7_s *
AllocPlan7Shell(void) 
{
  struct plan7_s *hmm;

  hmm    = (struct plan7_s *) MallocOrDie (sizeof(struct plan7_s));
  hmm->M = 0;

  hmm->name     = NULL;
  hmm->acc      = NULL;
  hmm->desc     = NULL;
  hmm->rf       = NULL;
  hmm->cs       = NULL;
  hmm->ca       = NULL;
  hmm->comlog   = NULL; 
  hmm->nseq     = 0;
  hmm->ctime    = NULL;
  hmm->map      = NULL;
  hmm->checksum = 0;

  hmm->tpri = NULL;
  hmm->mpri = NULL;
  hmm->ipri = NULL;

  hmm->ga1 = hmm->ga2 = 0.0;
  hmm->tc1 = hmm->tc2 = 0.0;
  hmm->nc1 = hmm->nc2 = 0.0;

  hmm->t      = NULL;
  hmm->tsc    = NULL;
  hmm->mat    = NULL;
  hmm->ins    = NULL;
  hmm->msc    = NULL;
  hmm->isc    = NULL;

  hmm->begin  = NULL;
  hmm->bsc    = NULL;
  hmm->end    = NULL;
  hmm->esc    = NULL;
				/* DNA translation is not enabled by default */
  hmm->dnam   = NULL;
  hmm->dnai   = NULL;
  hmm->dna2   = -INFTY;
  hmm->dna4   = -INFTY;
			/* statistical parameters set to innocuous empty values */
  hmm->mu     = 0.; 
  hmm->lambda = 0.;
  
  hmm->flags = 0;
  return hmm;
}  
コード例 #30
0
/* Function: ExtremeValueSetHistogram()
 * 
 * Purpose:  Instead of fitting the histogram to an EVD,
 *           simply set the EVD parameters from an external source.
 *
 * Args:     h        - the histogram to set
 *           mu       - mu location parameter                
 *           lambda   - lambda scale parameter
 *           lowbound - low bound of the histogram that was fit
 *           highbound- high bound of histogram that was fit
 *           ndegrees - extra degrees of freedom to subtract in X^2 test:
 *                        typically 0 if mu, lambda are parametric,
 *                        else 2 if mu, lambda are estimated from data
 */
void
ExtremeValueSetHistogram(struct histogram_s *h, float mu, float lambda, 
			 float lowbound, float highbound, int ndegrees)
{
  int   sc;
  int   hsize, idx;
  int   nbins;
  float delta;

  UnfitHistogram(h);
  h->fit_type          = HISTFIT_EVD;
  h->param[EVD_LAMBDA] = lambda;
  h->param[EVD_MU]     = mu;

  hsize     = h->max - h->min + 1;
  h->expect = (float *) MallocOrDie(sizeof(float) * hsize);
  for (idx = 0; idx < hsize; idx++)
    h->expect[idx] = 0.;

  /* Calculate the expected values for the histogram.
   */
  for (sc = h->min; sc <= h->max; sc++)
    h->expect[sc - h->min] =
      ExtremeValueE((float)(sc), h->param[EVD_MU], h->param[EVD_LAMBDA], 
		    h->total) -
      ExtremeValueE((float)(sc+1), h->param[EVD_MU], h->param[EVD_LAMBDA],
		    h->total);
  
  /* Calculate the goodness-of-fit (within whole region)
   */
  h->chisq = 0.;
  nbins    = 0;
  for (sc = lowbound; sc <= highbound; sc++)
    if (h->expect[sc-h->min] >= 5. && h->histogram[sc-h->min] >= 5)
      {
	delta = (float) h->histogram[sc-h->min] - h->expect[sc-h->min];
	h->chisq += delta * delta / h->expect[sc-h->min];
	nbins++;
      }

  /* Since we fit the whole histogram, there is at least 
   * one constraint on chi-square: the normalization to h->total.
   */
  if (nbins > 1 + ndegrees)
    h->chip = (float) IncompleteGamma((double)(nbins-1-ndegrees)/2., 
				      (double) h->chisq/2.);
  else
    h->chip = 0.;		
}