/* Function: DealignAseqs() * * Given an array of (num) aligned sequences aseqs, * strip the gaps. Store the raw sequences in a new allocated array. * * Caller is responsible for free'ing the memory allocated to * rseqs. * * Returns 1 on success. Returns 0 and sets squid_errno on * failure. */ int DealignAseqs(char **aseqs, int num, char ***ret_rseqs) { char **rseqs; /* de-aligned sequence array */ int idx; /* counter for sequences */ int depos; /* position counter for dealigned seq*/ int apos; /* position counter for aligned seq */ int seqlen; /* length of aligned seq */ /* alloc space */ rseqs = (char **) MallocOrDie (num * sizeof(char *)); /* main loop */ for (idx = 0; idx < num; idx++) { seqlen = strlen(aseqs[idx]); /* alloc space */ rseqs[idx] = (char *) MallocOrDie ((seqlen + 1) * sizeof(char)); /* strip gaps */ depos = 0; for (apos = 0; aseqs[idx][apos] != '\0'; apos++) if (!isgap(aseqs[idx][apos])) { rseqs[idx][depos] = aseqs[idx][apos]; depos++; } rseqs[idx][depos] = '\0'; } *ret_rseqs = rseqs; return 1; }
/* Function: EVDBasicFit() * Date: SRE, Wed Nov 12 11:02:27 1997 [St. Louis] * * Purpose: Fit a score histogram to the extreme value * distribution. Set the parameters lambda * and mu in the histogram structure. Fill in the * expected values in the histogram. Calculate * a chi-square test as a measure of goodness of fit. * * This is the basic version of ExtremeValueFitHistogram(), * in a nonrobust form: simple linear regression with no * outlier pruning. * * Methods: Uses a linear regression fitting method [Collins88,Lawless82] * * Args: h - histogram to fit * * Return: (void) */ void EVDBasicFit(struct histogram_s *h) { float *d; /* distribution P(S < x) */ float *x; /* x-axis of P(S<x) for Linefit() */ int hsize; int sum; int sc, idx; /* loop indices for score or score-h->min */ float slope, intercept; /* m,b fit from Linefit() */ float corr; /* correlation coeff of line fit, not used */ float lambda, mu; /* slope, intercept converted to EVD params */ /* Allocations for x, y axes * distribution d runs from min..max with indices 0..max-min * i.e. score - min = index into d, x, histogram, and expect */ hsize = h->highscore - h->lowscore + 1; d = (float *) MallocOrDie(sizeof(float) * hsize); x = (float *) MallocOrDie(sizeof(float) * hsize); for (idx = 0; idx < hsize; idx++) d[idx] = x[idx] = 0.; /* Calculate P(S < x) distribution from histogram. * note off-by-one of sc, because histogram bin contains scores between * x and x+1. */ sum = 0; for (sc = h->lowscore; sc <= h->highscore; sc++) { sum += h->histogram[sc - h->min]; d[sc - h->lowscore] = (float) sum / (float) h->total; x[sc - h->lowscore] = (float) (sc + 1); } /* Do a linear regression fit to the log[-log(P(S<x))] "line". * we have log[-log(1-P(S>x))] = -lambda * x + lambda * mu * so lambda = -m and mu = b/lambda */ /* convert y axis to log[-log(P(S<x))] */ for (sc = h->lowscore; sc < h->highscore; sc++) d[sc - h->lowscore] = log(-1. * log(d[sc - h->lowscore])); /* do the linear regression */ FLinefit(x, d, hsize-1, &intercept, &slope, &corr); /* calc mu, lambda */ lambda = -1. * slope; mu = intercept / lambda; /* Set the EVD parameters in the histogram; * pass 2 for additional lost degrees of freedom because we fit mu, lambda. */ ExtremeValueSetHistogram(h, mu, lambda, h->lowscore, h->highscore, 2); free(x); free(d); return; }
/* Function: MSAAddGS() * Date: SRE, Wed Jun 2 06:57:03 1999 [St. Louis] * * Purpose: Add an unparsed #=GS markup line to the MSA * structure, allocating as necessary. * * It's possible that we could get more than one * of the same type of GS tag per sequence; for * example, "DR PDB;" structure links in Pfam. * Hack: handle these by appending to the string, * in a \n separated fashion. * * Args: msa - multiple alignment structure * tag - markup tag (e.g. "AC") * sqidx - index of sequence to assoc markup with (0..nseq-1) * value - markup (e.g. "P00666") * * Returns: 0 on success */ void MSAAddGS(MSA *msa, char *tag, int sqidx, char *value) { int tagidx; int i; /* Is this an unparsed tag name that we recognize? * If not, handle adding it to index, and reallocating * as needed. */ if (msa->gs_tag == NULL) /* first tag? init w/ malloc */ { msa->gs_idx = GKIInit(); tagidx = GKIStoreKey(msa->gs_idx, tag); SQD_DASSERT1((tagidx == 0)); msa->gs_tag = MallocOrDie(sizeof(char *)); msa->gs = MallocOrDie(sizeof(char **)); msa->gs[0] = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (i = 0; i < msa->nseqalloc; i++) msa->gs[0][i] = NULL; } else { /* new tag? */ tagidx = GKIKeyIndex(msa->gs_idx, tag); if (tagidx < 0) { /* it's a new tag name; realloc */ tagidx = GKIStoreKey(msa->gs_idx, tag); /* since we alloc in blocks of 1, we always realloc upon seeing a new tag. */ SQD_DASSERT1((tagidx == msa->ngs)); msa->gs_tag = ReallocOrDie(msa->gs_tag, (msa->ngs+1) * sizeof(char *)); msa->gs = ReallocOrDie(msa->gs, (msa->ngs+1) * sizeof(char **)); msa->gs[msa->ngs] = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (i = 0; i < msa->nseqalloc; i++) msa->gs[msa->ngs][i] = NULL; } } if (tagidx == msa->ngs) { msa->gs_tag[tagidx] = sre_strdup(tag, -1); msa->ngs++; } if (msa->gs[tagidx][sqidx] == NULL) /* first annotation of this seq with this tag? */ msa->gs[tagidx][sqidx] = sre_strdup(value, -1); else { /* >1 annotation of this seq with this tag; append */ int len; if ((len = sre_strcat(&(msa->gs[tagidx][sqidx]), -1, "\n", 1)) < 0) Die("failed to sre_strcat()"); if (sre_strcat(&(msa->gs[tagidx][sqidx]), len, value, -1) < 0) Die("failed to sre_strcat()"); } return; }
/* Function: ReadMultipleRseqs() * * Purpose: Open a data file and * parse it into an array of rseqs (raw, unaligned * sequences). * * Caller is responsible for free'ing memory allocated * to ret_rseqs, ret_weights, and ret_names. * * Weights are currently only supported for MSF format. * Sequences read from all other formats will be assigned * weights of 1.0. If the caller isn't interested in * weights, it passes NULL as ret_weights. * * Returns 1 on success. Returns 0 on failure and sets * squid_errno to indicate the cause. */ int ReadMultipleRseqs(char *seqfile, int fformat, char ***ret_rseqs, SQINFO **ret_sqinfo, int *ret_num) { SQINFO *sqinfo; /* array of sequence optional info */ SQFILE *dbfp; /* open ptr for sequential access of file */ char **rseqs; /* sequence array */ char **aseqs; /* aligned sequences, if file is aligned */ AINFO ainfo; /* alignment-associated information */ int numalloced; /* num of seqs currently alloced for */ int idx; int num; if (fformat == kSelex || fformat == kMSF || fformat == kClustal) { if (! ReadAlignment(seqfile, fformat, &aseqs, &ainfo)) return 0; if (! DealignAseqs(aseqs, ainfo.nseq, &rseqs)) return 0; /* copy the sqinfo array */ num = ainfo.nseq; sqinfo= (SQINFO *) MallocOrDie (sizeof(SQINFO)*ainfo.nseq); for (idx = 0; idx < ainfo.nseq; idx++) SeqinfoCopy(&(sqinfo[idx]), &(ainfo.sqinfo[idx])); FreeAlignment(aseqs, &ainfo); } else { /* initial alloc */ num = 0; numalloced = 16; rseqs = (char **) MallocOrDie (numalloced * sizeof(char *)); sqinfo = (SQINFO *) MallocOrDie (numalloced * sizeof(SQINFO)); if ((dbfp = SeqfileOpen(seqfile, fformat, NULL)) == NULL) return 0; while (ReadSeq(dbfp, fformat, &rseqs[num], &(sqinfo[num]))) { num++; if (num == numalloced) /* more seqs coming, alloc more room */ { numalloced += 16; rseqs = (char **) ReallocOrDie (rseqs, numalloced*sizeof(char *)); sqinfo = (SQINFO *) ReallocOrDie (sqinfo, numalloced * sizeof(SQINFO)); } } SeqfileClose(dbfp); } *ret_rseqs = rseqs; *ret_sqinfo = sqinfo; *ret_num = num; return 1; }
/*ARGSUSED*/ static int make_ref_alilist(int *ref, char *k1, char *k2, char *s1, char *s2, int **ret_s1_list, int *ret_listlen) { int *s1_list; int col; /* column position in alignment */ int r1, r2; /* raw symbol index at current col in s1, s2 */ int *canons1; /* flag array, 1 if position i in s1 raw seq is canonical */ int lpos; /* position in list */ /* Allocations. No arrays can exceed the length of their * appropriate parent (s1 or s2) */ s1_list = (int *) MallocOrDie (sizeof(int) * strlen(s1)); canons1 = (int *) MallocOrDie (sizeof(int) * strlen(s1)); /* First we use refcoords and k1,k2 to construct an array of 1's * and 0's, telling us whether s1's raw symbol number i is countable. * It's countable simply if it's under a canonical column. */ r1 = 0; for (col = 0; k1[col] != '\0'; col++) { if (! isgap(k1[col])) { canons1[r1] = ref[col] ? 1 : 0; r1++; } } /* Now we can construct the list. We don't count pairs if the sym in s1 * is non-canonical. * We have to keep separate track of our position in the list (lpos) * from our positions in the raw sequences (r1,r2) */ r1 = r2 = lpos = 0; for (col = 0; s1[col] != '\0'; col++) { if (! isgap(s1[col]) && canons1[r1]) { s1_list[lpos] = isgap(s2[col]) ? -1 : r2; lpos++; } if (! isgap(s1[col])) r1++; if (! isgap(s2[col])) r2++; } free(canons1); *ret_listlen = lpos; *ret_s1_list = s1_list; return 1; }
/* Function: StateOccupancy() * Date: SRE, Wed Nov 11 09:46:15 1998 [St. Louis] * * Purpose: Calculate the expected state occupancy for * a given HMM in generated traces. * * Note that expected prob of getting into * any special state in a trace is trivial: * S,N,B,E,C,T = 1.0 * J = E->J transition prob * * Args: hmm - the model * ret_mp - RETURN: [1..M] prob's of occupying M * ret_ip - RETURN: [1..M-1] prob's of occupying I * ret_dp - RETURN: [1..M] prob's of occupying D * * Returns: void * mp, ip, dp are malloc'ed here. Caller must free(). */ void StateOccupancy(struct plan7_s *hmm, float **ret_mp, float **ret_ip, float **ret_dp) { float *fmp, *fip, *fdp; /* forward probabilities */ int k; /* counter for nodes */ /* Initial allocations */ fmp = MallocOrDie (sizeof(float) * (hmm->M+1)); fip = MallocOrDie (sizeof(float) * (hmm->M)); fdp = MallocOrDie (sizeof(float) * (hmm->M+1)); /* Forward pass. */ fdp[1] = hmm->tbd1; fmp[1] = hmm->begin[1]; fip[1] = fmp[1] * hmm->t[1][TMI]; for (k = 2; k <= hmm->M; k++) { /* M: from M,D,I at k-1, or B; count t_II as 1.0 */ fmp[k] = fmp[k-1] * hmm->t[k-1][TMM] + fip[k-1] + fdp[k-1] * hmm->t[k-1][TDM] + hmm->begin[k]; /* D: from M,D at k-1 */ fdp[k] = fmp[k-1] * hmm->t[k-1][TMD] + fdp[k-1] * hmm->t[k-1][TDD]; /* I: from M at k; don't count II */ if (k < hmm->M) { fip[k] = fmp[k] * hmm->t[k][TMI]; } SQD_DASSERT2((fabs(1.0f - fmp[k] - fdp[k]) < 1e-6f)); fmp[k] /= fmp[k]+fdp[k]; /* prevent propagating fp errors */ fdp[k] /= fmp[k]+fdp[k]; } /* We don't need a backward pass; all backwards P's are 1.0 * by definition (you can always get out of a state with P=1). * The only situation where this might not be true is for * a TII of 1.0, when TIM = 0 -- but in that case, if there's * a finite chance of getting into that insert state, the model * generates infinitely long sequences, so we can consider this * situation "perverse" and disallow it elsewhere in building * profile HMMs. */ /* Return. */ *ret_mp = fmp; *ret_dp = fdp; *ret_ip = fip; }
/* Function: AllocTophits() * * Purpose: Allocate a struct tophit_s, for maintaining * a list of top-scoring hits in a database search. * * Args: lumpsize - allocation lumpsize * * Return: An allocated struct hit_s. Caller must free. */ struct tophit_s * AllocTophits(int lumpsize) { struct tophit_s *hitlist; hitlist = MallocOrDie (sizeof(struct tophit_s)); hitlist->hit = NULL; hitlist->unsrt = MallocOrDie (lumpsize * sizeof(struct hit_s)); hitlist->alloc = lumpsize; hitlist->num = 0; hitlist->lump = lumpsize; return hitlist; }
/***************************************************************** * GSI64 index construction routines * SRE, Wed Nov 10 11:49:14 1999 [St. Louis] * * API: * g = GSI64AllocIndex(); * * [foreach filename, <32 char, no directory path] * GSI64AddFileToIndex(g, filename); * filenum++; * [foreach key, <32 char, w/ filenum 1..nfiles, w/ 64bit offset] * GSI64AddKeyToIndex(g, key, filenum, offset); * * GSI64SortIndex(g); * GSI64WriteIndex(fp, g); * GSI64FreeIndex(g); *****************************************************************/ struct gsi64index_s * GSI64AllocIndex(void) { struct gsi64index_s *g; g = MallocOrDie(sizeof(struct gsi64index_s)); g->filenames = MallocOrDie(sizeof(char *) * 10); g->fmt = MallocOrDie(sizeof(int) * 10); g->elems = MallocOrDie(sizeof(struct gsi64key_s) * 100); g->nfiles = 0; g->nkeys = 0; return g; }
/* Add str to the list of strings in list. * List may be a new list, in which case space is allocated * for it. * Return the index on success, otherwise -1. */ static int add_to_taglist(const char *str,StringArray *list) { Boolean everything_ok = TRUE; if(list->num_allocated_elements == list->num_used_elements){ /* We need more space. */ if(list->num_allocated_elements == 0){ /* No elements in the list. */ list->tag_strings = (TagSelection *)MallocOrDie((INIT_LIST_SPACE+1)* sizeof(TagSelection)); if(list->tag_strings != NULL){ list->num_allocated_elements = INIT_LIST_SPACE; list->num_used_elements = 0; } else{ everything_ok = FALSE; } } else{ list->tag_strings = (TagSelection *)realloc((void *)list->tag_strings, (list->num_allocated_elements+MORE_LIST_SPACE+1)* sizeof(TagSelection)); if(list->tag_strings != NULL){ list->num_allocated_elements += MORE_LIST_SPACE; } else{ everything_ok = FALSE; } } } if(everything_ok){ /* There is space. */ unsigned ix = list->num_used_elements; list->tag_strings[ix].operator = NONE; list->tag_strings[ix].tag_string = (char *) MallocOrDie(strlen(str)+1); if(list->tag_strings[ix].tag_string != NULL){ strcpy(list->tag_strings[ix].tag_string,str); list->num_used_elements++; /* Make sure that the list is properly terminated at all times. */ list->tag_strings[ix+1].tag_string = NULL; return (int) ix; } else{ return -1; } } else{ return -1; } }
/* Function: GSIOpen() * * Purpose: Open a GSI file. Returns the number of records in * the file and a file pointer. Returns NULL on failure. * The file pointer should be fclose()'d normally. */ GSIFILE * GSIOpen(char *gsifile) { GSIFILE *gsi; char magic[GSI_KEYSIZE]; gsi = (GSIFILE *) MallocOrDie (sizeof(GSIFILE)); if ((gsi->gsifp = fopen(gsifile, "r")) == NULL) { free(gsi); squid_errno = SQERR_NOFILE; return NULL; } if (! fread(magic, sizeof(char), GSI_KEYSIZE, gsi->gsifp)) { free(gsi); squid_errno = SQERR_NODATA; return NULL; } if (strcmp(magic, "GSI") != 0) { free(gsi); squid_errno = SQERR_FORMAT; return NULL; } if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp)) { free(gsi); squid_errno = SQERR_NODATA; return NULL; } if (! fread(&(gsi->recnum), sizeof(sqd_uint32), 1, gsi->gsifp)) { free(gsi); squid_errno = SQERR_NODATA; return NULL; } gsi->nfiles = sre_ntoh16(gsi->nfiles); /* convert from network short */ gsi->recnum = sre_ntoh32(gsi->recnum); /* convert from network long */ return gsi; }
/* Initialise the count of required pieces prior to reading * in the data. */ static Ending_details * new_ending_details(void) { Ending_details *details = (Ending_details *) MallocOrDie(sizeof(Ending_details)); int c; Piece piece; for(piece = PAWN; piece <= KING; piece++){ for(c = 0; c < 2; c++){ details->num_pieces[c][piece] = 0; details->occurs[c][piece] = EXACTLY; } } /* Fill out some miscellaneous colour based information. */ for(c = 0; c < 2; c++){ /* Only the KING is a requirement for each side. */ details->num_pieces[c][KING] = 1; details->match_depth[c] = 0; /* How many general minor pieces to match. */ details->num_minor_pieces[c] = 0; details->minor_occurs[c] = EXACTLY; } /* Assume that the match must always have a depth of at least two for * two half-move stability. */ details->move_depth = 2; details->next = NULL; return details; }
/* Function: Plan7ComlogAppend() * Date: SRE, Wed Oct 29 09:57:30 1997 [TWA 721 over Greenland] * * Purpose: Concatenate command line options and append to the * command line log. */ void Plan7ComlogAppend(struct plan7_s *hmm, int argc, char **argv) { int len; int i; /* figure out length of command line, w/ spaces and \n */ len = argc; for (i = 0; i < argc; i++) len += strlen(argv[i]); /* allocate */ if (hmm->comlog != NULL) { len += strlen(hmm->comlog); hmm->comlog = ReallocOrDie(hmm->comlog, sizeof(char)* (len+1)); } else { hmm->comlog = MallocOrDie(sizeof(char)* (len+1)); *(hmm->comlog) = '\0'; /* need this to make strcat work */ } /* append */ strcat(hmm->comlog, "\n"); for (i = 0; i < argc; i++) { strcat(hmm->comlog, argv[i]); if (i < argc-1) strcat(hmm->comlog, " "); } }
static void addstruc(char *s, struct ReadSeqVars *V) { char *sptr; if (! (V->sqinfo->flags & SQINFO_SS)) { V->sqinfo->ss = (char *) MallocOrDie ((V->maxseq+1) * sizeof(char)); V->sqinfo->flags |= SQINFO_SS; sptr = V->sqinfo->ss; } else { V->sqinfo->ss = (char *) ReallocOrDie (V->sqinfo->ss, (V->maxseq+1) * sizeof(char)); sptr = V->sqinfo->ss; while (*sptr != '\0') sptr++; } while (*s != 0) { if (isSeqChar((int)*s)) { *sptr = *s; sptr++; } s++; } *sptr = '\0'; }
/* Function: make_alilist() * * Purpose: Construct a list (array) mapping the raw symbols of s1 * onto the indexes of the aligned symbols in s2 (or -1 * for gaps in s2). The list (s1_list) will be of the * length of s1's raw sequence. * * Args: s1 - sequence to construct the list for * s2 - sequence s1 is aligned to * ret_s1_list - RETURN: the constructed list (caller must free) * ret_listlen - RETURN: length of the list * * Returns: 1 on success, 0 on failure */ static int make_alilist(char *s1, char *s2, int **ret_s1_list, int *ret_listlen) { int *s1_list; int col; /* column position in alignment */ int r1, r2; /* raw symbol index at current col in s1, s2 */ /* Malloc for s1_list. It can't be longer than s1 itself; we just malloc * for that (and waste a wee bit of space) */ s1_list = (int *) MallocOrDie (sizeof(int) * strlen(s1)); r1 = r2 = 0; for (col = 0; s1[col] != '\0'; col++) { /* symbol in s1? Record what it's aligned to, and bump * the r1 counter. */ if (! isgap(s1[col])) { s1_list[r1] = isgap(s2[col]) ? -1 : r2; r1++; } /* symbol in s2? bump the r2 counter */ if (! isgap(s2[col])) r2++; } *ret_listlen = r1; *ret_s1_list = s1_list; return 1; }
/* Function: GSI64Open() * * Purpose: Open a GSI64 file. Returns the number of records in * the file and a file pointer. Returns NULL on failure. * The file pointer should be fclose()'d normally. */ GSI64FILE * GSI64Open(char *gsifile) { GSI64FILE *gsi; char magic[GSI64_KEYSIZE]; gsi = (GSI64FILE *) MallocOrDie (sizeof(GSI64FILE)); if ((gsi->gsifp = fopen(gsifile, "r")) == NULL) { free(gsi); squid_errno = SQERR_NOFILE; return NULL; } if (! fread(magic, sizeof(char), GSI64_KEYSIZE, gsi->gsifp)) { free(gsi); squid_errno = SQERR_NODATA; return NULL; } if (strcmp(magic, "GSI64") != 0) { free(gsi); squid_errno = SQERR_FORMAT; return NULL; } if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp)) { free(gsi); squid_errno = SQERR_NODATA; return NULL; } if (! fread(&(gsi->recnum), sizeof(sqd_uint64), 1, gsi->gsifp)) { free(gsi); squid_errno = SQERR_NODATA; return NULL; } #if 0 /* HACK! we don't byteswap */ gsi->nfiles = sre_ntohs(gsi->nfiles); /* convert from network short */ gsi->recnum = sre_ntohl(gsi->recnum); /* convert from network long */ #endif return gsi; }
/* Function: MSAAppendGR() * Date: SRE, Thu Jun 3 06:34:38 1999 [Madison] * * Purpose: Add an unparsed #=GR markup line to the * MSA structure, allocating as necessary. * * When called multiple times for the same tag, * appends value strings together -- used when * parsing multiblock alignment files, for * example. * * Args: msa - multiple alignment structure * tag - markup tag (e.g. "SS") * sqidx - index of seq to assoc markup with (0..nseq-1) * value - markup, one char per aligned column * * Returns: (void) */ void MSAAppendGR(MSA *msa, char *tag, int sqidx, char *value) { int tagidx; int i; /* Is this an unparsed tag name that we recognize? * If not, handle adding it to index, and reallocating * as needed. */ if (msa->gr_tag == NULL) /* first tag? init w/ malloc */ { msa->gr_tag = MallocOrDie(sizeof(char *)); msa->gr = MallocOrDie(sizeof(char **)); msa->gr[0] = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (i = 0; i < msa->nseqalloc; i++) msa->gr[0][i] = NULL; msa->gr_idx = GKIInit(); tagidx = GKIStoreKey(msa->gr_idx, tag); SQD_DASSERT1((tagidx == 0)); } else { /* new tag? */ tagidx = GKIKeyIndex(msa->gr_idx, tag); if (tagidx < 0) { /* it's a new tag name; realloc */ tagidx = GKIStoreKey(msa->gr_idx, tag); /* since we alloc in blocks of 1, we always realloc upon seeing a new tag. */ SQD_DASSERT1((tagidx == msa->ngr)); msa->gr_tag = ReallocOrDie(msa->gr_tag, (msa->ngr+1) * sizeof(char *)); msa->gr = ReallocOrDie(msa->gr, (msa->ngr+1) * sizeof(char **)); msa->gr[msa->ngr] = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (i = 0; i < msa->nseqalloc; i++) msa->gr[msa->ngr][i] = NULL; } } if (tagidx == msa->ngr) { msa->gr_tag[tagidx] = sre_strdup(tag, -1); msa->ngr++; } sre_strcat(&(msa->gr[tagidx][sqidx]), -1, value, -1); return; }
/* Function: PrintNewHampshireTree() * * Purpose: Print out a tree in the "New Hampshire" standard * format. See PHYLIP's draw.doc for a definition of * the New Hampshire format. * * Like a CFG, we generate the format string left to * right by a preorder tree traversal. * * Args: fp - file to print to * ainfo- alignment info, including sequence names * tree - tree to print * N - number of leaves * */ void PrintNewHampshireTree(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N) { struct intstack_s *stack; int code; float *blen; int docomma; blen = (float *) MallocOrDie (sizeof(float) * (2*N-1)); stack = InitIntStack(); PushIntStack(stack, N); /* push root on stack */ docomma = FALSE; /* node index code: * 0..N-1 = leaves; indexes of sequences. * N..2N-2 = interior nodes; node-N = index of node in tree structure. * code N is the root. * 2N..3N-2 = special flags for closing interior nodes; node-2N = index in tree */ while (PopIntStack(stack, &code)) { if (code < N) /* we're a leaf. */ { /* 1) print name:branchlength */ if (docomma) fputs(",", fp); fprintf(fp, "%s:%.5f", ainfo->sqinfo[code].name, blen[code]); docomma = TRUE; } else if (code < 2*N) /* we're an interior node */ { /* 1) print a '(' */ if (docomma) fputs(",\n", fp); fputs("(", fp); /* 2) push on stack: ), rchild, lchild */ PushIntStack(stack, code+N); PushIntStack(stack, tree[code-N].right); PushIntStack(stack, tree[code-N].left); /* 3) record branch lengths */ blen[tree[code-N].right] = tree[code-N].rblen; blen[tree[code-N].left] = tree[code-N].lblen; docomma = FALSE; } else /* we're closing an interior node */ { /* print a ):branchlength */ if (code == 2*N) fprintf(fp, ");\n"); else fprintf(fp, "):%.5f", blen[code-N]); docomma = TRUE; } } FreeIntStack(stack); free(blen); return; }
/* Initialise the game header structure to contain * space for the default number of tags. * The space will have to be increased if new tags are * identified in the program source. */ void init_game_header(void) { unsigned i; GameHeader.header_tags_length = ORIGINAL_NUMBER_OF_TAGS; GameHeader.Tags = (char **) MallocOrDie(GameHeader.header_tags_length* sizeof(*GameHeader.Tags)); for(i = 0; i < GameHeader.header_tags_length; i++) { GameHeader.Tags[i] = (char *) NULL; } }
struct fancyali_s * AllocFancyAli(void) { struct fancyali_s *ali; ali = MallocOrDie (sizeof(struct fancyali_s)); ali->rfline = ali->csline = ali->model = ali->mline = ali->aseq = NULL; ali->query = ali->target = NULL; ali->sqfrom = ali->sqto = 0; return ali; }
void init_tag_lists(void) { int i; tag_list_length = ORIGINAL_NUMBER_OF_TAGS; TagLists = (StringArray *) MallocOrDie(tag_list_length*sizeof(*TagLists)); for(i = 0; i < tag_list_length; i++){ TagLists[i].num_allocated_elements = 0; TagLists[i].num_used_elements = 0; TagLists[i].tag_strings = (TagSelection *) NULL; } }
/* Function: DigitizeSequence() * * Purpose: Internal representation of a sequence in HMMER is * as a char array. 1..L are the indices * of seq symbols in Alphabet[]. 0,L+1 are sentinel * bytes, set to be Alphabet_iupac -- i.e. one more * than the maximum allowed index. * * Assumes that 'X', the fully degenerate character, * is the last character in the allowed alphabet. * * Args: seq - sequence to be digitized (0..L-1) * L - length of sequence * * Return: digitized sequence, dsq. * dsq is allocated here and must be free'd by caller. */ char * DigitizeSequence(char *seq, int L) { char *dsq; int i; dsq = MallocOrDie (sizeof(char) * (L+2)); dsq[0] = dsq[L+1] = (char) Alphabet_iupac; for (i = 1; i <= L; i++) dsq[i] = SymbolIndex(seq[i-1]); return dsq; }
/* Function: DedigitizeSequence() * Date: SRE, Tue Dec 16 10:39:19 1997 [StL] * * Purpose: Returns a 0..L-1 character string, converting the * dsq back to the real alphabet. */ char * DedigitizeSequence(char *dsq, int L) { char *seq; int i; seq = MallocOrDie(sizeof(char) * (L+1)); for (i = 0; i < L; i++) seq[i] = Alphabet[(int) dsq[i+1]]; seq[L] = '\0'; return seq; }
void TextureAtlas::Initialize(uint32_t width, uint32_t height, xo::TexFormat format, uint32_t padding) { TexWidth = width; TexHeight = height; Padding = padding; TexFormat = format; TexStride = (int) (width * TexFormatBytesPerPixel(format)); size_t nbytes = height * TexStride; TexData = (uint8_t*) MallocOrDie(nbytes); PosTop = Padding; PosBottom = Padding; PosRight = Padding; }
/* Function: MSASetSeqAccession() * Date: SRE, Mon Jun 21 04:13:33 1999 [Sanger Centre] * * Purpose: Set a sequence accession in an MSA structure. * Handles some necessary allocation/initialization. * * Args: msa - multiple alignment to add accession to * seqidx - index of sequence to attach accession to * acc - accession * * Returns: void */ void MSASetSeqAccession(MSA *msa, int seqidx, char *acc) { int x; if (msa->sqacc == NULL) { msa->sqacc = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (x = 0; x < msa->nseqalloc; x++) msa->sqacc[x] = NULL; } msa->sqacc[seqidx] = sre_strdup(acc, -1); }
/* Function: MSASetSeqDescription() * Date: SRE, Mon Jun 21 04:21:09 1999 [Sanger Centre] * * Purpose: Set a sequence description in an MSA structure. * Handles some necessary allocation/initialization. * * Args: msa - multiple alignment to add accession to * seqidx - index of sequence to attach accession to * desc - description * * Returns: void */ void MSASetSeqDescription(MSA *msa, int seqidx, char *desc) { int x; if (msa->sqdesc == NULL) { msa->sqdesc = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (x = 0; x < msa->nseqalloc; x++) msa->sqdesc[x] = NULL; } msa->sqdesc[seqidx] = sre_strdup(desc, -1); }
/* Function: MSAAppendGC() * Date: SRE, Thu Jun 3 06:25:14 1999 [Madison] * * Purpose: Add an unparsed #=GC markup line to the MSA * structure, allocating as necessary. * * When called multiple times for the same tag, * appends value strings together -- used when * parsing multiblock alignment files, for * example. * * Args: msa - multiple alignment structure * tag - markup tag (e.g. "CS") * value - markup, one char per aligned column * * Returns: (void) */ void MSAAppendGC(MSA *msa, char *tag, char *value) { int tagidx; /* Is this an unparsed tag name that we recognize? * If not, handle adding it to index, and reallocating * as needed. */ if (msa->gc_tag == NULL) /* first tag? init w/ malloc */ { msa->gc_tag = MallocOrDie(sizeof(char *)); msa->gc = MallocOrDie(sizeof(char *)); msa->gc_idx = GKIInit(); tagidx = GKIStoreKey(msa->gc_idx, tag); SQD_DASSERT1((tagidx == 0)); msa->gc[0] = NULL; } else { /* new tag? */ tagidx = GKIKeyIndex(msa->gc_idx, tag); if (tagidx < 0) { /* it's a new tag name; realloc */ tagidx = GKIStoreKey(msa->gc_idx, tag); /* since we alloc in blocks of 1, we always realloc upon seeing a new tag. */ SQD_DASSERT1((tagidx == msa->ngc)); msa->gc_tag = ReallocOrDie(msa->gc_tag, (msa->ngc+1) * sizeof(char **)); msa->gc = ReallocOrDie(msa->gc, (msa->ngc+1) * sizeof(char **)); msa->gc[tagidx] = NULL; } } if (tagidx == msa->ngc) { msa->gc_tag[tagidx] = sre_strdup(tag, -1); msa->ngc++; } sre_strcat(&(msa->gc[tagidx]), -1, value, -1); return; }
/* Function: DigitizeAlignment() * * Purpose: Given an alignment, return digitized unaligned * sequence array. (Tracebacks are always relative * to digitized unaligned seqs, even if they are * faked from an existing alignment in modelmakers.c.) * * Args: msa - alignment to digitize * ret_dsqs - RETURN: array of digitized unaligned sequences * * Return: (void) * dsqs is alloced here. Free2DArray(dseqs, nseq). */ void DigitizeAlignment(MSA *msa, char ***ret_dsqs) { char **dsq; int idx; /* counter for sequences */ int dpos; /* position in digitized seq */ int apos; /* position in aligned seq */ dsq = (char **) MallocOrDie (sizeof(char *) * msa->nseq); for (idx = 0; idx < msa->nseq; idx++) { dsq[idx] = (char *) MallocOrDie (sizeof(char) * (msa->alen+2)); dsq[idx][0] = (char) Alphabet_iupac; /* sentinel byte at start */ for (apos = 0, dpos = 1; apos < msa->alen; apos++) { if (! isgap(msa->aseq[idx][apos])) /* skip gaps */ dsq[idx][dpos++] = SymbolIndex(msa->aseq[idx][apos]); } dsq[idx][dpos] = (char) Alphabet_iupac; /* sentinel byte at end */ } *ret_dsqs = dsq; }
/* Function: GaussianSetHistogram() * * Purpose: Instead of fitting the histogram to a Gaussian, * simply set the Gaussian parameters from an external source. */ void GaussianSetHistogram(struct histogram_s *h, float mean, float sd) { int sc; int hsize, idx; int nbins; float delta; UnfitHistogram(h); h->fit_type = HISTFIT_GAUSSIAN; h->param[GAUSS_MEAN] = mean; h->param[GAUSS_SD] = sd; /* Calculate the expected values for the histogram. */ hsize = h->max - h->min + 1; h->expect = (float *) MallocOrDie(sizeof(float) * hsize); for (idx = 0; idx < hsize; idx++) h->expect[idx] = 0.; /* Note: ideally we'd use the Gaussian distribution function * to find the histogram occupancy in the window sc..sc+1. * However, the distribution function is hard to calculate. * Instead, estimate the histogram by taking the density at sc+0.5. */ for (sc = h->min; sc <= h->max; sc++) { delta = ((float)sc + 0.5) - h->param[GAUSS_MEAN]; h->expect[sc - h->min] = (float) h->total * ((1. / (h->param[GAUSS_SD] * sqrt(2.*3.14159))) * (exp(-1.*delta*delta / (2. * h->param[GAUSS_SD] * h->param[GAUSS_SD])))); } /* Calculate the goodness-of-fit (within whole region) */ h->chisq = 0.; nbins = 0; for (sc = h->lowscore; sc <= h->highscore; sc++) if (h->expect[sc-h->min] >= 5. && h->histogram[sc-h->min] >= 5) { delta = (float) h->histogram[sc-h->min] - h->expect[sc-h->min]; h->chisq += delta * delta / h->expect[sc-h->min]; nbins++; } /* -1 d.f. for normalization */ if (nbins > 1) h->chip = (float) IncompleteGamma((double)(nbins-1)/2., (double) h->chisq/2.); else h->chip = 0.; }
struct plan7_s * AllocPlan7Shell(void) { struct plan7_s *hmm; hmm = (struct plan7_s *) MallocOrDie (sizeof(struct plan7_s)); hmm->M = 0; hmm->name = NULL; hmm->acc = NULL; hmm->desc = NULL; hmm->rf = NULL; hmm->cs = NULL; hmm->ca = NULL; hmm->comlog = NULL; hmm->nseq = 0; hmm->ctime = NULL; hmm->map = NULL; hmm->checksum = 0; hmm->tpri = NULL; hmm->mpri = NULL; hmm->ipri = NULL; hmm->ga1 = hmm->ga2 = 0.0; hmm->tc1 = hmm->tc2 = 0.0; hmm->nc1 = hmm->nc2 = 0.0; hmm->t = NULL; hmm->tsc = NULL; hmm->mat = NULL; hmm->ins = NULL; hmm->msc = NULL; hmm->isc = NULL; hmm->begin = NULL; hmm->bsc = NULL; hmm->end = NULL; hmm->esc = NULL; /* DNA translation is not enabled by default */ hmm->dnam = NULL; hmm->dnai = NULL; hmm->dna2 = -INFTY; hmm->dna4 = -INFTY; /* statistical parameters set to innocuous empty values */ hmm->mu = 0.; hmm->lambda = 0.; hmm->flags = 0; return hmm; }
/* Function: ExtremeValueSetHistogram() * * Purpose: Instead of fitting the histogram to an EVD, * simply set the EVD parameters from an external source. * * Args: h - the histogram to set * mu - mu location parameter * lambda - lambda scale parameter * lowbound - low bound of the histogram that was fit * highbound- high bound of histogram that was fit * ndegrees - extra degrees of freedom to subtract in X^2 test: * typically 0 if mu, lambda are parametric, * else 2 if mu, lambda are estimated from data */ void ExtremeValueSetHistogram(struct histogram_s *h, float mu, float lambda, float lowbound, float highbound, int ndegrees) { int sc; int hsize, idx; int nbins; float delta; UnfitHistogram(h); h->fit_type = HISTFIT_EVD; h->param[EVD_LAMBDA] = lambda; h->param[EVD_MU] = mu; hsize = h->max - h->min + 1; h->expect = (float *) MallocOrDie(sizeof(float) * hsize); for (idx = 0; idx < hsize; idx++) h->expect[idx] = 0.; /* Calculate the expected values for the histogram. */ for (sc = h->min; sc <= h->max; sc++) h->expect[sc - h->min] = ExtremeValueE((float)(sc), h->param[EVD_MU], h->param[EVD_LAMBDA], h->total) - ExtremeValueE((float)(sc+1), h->param[EVD_MU], h->param[EVD_LAMBDA], h->total); /* Calculate the goodness-of-fit (within whole region) */ h->chisq = 0.; nbins = 0; for (sc = lowbound; sc <= highbound; sc++) if (h->expect[sc-h->min] >= 5. && h->histogram[sc-h->min] >= 5) { delta = (float) h->histogram[sc-h->min] - h->expect[sc-h->min]; h->chisq += delta * delta / h->expect[sc-h->min]; nbins++; } /* Since we fit the whole histogram, there is at least * one constraint on chi-square: the normalization to h->total. */ if (nbins > 1 + ndegrees) h->chip = (float) IncompleteGamma((double)(nbins-1-ndegrees)/2., (double) h->chisq/2.); else h->chip = 0.; }