void countcat(int *tags, int n,int *ncat,int nclass) /* simple frequency count of integer array */ { int i, k; ivzero(ncat, nclass) ; for (i=0 ; i<n ; i++) { k = tags[i] ; if ( (k<0) || (k >= nclass)) fatalx("(countcat) bounds error\n") ; ++ncat[k] ; } }
void ransamp(int *samp, int nsamp, double *p, int plen) /** pick nsamp elements from random distribution uses randis but array is at least sorted optimally */ { double *px ; int *indx ; double y ; int i, j, k ; if (plen<=1) { ivzero(samp, nsamp) ; return ; } ZALLOC(px, plen, double) ; ZALLOC(indx, plen, int) ; y = asum(p, plen) ; vst(px, p, -1.0/y, plen) ; sortit(px, indx, plen) ; vst(px, px, -1.0, plen) ; for (i=0; i<nsamp; i++) { /** really need binary chop picker */ j = randis(px, plen) ; if (j<0) { for (k=0; k<plen; k++) { printf("zz %d %d %12.6f %12.6f\n",k, indx[k], p[k], px[k]) ; } fatalx("bad ransamp\n") ; } k = indx[j] ; samp[i] = k ; } free (px) ; free (indx) ; }
int main() { int n=50, m=16, a=5 ; int k, t, iter ; int hist[100] ; double y ; ivzero(hist, 100) ; for (iter =1 ; iter <= 10000; ++iter) { t = ranhprob(n, a, m) ; ++hist[t] ; } for (k=0; k<=a ; ++k) { y = exp(loghprob(n, a, m, k)) ; printf("%3d %3d %9.3f\n", k, hist[k], y) ; } }
void ranmultinom(int *samp, int n, double *p, int len) // multinomial sample p is prob dist n samples returned // work is O(len^2) which is silly { int x ; double *pp ; if (len==0) return ; ivzero(samp, len) ; if (n<=0) return ; if (len==1) { samp[0] = n ; return ; } ZALLOC(pp, len, double) ; copyarr(p, pp, len) ; bal1(pp, len) ; samp[0] = x = ranbinom(n, pp[0]) ; ranmultinom(samp+1, n-x, p+1, len-1) ; free(pp) ; }
double unbiasedest(int *ndx, int ndsize, int **counts) { /** ndx is ndsize array containing small integers coding pop index of each bracket (pop0 assumed) thus ndsize = 4 ndx = (1,1,1,1) codes (p_0-p_1)^4 thus ndsize = 4 ndx = (1,1,2,3) codes (p_0-p_1)^2 (p_0-p_2) (p_0-p_3) counts [][] is integer array containing counts[k][0] is count for variant allele for pop k counts[k][1] is count for reference allele for pop k */ double xtop, xbot, yest, y ; int popind[20] ; int popmax, j, k, n, nmax, a, t, s ; int *tcounts ; double **xmomest, yp ; ivmaxmin(ndx, ndsize, &popmax, NULL) ; //printf("popmax: %d\n", popmax) ; ZALLOC(tcounts, popmax+1, int) ; for (j=0; j <= popmax; ++j) { tcounts[j] = counts[j][0] + counts[j][1] ; } /** unbiased estimate of p_j^k */ xmomest = initarray_2Ddouble(popmax+1, ndsize, 0.0) ; for (j=0; j<= popmax; ++j) { xmomest[j][0] = 1.0 ; for (k=1; k<=ndsize; ++k) { xtop = ifall(counts[j][0], k) ; xbot = ifall(tcounts[j], k) ; if (xbot <= 0.1) xmomest[j][k] = -10000.0 ; else xmomest[j][k] = (double) xtop / (double) xbot ; //printf("zz %3d %3d %9.3f\n", j, k, xmomest[j][k] ) ; } } nmax = (1<<(ndsize)) -1 ; yest = 0.0 ; //printf("nmax: %d\n", nmax) ; for (n=0; n<= nmax; ++n) { t = n ; ivzero(popind, popmax+1) ; for (k=0; k<ndsize; ++k) { a = 0 ; s = t & 1 ; t = t >> 1 ; if (s==1) a = ndx[k] ; ++popind[a] ; } yp = 1.0 ; for (j=0; j<=popmax; ++j) { t = popind[j] ; s = 0 ; if (j>0) s = t % 2 ; // flags sign y = xmomest[j][t] ; if (y < -1.0) { free(tcounts) ; free2D(&xmomest, popmax+1) ; return (-10000.0) ; } if (s==1) y = -y ; yp *= y ; } //printf(" %12.6f ", yp) ; //printimat(popind, 1, popmax+1) ; yest += yp ; } if (fabs(yest) >= 100) yest = -10000 ; free(tcounts) ; free2D(&xmomest, popmax+1) ; return (yest) ; }
double fstcol(double *estn, double *estd, SNP *cupt, int *xindex, int *xtypes, int nrows, int type1, int type2) { int c1[2], c2[2], *cc ; int *rawcol ; int k, g, i ; double ya, yb, yaa, ybb, p1, p2, en, ed ; double z, zz, h1, h2, yt ; int **ccc ; static int ncall = 0 ; ++ncall ; ccc = initarray_2Dint(nrows, 2, 0) ; ZALLOC(rawcol, nrows, int) ; getrawcolx(ccc, cupt, xindex, nrows, indivmarkers) ; getrawcol(rawcol, cupt, xindex, nrows) ; ivzero(c1, 2) ; ivzero(c2, 2) ; for (i=0; i< nrows; i++) { k = xtypes[i] ; cc = NULL ; if (k==type1) cc = c1 ; if (k==type2) cc = c2 ; if (cc == NULL) continue ; g = ccc[i][0] ; if (ncall < 1000) { // printf("zz %d %d %d\n", rawcol[i], ccc[i][0], ccc[i][1]) ; } if (g<0) continue ; ivvp(cc, cc, ccc[i], 2) ; } if (ncall < 0) { printf("qqq\n") ; printimat(c1, 1, 2) ; printimat(c2, 1, 2) ; } ya = c1[0] ; yb = c1[1] ; yaa = c2[0] ; ybb = c2[1] ; z = ya + yb ; zz = yaa+ybb ; if ((z<1.1) || (zz<1.1)) { *estn = 0.0 ; *estd = -1.0 ; free(rawcol) ; free2Dint(&ccc, nrows) ; return 0.0; } yt = ya+yb ; p1 = ya/yt ; h1 = ya*yb/(yt*(yt-1.0)) ; yt = yaa+ybb ; p2 = yaa/yt ; h2 = yaa*ybb/(yt*(yt-1.0)) ; en = (p1-p2)*(p1-p2) ; en -= h1/z ; en -= h2/zz ; ed = en ; ed += h1 ; ed += h2 ; *estn = en ; *estd = ed ; free(rawcol) ; free2Dint(&ccc, nrows) ; return z + zz ; }
double oldfstcol(double *estn, double *estd, SNP *cupt, int *xindex, int *xtypes, int nrows, int type1, int type2) { int c1[2], c2[2], *cc ; int *rawcol ; int k, g, i ; double ya, yb, yaa, ybb, p1, p2, en, ed ; double z, zz, h1, h2, yt ; static int ncall = 0; ++ncall ; ZALLOC(rawcol, nrows, int) ; getrawcol(rawcol, cupt, xindex, nrows) ; ivzero(c1, 2) ; ivzero(c2, 2) ; for (i=0; i< nrows; i++) { k = xtypes[i] ; cc = NULL ; if (k==type1) cc = c1 ; if (k==type2) cc = c2 ; if (cc == NULL) continue ; g = rawcol[i] ; if (g<0) continue ; cc[0] += g ; cc[1] += 2-g ; } if (ncall < 0) { printf("qq2\n") ; printimat(c1, 1, 2) ; printimat(c2, 1, 2) ; } ya = c1[0] ; yb = c1[1] ; yaa = c2[0] ; ybb = c2[1] ; z = ya + yb ; zz = yaa+ybb ; if ((z<0.1) || (zz<0.1)) { *estn = 0.0 ; *estd = -1.0 ; free(rawcol) ; return 0.0; } yt = ya+yb ; p1 = ya/yt ; h1 = ya*yb/(yt*(yt-1.0)) ; yt = yaa+ybb ; p2 = yaa/yt ; h2 = yaa*ybb/(yt*(yt-1.0)) ; en = (p1-p2)*(p1-p2) ; en -= h1/z ; en -= h2/zz ; ed = en ; ed += h1 ; ed += h2 ; *estn = en ; *estd = ed ; free(rawcol) ; return z + zz ; }
void dotpops(double *X, char **eglist, int numeg, int *xtypes, int nrows) { double *pp, *npp, val, yy ; int *popsize ; int i, j, k1, k2 ; if (fstonly) return ; ZALLOC(pp, numeg * numeg, double) ; ZALLOC(npp, numeg * numeg, double) ; popsize = xpopsize; ivzero(popsize, numeg) ; for (i=0; i<nrows; i++) { k1 = xtypes[i] ; ++popsize[k1] ; for (j=i+1; j<nrows; j++) { k2 = xtypes[j] ; if (k1 < 0) fatalx("bug\n") ; if (k2 < 0) fatalx("bug\n") ; if (k1>=numeg) fatalx("bug\n") ; if (k2>=numeg) fatalx("bug\n") ; val = X[i*nrows+i] + X[j*nrows+j] - 2.0*X[i*nrows+j] ; pp[k1*numeg+k2] += val ; pp[k2*numeg+k1] += val ; ++npp[k1*numeg+k2] ; ++npp[k2*numeg+k1] ; } } vsp(npp, npp, 1.0e-8, numeg*numeg) ; vvd(pp, pp, npp, numeg*numeg) ; // and normalize so that mean on diagonal is 1 yy = trace(pp, numeg) / (double) numeg ; vst(pp, pp, 1.0/yy, numeg*numeg) ; printf("\n## Average divergence between populations:"); if (numeg<=10) { printf("\n") ; printf("%10s", "") ; for (k1=0; k1<numeg; ++k1) { printf(" %10s", eglist[k1]) ; } printf(" %10s", "popsize") ; printf("\n") ; for (k2=0; k2<numeg; ++k2) { printf("%10s", eglist[k2]) ; for (k1=0; k1<numeg; ++k1) { val = pp[k1*numeg+k2] ; printf(" %10.3f", val) ; } printf(" %10d", popsize[k2]) ; printf("\n") ; } } else { // numeg >= 10 printf("\n") ; for (k2=0; k2<numeg; ++k2) { for (k1=k2; k1<numeg; ++k1) { printf("dotp: %10s", eglist[k2]) ; printf(" %10s", eglist[k1]) ; val = pp[k1*numeg+k2] ; printf(" %10.3f", val) ; printf(" %10d", popsize[k2]) ; printf(" %10d", popsize[k1]) ; printf("\n") ; } } } printf("\n") ; printf("\n") ; fflush(stdout) ; free(pp) ; free(npp) ; }