void writesnpeigs(char *snpeigname, SNP **xsnplist, double *ffvecs, int numeigs, int ncols) { // this is called at end and ffvecs overwritten double *xpt, y, yscal, *snpsc ; int i, j, k, kmax, kmin ; SNP * cupt ; FILE *fff ; for (j=0; j<numeigs; ++j) { xpt = ffvecs+j*ncols ; y = asum2(xpt, ncols) ; yscal = (double) ncols / y ; yscal = sqrt(yscal) ; vst(xpt, xpt, yscal, ncols) ; } ZALLOC(snpsc, ncols, double) ; vclear(snpsc, -99999, ncols) ; for (j=0; j<numeigs; ++j) { for (i=0; i<ncols; ++i) { cupt = xsnplist[i] ; if (cupt -> ignore) continue ; y = ffvecs[j*ncols+i] ; snpsc[i] = fabs(y) ; } for (k=0; k<=10; ++k) { vlmaxmin(snpsc, ncols, &kmax, &kmin) ; cupt = xsnplist[kmax] ; printf("eigbestsnp %4d %20s %2d %12d %9.3f\n", j+1, cupt -> ID, cupt -> chrom, nnint(cupt -> physpos), snpsc[kmax]) ; snpsc[kmax] = -1.0 ; } } free(snpsc) ; if (snpeigname == NULL) return ; openit (snpeigname, &fff, "w") ; for (i=0; i<ncols; ++i) { cupt = xsnplist[i] ; if (cupt -> ignore) continue ; fprintf(fff, "%20s", cupt -> ID) ; fprintf(fff, " %2d", cupt -> chrom) ; fprintf(fff, " %12d", nnint(cupt -> physpos)) ; for (j=0; j<numeigs; ++j) { fprintf(fff, " %9.3f", ffvecs[j*ncols+i]) ; } fprintf(fff, "\n") ; } fclose(fff) ; }
int ranhprob(int n, int a, int m) // hypergeometric sampling // rejection sampling. Devroye. Computing (1987) General method for log-concave densities // where mode is known /** urn with n balls . a black balls. Pick m without replacement. Return number of black balls picked. */ { double y ; double pm, logpm, w, ru, rw, rat ; int mode, x, zans ; mode = modehprob(n, a, m) ; logpm = loghprob(n, a, m, mode) ; pm = exp(logpm) ; w = 1 + pm ; for (;;) { ru = DRAND() ; rw = DRAND() ; if (ru <= w/(1+w)) y = DRAND()*w/pm ; else y = (w+ranexp())/pm ; x = nnint(y) ; if (ranmod(2)==0) x = -x ; zans = mode+x ; if (zans<0) continue ; if (zans>a) continue ; rat = exp(loghprob(n, a, m, zans)-logpm) ; rw *= MIN(1, exp(w-pm*y)) ; if (rw <= rat) break ; } return zans ; }
void fixit(int *a, double *b, int n) { int i ; for (i=0; i<n; i++) { a[i] = nnint(b[i]) ; } }
int isprime(long num) // naive algorithm. Implement Pollard rho at some time { int top, x, t ; if (num < 2) return NO ; if (num == 2) return YES ; top = nnint(sqrt(num)) ; for (x=2; x <= top; ++x) { t = num % x ; if (t == 0) return NO ; } return YES ; }
int ranhprob(int n, int a, int m) // rejection sampling. Devroye { double v, y ; double pm, logpm, w, ru, rw, rat ; int mode, k, x, zans ; v = (double) (a+1)*(m+1) / (double) (n+1) ; mode = (int) v ; /** for (k=-5; k<=5; ++k) { x = mode+k ; y = exp(loghprob(n, a, m, x)) ; printf("%4d %4d %12.6f\n", mode, x, y) ; } */ logpm = loghprob(n, a, m, mode) ; pm = exp(logpm) ; w = 1 + pm ; for (;;) { ru = DRAND() ; rw = DRAND() ; if (ru <= w/(1+w)) y = DRAND()*w/pm ; else y = (w+ranexp())/pm ; x = nnint(y) ; if (ranmod(2)==0) x = -x ; zans = mode+x ; if (zans<0) continue ; if (zans>a) continue ; rat = exp(loghprob(n, a, m, zans)-logpm) ; rw *= MIN(1, exp(1.0-pm*y)) ; if (rw <= rat) break ; } return zans ; }
int main(int argc, char **argv) { char **eglist ; int numeg ; int i, j, k, pos; int *vv ; SNP *cupt, *cupt2 ; Indiv *indx ; double y1, y2, y ; int n0, n1, nkill ; int nindiv = 0 ; int nignore, numrisks = 1 ; SNP **xsnplist ; Indiv **xindlist ; int *xindex ; int nrows, ncols, m ; double *XTX, *cc, *evecs, *ww ; double *lambda ; double *tvecs ; int weightmode = NO ; int t ; double *xmean, *xfancy ; double *ldmat = NULL, *ldmat2 = NULL; double *ldvv = NULL, *ldvv2 = NULL, *vv2 = NULL ; int chrom, numclear ; double gdis ; int outliter, numoutiter, *badlist, nbad ; int a, b, n ; FILE *outlfile ; int xblock, blocksize=10000 ; double *tblock ; OUTLINFO *outpt ; int *idperm, *vecind ; // for sort readcommands(argc, argv) ; printf("## smartrel version: %s\n", WVERSION) ; packmode = YES ; setomode(&outputmode, omode) ; if (parname == NULL) return 0 ; if (xchrom == (numchrom+1)) noxdata = NO ; if (fstonly) { printf("fstonly\n") ; numeigs = 0 ; numoutliter = 0 ; numoutiter = 0 ; outputname = NULL ; snpeigname = NULL ; } if (fancynorm) printf("norm used\n\n") ; else printf("no norm used\n\n") ; nostatslim = MAX(nostatslim, 3) ; outlfile = ofile = stdout; if (outputname != NULL) openit(outputname, &ofile, "w") ; if (outliername != NULL) openit(outliername, &outlfile, "w") ; if (fstdetailsname != NULL) openit(fstdetailsname, &fstdetails, "w") ; numsnps = getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ; numindivs = getindivs(indivname, &indivmarkers) ; k = getgenos(genotypename, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; if (poplistname != NULL) { ZALLOC(eglist, numindivs, char *) ; numeg = loadlist(eglist, poplistname) ; seteglist(indivmarkers, numindivs, poplistname); } else { setstatus(indivmarkers, numindivs, NULL) ; ZALLOC(eglist, MAXPOPS, char *) ; numeg = makeeglist(eglist, MAXPOPS, indivmarkers, numindivs) ; } for (i=0; i<numeg; i++) { /* printf("%3d %s\n",i, eglist[i]) ; */ } nindiv=0 ; for (i=0; i<numindivs; i++) { indx = indivmarkers[i] ; if(indx -> affstatus == YES) ++nindiv ; } for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; chrom = cupt -> chrom ; if ((noxdata) && (chrom == (numchrom+1))) cupt-> ignore = YES ; if (chrom == 0) cupt -> ignore = YES ; if (chrom > (numchrom+1)) cupt -> ignore = YES ; } for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; pos = nnint(cupt -> physpos) ; if ((xchrom>0) && (cupt -> chrom != xchrom)) cupt -> ignore = YES ; if ((xchrom > 0) && (pos < lopos)) cupt -> ignore = YES ; if ((xchrom > 0) && (pos > hipos)) cupt -> ignore = YES ; if (cupt -> ignore) continue ; if (numvalidgtx(indivmarkers, cupt, YES) <= 1) { printf("nodata: %20s\n", cupt -> ID) ; cupt -> ignore = YES ; } } if (killr2) { nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ; if (nkill>0) printf("killhir2. number of snps killed: %d\n", nkill) ; } ZALLOC(vv, numindivs, int) ; numvalidgtallind(vv, snpmarkers, numsnps, numindivs) ; for (i=0; i<numindivs; ++i) { if (vv[i] == 0) { indx = indivmarkers[i] ; indx -> ignore = YES ; } } free(vv) ; numsnps = rmsnps(snpmarkers, numsnps, NULL) ; // rid ignorable snps if (missingmode) { setmiss(snpmarkers, numsnps) ; fancynorm = NO ; } if (weightname != NULL) { weightmode = YES ; getweights(weightname, snpmarkers, numsnps) ; } if (ldregress>0) { ZALLOC(ldvv, ldregress*numindivs, double) ; ZALLOC(ldvv2, ldregress*numindivs, double) ; ZALLOC(vv2, numindivs, double) ; ZALLOC(ldmat, ldregress*ldregress, double) ; ZALLOC(ldmat2, ldregress*ldregress, double) ; setidmat(ldmat, ldregress) ; vst(ldmat, ldmat, 1.0e-6, ldregress*ldregress) ; } ZALLOC(xindex, numindivs, int) ; ZALLOC(xindlist, numindivs, Indiv *) ; ZALLOC(xsnplist, numsnps, SNP *) ; if (popsizelimit > 0) { setplimit(indivmarkers, numindivs, eglist, numeg, popsizelimit) ; } nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ; ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ; printf("number of samples used: %d number of snps used: %d\n", nrows, ncols) ; /** cupt = xsnplist[0] ; for (j=0; j<nrows; ++j) { k = xindex[j] ; g = getgtypes(cupt, k) ; indx = indivmarkers[k] ; t = indxindex(eglist, numeg, indx -> egroup) ; printf("yy1 %20s %20s %20s %d %d %d\n", cupt ->ID, indx -> ID, indx -> egroup, j, k, g) ; } printf("yya: ") ; printimat(xindex, 1, nrows) ; printf("zzindxa: %s\n", indivmarkers[230] -> egroup) ; */ /* printf("## nrows: %d ncols %d\n", nrows, ncols) ; */ ZALLOC(xmean, ncols, double) ; ZALLOC(xfancy, ncols, double) ; ZALLOC(XTX, nrows*nrows, double) ; ZALLOC(evecs, nrows*nrows, double) ; ZALLOC(tvecs, nrows*nrows, double) ; ZALLOC(lambda, nrows, double) ; ZALLOC(cc, nrows, double) ; ZALLOC(ww, nrows, double) ; ZALLOC(badlist, nrows, int) ; blocksize = MIN(blocksize, ncols) ; ZALLOC(tblock, nrows*blocksize, double) ; // xfancy is multiplier for column xmean is mean to take off // badlist is list of rows to delete (outlier removal) numoutiter = 1 ; if (numoutliter>=1) { numoutiter = numoutliter+1 ; ZALLOC(outinfo, nrows, OUTLINFO *) ; for (k=0; k<nrows; k++) { ZALLOC(outinfo[k], 1, OUTLINFO) ; } /* fprintf(outlfile, "##%18s %4s %6s %9s\n", "ID", "iter","eigvec", "score") ; */ } for (outliter = 1; outliter <= numoutiter ; ++outliter) { if (fstonly) { setidmat(XTX, nrows) ; vclear(lambda, 1.0, nrows) ; break ; } if (outliter>1) { ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ; } vzero(XTX, nrows*nrows) ; vzero(tblock, nrows*blocksize) ; xblock = 0 ; vzero(xmean, ncols) ; vclear(xfancy, 1.0, ncols) ; for (i=0; i<ncols; i++) { cupt = xsnplist[i] ; chrom = cupt -> chrom ; getcolxz(cc, cupt, xindex, nrows, i, xmean, xfancy, &n0, &n1) ; t = MIN(n0, n1) ; if (t <= minallelecnt) { cupt -> ignore = YES ; vzero(cc, nrows) ; } if (weightmode) { vst(cc, cc, xsnplist[i] -> weight, nrows) ; } if (ldregress>0) { numclear = 0 ; for (k=1; k<= ldregress; ++k) { j = i-k ; if (j<0) { numclear = ldregress-k+1 ; break ; } cupt2 = xsnplist[j] ; if (cupt2 -> chrom != chrom) gdis = ldlimit + 1.0 ; else gdis = cupt -> genpos - cupt2 -> genpos ; if (gdis>=ldlimit) { numclear = ldregress-k+1 ; break ; } } if (numclear>0) clearld(ldmat, ldvv, ldregress, nrows, numclear) ; ldreg(ldmat, ldmat2, cc, vv2, ldvv, ldvv2, ldregress, nrows) ; copyarr(ldmat2, ldmat, ldregress*ldregress) ; copyarr(vv2, cc, nrows) ; copyarr(ldvv2, ldvv, ldregress*nrows) ; } copyarr(cc, tblock+xblock*nrows, nrows) ; ++xblock ; /** this is the key code to parallelize */ if (xblock==blocksize) { domult(tvecs, tblock, xblock, nrows) ; vvp(XTX, XTX, tvecs, nrows*nrows) ; xblock = 0 ; vzero(tblock, nrows*blocksize) ; } } if (xblock>0) { domult(tvecs, tblock, xblock, nrows) ; vvp(XTX, XTX, tvecs, nrows*nrows) ; } symit(XTX, nrows) ; /** a = 0; b=0 ; printf("zz1 %12.6f ", XTX[a*nrows+b]) ; a = nrows-1; b=nrows-1 ; printf(" %12.6f %15.9g\n", XTX[a*nrows+b], asum(XTX, nrows*nrows)) ; */ if (verbose) { printdiag(XTX, nrows) ; } y = trace(XTX, nrows) / (double) (nrows-1) ; if (isnan(y)) fatalx("bad XTX matrix\n") ; /* printf("trace: %9.3f\n", y) ; */ if (y<=0.0) fatalx("XTX has zero trace (perhaps no data)\n") ; vst(XTX, XTX, 1.0/y, nrows * nrows) ; /// mean eigenvalue is 1 eigvecs(XTX, lambda, evecs, nrows) ; // eigenvalues are in decreasing order if (outliter > numoutliter) break ; // last pass skips outliers numoutleigs = MIN(numoutleigs, nrows-1) ; nbad = ridoutlier(evecs, nrows, numoutleigs, outlthresh, badlist, outinfo) ; if (nbad == 0) break ; for (i=0; i<nbad; i++) { j = badlist[i] ; indx = xindlist[j] ; outpt = outinfo[j] ; fprintf(outlfile, "REMOVED outlier %s iter %d evec %d sigmage %.3f\n", indx -> ID, outliter, outpt -> vecno, outpt -> score) ; indx -> ignore = YES ; } nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ; printf("number of samples after outlier removal: %d\n", nrows) ; } if (outliername != NULL) fclose(outlfile) ; m = numgtz(lambda, nrows) ; /* printf("matrix rank: %d\n", m) ; */ if (m==0) fatalx("no data\n") ; /** smartrel code */ for (i=0; i<numeigs; i++) { y = sqrt(lambda[i]) ; vst(ww, evecs+i*nrows, y, nrows) ; subouter(XTX, ww, nrows) ; } free(tvecs) ; n = 0 ; ZALLOC(vecind, nrows*nrows/2, int) ; for (i=0; i<nrows; i++) { for (j=i+1; j<nrows; j++) { k = i*nrows + j ; y1 = XTX[i*nrows+i] ; y2 = XTX[j*nrows+j] ; y = XTX[k]/sqrt(y1*y2) ; y += 1/(double)(nrows-1); if (y<relthresh) continue ; vecind[n] = k ; evecs[n] = -y ; ++n ; } } free(XTX) ; if (n==0) { printf("## nothing above relthresh!\n") ; printf("##end of smartrel run\n") ; return 0 ; } ZALLOC(idperm, n, int) ; sortit(evecs, idperm, n) ; for (i=0; i<n; i++) { j = idperm[i] ; k = vecind[j] ; a = k/nrows ; b = k%nrows ; printf("rel: %20s ", xindlist[a] ->ID) ; printf("%20s ", xindlist[b] ->ID) ; printf(" %9.3f", -evecs[i]) ; printnl() ; } printf("##end of smartrel run\n") ; return 0 ; }
int mergeit(SNP **snpm1, SNP **snpm2, Indiv ***pindm1, Indiv **indm2, int nums1, int nums2, int numi1, int numi2) { SNP *cupt1, *cupt2 ; int k, x, g, t ; double y ; long rlen, packlen ; static unsigned char *packg ; unsigned char *buff ; Indiv **indm1 ; static Indiv **indivmarkers ; int numindivs, numsnps ; indm1 = *pindm1 ; numindivs = numi1 + numi2 ; numsnps = nums1 ; ZALLOC(indivmarkers, numindivs, Indiv *) ; t = 0 ; for (x=0; x<numi1; ++x) { indivmarkers[t] = indm1[x] ; ++t ; } for (x=0; x<numi2; ++x) { indivmarkers[t] = indm2[x] ; ++t ; } // we don't bother with a destructor here. Sloppy code y = (double) (numindivs * 2) / (8 * (double) sizeof (char)) ; rlen = nnint(ceil(y)) ; rlen = MAX(rlen, 48) ; packlen = numsnps*rlen ; ZALLOC(packg, packlen, unsigned char) ; cclear((unsigned char *) packg, 0XFF, packlen) ; // wipe to invalid buff = packg ; for (k=0; k<nums1; k++) { cupt1 = snpm1[k] ; x = cupt1 -> tagnumber ; if (x < 0 ) cupt1 -> ignore = YES ; if (cupt1 -> ignore) continue ; cupt2 = snpm2[x] ; if (cupt2 -> isrfake) { if (phasedmode == NO) flipalleles(cupt2) ; if (phasedmode == YES) flipalleles_phased(cupt2) ; } for (t=0; t<numi1; ++t) { g = getgtypes(cupt1, t) ; if (g<0) continue ; wbuff((unsigned char *)buff, t, g) ; } for (t=0; t<numi2; ++t) { g = getgtypes(cupt2, t) ; if (g<0) continue ; wbuff((unsigned char *)buff, numi1+t, g) ; } cupt1 -> ngtypes = numindivs ; cupt1 -> pbuff = (char *) buff ; buff += rlen ; } *pindm1 = indivmarkers ; return numindivs ; }
int main(int argc, char **argv) { int **snppos ; int *snpindx ; char **snpnamelist, **indnamelist ; char **eglist ; int lsnplist, lindlist, numeg ; int i,j; SNP *cupt, *cupt1, *cupt2, *cupt3 ; Indiv *indx ; double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ; double rthresh, zt ; int mpflag, ret, numvalidind, nvalid, numvalidsnps ; int ch1, ch2 ; int fmnum , lmnum ; int num, n1, n2 ; int nkill = 0 ; int t, k, g ; int nindiv = 0, e, f, lag=1 ; double xc[9], xd[4], xc2[9] ; double ychi, zscore, zthresh = 20.0 ; double y1, y2 ; int nignore, numrisks = 1 ; char **genolist ; int numgenolist ; char c1, c2 ; int t1, t2 ; malexhet = YES ; // convertf default is don't change the data tersem = YES ; // no snp counts readcommands(argc, argv) ; setomode(&outputmode, omode) ; packmode = YES ; settersemode(tersem) ; if (r2thresh > 0.0) killr2 = YES ; if (badpedignore) setbadpedignore() ; numsnps = getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ; for (i=0; i<numsnps; i++) { if (xchrom == -1) break ; cupt = snpmarkers[i] ; if (cupt -> chrom != xchrom) cupt -> ignore = YES ; if (cupt -> ignore) continue ; t = nnint(cupt -> physpos) ; if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ; } nignore = 0 ; for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; if (cupt -> chrom > maxchrom) cupt -> ignore = YES ; if (cupt -> chrom < minchrom) cupt -> ignore = YES ; if (cupt -> ignore) ++nignore ; } if (numsnps == nignore) fatalx("no valid snps\n") ; numindivs = getindivs(indivname, &indivmarkers) ; if (polarid != NULL) { polarindex = indindex(indivmarkers, numindivs, polarid) ; if (polarindex<0) fatalx("polarid %s not found\n") ; } if (genotypelist!= NULL) { getgenos_list(genotypelist, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } else { setgenotypename(&genotypename, indivname) ; getgenos(genotypename, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } if (newsnpname != NULL) { nums2 = getsnps(newsnpname, &snpm2, 0.0, NULL, &nignore, numrisks) ; remap(snpmarkers, numsnps, snpm2, nums2) ; snpmarkers = snpm2 ; numsnps = nums2 ; } if (newindivname != NULL) { numind2 = getindivs(newindivname, &indm2) ; remapind(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ; indivmarkers = indm2 ; numindivs = numind2 ; if (polarid != NULL) { polarindex = indindex(indivmarkers, numindivs, polarid) ; } } if (mkdiploid) { numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ; numind2 = mkindh2d(indivmarkers, &indm2, numindivs) ; remaph2d(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ; indivmarkers = indm2 ; numindivs = numind2 ; } if (deletedup) dedupit(snpmarkers, numsnps) ; // only one marker per position for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; if (zerodistance) cupt -> genpos = 0.0 ; c1 = cupt -> alleles[0] ; c2 = cupt -> alleles[1] ; t1 = pedval(&c1) % 5 ; t2 = pedval(&c2) % 5 ; // 0 and 5 are no good if ((t1==0) && (t2 >0)) flip1(cupt, phasedmode, YES) ; } flipstrand(flipstrandname, snpmarkers, numsnps) ; flipsnps(flipsnpname, snpmarkers, numsnps, phasedmode) ; if (polarindex>=0) { for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; g = getgtypes(cupt, polarindex) ; if (g==0) { printf("polarizing %s\n", cupt -> ID) ; flip1(cupt, NO, YES) ; g = getgtypes(cupt, polarindex) ; if (g!=2) fatalx("badbug\n") ; } if (g != 2) cupt -> ignore = YES ; } } if (outputall) { outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run (outputall mode)\n") ; return 0 ; } if (poplistname != NULL) { ZALLOC(eglist, numindivs, char *) ; numeg = loadlist(eglist, poplistname) ; seteglist(indivmarkers, numindivs, poplistname); for (i=0; i<numindivs; ++i) { indx = indivmarkers[i] ; if (indx -> affstatus == NO) indx -> ignore = YES ; } } else setstatus(indivmarkers, numindivs, "Case") ; numsnps = rmsnps(snpmarkers, numsnps, deletesnpoutname) ; numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ; if (killr2) { nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ; if (nkill>0) printf("killhir2. number of snps killed: %d\n", nkill) ; } if ( nhwfilter > 0 ) { hwfilter(snpmarkers, numsnps, numindivs, nhwfilter, deletesnpoutname); } if ( xregionname ) { excluderegions(xregionname, snpmarkers, numsnps, deletesnpoutname); } numvalidind = 0 ; for (i=0; i<numindivs; ++i) { indx = indivmarkers[i] ; if (indx -> ignore) continue ; if (numvalidgtind(snpmarkers, numsnps, i) ==0) { indx -> ignore = YES ; printf("no data for individual: %s\n", indx -> ID) ; } if (indx -> ignore == NO) ++numvalidind ; } if (maxmiss<0) maxmiss = (int) (maxmissfrac * (double) numvalidind+1) ; printf("numvalidind: %5d maxmiss: %5d\n", numvalidind, maxmiss) ; if (numvalidind == 0) fatalx("no valid samples!\n") ; for (k=0; k<numsnps; ++k) { if (maxmiss>numvalidind) break ; cupt = snpmarkers[k] ; t = numvalidind - numvalidgtypes(cupt) ; // printf("zz %20s %4d %4d\n", cupt -> ID, t, numvalidind-t) ; if (maxmiss < t) { cupt -> ignore = YES ; } /** if (numvalidind == t) { printf("no data for snp: %s\n", cupt -> ID) ; cupt -> ignore = YES ; } */ } if (fastdup) { printf("fastdup set %d\n", fastdupnum) ; if (fastdupnum > 0) { setfastdupnum(fastdupnum) ; setfastdupthresh(fastdupthresh, fastdupkill) ; fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ; } } if (decim>0) { snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ; } outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run\n") ; return 0 ; }
int ipow2 (int l) { return nnint(pow(2.0, l)) ; }
int main(int argc, char **argv) { int **snppos ; int *snpindx ; char **snpnamelist, **indnamelist ; char **eglist ; int lsnplist, lindlist, numeg ; int i,j; SNP *cupt, *cupt1, *cupt2, *cupt3 ; Indiv *indx ; double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ; double rthresh, zt ; int mpflag, ret, nvalid; int ch1, ch2 ; int fmnum , lmnum ; int num, n1, n2 ; int nkill = 0 ; int t, k ; int nindiv = 0, e, f, lag=1 ; double xc[9], xd[4], xc2[9] ; double ychi, zscore, zthresh = 20.0 ; double y1, y2 ; int nignore, numrisks = 1 ; char **genolist ; int numgenolist ; int maxmiss ; malexhet = YES ; // convertf default is don't change the data tersem = YES ; // no snp counts readcommands(argc, argv) ; setomode(&outputmode, omode) ; packmode = YES ; settersemode(tersem) ; numsnps = getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ; for (i=0; i<numsnps; i++) { if (xchrom == -1) break ; cupt = snpmarkers[i] ; if (cupt -> chrom != xchrom) cupt -> ignore = YES ; if (cupt -> ignore) continue ; t = nnint(cupt -> physpos) ; if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ; } nignore = 0 ; for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; if (cupt -> chrom > maxchrom) cupt -> ignore = YES ; if (cupt -> chrom < minchrom) cupt -> ignore = YES ; if (cupt -> ignore) ++nignore ; } if (numsnps == nignore) fatalx("no valid snps\n") ; /** cupt = snpmarkers[0] ; printf("zz2: %d %d %d %20s: %d\n", numsnps, nignore, cupt -> chrom, cupt -> ID, cupt -> ignore) ; */ numindivs = getindivs(indivname, &indivmarkers) ; if (genotypelist!= NULL) { getgenos_list(genotypelist, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } else { setgenotypename(&genotypename, indivname) ; getgenos(genotypename, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } if (outputall) { outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run (outputall mode)\n") ; return 0 ; } if (killr2) { nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ; if (nkill>0) printf("killhir2. number of snps killed: %d\n", nkill) ; } setstatus(indivmarkers, numindivs, "Case") ; /******************************************************************/ /* removesubthreshold(indivmarkers, snpmarkers, numindiv, numsnps, maxmissfracind, maxmissfracsnp); */ /******************************************************************/ if (fastdup) { if (fastdupnum > 0) setfastdupnum(fastdupnum) ; fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ; } if (decim>0) { snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ; } outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run\n") ; return 0 ; }