int main(int argc, char **argv) { char **eglist ; int numeg ; int i, j, k, pos; int *vv ; SNP *cupt, *cupt2 ; Indiv *indx ; double y1, y2, y ; int n0, n1, nkill ; int nindiv = 0 ; int nignore, numrisks = 1 ; SNP **xsnplist ; Indiv **xindlist ; int *xindex ; int nrows, ncols, m ; double *XTX, *cc, *evecs, *ww ; double *lambda ; double *tvecs ; int weightmode = NO ; int t ; double *xmean, *xfancy ; double *ldmat = NULL, *ldmat2 = NULL; double *ldvv = NULL, *ldvv2 = NULL, *vv2 = NULL ; int chrom, numclear ; double gdis ; int outliter, numoutiter, *badlist, nbad ; int a, b, n ; FILE *outlfile ; int xblock, blocksize=10000 ; double *tblock ; OUTLINFO *outpt ; int *idperm, *vecind ; // for sort readcommands(argc, argv) ; printf("## smartrel version: %s\n", WVERSION) ; packmode = YES ; setomode(&outputmode, omode) ; if (parname == NULL) return 0 ; if (xchrom == (numchrom+1)) noxdata = NO ; if (fstonly) { printf("fstonly\n") ; numeigs = 0 ; numoutliter = 0 ; numoutiter = 0 ; outputname = NULL ; snpeigname = NULL ; } if (fancynorm) printf("norm used\n\n") ; else printf("no norm used\n\n") ; nostatslim = MAX(nostatslim, 3) ; outlfile = ofile = stdout; if (outputname != NULL) openit(outputname, &ofile, "w") ; if (outliername != NULL) openit(outliername, &outlfile, "w") ; if (fstdetailsname != NULL) openit(fstdetailsname, &fstdetails, "w") ; numsnps = getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ; numindivs = getindivs(indivname, &indivmarkers) ; k = getgenos(genotypename, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; if (poplistname != NULL) { ZALLOC(eglist, numindivs, char *) ; numeg = loadlist(eglist, poplistname) ; seteglist(indivmarkers, numindivs, poplistname); } else { setstatus(indivmarkers, numindivs, NULL) ; ZALLOC(eglist, MAXPOPS, char *) ; numeg = makeeglist(eglist, MAXPOPS, indivmarkers, numindivs) ; } for (i=0; i<numeg; i++) { /* printf("%3d %s\n",i, eglist[i]) ; */ } nindiv=0 ; for (i=0; i<numindivs; i++) { indx = indivmarkers[i] ; if(indx -> affstatus == YES) ++nindiv ; } for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; chrom = cupt -> chrom ; if ((noxdata) && (chrom == (numchrom+1))) cupt-> ignore = YES ; if (chrom == 0) cupt -> ignore = YES ; if (chrom > (numchrom+1)) cupt -> ignore = YES ; } for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; pos = nnint(cupt -> physpos) ; if ((xchrom>0) && (cupt -> chrom != xchrom)) cupt -> ignore = YES ; if ((xchrom > 0) && (pos < lopos)) cupt -> ignore = YES ; if ((xchrom > 0) && (pos > hipos)) cupt -> ignore = YES ; if (cupt -> ignore) continue ; if (numvalidgtx(indivmarkers, cupt, YES) <= 1) { printf("nodata: %20s\n", cupt -> ID) ; cupt -> ignore = YES ; } } if (killr2) { nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ; if (nkill>0) printf("killhir2. number of snps killed: %d\n", nkill) ; } ZALLOC(vv, numindivs, int) ; numvalidgtallind(vv, snpmarkers, numsnps, numindivs) ; for (i=0; i<numindivs; ++i) { if (vv[i] == 0) { indx = indivmarkers[i] ; indx -> ignore = YES ; } } free(vv) ; numsnps = rmsnps(snpmarkers, numsnps, NULL) ; // rid ignorable snps if (missingmode) { setmiss(snpmarkers, numsnps) ; fancynorm = NO ; } if (weightname != NULL) { weightmode = YES ; getweights(weightname, snpmarkers, numsnps) ; } if (ldregress>0) { ZALLOC(ldvv, ldregress*numindivs, double) ; ZALLOC(ldvv2, ldregress*numindivs, double) ; ZALLOC(vv2, numindivs, double) ; ZALLOC(ldmat, ldregress*ldregress, double) ; ZALLOC(ldmat2, ldregress*ldregress, double) ; setidmat(ldmat, ldregress) ; vst(ldmat, ldmat, 1.0e-6, ldregress*ldregress) ; } ZALLOC(xindex, numindivs, int) ; ZALLOC(xindlist, numindivs, Indiv *) ; ZALLOC(xsnplist, numsnps, SNP *) ; if (popsizelimit > 0) { setplimit(indivmarkers, numindivs, eglist, numeg, popsizelimit) ; } nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ; ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ; printf("number of samples used: %d number of snps used: %d\n", nrows, ncols) ; /** cupt = xsnplist[0] ; for (j=0; j<nrows; ++j) { k = xindex[j] ; g = getgtypes(cupt, k) ; indx = indivmarkers[k] ; t = indxindex(eglist, numeg, indx -> egroup) ; printf("yy1 %20s %20s %20s %d %d %d\n", cupt ->ID, indx -> ID, indx -> egroup, j, k, g) ; } printf("yya: ") ; printimat(xindex, 1, nrows) ; printf("zzindxa: %s\n", indivmarkers[230] -> egroup) ; */ /* printf("## nrows: %d ncols %d\n", nrows, ncols) ; */ ZALLOC(xmean, ncols, double) ; ZALLOC(xfancy, ncols, double) ; ZALLOC(XTX, nrows*nrows, double) ; ZALLOC(evecs, nrows*nrows, double) ; ZALLOC(tvecs, nrows*nrows, double) ; ZALLOC(lambda, nrows, double) ; ZALLOC(cc, nrows, double) ; ZALLOC(ww, nrows, double) ; ZALLOC(badlist, nrows, int) ; blocksize = MIN(blocksize, ncols) ; ZALLOC(tblock, nrows*blocksize, double) ; // xfancy is multiplier for column xmean is mean to take off // badlist is list of rows to delete (outlier removal) numoutiter = 1 ; if (numoutliter>=1) { numoutiter = numoutliter+1 ; ZALLOC(outinfo, nrows, OUTLINFO *) ; for (k=0; k<nrows; k++) { ZALLOC(outinfo[k], 1, OUTLINFO) ; } /* fprintf(outlfile, "##%18s %4s %6s %9s\n", "ID", "iter","eigvec", "score") ; */ } for (outliter = 1; outliter <= numoutiter ; ++outliter) { if (fstonly) { setidmat(XTX, nrows) ; vclear(lambda, 1.0, nrows) ; break ; } if (outliter>1) { ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ; } vzero(XTX, nrows*nrows) ; vzero(tblock, nrows*blocksize) ; xblock = 0 ; vzero(xmean, ncols) ; vclear(xfancy, 1.0, ncols) ; for (i=0; i<ncols; i++) { cupt = xsnplist[i] ; chrom = cupt -> chrom ; getcolxz(cc, cupt, xindex, nrows, i, xmean, xfancy, &n0, &n1) ; t = MIN(n0, n1) ; if (t <= minallelecnt) { cupt -> ignore = YES ; vzero(cc, nrows) ; } if (weightmode) { vst(cc, cc, xsnplist[i] -> weight, nrows) ; } if (ldregress>0) { numclear = 0 ; for (k=1; k<= ldregress; ++k) { j = i-k ; if (j<0) { numclear = ldregress-k+1 ; break ; } cupt2 = xsnplist[j] ; if (cupt2 -> chrom != chrom) gdis = ldlimit + 1.0 ; else gdis = cupt -> genpos - cupt2 -> genpos ; if (gdis>=ldlimit) { numclear = ldregress-k+1 ; break ; } } if (numclear>0) clearld(ldmat, ldvv, ldregress, nrows, numclear) ; ldreg(ldmat, ldmat2, cc, vv2, ldvv, ldvv2, ldregress, nrows) ; copyarr(ldmat2, ldmat, ldregress*ldregress) ; copyarr(vv2, cc, nrows) ; copyarr(ldvv2, ldvv, ldregress*nrows) ; } copyarr(cc, tblock+xblock*nrows, nrows) ; ++xblock ; /** this is the key code to parallelize */ if (xblock==blocksize) { domult(tvecs, tblock, xblock, nrows) ; vvp(XTX, XTX, tvecs, nrows*nrows) ; xblock = 0 ; vzero(tblock, nrows*blocksize) ; } } if (xblock>0) { domult(tvecs, tblock, xblock, nrows) ; vvp(XTX, XTX, tvecs, nrows*nrows) ; } symit(XTX, nrows) ; /** a = 0; b=0 ; printf("zz1 %12.6f ", XTX[a*nrows+b]) ; a = nrows-1; b=nrows-1 ; printf(" %12.6f %15.9g\n", XTX[a*nrows+b], asum(XTX, nrows*nrows)) ; */ if (verbose) { printdiag(XTX, nrows) ; } y = trace(XTX, nrows) / (double) (nrows-1) ; if (isnan(y)) fatalx("bad XTX matrix\n") ; /* printf("trace: %9.3f\n", y) ; */ if (y<=0.0) fatalx("XTX has zero trace (perhaps no data)\n") ; vst(XTX, XTX, 1.0/y, nrows * nrows) ; /// mean eigenvalue is 1 eigvecs(XTX, lambda, evecs, nrows) ; // eigenvalues are in decreasing order if (outliter > numoutliter) break ; // last pass skips outliers numoutleigs = MIN(numoutleigs, nrows-1) ; nbad = ridoutlier(evecs, nrows, numoutleigs, outlthresh, badlist, outinfo) ; if (nbad == 0) break ; for (i=0; i<nbad; i++) { j = badlist[i] ; indx = xindlist[j] ; outpt = outinfo[j] ; fprintf(outlfile, "REMOVED outlier %s iter %d evec %d sigmage %.3f\n", indx -> ID, outliter, outpt -> vecno, outpt -> score) ; indx -> ignore = YES ; } nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ; printf("number of samples after outlier removal: %d\n", nrows) ; } if (outliername != NULL) fclose(outlfile) ; m = numgtz(lambda, nrows) ; /* printf("matrix rank: %d\n", m) ; */ if (m==0) fatalx("no data\n") ; /** smartrel code */ for (i=0; i<numeigs; i++) { y = sqrt(lambda[i]) ; vst(ww, evecs+i*nrows, y, nrows) ; subouter(XTX, ww, nrows) ; } free(tvecs) ; n = 0 ; ZALLOC(vecind, nrows*nrows/2, int) ; for (i=0; i<nrows; i++) { for (j=i+1; j<nrows; j++) { k = i*nrows + j ; y1 = XTX[i*nrows+i] ; y2 = XTX[j*nrows+j] ; y = XTX[k]/sqrt(y1*y2) ; y += 1/(double)(nrows-1); if (y<relthresh) continue ; vecind[n] = k ; evecs[n] = -y ; ++n ; } } free(XTX) ; if (n==0) { printf("## nothing above relthresh!\n") ; printf("##end of smartrel run\n") ; return 0 ; } ZALLOC(idperm, n, int) ; sortit(evecs, idperm, n) ; for (i=0; i<n; i++) { j = idperm[i] ; k = vecind[j] ; a = k/nrows ; b = k%nrows ; printf("rel: %20s ", xindlist[a] ->ID) ; printf("%20s ", xindlist[b] ->ID) ; printf(" %9.3f", -evecs[i]) ; printnl() ; } printf("##end of smartrel run\n") ; return 0 ; }
int main(int argc, char **argv) { SNP **snpmarkers ; Indiv **indivmarkers ; int numsnps, numindivs ; unsigned char *packg1, *packg2 ; int **snppos ; int *snpindx ; int lsnplist, lindlist, numeg ; int i,j; SNP *cupt, *cupt1, *cupt2, *cupt3 ; Indiv *indx ; int ch1, ch2 ; int fmnum , lmnum ; int num, n1, n2 ; int nkill = 0 ; int t, k, x ; int nignore, numrisks = 1 ; char **genolist ; int numgenolist ; int maxmiss ; tersem = YES ; // no snp counts readcommands(argc, argv) ; setomode(&outputmode, omode) ; packmode = YES ; settersemode(tersem) ; nums1 = getsnps(snp1, &snpm1, 0.0, NULL, &nignore, numrisks) ; putped(1) ; freeped() ; nums2 = getsnps(snp2, &snpm2, 0.0, NULL, &nignore, numrisks) ; putped(2) ; freeped() ; for (x=0; x<nums1; ++x) { cupt1 = snpm1[x] ; cupt1 -> tagnumber = -1 ; } for (x=0; x<nums2; ++x) { cupt2 = snpm2[x] ; t = x %1000 ; // if (t==0) printf("zz %d %d\n", x, nums2) ; k = snpindex(snpm1, nums1, cupt2 -> ID) ; if (k<0) { cupt2 -> ignore = YES ; continue ; } cupt1 = snpm1[k] ; cupt1 -> tagnumber = x ; t = checkmatch(cupt1, cupt2) ; if (t==1) continue ; if (t==2) { cupt2 -> isrfake = YES ; continue ; } if (t<0) { cupt1 -> ignore = cupt2 -> ignore = YES ; continue ; } printf("allele funny: %s", cupt1 -> ID) ; printalleles(cupt1, stdout) ; printalleles(cupt2, stdout) ; printnl() ; cupt1 -> ignore = cupt2 -> ignore = YES ; continue ; } freesnpindex() ; numi1 = getindivs(ind1, &indm1) ; numi2 = getindivs(ind2, &indm2) ; for (x=0; x<numi2; ++x) { k = indindex(indm1, numi1, indm2[x] -> ID) ; // this code could be modified to allow duplicate individuals if (k>=0) fatalx("dup ind: %s\n", indm2[x] -> ID) ; // fix later? } setgenotypename(&geno1, ind1) ; getped(1) ; getgenos(geno1, snpm1, indm1, nums1, numi1, nignore) ; packg1 = (unsigned char *) getpackgenos() ; clearpackgenos() ; setgenotypename(&geno2, ind2) ; getped(2) ; getgenos(geno2, snpm2, indm2, nums2, numi2, nignore) ; packg2 = (unsigned char *) getpackgenos() ; numindivs = mergeit(snpm1, snpm2, &indm1, indm2, nums1, nums2, numi1, numi2) ; snpmarkers = snpm1 ; numsnps = nums1 ; indivmarkers = indm1 ; free(packg1) ; free(packg2) ; outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of mergeit run\n") ; return 0 ; }
int main(int argc, char **argv) { char sss[MAXSTR] ; int **snppos ; int *snpindx ; char **snpnamelist, **indnamelist ; int lsnplist, lindlist ; int i, j, k, k1, k2, k3, k4, kk ; SNP *cupt, *cupt1, *cupt2, *cupt3 ; Indiv *indx ; double y1, y2, y, sig, tail, yy1, yy2 ; char ss[11] ; int *blstart, *blsize, nblocks ; int xnblocks ; /* for xsnplist */ int *bcols ; int **subsets ; double maxgendis ; char **eglist ; int numeg ; int ch1, ch2 ; int fmnum , lmnum ; int num, n1, n2 ; int nindiv = 0, e, f, lag=1 ; double xc[9], xd[4], xc2[9] ; int nignore, numrisks = 1 ; double *xrow, *xpt ; SNP **xsnplist ; int *tagnums ; Indiv **xindlist ; int *xindex, *xtypes ; int nedge, m, nc ; double zn, zvar ; int weightmode = NO ; double chisq, ynrows ; int *numhits, t ; double *xmean, *xfancy ; double *divans, *divsd ; double *hettop, *hetbot ; int chrom, numclear ; double gdis ; int outliter, *badlist, nbad ; int ***counts ; char ***plists ; int nplist, trun ; int nrows, ncols ; readcommands(argc, argv) ; printf("## qpBound version: %s\n", WVERSION) ; if (parname == NULL) return 0 ; if (xchrom == 23) noxdata = NO ; if (outpop == NULL) fatalx("no outpop\n") ; setinbreed(inbreed) ; numsnps = getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ; numindivs = getindivs(indivname, &indivmarkers) ; setindm(indivmarkers) ; k = getgenos(genotypename, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; chrom = cupt -> chrom ; if ((xchrom>0) && (chrom != xchrom)) cupt -> ignore = YES ; if ((noxdata) && (chrom == 23)) cupt-> ignore = YES ; if (chrom == 0) cupt -> ignore = YES ; if (chrom > 23) cupt -> ignore = YES ; if (chrom == zchrom) cupt -> ignore = YES ; } nplist = numlines(popfilename) ; ZALLOC(plists, nplist, char **) ; ZALLOC(lines, nplist, char *) ; num = readpopx(popfilename, plists, 3) ; nplist = num ; printf("nplist: %d\n", nplist) ; if (nplist == 0) return 0; ZALLOC(eglist, nplist*3, char *) ; numeg = 0 ; for (trun=0; trun<nplist; ++trun) { for (k=0; k<3; ++k) { t = indxindex(eglist, numeg, plists[trun][k]) ; if (t<0) { eglist[numeg] = strdup(plists[trun][k]) ; ++numeg ; } } } if (popsizelimit > 0) { setplimit(indivmarkers, numindivs, eglist, numeg, popsizelimit) ; } if (outputname != NULL) openit (outputname, &ofile, "w") ; outnum = 0 ; ZALLOC(xindex, numindivs, int) ; ZALLOC(xindlist, numindivs, Indiv *) ; nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ; ZALLOC(xtypes, nrows, int) ; for (i=0; i<nrows; i++) { indx = xindlist[i] ; k = indxindex(eglist, numeg, indx -> egroup) ; xtypes[i] = k+1 ; // dangerous bend t = strcmp(indx -> egroup, outpop) ; if (t==0) xtypes[i] = outnum ; else fatalx("outpop bug\n") ; } ZALLOC(xsnplist, numsnps, SNP *) ; ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ; /** ZALLOC(counts, ncols, int **) ; for (k=0; k<ncols; ++k) { counts[k] = initarray_2Dint( numeg, 2, 0) ; } countpops(counts, xsnplist, xindex, xtypes, nrows, ncols) ; */ for (trun=0; trun<nplist; ++trun) { dopop3out(plists[trun], xsnplist, ncols, lines[trun], outpop) ; } if (outputname != NULL) fclose(ofile) ; printf("##end of qpBound\n") ; return 0 ; }
int main(int argc, char **argv) { char sss[MAXSTR] ; int **snppos ; int *snpindx ; char **snpnamelist, **indnamelist ; char **eglist ; int *nsamppops ; int *ztypes ; int lsnplist, lindlist, numeg ; int i, j, k, k1, k2, k3, k4, kk; SNP *cupt, *cupt1, *cupt2, *cupt3 ; Indiv *indx ; double y1, y2, y, sig, tail, yy1, yy2 ; char ss[11] ; int *blstart, *blsize, nblocks ; int xnblocks ; /* for xsnplist */ int *bcols ; double maxgendis ; int xind[4] ; int ch1, ch2 ; int fmnum , lmnum ; int num, n1, n2 ; int nindiv = 0, e, f, lag=1 ; double xc[9], xd[4], xc2[9] ; int nignore, numrisks = 1 ; double *xrow, *xpt ; SNP **xsnplist ; int *tagnums ; Indiv **xindlist ; int *xindex, *xtypes ; int nrows, ncols, m, nc ; double zn, zvar ; int weightmode = NO ; double chisq, ynrows ; int *numhits, t ; double *xmean, *xfancy ; double *divans, *divsd ; double *hettop, *hetbot ; int chrom, numclear ; double gdis ; int outliter, *badlist, nbad ; double **zdata, *z1, *z2 ; int maxtag = -1 ; double **zz ; double *pmean, *pnum, rscore[3], dstat[3], hscore[3], rrr[3], ww[3], serr[3] ; int ssize[3][3], *sz ; int tpat[3][4] , rpat[3][4], *rrtmp, *rp ; int *rawcol ; ; int a, b, c, d, col ; int aa, bb, cc, dd ; double *qpscores ; double *hest, *hsig ; double mingenpos, maxgenpos ; int *qhit ; /* number of times pair is clade in quartet */ int *qmiss ; /* number of times pair migration event implied */ int **qplist, numqp = 0, maxqp=10000 ; double *qpscore ; char ***qlist, *sx ; int nqlist = 0 ; int bbest[3] ; double absscore[3] ; double ascore[4], astat[4] ; double **dsctop, **dscbot ; double **abx, **bax, **f2 ; int popx[4] ; double tn[4*5], td[4*4] ; double zzsig[5], zzest[5], zsc[5] ; double ymin ; double *f3, *f4, *f3sig, *f4sig ; int t1, t2, tt ; int ***counts, **ccc ; double tlenz[5], tlen[5] ; int lenz[5] ; readcommands(argc, argv) ; printf("## qpDstat version: %s\n", WVERSION) ; if (parname == NULL) return 0 ; if ((poplistname == NULL) && (popfilename == NULL)) fatalx("poplistname, popfilename both null\n") ; if (!bankermode) forceclade = NO ; //if (fancynorm) printf("fancynorm used\n") ; //else printf("no fancynorm used\n") ; setjquart(NO, jackweight, jackquart) ; nostatslim = MAX(nostatslim, 3) ; setinbreed(inbreed) ; if (outputname != NULL) openit(outputname, &ofile, "w") ; numsnps = getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ; numindivs = getindivs(indivname, &indivmarkers) ; if (id2pops != NULL) { setid2pops(id2pops, indivmarkers, numindivs) ; } setindm(indivmarkers) ; k = getgenos(genotypename, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; if (cupt -> chrom >= 23) cupt -> ignore = YES ; if (cupt -> chrom == zchrom) cupt -> ignore = YES ; } ZALLOC(eglist, numindivs, char *) ; ZALLOC(ztypes, numindivs, int) ; if (popfilename == NULL) { if (bankermode == NO) numeg = loadlist(eglist, poplistname) ; else { numeg = loadlist_type(eglist, poplistname, ztypes, 0) ; numbanker = 0 ; for (k=0; k<numeg; ++k) { if (ztypes[k] == 2) ++numbanker ; } printf("bankermode: ") ; printimat(ztypes, 1, numeg) ; } } if (popfilename != NULL) { bbestmode = NO ; nqlist = numlines(popfilename) ; ZALLOC(qlist, 4, char **) ; for (k=0; k<4; ++k) { ZALLOC(qlist[k], nqlist, char *) ; } nqlist = getnamesstripcolon(&qlist, nqlist, 4, popfilename, locount, hicount) ; numeg = 0 ; printf("number of quadruples %d\n", nqlist) ; fflush(stdout) ; for (k=0; k<4; ++k) { for (j=0; j<nqlist; ++j) { sx = qlist[k][j] ; t1 = indxstring(eglist, numeg, sx) ; if (t1 >=0) continue ; eglist[numeg] = strdup(sx) ; ++numeg ; setstatus(indivmarkers, numindivs, sx) ; } } }
int main(int argc, char **argv) { double *V; double *xx; double *iscase; double *iscasecorr; int K; int k,m,n; int nignore; double rowsum, rowsum1; double chisq, Echisq, gamma, denom; readcommands(argc, argv) ; if (outputname != NULL) openit(outputname, &fpout, "w") ; else fpout = stdout; fprintf(fpout, "Chisq EIGENSTRAT\n"); setinmode(&inmode, imode); packmode = YES; numsnps = getsnps(snpname, &snpmarkers, 0.0, NULL, &nignore, 1) ; NSAMPLES = getindivs(indivname, &indivmarkers) ; setstatus(indivmarkers, NSAMPLES, "Case") ; setgenotypename(&genotypename, indivname) ; if (genotypename != NULL) { getgenos(genotypename, snpmarkers, indivmarkers, numsnps, NSAMPLES, nignore) ; } /*******************************************************************/ /* Free memory: Usually this is done in outfiles: */ /* */ /* nind = rmindivs(&snpmarkers, numsnps, &indmarkers, NSAMPLES); */ /* */ /* But where is the snpmarkers array released? */ /*******************************************************************/ L = numpc; readpcafile(&V, &outlier, &K, L, NSAMPLES); getphenos(NSAMPLES, &iscase, outlier, &iscasecorr, L, V); /* main eigenstrat loop here */ if ((xx = (double *)malloc(NSAMPLES*sizeof(*xx))) == NULL) { fprintf(stderr,"CM\n"); exit(1); } for(m=0;m<numsnps;m++) { SNP *cupt = snpmarkers[m]; for(n=0; n<NSAMPLES; n++) { int j = getgtypes(cupt,n); if(j == 0) { xx[n] = 0.0; } else if(j == 1) { xx[n] = 0.5; } else if(j == 2 ) { xx[n] = 1.0; } else if(j == -1) { xx[n] = -100.0; } if(outlier[n] == 1) xx[n] = -100.0; } /* mean-adjust xx */ rowsum = 0.0; rowsum1 = 0.0; for(n=0; n<NSAMPLES; n++) { if(qtmode == NO && ((outlier[n]) || (xx[n] < -99.0))) continue; if(qtmode == YES && ((outlier[n]) || (xx[n] == -100.0))) continue; rowsum += xx[n]; rowsum1 += 1.0; } for(n=0; n<NSAMPLES; n++) { if(outlier[n]) continue; if(qtmode == NO) { if (xx[n] < -99.0) xx[n] = -100.0; /* still keep track */ else xx[n] -= rowsum/rowsum1; } else { if (xx[n] == -100.0) xx[n] = -100.0; /* still keep track */ else xx[n] -= rowsum/rowsum1; } } /* Chisq */ chisq = compute_chisq(xx,iscase); /* EIGENSTRAT */ for(k=0; k<L; k++) { gamma = 0.0; denom = 0.0; for(n=0; n<NSAMPLES; n++) { if(qtmode == NO && (outlier[n] || xx[n] < -99.0)) continue; if(qtmode == YES && (outlier[n] || xx[n] == -100.0)) continue; gamma += xx[n]*V[NSAMPLES*n+k]; denom += V[NSAMPLES*n+k]*V[NSAMPLES*n+k]; } gamma /= denom; for(n=0; n<NSAMPLES; n++) { if(qtmode == NO && (outlier[n] || xx[n] < -99.0)) continue; if(qtmode == YES && (outlier[n] || xx[n] == -100.0)) continue; xx[n] -= gamma*V[NSAMPLES*n+k]; } } Echisq = compute_chisqE(xx,iscasecorr); if(rowsum1 == 0.0) { chisq = -1.0; Echisq = -1.0; } if(chisq >= 0.0) fprintf(fpout,"%.04f",chisq); else fprintf(fpout,"NA"); if(Echisq >= 0.0) fprintf(fpout," %.04f\n",Echisq); else fprintf(fpout," NA\n"); if(NSAMPLES*m > MAXSIZE) { fprintf(stderr,"OOPS genotype file has > %d genotypes\n",MAXSIZE); fprintf(fpout,"OOPS genotype file has > %d genotypes\n",MAXSIZE); exit(1); } } }
int main(int argc, char* argv[]) { program_name = argv[0]; if (argc < 3) { print_help(); exit(EXIT_FAILURE); } char binfile[50]; base = argv[1]; strapp(binfile, ".nc"); int total_ids = atoi(argv[2]); FILE *fp; if (argc > 2) { infile = argv[3]; fp = fopen(infile, "r"); if (!fp) error(1, errno, "%s", infile); } else { infile = "stdin"; fp = stdin; } /* Get the first line with the SNP names */ char *line = NULL; size_t n = 0; int read = getlin(&line, &n, fp); if (read == -1) error(1, errno, "%s", infile); int nsnps = getsnps(line); free(line); line = NULL; printf("%d markers\n", nsnps); printf("Converting intensity values to binary\n"); int ncid, stat, sampid, varid; size_t nids = 0, perkey; size_t count[3] = { 1, 0, 0 }; size_t start[3] = { 0, 0, 0 }; char *id; float *vals; while (getlin(&line, &n, fp) != -1) { if (nids == 0) { /* Count number of vals in first line to alloc indv_dat */ read = getstrings(line, NULL, DELIM) - 1; if (read < 1) error(2, 0, "No values found"); if (read % nsnps != 0) error(2, 0, "Not same number of values for each marker"); perkey = read / nsnps; printf("%zu values per marker\n", perkey); vals = malloc(sizeof(float) * read); if (vals == NULL) error(1, errno, "dat.vals"); /* Create netcdf file and write snp names */ ncid = create_nc(binfile, total_ids, nsnps, perkey); stat = nc_inq_varid(ncid, "sample", &sampid); check_err(stat,__LINE__,__FILE__); stat = nc_inq_varid(ncid, "intensity", &varid); check_err(stat,__LINE__,__FILE__); writesnps(ncid, nsnps); count[1] = nsnps; count[2] = perkey; } /* Read valuess from line */ id = linevals(line, vals); /* Write sample id */ stat = nc_put_var1_string(ncid, sampid, &nids, (const char **) &id); check_err(stat,__LINE__,__FILE__); /* Write intensity values */ start[0] = nids; stat = nc_put_vara_float(ncid, varid, start, count, vals); check_err(stat,__LINE__,__FILE__); free(line); line = NULL; nids++; printf("Read %zu\r", nids); fflush(stdout); } fclose(fp); stat = nc_close(ncid); check_err(stat,__LINE__,__FILE__); free(vals); free(line); printf("Wrote %zu samples to [ %s ]\n", nids, binfile); exit(EXIT_SUCCESS); }
int main(int argc, char **argv) { int **snppos ; int *snpindx ; char **snpnamelist, **indnamelist ; char **eglist ; int lsnplist, lindlist, numeg ; int i,j; SNP *cupt, *cupt1, *cupt2, *cupt3 ; Indiv *indx ; double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ; double rthresh, zt ; int mpflag, ret, numvalidind, nvalid, numvalidsnps ; int ch1, ch2 ; int fmnum , lmnum ; int num, n1, n2 ; int nkill = 0 ; int t, k, g ; int nindiv = 0, e, f, lag=1 ; double xc[9], xd[4], xc2[9] ; double ychi, zscore, zthresh = 20.0 ; double y1, y2 ; int nignore, numrisks = 1 ; char **genolist ; int numgenolist ; char c1, c2 ; int t1, t2 ; malexhet = YES ; // convertf default is don't change the data tersem = YES ; // no snp counts readcommands(argc, argv) ; setomode(&outputmode, omode) ; packmode = YES ; settersemode(tersem) ; if (r2thresh > 0.0) killr2 = YES ; if (badpedignore) setbadpedignore() ; numsnps = getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ; for (i=0; i<numsnps; i++) { if (xchrom == -1) break ; cupt = snpmarkers[i] ; if (cupt -> chrom != xchrom) cupt -> ignore = YES ; if (cupt -> ignore) continue ; t = nnint(cupt -> physpos) ; if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ; } nignore = 0 ; for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; if (cupt -> chrom > maxchrom) cupt -> ignore = YES ; if (cupt -> chrom < minchrom) cupt -> ignore = YES ; if (cupt -> ignore) ++nignore ; } if (numsnps == nignore) fatalx("no valid snps\n") ; numindivs = getindivs(indivname, &indivmarkers) ; if (polarid != NULL) { polarindex = indindex(indivmarkers, numindivs, polarid) ; if (polarindex<0) fatalx("polarid %s not found\n") ; } if (genotypelist!= NULL) { getgenos_list(genotypelist, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } else { setgenotypename(&genotypename, indivname) ; getgenos(genotypename, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } if (newsnpname != NULL) { nums2 = getsnps(newsnpname, &snpm2, 0.0, NULL, &nignore, numrisks) ; remap(snpmarkers, numsnps, snpm2, nums2) ; snpmarkers = snpm2 ; numsnps = nums2 ; } if (newindivname != NULL) { numind2 = getindivs(newindivname, &indm2) ; remapind(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ; indivmarkers = indm2 ; numindivs = numind2 ; if (polarid != NULL) { polarindex = indindex(indivmarkers, numindivs, polarid) ; } } if (mkdiploid) { numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ; numind2 = mkindh2d(indivmarkers, &indm2, numindivs) ; remaph2d(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ; indivmarkers = indm2 ; numindivs = numind2 ; } if (deletedup) dedupit(snpmarkers, numsnps) ; // only one marker per position for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; if (zerodistance) cupt -> genpos = 0.0 ; c1 = cupt -> alleles[0] ; c2 = cupt -> alleles[1] ; t1 = pedval(&c1) % 5 ; t2 = pedval(&c2) % 5 ; // 0 and 5 are no good if ((t1==0) && (t2 >0)) flip1(cupt, phasedmode, YES) ; } flipstrand(flipstrandname, snpmarkers, numsnps) ; flipsnps(flipsnpname, snpmarkers, numsnps, phasedmode) ; if (polarindex>=0) { for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; g = getgtypes(cupt, polarindex) ; if (g==0) { printf("polarizing %s\n", cupt -> ID) ; flip1(cupt, NO, YES) ; g = getgtypes(cupt, polarindex) ; if (g!=2) fatalx("badbug\n") ; } if (g != 2) cupt -> ignore = YES ; } } if (outputall) { outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run (outputall mode)\n") ; return 0 ; } if (poplistname != NULL) { ZALLOC(eglist, numindivs, char *) ; numeg = loadlist(eglist, poplistname) ; seteglist(indivmarkers, numindivs, poplistname); for (i=0; i<numindivs; ++i) { indx = indivmarkers[i] ; if (indx -> affstatus == NO) indx -> ignore = YES ; } } else setstatus(indivmarkers, numindivs, "Case") ; numsnps = rmsnps(snpmarkers, numsnps, deletesnpoutname) ; numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ; if (killr2) { nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ; if (nkill>0) printf("killhir2. number of snps killed: %d\n", nkill) ; } if ( nhwfilter > 0 ) { hwfilter(snpmarkers, numsnps, numindivs, nhwfilter, deletesnpoutname); } if ( xregionname ) { excluderegions(xregionname, snpmarkers, numsnps, deletesnpoutname); } numvalidind = 0 ; for (i=0; i<numindivs; ++i) { indx = indivmarkers[i] ; if (indx -> ignore) continue ; if (numvalidgtind(snpmarkers, numsnps, i) ==0) { indx -> ignore = YES ; printf("no data for individual: %s\n", indx -> ID) ; } if (indx -> ignore == NO) ++numvalidind ; } if (maxmiss<0) maxmiss = (int) (maxmissfrac * (double) numvalidind+1) ; printf("numvalidind: %5d maxmiss: %5d\n", numvalidind, maxmiss) ; if (numvalidind == 0) fatalx("no valid samples!\n") ; for (k=0; k<numsnps; ++k) { if (maxmiss>numvalidind) break ; cupt = snpmarkers[k] ; t = numvalidind - numvalidgtypes(cupt) ; // printf("zz %20s %4d %4d\n", cupt -> ID, t, numvalidind-t) ; if (maxmiss < t) { cupt -> ignore = YES ; } /** if (numvalidind == t) { printf("no data for snp: %s\n", cupt -> ID) ; cupt -> ignore = YES ; } */ } if (fastdup) { printf("fastdup set %d\n", fastdupnum) ; if (fastdupnum > 0) { setfastdupnum(fastdupnum) ; setfastdupthresh(fastdupthresh, fastdupkill) ; fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ; } } if (decim>0) { snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ; } outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run\n") ; return 0 ; }
int main(int argc, char **argv) { int **snppos ; int *snpindx ; char **snpnamelist, **indnamelist ; char **eglist ; int lsnplist, lindlist, numeg ; int i,j; SNP *cupt, *cupt1, *cupt2, *cupt3 ; Indiv *indx ; double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ; double rthresh, zt ; int mpflag, ret, nvalid; int ch1, ch2 ; int fmnum , lmnum ; int num, n1, n2 ; int nkill = 0 ; int t, k ; int nindiv = 0, e, f, lag=1 ; double xc[9], xd[4], xc2[9] ; double ychi, zscore, zthresh = 20.0 ; double y1, y2 ; int nignore, numrisks = 1 ; char **genolist ; int numgenolist ; int maxmiss ; malexhet = YES ; // convertf default is don't change the data tersem = YES ; // no snp counts readcommands(argc, argv) ; setomode(&outputmode, omode) ; packmode = YES ; settersemode(tersem) ; numsnps = getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ; for (i=0; i<numsnps; i++) { if (xchrom == -1) break ; cupt = snpmarkers[i] ; if (cupt -> chrom != xchrom) cupt -> ignore = YES ; if (cupt -> ignore) continue ; t = nnint(cupt -> physpos) ; if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ; } nignore = 0 ; for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; if (cupt -> chrom > maxchrom) cupt -> ignore = YES ; if (cupt -> chrom < minchrom) cupt -> ignore = YES ; if (cupt -> ignore) ++nignore ; } if (numsnps == nignore) fatalx("no valid snps\n") ; /** cupt = snpmarkers[0] ; printf("zz2: %d %d %d %20s: %d\n", numsnps, nignore, cupt -> chrom, cupt -> ID, cupt -> ignore) ; */ numindivs = getindivs(indivname, &indivmarkers) ; if (genotypelist!= NULL) { getgenos_list(genotypelist, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } else { setgenotypename(&genotypename, indivname) ; getgenos(genotypename, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } if (outputall) { outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run (outputall mode)\n") ; return 0 ; } if (killr2) { nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ; if (nkill>0) printf("killhir2. number of snps killed: %d\n", nkill) ; } setstatus(indivmarkers, numindivs, "Case") ; /******************************************************************/ /* removesubthreshold(indivmarkers, snpmarkers, numindiv, numsnps, maxmissfracind, maxmissfracsnp); */ /******************************************************************/ if (fastdup) { if (fastdupnum > 0) setfastdupnum(fastdupnum) ; fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ; } if (decim>0) { snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ; } outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run\n") ; return 0 ; }