int main(int argc, char **argv) { SNP **snpmarkers ; Indiv **indivmarkers ; int numsnps, numindivs ; unsigned char *packg1, *packg2 ; int **snppos ; int *snpindx ; int lsnplist, lindlist, numeg ; int i,j; SNP *cupt, *cupt1, *cupt2, *cupt3 ; Indiv *indx ; int ch1, ch2 ; int fmnum , lmnum ; int num, n1, n2 ; int nkill = 0 ; int t, k, x ; int nignore, numrisks = 1 ; char **genolist ; int numgenolist ; int maxmiss ; tersem = YES ; // no snp counts readcommands(argc, argv) ; setomode(&outputmode, omode) ; packmode = YES ; settersemode(tersem) ; nums1 = getsnps(snp1, &snpm1, 0.0, NULL, &nignore, numrisks) ; putped(1) ; freeped() ; nums2 = getsnps(snp2, &snpm2, 0.0, NULL, &nignore, numrisks) ; putped(2) ; freeped() ; for (x=0; x<nums1; ++x) { cupt1 = snpm1[x] ; cupt1 -> tagnumber = -1 ; } for (x=0; x<nums2; ++x) { cupt2 = snpm2[x] ; t = x %1000 ; // if (t==0) printf("zz %d %d\n", x, nums2) ; k = snpindex(snpm1, nums1, cupt2 -> ID) ; if (k<0) { cupt2 -> ignore = YES ; continue ; } cupt1 = snpm1[k] ; cupt1 -> tagnumber = x ; t = checkmatch(cupt1, cupt2) ; if (t==1) continue ; if (t==2) { cupt2 -> isrfake = YES ; continue ; } if (t<0) { cupt1 -> ignore = cupt2 -> ignore = YES ; continue ; } printf("allele funny: %s", cupt1 -> ID) ; printalleles(cupt1, stdout) ; printalleles(cupt2, stdout) ; printnl() ; cupt1 -> ignore = cupt2 -> ignore = YES ; continue ; } freesnpindex() ; numi1 = getindivs(ind1, &indm1) ; numi2 = getindivs(ind2, &indm2) ; for (x=0; x<numi2; ++x) { k = indindex(indm1, numi1, indm2[x] -> ID) ; // this code could be modified to allow duplicate individuals if (k>=0) fatalx("dup ind: %s\n", indm2[x] -> ID) ; // fix later? } setgenotypename(&geno1, ind1) ; getped(1) ; getgenos(geno1, snpm1, indm1, nums1, numi1, nignore) ; packg1 = (unsigned char *) getpackgenos() ; clearpackgenos() ; setgenotypename(&geno2, ind2) ; getped(2) ; getgenos(geno2, snpm2, indm2, nums2, numi2, nignore) ; packg2 = (unsigned char *) getpackgenos() ; numindivs = mergeit(snpm1, snpm2, &indm1, indm2, nums1, nums2, numi1, numi2) ; snpmarkers = snpm1 ; numsnps = nums1 ; indivmarkers = indm1 ; free(packg1) ; free(packg2) ; outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of mergeit run\n") ; return 0 ; }
int main(int argc, char **argv) { int **snppos ; int *snpindx ; char **snpnamelist, **indnamelist ; char **eglist ; int lsnplist, lindlist, numeg ; int i,j; SNP *cupt, *cupt1, *cupt2, *cupt3 ; Indiv *indx ; double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ; double rthresh, zt ; int mpflag, ret, numvalidind, nvalid, numvalidsnps ; int ch1, ch2 ; int fmnum , lmnum ; int num, n1, n2 ; int nkill = 0 ; int t, k, g ; int nindiv = 0, e, f, lag=1 ; double xc[9], xd[4], xc2[9] ; double ychi, zscore, zthresh = 20.0 ; double y1, y2 ; int nignore, numrisks = 1 ; char **genolist ; int numgenolist ; char c1, c2 ; int t1, t2 ; malexhet = YES ; // convertf default is don't change the data tersem = YES ; // no snp counts readcommands(argc, argv) ; setomode(&outputmode, omode) ; packmode = YES ; settersemode(tersem) ; if (r2thresh > 0.0) killr2 = YES ; if (badpedignore) setbadpedignore() ; numsnps = getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ; for (i=0; i<numsnps; i++) { if (xchrom == -1) break ; cupt = snpmarkers[i] ; if (cupt -> chrom != xchrom) cupt -> ignore = YES ; if (cupt -> ignore) continue ; t = nnint(cupt -> physpos) ; if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ; } nignore = 0 ; for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; if (cupt -> chrom > maxchrom) cupt -> ignore = YES ; if (cupt -> chrom < minchrom) cupt -> ignore = YES ; if (cupt -> ignore) ++nignore ; } if (numsnps == nignore) fatalx("no valid snps\n") ; numindivs = getindivs(indivname, &indivmarkers) ; if (polarid != NULL) { polarindex = indindex(indivmarkers, numindivs, polarid) ; if (polarindex<0) fatalx("polarid %s not found\n") ; } if (genotypelist!= NULL) { getgenos_list(genotypelist, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } else { setgenotypename(&genotypename, indivname) ; getgenos(genotypename, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } if (newsnpname != NULL) { nums2 = getsnps(newsnpname, &snpm2, 0.0, NULL, &nignore, numrisks) ; remap(snpmarkers, numsnps, snpm2, nums2) ; snpmarkers = snpm2 ; numsnps = nums2 ; } if (newindivname != NULL) { numind2 = getindivs(newindivname, &indm2) ; remapind(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ; indivmarkers = indm2 ; numindivs = numind2 ; if (polarid != NULL) { polarindex = indindex(indivmarkers, numindivs, polarid) ; } } if (mkdiploid) { numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ; numind2 = mkindh2d(indivmarkers, &indm2, numindivs) ; remaph2d(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ; indivmarkers = indm2 ; numindivs = numind2 ; } if (deletedup) dedupit(snpmarkers, numsnps) ; // only one marker per position for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; if (zerodistance) cupt -> genpos = 0.0 ; c1 = cupt -> alleles[0] ; c2 = cupt -> alleles[1] ; t1 = pedval(&c1) % 5 ; t2 = pedval(&c2) % 5 ; // 0 and 5 are no good if ((t1==0) && (t2 >0)) flip1(cupt, phasedmode, YES) ; } flipstrand(flipstrandname, snpmarkers, numsnps) ; flipsnps(flipsnpname, snpmarkers, numsnps, phasedmode) ; if (polarindex>=0) { for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; g = getgtypes(cupt, polarindex) ; if (g==0) { printf("polarizing %s\n", cupt -> ID) ; flip1(cupt, NO, YES) ; g = getgtypes(cupt, polarindex) ; if (g!=2) fatalx("badbug\n") ; } if (g != 2) cupt -> ignore = YES ; } } if (outputall) { outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run (outputall mode)\n") ; return 0 ; } if (poplistname != NULL) { ZALLOC(eglist, numindivs, char *) ; numeg = loadlist(eglist, poplistname) ; seteglist(indivmarkers, numindivs, poplistname); for (i=0; i<numindivs; ++i) { indx = indivmarkers[i] ; if (indx -> affstatus == NO) indx -> ignore = YES ; } } else setstatus(indivmarkers, numindivs, "Case") ; numsnps = rmsnps(snpmarkers, numsnps, deletesnpoutname) ; numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ; if (killr2) { nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ; if (nkill>0) printf("killhir2. number of snps killed: %d\n", nkill) ; } if ( nhwfilter > 0 ) { hwfilter(snpmarkers, numsnps, numindivs, nhwfilter, deletesnpoutname); } if ( xregionname ) { excluderegions(xregionname, snpmarkers, numsnps, deletesnpoutname); } numvalidind = 0 ; for (i=0; i<numindivs; ++i) { indx = indivmarkers[i] ; if (indx -> ignore) continue ; if (numvalidgtind(snpmarkers, numsnps, i) ==0) { indx -> ignore = YES ; printf("no data for individual: %s\n", indx -> ID) ; } if (indx -> ignore == NO) ++numvalidind ; } if (maxmiss<0) maxmiss = (int) (maxmissfrac * (double) numvalidind+1) ; printf("numvalidind: %5d maxmiss: %5d\n", numvalidind, maxmiss) ; if (numvalidind == 0) fatalx("no valid samples!\n") ; for (k=0; k<numsnps; ++k) { if (maxmiss>numvalidind) break ; cupt = snpmarkers[k] ; t = numvalidind - numvalidgtypes(cupt) ; // printf("zz %20s %4d %4d\n", cupt -> ID, t, numvalidind-t) ; if (maxmiss < t) { cupt -> ignore = YES ; } /** if (numvalidind == t) { printf("no data for snp: %s\n", cupt -> ID) ; cupt -> ignore = YES ; } */ } if (fastdup) { printf("fastdup set %d\n", fastdupnum) ; if (fastdupnum > 0) { setfastdupnum(fastdupnum) ; setfastdupthresh(fastdupthresh, fastdupkill) ; fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ; } } if (decim>0) { snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ; } outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run\n") ; return 0 ; }
int main(int argc, char **argv) { double *V; double *xx; double *iscase; double *iscasecorr; int K; int k,m,n; int nignore; double rowsum, rowsum1; double chisq, Echisq, gamma, denom; readcommands(argc, argv) ; if (outputname != NULL) openit(outputname, &fpout, "w") ; else fpout = stdout; fprintf(fpout, "Chisq EIGENSTRAT\n"); setinmode(&inmode, imode); packmode = YES; numsnps = getsnps(snpname, &snpmarkers, 0.0, NULL, &nignore, 1) ; NSAMPLES = getindivs(indivname, &indivmarkers) ; setstatus(indivmarkers, NSAMPLES, "Case") ; setgenotypename(&genotypename, indivname) ; if (genotypename != NULL) { getgenos(genotypename, snpmarkers, indivmarkers, numsnps, NSAMPLES, nignore) ; } /*******************************************************************/ /* Free memory: Usually this is done in outfiles: */ /* */ /* nind = rmindivs(&snpmarkers, numsnps, &indmarkers, NSAMPLES); */ /* */ /* But where is the snpmarkers array released? */ /*******************************************************************/ L = numpc; readpcafile(&V, &outlier, &K, L, NSAMPLES); getphenos(NSAMPLES, &iscase, outlier, &iscasecorr, L, V); /* main eigenstrat loop here */ if ((xx = (double *)malloc(NSAMPLES*sizeof(*xx))) == NULL) { fprintf(stderr,"CM\n"); exit(1); } for(m=0;m<numsnps;m++) { SNP *cupt = snpmarkers[m]; for(n=0; n<NSAMPLES; n++) { int j = getgtypes(cupt,n); if(j == 0) { xx[n] = 0.0; } else if(j == 1) { xx[n] = 0.5; } else if(j == 2 ) { xx[n] = 1.0; } else if(j == -1) { xx[n] = -100.0; } if(outlier[n] == 1) xx[n] = -100.0; } /* mean-adjust xx */ rowsum = 0.0; rowsum1 = 0.0; for(n=0; n<NSAMPLES; n++) { if(qtmode == NO && ((outlier[n]) || (xx[n] < -99.0))) continue; if(qtmode == YES && ((outlier[n]) || (xx[n] == -100.0))) continue; rowsum += xx[n]; rowsum1 += 1.0; } for(n=0; n<NSAMPLES; n++) { if(outlier[n]) continue; if(qtmode == NO) { if (xx[n] < -99.0) xx[n] = -100.0; /* still keep track */ else xx[n] -= rowsum/rowsum1; } else { if (xx[n] == -100.0) xx[n] = -100.0; /* still keep track */ else xx[n] -= rowsum/rowsum1; } } /* Chisq */ chisq = compute_chisq(xx,iscase); /* EIGENSTRAT */ for(k=0; k<L; k++) { gamma = 0.0; denom = 0.0; for(n=0; n<NSAMPLES; n++) { if(qtmode == NO && (outlier[n] || xx[n] < -99.0)) continue; if(qtmode == YES && (outlier[n] || xx[n] == -100.0)) continue; gamma += xx[n]*V[NSAMPLES*n+k]; denom += V[NSAMPLES*n+k]*V[NSAMPLES*n+k]; } gamma /= denom; for(n=0; n<NSAMPLES; n++) { if(qtmode == NO && (outlier[n] || xx[n] < -99.0)) continue; if(qtmode == YES && (outlier[n] || xx[n] == -100.0)) continue; xx[n] -= gamma*V[NSAMPLES*n+k]; } } Echisq = compute_chisqE(xx,iscasecorr); if(rowsum1 == 0.0) { chisq = -1.0; Echisq = -1.0; } if(chisq >= 0.0) fprintf(fpout,"%.04f",chisq); else fprintf(fpout,"NA"); if(Echisq >= 0.0) fprintf(fpout," %.04f\n",Echisq); else fprintf(fpout," NA\n"); if(NSAMPLES*m > MAXSIZE) { fprintf(stderr,"OOPS genotype file has > %d genotypes\n",MAXSIZE); fprintf(fpout,"OOPS genotype file has > %d genotypes\n",MAXSIZE); exit(1); } } }
int main(int argc, char **argv) { int **snppos ; int *snpindx ; char **snpnamelist, **indnamelist ; char **eglist ; int lsnplist, lindlist, numeg ; int i,j; SNP *cupt, *cupt1, *cupt2, *cupt3 ; Indiv *indx ; double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ; double rthresh, zt ; int mpflag, ret, nvalid; int ch1, ch2 ; int fmnum , lmnum ; int num, n1, n2 ; int nkill = 0 ; int t, k ; int nindiv = 0, e, f, lag=1 ; double xc[9], xd[4], xc2[9] ; double ychi, zscore, zthresh = 20.0 ; double y1, y2 ; int nignore, numrisks = 1 ; char **genolist ; int numgenolist ; int maxmiss ; malexhet = YES ; // convertf default is don't change the data tersem = YES ; // no snp counts readcommands(argc, argv) ; setomode(&outputmode, omode) ; packmode = YES ; settersemode(tersem) ; numsnps = getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ; for (i=0; i<numsnps; i++) { if (xchrom == -1) break ; cupt = snpmarkers[i] ; if (cupt -> chrom != xchrom) cupt -> ignore = YES ; if (cupt -> ignore) continue ; t = nnint(cupt -> physpos) ; if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ; } nignore = 0 ; for (i=0; i<numsnps; i++) { cupt = snpmarkers[i] ; if (cupt -> chrom > maxchrom) cupt -> ignore = YES ; if (cupt -> chrom < minchrom) cupt -> ignore = YES ; if (cupt -> ignore) ++nignore ; } if (numsnps == nignore) fatalx("no valid snps\n") ; /** cupt = snpmarkers[0] ; printf("zz2: %d %d %d %20s: %d\n", numsnps, nignore, cupt -> chrom, cupt -> ID, cupt -> ignore) ; */ numindivs = getindivs(indivname, &indivmarkers) ; if (genotypelist!= NULL) { getgenos_list(genotypelist, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } else { setgenotypename(&genotypename, indivname) ; getgenos(genotypename, snpmarkers, indivmarkers, numsnps, numindivs, nignore) ; } if (outputall) { outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run (outputall mode)\n") ; return 0 ; } if (killr2) { nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ; if (nkill>0) printf("killhir2. number of snps killed: %d\n", nkill) ; } setstatus(indivmarkers, numindivs, "Case") ; /******************************************************************/ /* removesubthreshold(indivmarkers, snpmarkers, numindiv, numsnps, maxmissfracind, maxmissfracsnp); */ /******************************************************************/ if (fastdup) { if (fastdupnum > 0) setfastdupnum(fastdupnum) ; fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ; } if (decim>0) { snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ; } outfiles(snpoutfilename, indoutfilename, genooutfilename, snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ; printf("##end of convertf run\n") ; return 0 ; }