Пример #1
0
int main(int argc, char **argv)
{

  char **eglist ;
  int numeg ;
  int i, j, k, pos; 
  int *vv ;
  SNP *cupt, *cupt2 ;
  Indiv *indx ;
  double y1, y2, y ;

  int n0, n1, nkill ;

  int nindiv = 0 ;
  int nignore, numrisks = 1 ;
  SNP **xsnplist  ;
  Indiv **xindlist ;
  int *xindex ;
  int nrows, ncols, m ;
  double *XTX, *cc, *evecs, *ww ;
  double *lambda ;
  double *tvecs ;
  int weightmode = NO ;
  int t ;
  double *xmean, *xfancy ;
  double *ldmat = NULL, *ldmat2 = NULL;
  double *ldvv = NULL, *ldvv2 = NULL, *vv2 = NULL ;
  int chrom,  numclear ;
  double gdis ;
  int outliter, numoutiter, *badlist, nbad ;
  int a, b, n ;
  FILE *outlfile ;
  

  int xblock, blocksize=10000 ;   
  double *tblock ;  

  OUTLINFO *outpt ;
  int *idperm, *vecind ;   // for sort

  readcommands(argc, argv) ;
  printf("## smartrel version: %s\n", WVERSION) ;
  packmode = YES ;
  setomode(&outputmode, omode) ;

  if (parname == NULL) return 0 ;
  if (xchrom == (numchrom+1)) noxdata = NO ;

  if (fstonly) { 
   printf("fstonly\n") ;
   numeigs = 0 ; 
   numoutliter = 0 ;
   numoutiter = 0 ;
   outputname = NULL ;
   snpeigname = NULL ;
  }

  if (fancynorm) printf("norm used\n\n") ;
  else printf("no norm used\n\n") ;

  nostatslim = MAX(nostatslim, 3) ;

  outlfile = ofile = stdout; 

  if (outputname != NULL)  openit(outputname, &ofile, "w") ;
  if (outliername != NULL) openit(outliername, &outlfile, "w") ;
  if (fstdetailsname != NULL) openit(fstdetailsname, &fstdetails, "w") ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;

  numindivs = getindivs(indivname, &indivmarkers) ;
  k = getgenos(genotypename, snpmarkers, indivmarkers, 
    numsnps, numindivs, nignore) ;


  if (poplistname != NULL) 
  { 
    ZALLOC(eglist, numindivs, char *) ; 
    numeg = loadlist(eglist, poplistname) ;
    seteglist(indivmarkers, numindivs, poplistname);
  }
  else
  {
    setstatus(indivmarkers, numindivs, NULL) ;
    ZALLOC(eglist, MAXPOPS, char *) ;
    numeg = makeeglist(eglist, MAXPOPS, indivmarkers, numindivs) ;
  }
  for (i=0; i<numeg; i++) 
  {  
    /* printf("%3d %s\n",i, eglist[i]) ; */
  }

  nindiv=0 ;
  for (i=0; i<numindivs; i++) 
  {
    indx = indivmarkers[i] ;
    if(indx -> affstatus == YES) ++nindiv  ;
  }

  for (i=0; i<numsnps; i++)  
  {  
    cupt = snpmarkers[i] ; 
    chrom = cupt -> chrom ;
    if ((noxdata) && (chrom == (numchrom+1))) cupt-> ignore = YES ;
    if (chrom == 0) cupt -> ignore = YES ;
    if (chrom > (numchrom+1)) cupt -> ignore = YES ;
  }
  for (i=0; i<numsnps; i++)  
  {
    cupt = snpmarkers[i] ; 
    pos = nnint(cupt -> physpos) ;
    if ((xchrom>0) && (cupt -> chrom != xchrom)) cupt -> ignore = YES ;
    if ((xchrom > 0) && (pos < lopos)) cupt -> ignore = YES ;
    if ((xchrom > 0) && (pos > hipos)) cupt -> ignore = YES ;
    if (cupt -> ignore) continue ;
    if (numvalidgtx(indivmarkers, cupt, YES) <= 1) 
    { 
      printf("nodata: %20s\n", cupt -> ID) ;
      cupt -> ignore = YES ;
    }
  }

  if (killr2) {
   nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ;
   if (nkill>0) printf("killhir2.  number of snps killed: %d\n", nkill) ;
  }

  ZALLOC(vv, numindivs, int) ;
  numvalidgtallind(vv, snpmarkers, numsnps,  numindivs) ; 
  for (i=0; i<numindivs; ++i)  { 
  if (vv[i] == 0) {
    indx = indivmarkers[i] ;
    indx -> ignore = YES ; 
   }
  }
  free(vv) ;

  numsnps = rmsnps(snpmarkers, numsnps, NULL) ;  //  rid ignorable snps

   
  if (missingmode) 
  {
    setmiss(snpmarkers, numsnps) ;
    fancynorm = NO ;
  }

  if  (weightname != NULL)   
  {  
    weightmode = YES ;
    getweights(weightname, snpmarkers, numsnps) ;
  }
  if (ldregress>0) 
  {  
    ZALLOC(ldvv,  ldregress*numindivs, double) ;
    ZALLOC(ldvv2,  ldregress*numindivs, double) ;
    ZALLOC(vv2,  numindivs, double) ;
    ZALLOC(ldmat,  ldregress*ldregress, double) ;
    ZALLOC(ldmat2,  ldregress*ldregress, double) ;
    setidmat(ldmat, ldregress) ;         
    vst(ldmat, ldmat, 1.0e-6, ldregress*ldregress) ;
  }

  ZALLOC(xindex, numindivs, int) ;
  ZALLOC(xindlist, numindivs, Indiv *) ;
  ZALLOC(xsnplist, numsnps, SNP *) ;

  if (popsizelimit > 0) 
  {  
    setplimit(indivmarkers, numindivs, eglist, numeg, popsizelimit) ; 
  }

  nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ;
  ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ;
  printf("number of samples used: %d number of snps used: %d\n", nrows, ncols) ;

/**
  cupt = xsnplist[0] ;
  for (j=0; j<nrows; ++j) {  
   k = xindex[j] ;
   g = getgtypes(cupt, k) ;
   indx = indivmarkers[k] ;
   t = indxindex(eglist, numeg, indx -> egroup) ;
   printf("yy1 %20s %20s %20s %d %d %d\n", cupt ->ID, indx -> ID, indx -> egroup, j, k, g) ;
  }
  printf("yya: ") ; printimat(xindex, 1, nrows) ;
  printf("zzindxa:  %s\n", indivmarkers[230] -> egroup) ;
*/

  /* printf("## nrows: %d  ncols  %d\n", nrows, ncols) ; */
  ZALLOC(xmean, ncols, double) ;
  ZALLOC(xfancy, ncols, double) ;
  ZALLOC(XTX, nrows*nrows, double) ;
  ZALLOC(evecs, nrows*nrows, double) ;
  ZALLOC(tvecs, nrows*nrows, double) ;
  ZALLOC(lambda, nrows, double) ;
  ZALLOC(cc, nrows, double) ;
  ZALLOC(ww, nrows, double) ;
  ZALLOC(badlist, nrows, int) ;

  blocksize = MIN(blocksize, ncols) ; 
  ZALLOC(tblock, nrows*blocksize, double) ;

  // xfancy is multiplier for column xmean is mean to take off
  // badlist is list of rows to delete (outlier removal) 

  numoutiter = 1 ;  

  if (numoutliter>=1) 
  {
    numoutiter = numoutliter+1 ;
    ZALLOC(outinfo, nrows,  OUTLINFO *) ;  
    for (k=0; k<nrows; k++) 
    {  
      ZALLOC(outinfo[k], 1, OUTLINFO) ;
    }
    /* fprintf(outlfile, "##%18s %4s %6s %9s\n", "ID", "iter","eigvec", "score") ; */
  }

  for (outliter = 1; outliter <= numoutiter ; ++outliter)  {
    if (fstonly) { 
     setidmat(XTX, nrows) ;
     vclear(lambda, 1.0, nrows) ;
     break ;
    }
    if (outliter>1) {
     ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ;
    }
    vzero(XTX, nrows*nrows) ;
    vzero(tblock, nrows*blocksize) ;
    xblock = 0 ; 

    vzero(xmean, ncols) ;
    vclear(xfancy, 1.0, ncols) ;

    for (i=0; i<ncols; i++) 
    { 
      cupt = xsnplist[i] ;
      chrom = cupt -> chrom ;
      getcolxz(cc, cupt, xindex, nrows, i, xmean, xfancy, &n0, &n1) ;
      t = MIN(n0, n1) ; 

      if (t <= minallelecnt)  {  
       cupt -> ignore = YES ;
       vzero(cc, nrows) ; 
      }

      if (weightmode) 
      {
        vst(cc, cc, xsnplist[i] -> weight, nrows) ;
      }
      if (ldregress>0) 
      {  
        numclear = 0 ;
        for (k=1; k<= ldregress; ++k)  
        {  
          j = i-k ;  
          if (j<0) 
          { 
            numclear = ldregress-k+1 ; 
            break ;
          }
          cupt2 = xsnplist[j] ;  
          if (cupt2 -> chrom != chrom) gdis = ldlimit + 1.0 ; 
          else gdis = cupt -> genpos - cupt2 -> genpos ;
          if (gdis>=ldlimit) 
          {   
            numclear = ldregress-k+1 ; 
            break ;
          }
        }
        if (numclear>0) clearld(ldmat, ldvv, ldregress, nrows, numclear) ; 
        ldreg(ldmat, ldmat2, cc, vv2, ldvv, ldvv2, ldregress, nrows) ;
        copyarr(ldmat2, ldmat, ldregress*ldregress) ;
        copyarr(vv2, cc, nrows) ;
        copyarr(ldvv2, ldvv, ldregress*nrows) ;
      }
      copyarr(cc, tblock+xblock*nrows, nrows) ;
      ++xblock ; 

/** this is the key code to parallelize */
      if (xblock==blocksize) 
      {  
        domult(tvecs, tblock, xblock, nrows) ;
        vvp(XTX, XTX, tvecs, nrows*nrows) ;
        xblock = 0 ;
        vzero(tblock, nrows*blocksize) ;
      }
    }

    if (xblock>0) 
    { 
     domult(tvecs, tblock, xblock, nrows) ;
     vvp(XTX, XTX, tvecs, nrows*nrows) ;
    }
    symit(XTX, nrows) ;

    /**
    a = 0; b=0 ;
    printf("zz1 %12.6f ", XTX[a*nrows+b]) ;
    a = nrows-1; b=nrows-1 ;
    printf(" %12.6f %15.9g\n", XTX[a*nrows+b], asum(XTX, nrows*nrows)) ;
    */

    if (verbose) 
    {
      printdiag(XTX, nrows) ;
    }

    y = trace(XTX, nrows) / (double) (nrows-1) ;
    if (isnan(y)) fatalx("bad XTX matrix\n") ;
    /* printf("trace:  %9.3f\n", y) ; */
    if (y<=0.0) fatalx("XTX has zero trace (perhaps no data)\n") ;
    vst(XTX, XTX, 1.0/y, nrows * nrows) ;
/// mean eigenvalue is 1
    eigvecs(XTX, lambda, evecs, nrows) ;
// eigenvalues are in decreasing order 

    if (outliter > numoutliter) break ;  
    // last pass skips outliers 
    numoutleigs = MIN(numoutleigs, nrows-1) ;
    nbad = ridoutlier(evecs, nrows, numoutleigs, outlthresh, badlist, outinfo) ;
    if (nbad == 0) break ; 
    for (i=0; i<nbad; i++) 
    {  
      j = badlist[i] ;
      indx = xindlist[j] ;
      outpt = outinfo[j] ;
      fprintf(outlfile, "REMOVED outlier %s iter %d evec %d sigmage %.3f\n", indx -> ID, outliter, outpt -> vecno, outpt -> score) ;
      indx -> ignore = YES ;
    }
    nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ;
    printf("number of samples after outlier removal: %d\n", nrows) ;
  }

  if (outliername != NULL) fclose(outlfile) ;

  m = numgtz(lambda, nrows)  ;
  /* printf("matrix rank: %d\n", m) ; */
  if (m==0) fatalx("no data\n") ;

/** smartrel code */
  for (i=0; i<numeigs; i++) {  
   y = sqrt(lambda[i]) ;
   vst(ww, evecs+i*nrows, y, nrows) ;               
   subouter(XTX, ww, nrows) ;
  }
  free(tvecs) ; 

  n = 0 ;
  ZALLOC(vecind, nrows*nrows/2, int) ; 
  for (i=0; i<nrows; i++) { 
   for (j=i+1; j<nrows; j++) { 
    k = i*nrows + j ; 
    y1 = XTX[i*nrows+i] ;
    y2 = XTX[j*nrows+j] ;
    y = XTX[k]/sqrt(y1*y2) ;
    y += 1/(double)(nrows-1);
    if (y<relthresh) continue ;
    vecind[n] = k ; 
    evecs[n] = -y ;
    ++n ;
   }
  }
  free(XTX) ; 
  if (n==0) { 
   printf("## nothing above relthresh!\n") ;
   printf("##end of smartrel run\n") ;
   return 0 ;
  }
  ZALLOC(idperm, n, int) ; 
  sortit(evecs, idperm, n) ;
  for (i=0; i<n; i++) {  
   j = idperm[i] ;
   k = vecind[j] ;
   a = k/nrows ; 
   b = k%nrows ;
   printf("rel: %20s ",  xindlist[a] ->ID) ;
   printf("%20s ",  xindlist[b] ->ID) ;
   printf(" %9.3f", -evecs[i]) ;
   printnl() ;
  }
  
  printf("##end of smartrel run\n") ;
  return 0 ;
}
Пример #2
0
int main(int argc, char **argv)
{
  SNP **snpmarkers ;
  Indiv **indivmarkers ;
  int numsnps, numindivs ;
  unsigned char *packg1, *packg2 ;

  int **snppos ;
  int *snpindx ;
  int  lsnplist, lindlist, numeg ;
  int i,j; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;
  int nkill = 0 ;
  int t, k, x ;

  int nignore, numrisks = 1 ;

  char **genolist ;
  int numgenolist ;
  int maxmiss ; 

  tersem = YES ;     // no snp counts

  readcommands(argc, argv) ;

  setomode(&outputmode, omode) ;
  packmode = YES ;
  settersemode(tersem) ;

  nums1 = 
    getsnps(snp1, &snpm1, 0.0, NULL, &nignore, numrisks) ;

  putped(1) ;
  freeped() ;

  nums2 = 
    getsnps(snp2, &snpm2, 0.0, NULL, &nignore, numrisks) ;

  putped(2) ;
  freeped() ;

  for (x=0; x<nums1; ++x)  {  
   cupt1 = snpm1[x] ;
   cupt1 -> tagnumber = -1 ;
  }
  for (x=0; x<nums2; ++x)  {  
   cupt2 = snpm2[x] ;
   t = x %1000 ;   
// if (t==0) printf("zz %d %d\n", x, nums2) ;

   k = snpindex(snpm1, nums1, cupt2 -> ID) ;  
   if (k<0) { 
    cupt2 -> ignore = YES ;
    continue ;
   }
   cupt1 = snpm1[k] ;
   cupt1 -> tagnumber = x ;
   t = checkmatch(cupt1, cupt2) ;
   if (t==1) continue ;
   if (t==2) {  
    cupt2 -> isrfake = YES ;
    continue ;
   }
   if (t<0) {  
    cupt1  -> ignore = cupt2 -> ignore = YES ;
    continue ;
   }
   printf("allele funny: %s", cupt1 -> ID) ;
   printalleles(cupt1, stdout) ;
   printalleles(cupt2, stdout) ;
   printnl() ;
   cupt1  -> ignore = cupt2 -> ignore = YES ;
   continue ;
  }
  freesnpindex() ;
  numi1 = getindivs(ind1, &indm1) ;
  numi2 = getindivs(ind2, &indm2) ;

  for (x=0; x<numi2; ++x) {  
   k = indindex(indm1, numi1, indm2[x] -> ID) ;
// this code could be modified to allow duplicate individuals
   if (k>=0) fatalx("dup ind: %s\n", indm2[x] -> ID) ;  // fix later?  
  }

  setgenotypename(&geno1, ind1) ;
  getped(1) ;
  getgenos(geno1, snpm1, indm1, 
     nums1, numi1, nignore) ;

  packg1 = (unsigned char *) getpackgenos() ;
  clearpackgenos() ;

  setgenotypename(&geno2, ind2) ;
  getped(2) ;
  getgenos(geno2, snpm2, indm2, 
     nums2, numi2, nignore) ;

  packg2 = (unsigned char *) getpackgenos() ;
  numindivs = mergeit(snpm1, snpm2, &indm1, indm2, nums1, nums2, numi1, numi2) ;

  snpmarkers = snpm1 ; 
  numsnps = nums1 ;
  indivmarkers = indm1 ; 

  free(packg1) ;
  free(packg2) ;

  outfiles(snpoutfilename, indoutfilename, genooutfilename, 
   snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

  printf("##end of mergeit run\n") ;
  return 0 ;
}
Пример #3
0
int main(int argc, char **argv)
{

  char sss[MAXSTR] ;
  int **snppos ;
  int *snpindx ;
  char **snpnamelist, **indnamelist ;
  int  lsnplist, lindlist ;
  int i, j, k, k1, k2, k3, k4, kk ; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;
  double y1, y2, y, sig, tail, yy1, yy2 ;
  char ss[11] ;
  int *blstart, *blsize, nblocks ;
  int  xnblocks ; /* for xsnplist */
  int *bcols ;
  int **subsets ;
  double maxgendis ;
  char **eglist ;
  int numeg ;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;

  int nindiv = 0, e, f, lag=1  ;
  double xc[9], xd[4], xc2[9] ;
  int nignore, numrisks = 1 ;
  double  *xrow, *xpt ; 
  SNP **xsnplist  ;
  int *tagnums  ;
  Indiv **xindlist ;
  int *xindex, *xtypes ;
  int  nedge, m, nc ;
  double zn, zvar ;
  int weightmode = NO ;
  double chisq, ynrows ;
  int *numhits, t ;  
  double *xmean, *xfancy ;
  double *divans, *divsd ; 
  double *hettop, *hetbot ; 
  int chrom,  numclear ;
  double gdis ;
  int outliter, *badlist, nbad ;
  int ***counts ;

  char ***plists ; 
  int nplist, trun ;
  int nrows, ncols ;

  readcommands(argc, argv) ;
  printf("## qpBound version: %s\n", WVERSION) ;
  if (parname == NULL) return 0 ;
  if (xchrom == 23) noxdata = NO ;
  if (outpop == NULL) fatalx("no outpop\n") ;  
  setinbreed(inbreed) ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;

  numindivs = getindivs(indivname, &indivmarkers) ;
  setindm(indivmarkers) ;
  k = getgenos(genotypename, snpmarkers, indivmarkers, 
    numsnps, numindivs, nignore) ;

  for (i=0; i<numsnps; i++)  
  {  
    cupt = snpmarkers[i] ; 
    chrom = cupt -> chrom ;
    if ((xchrom>0) && (chrom != xchrom)) cupt -> ignore = YES ;
    if ((noxdata) && (chrom == 23)) cupt-> ignore = YES ;
    if (chrom == 0) cupt -> ignore = YES ;
    if (chrom > 23) cupt -> ignore = YES ;
    if (chrom == zchrom) cupt -> ignore = YES ;
  }

  nplist = numlines(popfilename) ;
  ZALLOC(plists, nplist, char **) ;
  ZALLOC(lines, nplist, char *) ;
  num = readpopx(popfilename, plists, 3) ;
  nplist = num ;
  printf("nplist: %d\n", nplist) ;
  if (nplist == 0) return 0;


  ZALLOC(eglist, nplist*3, char *)  ;  
  numeg = 0 ;
  for (trun=0; trun<nplist; ++trun) {  
   for (k=0; k<3; ++k) { 
    t = indxindex(eglist,  numeg, plists[trun][k]) ;
    if (t<0) {  
     eglist[numeg] = strdup(plists[trun][k]) ;
     ++numeg ;
    }
   }
  }

  if (popsizelimit > 0) {  
   setplimit(indivmarkers, numindivs, eglist, numeg, popsizelimit) ; 
  }
  
  if (outputname != NULL) openit (outputname, &ofile, "w") ;
  outnum = 0 ;
  ZALLOC(xindex, numindivs, int) ;
  ZALLOC(xindlist, numindivs, Indiv *) ;
  nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ;
  ZALLOC(xtypes, nrows, int) ;
  for (i=0; i<nrows; i++) {
   indx = xindlist[i] ;
   k = indxindex(eglist, numeg, indx -> egroup) ;
   xtypes[i] = k+1 ; // dangerous bend
   t = strcmp(indx -> egroup, outpop) ;
   if (t==0) xtypes[i] = outnum ;
   else fatalx("outpop bug\n") ;
  }
  ZALLOC(xsnplist, numsnps, SNP *) ;
  ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ;



/**
  ZALLOC(counts, ncols, int **) ;
  for (k=0; k<ncols; ++k) {
   counts[k] = initarray_2Dint( numeg, 2, 0) ;
  }
  countpops(counts, xsnplist, xindex, xtypes, nrows, ncols) ;
*/

  for (trun=0; trun<nplist; ++trun) {  
   dopop3out(plists[trun], xsnplist, ncols, lines[trun], outpop) ;
  }

  if (outputname != NULL) fclose(ofile) ;

  printf("##end of qpBound\n") ;
  return 0 ;
  
}
Пример #4
0
int main(int argc, char **argv)
{

  char sss[MAXSTR] ;
  int **snppos ;
  int *snpindx ;
  char **snpnamelist, **indnamelist ;
  char **eglist ;
  int *nsamppops ;
  int *ztypes ;
  int  lsnplist, lindlist, numeg ;
  int i, j, k, k1, k2, k3, k4, kk; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;
  double y1, y2, y, sig, tail, yy1, yy2 ;
  char ss[11] ;
  int *blstart, *blsize, nblocks ;
  int  xnblocks ; /* for xsnplist */
  int *bcols ;
  double maxgendis ;
  int xind[4] ;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;

  int nindiv = 0, e, f, lag=1  ;
  double xc[9], xd[4], xc2[9] ;
  int nignore, numrisks = 1 ;
  double  *xrow, *xpt ; 
  SNP **xsnplist  ;
  int *tagnums  ;
  Indiv **xindlist ;
  int *xindex, *xtypes ;
  int nrows, ncols, m, nc ;
  double zn, zvar ;
  int weightmode = NO ;
  double chisq, ynrows ;
  int *numhits, t ;  
  double *xmean, *xfancy ;
  double *divans, *divsd ; 
  double *hettop, *hetbot ; 
  int chrom,  numclear ;
  double gdis ;
  int outliter, *badlist, nbad ;
  double **zdata, *z1, *z2 ;
  int maxtag = -1 ;
  double **zz ; 
  double *pmean, *pnum, rscore[3], dstat[3], hscore[3], rrr[3], ww[3], serr[3] ;
  int ssize[3][3], *sz ;
  int tpat[3][4] , rpat[3][4], *rrtmp, *rp ;
  int  *rawcol ; ;
  int a, b, c, d, col  ;
  int aa, bb, cc, dd  ;
  double *qpscores ;
  double *hest, *hsig ;
  double mingenpos, maxgenpos ;
  int *qhit  ;  /* number of times pair is clade in quartet */
  int *qmiss ;  /* number of times pair migration event implied */
  int **qplist, numqp = 0, maxqp=10000 ;
  double *qpscore ;
  char ***qlist, *sx ;
  int nqlist = 0  ;
  int bbest[3] ;
  double absscore[3] ; 
  double ascore[4], astat[4] ;


  double **dsctop, **dscbot ;
  double **abx, **bax, **f2 ;
  int popx[4] ;
  double tn[4*5], td[4*4] ;
  double zzsig[5], zzest[5], zsc[5] ;
  double ymin ;

  double *f3, *f4, *f3sig, *f4sig ;
  int t1, t2, tt ;
  int ***counts, **ccc ; 

  double tlenz[5], tlen[5] ;
  int lenz[5] ;  


  readcommands(argc, argv) ;
  printf("## qpDstat version: %s\n", WVERSION) ;
  if (parname == NULL) return 0 ;
  if ((poplistname == NULL) && (popfilename == NULL)) fatalx("poplistname, popfilename both null\n") ;

  if (!bankermode) forceclade = NO ;
//if (fancynorm) printf("fancynorm used\n") ;
//else printf("no fancynorm used\n") ;
  setjquart(NO, jackweight, jackquart) ;

  nostatslim = MAX(nostatslim, 3) ;

   setinbreed(inbreed) ;

  if (outputname != NULL)  openit(outputname, &ofile, "w") ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;

  numindivs = getindivs(indivname, &indivmarkers) ;
  if (id2pops != NULL) { 
   setid2pops(id2pops, indivmarkers, numindivs) ;
  }
  setindm(indivmarkers) ;

  k = getgenos(genotypename, snpmarkers, indivmarkers, 
    numsnps, numindivs, nignore) ;

  for (i=0; i<numsnps; i++)  {  
	  cupt = snpmarkers[i] ;
	  if (cupt -> chrom >= 23) cupt -> ignore = YES ;
	  if (cupt -> chrom == zchrom) cupt -> ignore = YES ;
  }

   ZALLOC(eglist, numindivs, char *) ; 
   ZALLOC(ztypes, numindivs, int) ;
   if (popfilename == NULL) {
   if (bankermode == NO)
     numeg = loadlist(eglist, poplistname) ;
   else { 
     numeg = loadlist_type(eglist, poplistname, ztypes, 0) ;
     numbanker = 0 ;
     for (k=0; k<numeg; ++k) { 
      if (ztypes[k] == 2) ++numbanker ;
     }
     printf("bankermode: ") ; 
     printimat(ztypes, 1, numeg) ;
   }
   }
   if (popfilename != NULL) { 
    bbestmode = NO ;
    nqlist = numlines(popfilename) ;
    ZALLOC(qlist, 4, char **) ;
    for (k=0; k<4; ++k) {  
     ZALLOC(qlist[k], nqlist, char *) ;
    }
    nqlist = getnamesstripcolon(&qlist, nqlist, 4, popfilename, locount, hicount) ;
    numeg = 0 ;
    printf("number of quadruples %d\n", nqlist) ;
    fflush(stdout) ;
    for (k=0; k<4; ++k) { 
     for (j=0; j<nqlist; ++j) { 
      sx = qlist[k][j] ;
      t1 = indxstring(eglist, numeg, sx) ;
      if (t1 >=0) continue ;
      eglist[numeg] = strdup(sx) ;
      ++numeg ;
      setstatus(indivmarkers, numindivs, sx) ;
     }
    }
   }
Пример #5
0
int main(int argc, char **argv)
{
  double *V;
  double *xx;
  double *iscase;
  double *iscasecorr;
  int K;
  int k,m,n;
  int nignore;
  double rowsum, rowsum1;
  double chisq, Echisq, gamma, denom;

  readcommands(argc, argv) ;
  if (outputname != NULL) 
    openit(outputname, &fpout, "w") ;
  else 
    fpout = stdout;
  fprintf(fpout, "Chisq EIGENSTRAT\n");

  setinmode(&inmode, imode);
  packmode = YES;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0,  NULL, &nignore, 1) ;

  NSAMPLES = getindivs(indivname, &indivmarkers) ;

  setstatus(indivmarkers, NSAMPLES, "Case") ;
  setgenotypename(&genotypename, indivname) ;
  if (genotypename != NULL)  {
   getgenos(genotypename, snpmarkers, indivmarkers, 
    numsnps, NSAMPLES, nignore) ;
  }

  /*******************************************************************/
  /*  Free memory:  Usually this is done in outfiles:                */
  /*                                                                 */
  /*  nind = rmindivs(&snpmarkers, numsnps, &indmarkers, NSAMPLES);  */
  /*                                                                 */
  /*  But where is the snpmarkers array released?                    */
  /*******************************************************************/

  L = numpc;
  readpcafile(&V, &outlier, &K, L, NSAMPLES);
  getphenos(NSAMPLES, &iscase, outlier, &iscasecorr, L, V);

  /* main eigenstrat loop here */

  if ((xx = (double *)malloc(NSAMPLES*sizeof(*xx))) == NULL)
  {  fprintf(stderr,"CM\n");  exit(1);  }

  for(m=0;m<numsnps;m++)  {

    SNP *cupt = snpmarkers[m];
    for(n=0; n<NSAMPLES; n++)
    {
      int j = getgtypes(cupt,n);

      if(j == 0)       { xx[n] = 0.0; }
      else if(j == 1)  { xx[n] = 0.5; }
      else if(j == 2 ) { xx[n] = 1.0; }
      else if(j == -1) { xx[n] = -100.0; }

      if(outlier[n] == 1) xx[n] = -100.0;

    }

    /* mean-adjust xx */
    rowsum = 0.0; rowsum1 = 0.0;
    for(n=0; n<NSAMPLES; n++)
    {
      if(qtmode == NO && ((outlier[n]) || (xx[n] < -99.0))) continue;
      if(qtmode == YES && ((outlier[n]) || (xx[n] == -100.0))) continue;
      rowsum += xx[n];
      rowsum1 += 1.0;
    }
    for(n=0; n<NSAMPLES; n++)
    {  
      if(outlier[n]) continue;
      if(qtmode == NO)  {
        if (xx[n] < -99.0) 
          xx[n] = -100.0; /* still keep track */
        else 
	  xx[n] -= rowsum/rowsum1;
      }
      else  {
        if (xx[n] == -100.0) 
          xx[n] = -100.0; /* still keep track */
        else 
	  xx[n] -= rowsum/rowsum1;
      }
    }

    /* Chisq */
    chisq = compute_chisq(xx,iscase);

    /* EIGENSTRAT */
    for(k=0; k<L; k++)
    {
      gamma = 0.0;
      denom = 0.0;
      for(n=0; n<NSAMPLES; n++) 
      {
        if(qtmode == NO && (outlier[n] || xx[n] < -99.0)) continue;
        if(qtmode == YES && (outlier[n] || xx[n] == -100.0)) continue;
        gamma += xx[n]*V[NSAMPLES*n+k];
        denom += V[NSAMPLES*n+k]*V[NSAMPLES*n+k];
      }
      gamma /= denom;
      for(n=0; n<NSAMPLES; n++) 
      {
        if(qtmode == NO && (outlier[n] || xx[n] < -99.0)) continue;
        if(qtmode == YES && (outlier[n] || xx[n] == -100.0)) continue;
        xx[n] -= gamma*V[NSAMPLES*n+k];
      }
    }
    Echisq = compute_chisqE(xx,iscasecorr);

    if(rowsum1 == 0.0)
    {
      chisq = -1.0; Echisq = -1.0;
    }

    if(chisq >= 0.0) fprintf(fpout,"%.04f",chisq);
    else fprintf(fpout,"NA");
    if(Echisq >= 0.0) fprintf(fpout," %.04f\n",Echisq);
    else fprintf(fpout," NA\n");

    if(NSAMPLES*m > MAXSIZE)
    {
      fprintf(stderr,"OOPS genotype file has > %d genotypes\n",MAXSIZE);
      fprintf(fpout,"OOPS genotype file has > %d genotypes\n",MAXSIZE);
      exit(1);
    }
  }
}
Пример #6
0
int main(int argc, char* argv[])
{
	program_name = argv[0];

	if (argc < 3) {
		print_help();
		exit(EXIT_FAILURE);
	}
	
	char binfile[50];
	base = argv[1];
	strapp(binfile, ".nc");

	int total_ids = atoi(argv[2]);

	FILE *fp;
	if (argc > 2) {
		infile = argv[3];
		fp = fopen(infile, "r");
		if (!fp) 
			error(1, errno, "%s", infile);
	} else {
		infile = "stdin";
		fp = stdin;
	}

	/* Get the first line with the SNP names */
	char *line = NULL;
	size_t n = 0;
	int read = getlin(&line, &n, fp);
	if (read == -1)
		error(1, errno, "%s", infile);
   	int nsnps = getsnps(line);
	free(line);
	line = NULL;
	printf("%d markers\n", nsnps);

	printf("Converting intensity values to binary\n");
	int ncid, stat, sampid, varid;
	size_t nids = 0, perkey;
	size_t count[3] = { 1, 0, 0 };
	size_t start[3] = { 0, 0, 0 };
	char *id;	
	float *vals;

	while (getlin(&line, &n, fp) != -1) {
		if (nids == 0) {
			/* Count number of vals in first line to alloc indv_dat */
			read = getstrings(line, NULL, DELIM) - 1;
			if (read < 1)
				error(2, 0, "No values found");
			if (read % nsnps != 0)
				error(2, 0, "Not same number of values for each marker");
			perkey = read / nsnps;
			printf("%zu values per marker\n", perkey);
			vals = malloc(sizeof(float) * read);
			if (vals == NULL)
				error(1, errno, "dat.vals");


			/* Create netcdf file and write snp names */
 			ncid = create_nc(binfile, total_ids, nsnps, perkey);
			stat = nc_inq_varid(ncid, "sample", &sampid);
			check_err(stat,__LINE__,__FILE__);
			stat = nc_inq_varid(ncid, "intensity", &varid);
			check_err(stat,__LINE__,__FILE__);
			writesnps(ncid, nsnps);

			count[1] = nsnps;
			count[2] = perkey;
		}
				
		/* Read valuess from line */
		id = linevals(line, vals);

		/* Write sample id */
		stat = nc_put_var1_string(ncid, sampid, &nids, (const char **) &id);
		check_err(stat,__LINE__,__FILE__);

		/* Write intensity values */
		start[0] = nids;
		stat = nc_put_vara_float(ncid, varid, start, count, vals);
    	check_err(stat,__LINE__,__FILE__);

		free(line);
		line = NULL;
		nids++;
		printf("Read %zu\r", nids);
		fflush(stdout);
	}
			
	fclose(fp);
    stat = nc_close(ncid);
    check_err(stat,__LINE__,__FILE__);
	free(vals);
	free(line);

	printf("Wrote %zu samples to [ %s ]\n", nids, binfile);
	exit(EXIT_SUCCESS);
}
Пример #7
0
int main(int argc, char **argv)
{

  int **snppos ;
  int *snpindx ;
  char **snpnamelist, **indnamelist ;
  char **eglist ;
  int  lsnplist, lindlist, numeg ;
  int i,j; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;
  double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ;
  double rthresh, zt ;
  int mpflag, ret, numvalidind, nvalid, numvalidsnps ;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;
  int nkill = 0 ;
  int t, k, g ;

  int nindiv = 0, e, f, lag=1  ;
  double xc[9], xd[4], xc2[9] ;
  double ychi, zscore, zthresh = 20.0 ;
  double y1, y2 ; 
  int nignore, numrisks = 1 ;

  char **genolist ;
  int numgenolist ;
  char c1, c2 ;
  int t1, t2 ;

  malexhet = YES ;    // convertf default is don't change the data
  tersem = YES ;     // no snp counts

  readcommands(argc, argv) ;

  setomode(&outputmode, omode) ;
  packmode = YES ;
  settersemode(tersem) ;

  if (r2thresh > 0.0) killr2 = YES ;
  if (badpedignore) setbadpedignore() ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;


  for (i=0; i<numsnps; i++)  {  
   if (xchrom == -1) break ;  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom != xchrom) cupt -> ignore = YES ; 
   if (cupt -> ignore) continue ; 
   t = nnint(cupt -> physpos) ; 
   if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ;
  }

  nignore = 0 ;
  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom > maxchrom) cupt -> ignore = YES ;  
   if (cupt -> chrom < minchrom) cupt -> ignore = YES ;  
   if (cupt -> ignore) ++nignore ;
  }

  if (numsnps == nignore) fatalx("no valid snps\n") ;


  numindivs = getindivs(indivname, &indivmarkers) ;
  if (polarid != NULL) {
   polarindex = indindex(indivmarkers, numindivs, polarid) ;
   if (polarindex<0) fatalx("polarid %s not found\n") ;
  }


  if (genotypelist!= NULL) {  
    getgenos_list(genotypelist, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  else {
   setgenotypename(&genotypename, indivname) ;
   getgenos(genotypename, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  if (newsnpname != NULL) { 
    nums2 = 
     getsnps(newsnpname, &snpm2, 0.0, NULL, &nignore, numrisks) ;
     remap(snpmarkers, numsnps, snpm2, nums2) ;
     snpmarkers = snpm2 ; 
     numsnps = nums2 ;
  }

  if (newindivname != NULL) { 
    numind2 = getindivs(newindivname, &indm2) ;
    remapind(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ;
    indivmarkers = indm2 ;
    numindivs = numind2 ;
    if (polarid != NULL) {
     polarindex = indindex(indivmarkers, numindivs, polarid) ;
    }
  }

  if (mkdiploid) { 

    numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ;
    numind2 = mkindh2d(indivmarkers, &indm2, numindivs) ;
    remaph2d(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ;

    indivmarkers = indm2 ;
    numindivs = numind2 ;

  }


  if (deletedup) dedupit(snpmarkers, numsnps) ; // only one marker per position

  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ;
   if (zerodistance) cupt -> genpos = 0.0 ;

   c1 = cupt -> alleles[0] ;  
   c2 = cupt -> alleles[1] ;  
   t1 = pedval(&c1) % 5 ;
   t2 = pedval(&c2) % 5 ;  // 0 and 5 are no good
   if ((t1==0) && (t2 >0)) flip1(cupt, phasedmode, YES) ;  
  }

  flipstrand(flipstrandname, snpmarkers, numsnps) ;
  flipsnps(flipsnpname, snpmarkers, numsnps, phasedmode) ;

  if (polarindex>=0) { 
    for (i=0; i<numsnps; i++)  {  
      cupt = snpmarkers[i] ;
      g = getgtypes(cupt, polarindex) ;
      if (g==0) { 
       printf("polarizing %s\n", cupt -> ID) ;
       flip1(cupt, NO, YES) ;
       g = getgtypes(cupt, polarindex) ;
       if (g!=2) fatalx("badbug\n") ;
      }
      if (g != 2) cupt -> ignore = YES ; 
    }
  }

  if (outputall) {
   outfiles(snpoutfilename, indoutfilename, genooutfilename, 
    snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

   printf("##end of convertf run (outputall mode)\n") ;
   return 0 ;
  }

  if (poplistname != NULL) 
  { 
    ZALLOC(eglist, numindivs, char *) ; 
    numeg = loadlist(eglist, poplistname) ;
    seteglist(indivmarkers, numindivs, poplistname);
    for (i=0; i<numindivs; ++i)  {     
     indx = indivmarkers[i] ; 
     if (indx -> affstatus == NO) indx -> ignore = YES ;
    }
  }
  else 
  setstatus(indivmarkers, numindivs, "Case") ;

  numsnps = rmsnps(snpmarkers, numsnps, deletesnpoutname) ;
  numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ;

  if (killr2) {
   nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ;
   if (nkill>0) printf("killhir2.  number of snps killed: %d\n", nkill) ;
  }


  if ( nhwfilter > 0 )  {
    hwfilter(snpmarkers, numsnps, numindivs, nhwfilter, deletesnpoutname);
  }

  if ( xregionname )  {
    excluderegions(xregionname, snpmarkers, numsnps, deletesnpoutname);
  }


  numvalidind = 0 ;
  for (i=0; i<numindivs; ++i)  { 
   indx = indivmarkers[i] ;
   if (indx -> ignore) continue ; 
   if (numvalidgtind(snpmarkers, numsnps, i) ==0) { 
    indx -> ignore = YES ; 
    printf("no data for individual: %s\n", indx -> ID) ;
   }
   if (indx -> ignore == NO) ++numvalidind ;
  }

  if (maxmiss<0) maxmiss  = (int) (maxmissfrac * (double) numvalidind+1) ;
  printf("numvalidind:  %5d  maxmiss: %5d\n", numvalidind, maxmiss)  ;
  if (numvalidind  == 0) fatalx("no valid samples!\n") ;

  for (k=0; k<numsnps; ++k) {  
   if (maxmiss>numvalidind) break ;
   cupt = snpmarkers[k] ;
   t = numvalidind - numvalidgtypes(cupt) ;
// printf("zz %20s %4d %4d\n", cupt -> ID, t, numvalidind-t) ;
   if (maxmiss < t) { 
    cupt -> ignore = YES ;
   }
/**
   if (numvalidind ==  t) { 
    printf("no data for snp: %s\n", cupt -> ID) ;
    cupt -> ignore = YES ;
   }
*/

  }

  if (fastdup)  {  

   printf("fastdup set %d\n", fastdupnum) ;
   if (fastdupnum > 0) {
     setfastdupnum(fastdupnum) ;
     setfastdupthresh(fastdupthresh, fastdupkill) ;
     fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ;  
   }
  }

  if (decim>0) {  
   snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ;
  }

  outfiles(snpoutfilename, indoutfilename, genooutfilename, 
   snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

  printf("##end of convertf run\n") ;
  return 0 ;
}
int main(int argc, char **argv)
{

  int **snppos ;
  int *snpindx ;
  char **snpnamelist, **indnamelist ;
  char **eglist ;
  int  lsnplist, lindlist, numeg ;
  int i,j; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;
  double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ;
  double rthresh, zt ;
  int mpflag, ret, nvalid;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;
  int nkill = 0 ;
  int t, k ;

  int nindiv = 0, e, f, lag=1  ;
  double xc[9], xd[4], xc2[9] ;
  double ychi, zscore, zthresh = 20.0 ;
  double y1, y2 ; 
  int nignore, numrisks = 1 ;

  char **genolist ;
  int numgenolist ;
  int maxmiss ; 

  malexhet = YES ;    // convertf default is don't change the data
  tersem = YES ;     // no snp counts

  readcommands(argc, argv) ;

  setomode(&outputmode, omode) ;
  packmode = YES ;
  settersemode(tersem) ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;


  for (i=0; i<numsnps; i++)  {  
   if (xchrom == -1) break ;  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom != xchrom) cupt -> ignore = YES ; 
   if (cupt -> ignore) continue ; 
   t = nnint(cupt -> physpos) ; 
   if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ;
  }

  nignore = 0 ;
  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom > maxchrom) cupt -> ignore = YES ;  
   if (cupt -> chrom < minchrom) cupt -> ignore = YES ;  
   if (cupt -> ignore) ++nignore ;
  }

  if (numsnps == nignore) fatalx("no valid snps\n") ;

/**
  cupt = snpmarkers[0] ;
  printf("zz2: %d %d %d %20s: %d\n", numsnps, nignore, cupt -> chrom, cupt -> ID, cupt -> ignore) ;
*/

  numindivs = getindivs(indivname, &indivmarkers) ;

  if (genotypelist!= NULL) {  
    getgenos_list(genotypelist, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  else {
   setgenotypename(&genotypename, indivname) ;
   getgenos(genotypename, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  if (outputall) {
   outfiles(snpoutfilename, indoutfilename, genooutfilename, 
    snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

   printf("##end of convertf run (outputall mode)\n") ;
   return 0 ;
  }

  if (killr2) {
   nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ;
   if (nkill>0) printf("killhir2.  number of snps killed: %d\n", nkill) ;
  }

  setstatus(indivmarkers, numindivs, "Case") ;

  /******************************************************************/
  /* removesubthreshold(indivmarkers, snpmarkers, numindiv, numsnps, 
    maxmissfracind, maxmissfracsnp); */
  /******************************************************************/

  if (fastdup)  {  
   if (fastdupnum > 0) setfastdupnum(fastdupnum) ;
   fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ;  
  }

  if (decim>0) {  
   snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ;
  }

  outfiles(snpoutfilename, indoutfilename, genooutfilename, 
   snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

  printf("##end of convertf run\n") ;
  return 0 ;
}