Exemple #1
0
void
writesnpeigs(char *snpeigname, SNP **xsnplist, double *ffvecs, int numeigs, int ncols) 
{
// this is called at end and ffvecs overwritten
  double *xpt, y, yscal, *snpsc ;
  int i, j, k, kmax, kmin ;
  SNP * cupt  ;
  FILE *fff ;
  
  for (j=0; j<numeigs; ++j) {  
   xpt = ffvecs+j*ncols ;  
   y = asum2(xpt, ncols) ;  
   yscal = (double) ncols / y ;
   yscal = sqrt(yscal) ;
   vst(xpt, xpt, yscal, ncols) ;
  }


  ZALLOC(snpsc, ncols, double) ;
  vclear(snpsc, -99999, ncols) ;
  for (j=0; j<numeigs; ++j) {  
   for (i=0; i<ncols; ++i) {  
    cupt = xsnplist[i] ;
    if (cupt -> ignore) continue ;
    y = ffvecs[j*ncols+i] ;
    snpsc[i] = fabs(y) ; 
   }
   for (k=0; k<=10; ++k) { 
    vlmaxmin(snpsc, ncols, &kmax, &kmin) ;
    cupt = xsnplist[kmax] ;
    printf("eigbestsnp %4d %20s %2d %12d %9.3f\n", j+1, cupt -> ID, cupt -> chrom, nnint(cupt -> physpos), snpsc[kmax]) ;
    snpsc[kmax] = -1.0 ;
   }
  }
  free(snpsc) ;


  if (snpeigname == NULL) return ;
  openit (snpeigname, &fff, "w") ;

  for (i=0; i<ncols; ++i) {  
   cupt = xsnplist[i] ;
   if (cupt -> ignore) continue ;

   fprintf(fff, "%20s", cupt -> ID) ;
   fprintf(fff,  " %2d", cupt -> chrom) ;
   fprintf(fff,  " %12d", nnint(cupt -> physpos)) ;

   for (j=0; j<numeigs; ++j) {  
    fprintf(fff, " %9.3f", ffvecs[j*ncols+i]) ;  
   }
   fprintf(fff, "\n") ;
  }

  fclose(fff) ;

}
Exemple #2
0
int ranhprob(int n, int a, int m) 
// hypergeometric sampling 
// rejection sampling.  Devroye.  Computing (1987) General method for log-concave densities 
// where mode is known
/** 
 urn with n balls . a black balls.  Pick m without replacement.  Return number of black balls picked.
*/
{
 double  y ;
 double pm, logpm, w, ru, rw, rat ;
 int mode, x, zans ;

 mode = modehprob(n, a, m) ;
 logpm = loghprob(n, a, m, mode) ;
 pm = exp(logpm) ;              
 w = 1 + pm ; 
 for (;;) { 
  ru = DRAND() ;
  rw = DRAND() ;
  if (ru <= w/(1+w)) y = DRAND()*w/pm ;
  else y = (w+ranexp())/pm ;
  x = nnint(y) ; 
  if (ranmod(2)==0) x = -x ;
  zans = mode+x ;
  if (zans<0) continue ;
  if (zans>a) continue ;
  rat = exp(loghprob(n, a, m, zans)-logpm) ; 
  rw *= MIN(1, exp(w-pm*y)) ;
  if (rw <= rat) break ;
 }
 return zans ;
 
}
Exemple #3
0
void fixit(int  *a, double *b, int n)
{
    int i ;
    for (i=0; i<n; i++) {
        a[i] = nnint(b[i]) ;
    }
}
Exemple #4
0
int isprime(long num)
// naive algorithm.  Implement Pollard rho at some time
{
    int top, x, t ;

    if (num < 2) return NO ;
    if (num ==  2) return YES ;
    top =  nnint(sqrt(num)) ;

    for (x=2; x <= top; ++x) {
        t = num % x  ;
        if (t == 0) return NO ;
    }

    return YES ;

}
Exemple #5
0
int ranhprob(int n, int a, int m) 
// rejection sampling.  Devroye
{
 double v, y ;
 double pm, logpm, w, ru, rw, rat ;
 int mode, k, x, zans ;

 v = (double) (a+1)*(m+1) / (double) (n+1) ; 
 mode = (int) v ;

/**
 for (k=-5; k<=5; ++k) {  
  x = mode+k ;
  y = exp(loghprob(n, a, m, x)) ;
  printf("%4d %4d %12.6f\n", mode, x, y)  ;
 }
*/

 logpm = loghprob(n, a, m, mode) ;
 pm = exp(logpm) ;              
 w = 1 + pm ; 
 for (;;) { 
  ru = DRAND() ;
  rw = DRAND() ;
  if (ru <= w/(1+w)) y = DRAND()*w/pm ;
  else y = (w+ranexp())/pm ;
  x = nnint(y) ; 
  if (ranmod(2)==0) x = -x ;
  zans = mode+x ;
  if (zans<0) continue ;
  if (zans>a) continue ;
  rat = exp(loghprob(n, a, m, zans)-logpm) ; 
  rw *= MIN(1, exp(1.0-pm*y)) ;
  if (rw <= rat) break ;
 }
 return zans ;
 
}
Exemple #6
0
int main(int argc, char **argv)
{

  char **eglist ;
  int numeg ;
  int i, j, k, pos; 
  int *vv ;
  SNP *cupt, *cupt2 ;
  Indiv *indx ;
  double y1, y2, y ;

  int n0, n1, nkill ;

  int nindiv = 0 ;
  int nignore, numrisks = 1 ;
  SNP **xsnplist  ;
  Indiv **xindlist ;
  int *xindex ;
  int nrows, ncols, m ;
  double *XTX, *cc, *evecs, *ww ;
  double *lambda ;
  double *tvecs ;
  int weightmode = NO ;
  int t ;
  double *xmean, *xfancy ;
  double *ldmat = NULL, *ldmat2 = NULL;
  double *ldvv = NULL, *ldvv2 = NULL, *vv2 = NULL ;
  int chrom,  numclear ;
  double gdis ;
  int outliter, numoutiter, *badlist, nbad ;
  int a, b, n ;
  FILE *outlfile ;
  

  int xblock, blocksize=10000 ;   
  double *tblock ;  

  OUTLINFO *outpt ;
  int *idperm, *vecind ;   // for sort

  readcommands(argc, argv) ;
  printf("## smartrel version: %s\n", WVERSION) ;
  packmode = YES ;
  setomode(&outputmode, omode) ;

  if (parname == NULL) return 0 ;
  if (xchrom == (numchrom+1)) noxdata = NO ;

  if (fstonly) { 
   printf("fstonly\n") ;
   numeigs = 0 ; 
   numoutliter = 0 ;
   numoutiter = 0 ;
   outputname = NULL ;
   snpeigname = NULL ;
  }

  if (fancynorm) printf("norm used\n\n") ;
  else printf("no norm used\n\n") ;

  nostatslim = MAX(nostatslim, 3) ;

  outlfile = ofile = stdout; 

  if (outputname != NULL)  openit(outputname, &ofile, "w") ;
  if (outliername != NULL) openit(outliername, &outlfile, "w") ;
  if (fstdetailsname != NULL) openit(fstdetailsname, &fstdetails, "w") ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;

  numindivs = getindivs(indivname, &indivmarkers) ;
  k = getgenos(genotypename, snpmarkers, indivmarkers, 
    numsnps, numindivs, nignore) ;


  if (poplistname != NULL) 
  { 
    ZALLOC(eglist, numindivs, char *) ; 
    numeg = loadlist(eglist, poplistname) ;
    seteglist(indivmarkers, numindivs, poplistname);
  }
  else
  {
    setstatus(indivmarkers, numindivs, NULL) ;
    ZALLOC(eglist, MAXPOPS, char *) ;
    numeg = makeeglist(eglist, MAXPOPS, indivmarkers, numindivs) ;
  }
  for (i=0; i<numeg; i++) 
  {  
    /* printf("%3d %s\n",i, eglist[i]) ; */
  }

  nindiv=0 ;
  for (i=0; i<numindivs; i++) 
  {
    indx = indivmarkers[i] ;
    if(indx -> affstatus == YES) ++nindiv  ;
  }

  for (i=0; i<numsnps; i++)  
  {  
    cupt = snpmarkers[i] ; 
    chrom = cupt -> chrom ;
    if ((noxdata) && (chrom == (numchrom+1))) cupt-> ignore = YES ;
    if (chrom == 0) cupt -> ignore = YES ;
    if (chrom > (numchrom+1)) cupt -> ignore = YES ;
  }
  for (i=0; i<numsnps; i++)  
  {
    cupt = snpmarkers[i] ; 
    pos = nnint(cupt -> physpos) ;
    if ((xchrom>0) && (cupt -> chrom != xchrom)) cupt -> ignore = YES ;
    if ((xchrom > 0) && (pos < lopos)) cupt -> ignore = YES ;
    if ((xchrom > 0) && (pos > hipos)) cupt -> ignore = YES ;
    if (cupt -> ignore) continue ;
    if (numvalidgtx(indivmarkers, cupt, YES) <= 1) 
    { 
      printf("nodata: %20s\n", cupt -> ID) ;
      cupt -> ignore = YES ;
    }
  }

  if (killr2) {
   nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ;
   if (nkill>0) printf("killhir2.  number of snps killed: %d\n", nkill) ;
  }

  ZALLOC(vv, numindivs, int) ;
  numvalidgtallind(vv, snpmarkers, numsnps,  numindivs) ; 
  for (i=0; i<numindivs; ++i)  { 
  if (vv[i] == 0) {
    indx = indivmarkers[i] ;
    indx -> ignore = YES ; 
   }
  }
  free(vv) ;

  numsnps = rmsnps(snpmarkers, numsnps, NULL) ;  //  rid ignorable snps

   
  if (missingmode) 
  {
    setmiss(snpmarkers, numsnps) ;
    fancynorm = NO ;
  }

  if  (weightname != NULL)   
  {  
    weightmode = YES ;
    getweights(weightname, snpmarkers, numsnps) ;
  }
  if (ldregress>0) 
  {  
    ZALLOC(ldvv,  ldregress*numindivs, double) ;
    ZALLOC(ldvv2,  ldregress*numindivs, double) ;
    ZALLOC(vv2,  numindivs, double) ;
    ZALLOC(ldmat,  ldregress*ldregress, double) ;
    ZALLOC(ldmat2,  ldregress*ldregress, double) ;
    setidmat(ldmat, ldregress) ;         
    vst(ldmat, ldmat, 1.0e-6, ldregress*ldregress) ;
  }

  ZALLOC(xindex, numindivs, int) ;
  ZALLOC(xindlist, numindivs, Indiv *) ;
  ZALLOC(xsnplist, numsnps, SNP *) ;

  if (popsizelimit > 0) 
  {  
    setplimit(indivmarkers, numindivs, eglist, numeg, popsizelimit) ; 
  }

  nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ;
  ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ;
  printf("number of samples used: %d number of snps used: %d\n", nrows, ncols) ;

/**
  cupt = xsnplist[0] ;
  for (j=0; j<nrows; ++j) {  
   k = xindex[j] ;
   g = getgtypes(cupt, k) ;
   indx = indivmarkers[k] ;
   t = indxindex(eglist, numeg, indx -> egroup) ;
   printf("yy1 %20s %20s %20s %d %d %d\n", cupt ->ID, indx -> ID, indx -> egroup, j, k, g) ;
  }
  printf("yya: ") ; printimat(xindex, 1, nrows) ;
  printf("zzindxa:  %s\n", indivmarkers[230] -> egroup) ;
*/

  /* printf("## nrows: %d  ncols  %d\n", nrows, ncols) ; */
  ZALLOC(xmean, ncols, double) ;
  ZALLOC(xfancy, ncols, double) ;
  ZALLOC(XTX, nrows*nrows, double) ;
  ZALLOC(evecs, nrows*nrows, double) ;
  ZALLOC(tvecs, nrows*nrows, double) ;
  ZALLOC(lambda, nrows, double) ;
  ZALLOC(cc, nrows, double) ;
  ZALLOC(ww, nrows, double) ;
  ZALLOC(badlist, nrows, int) ;

  blocksize = MIN(blocksize, ncols) ; 
  ZALLOC(tblock, nrows*blocksize, double) ;

  // xfancy is multiplier for column xmean is mean to take off
  // badlist is list of rows to delete (outlier removal) 

  numoutiter = 1 ;  

  if (numoutliter>=1) 
  {
    numoutiter = numoutliter+1 ;
    ZALLOC(outinfo, nrows,  OUTLINFO *) ;  
    for (k=0; k<nrows; k++) 
    {  
      ZALLOC(outinfo[k], 1, OUTLINFO) ;
    }
    /* fprintf(outlfile, "##%18s %4s %6s %9s\n", "ID", "iter","eigvec", "score") ; */
  }

  for (outliter = 1; outliter <= numoutiter ; ++outliter)  {
    if (fstonly) { 
     setidmat(XTX, nrows) ;
     vclear(lambda, 1.0, nrows) ;
     break ;
    }
    if (outliter>1) {
     ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ;
    }
    vzero(XTX, nrows*nrows) ;
    vzero(tblock, nrows*blocksize) ;
    xblock = 0 ; 

    vzero(xmean, ncols) ;
    vclear(xfancy, 1.0, ncols) ;

    for (i=0; i<ncols; i++) 
    { 
      cupt = xsnplist[i] ;
      chrom = cupt -> chrom ;
      getcolxz(cc, cupt, xindex, nrows, i, xmean, xfancy, &n0, &n1) ;
      t = MIN(n0, n1) ; 

      if (t <= minallelecnt)  {  
       cupt -> ignore = YES ;
       vzero(cc, nrows) ; 
      }

      if (weightmode) 
      {
        vst(cc, cc, xsnplist[i] -> weight, nrows) ;
      }
      if (ldregress>0) 
      {  
        numclear = 0 ;
        for (k=1; k<= ldregress; ++k)  
        {  
          j = i-k ;  
          if (j<0) 
          { 
            numclear = ldregress-k+1 ; 
            break ;
          }
          cupt2 = xsnplist[j] ;  
          if (cupt2 -> chrom != chrom) gdis = ldlimit + 1.0 ; 
          else gdis = cupt -> genpos - cupt2 -> genpos ;
          if (gdis>=ldlimit) 
          {   
            numclear = ldregress-k+1 ; 
            break ;
          }
        }
        if (numclear>0) clearld(ldmat, ldvv, ldregress, nrows, numclear) ; 
        ldreg(ldmat, ldmat2, cc, vv2, ldvv, ldvv2, ldregress, nrows) ;
        copyarr(ldmat2, ldmat, ldregress*ldregress) ;
        copyarr(vv2, cc, nrows) ;
        copyarr(ldvv2, ldvv, ldregress*nrows) ;
      }
      copyarr(cc, tblock+xblock*nrows, nrows) ;
      ++xblock ; 

/** this is the key code to parallelize */
      if (xblock==blocksize) 
      {  
        domult(tvecs, tblock, xblock, nrows) ;
        vvp(XTX, XTX, tvecs, nrows*nrows) ;
        xblock = 0 ;
        vzero(tblock, nrows*blocksize) ;
      }
    }

    if (xblock>0) 
    { 
     domult(tvecs, tblock, xblock, nrows) ;
     vvp(XTX, XTX, tvecs, nrows*nrows) ;
    }
    symit(XTX, nrows) ;

    /**
    a = 0; b=0 ;
    printf("zz1 %12.6f ", XTX[a*nrows+b]) ;
    a = nrows-1; b=nrows-1 ;
    printf(" %12.6f %15.9g\n", XTX[a*nrows+b], asum(XTX, nrows*nrows)) ;
    */

    if (verbose) 
    {
      printdiag(XTX, nrows) ;
    }

    y = trace(XTX, nrows) / (double) (nrows-1) ;
    if (isnan(y)) fatalx("bad XTX matrix\n") ;
    /* printf("trace:  %9.3f\n", y) ; */
    if (y<=0.0) fatalx("XTX has zero trace (perhaps no data)\n") ;
    vst(XTX, XTX, 1.0/y, nrows * nrows) ;
/// mean eigenvalue is 1
    eigvecs(XTX, lambda, evecs, nrows) ;
// eigenvalues are in decreasing order 

    if (outliter > numoutliter) break ;  
    // last pass skips outliers 
    numoutleigs = MIN(numoutleigs, nrows-1) ;
    nbad = ridoutlier(evecs, nrows, numoutleigs, outlthresh, badlist, outinfo) ;
    if (nbad == 0) break ; 
    for (i=0; i<nbad; i++) 
    {  
      j = badlist[i] ;
      indx = xindlist[j] ;
      outpt = outinfo[j] ;
      fprintf(outlfile, "REMOVED outlier %s iter %d evec %d sigmage %.3f\n", indx -> ID, outliter, outpt -> vecno, outpt -> score) ;
      indx -> ignore = YES ;
    }
    nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ;
    printf("number of samples after outlier removal: %d\n", nrows) ;
  }

  if (outliername != NULL) fclose(outlfile) ;

  m = numgtz(lambda, nrows)  ;
  /* printf("matrix rank: %d\n", m) ; */
  if (m==0) fatalx("no data\n") ;

/** smartrel code */
  for (i=0; i<numeigs; i++) {  
   y = sqrt(lambda[i]) ;
   vst(ww, evecs+i*nrows, y, nrows) ;               
   subouter(XTX, ww, nrows) ;
  }
  free(tvecs) ; 

  n = 0 ;
  ZALLOC(vecind, nrows*nrows/2, int) ; 
  for (i=0; i<nrows; i++) { 
   for (j=i+1; j<nrows; j++) { 
    k = i*nrows + j ; 
    y1 = XTX[i*nrows+i] ;
    y2 = XTX[j*nrows+j] ;
    y = XTX[k]/sqrt(y1*y2) ;
    y += 1/(double)(nrows-1);
    if (y<relthresh) continue ;
    vecind[n] = k ; 
    evecs[n] = -y ;
    ++n ;
   }
  }
  free(XTX) ; 
  if (n==0) { 
   printf("## nothing above relthresh!\n") ;
   printf("##end of smartrel run\n") ;
   return 0 ;
  }
  ZALLOC(idperm, n, int) ; 
  sortit(evecs, idperm, n) ;
  for (i=0; i<n; i++) {  
   j = idperm[i] ;
   k = vecind[j] ;
   a = k/nrows ; 
   b = k%nrows ;
   printf("rel: %20s ",  xindlist[a] ->ID) ;
   printf("%20s ",  xindlist[b] ->ID) ;
   printf(" %9.3f", -evecs[i]) ;
   printnl() ;
  }
  
  printf("##end of smartrel run\n") ;
  return 0 ;
}
int 
mergeit(SNP **snpm1, SNP **snpm2, Indiv ***pindm1, Indiv **indm2, 
 int nums1, int nums2, int numi1, int numi2)   
{
   SNP *cupt1, *cupt2 ;
   int k, x, g, t  ;
   double y ;
   long rlen, packlen ; 
   static unsigned char *packg ;
   unsigned char *buff ;
   Indiv **indm1 ;
   static Indiv **indivmarkers ;
   int numindivs, numsnps ;

   indm1 = *pindm1 ;
   numindivs = numi1 + numi2 ;
   numsnps = nums1 ;
   ZALLOC(indivmarkers, numindivs, Indiv *) ;

   t = 0 ;
   for (x=0; x<numi1; ++x)  {  
    indivmarkers[t] = indm1[x]  ;
    ++t ;
   }
   for (x=0; x<numi2; ++x)  {  
    indivmarkers[t] = indm2[x]  ;
    ++t ;
   }
// we don't bother with a destructor here.   Sloppy code

   y = (double) (numindivs * 2) / (8 * (double) sizeof (char)) ;
   rlen = nnint(ceil(y)) ;
   rlen = MAX(rlen, 48)  ;
   packlen = numsnps*rlen ;
   ZALLOC(packg, packlen, unsigned char) ;
   cclear((unsigned char *) packg, 0XFF, packlen) ;
// wipe to invalid

   buff = packg ;
   for (k=0; k<nums1; k++) {  
    cupt1 = snpm1[k] ;
    x = cupt1 -> tagnumber ;
    if (x < 0 ) cupt1 -> ignore = YES ;
    if (cupt1 -> ignore) continue ;
    cupt2 = snpm2[x] ; 
    if (cupt2 -> isrfake) { 
     if (phasedmode == NO)  flipalleles(cupt2) ;
     if (phasedmode == YES)  flipalleles_phased(cupt2) ;
    }
    for (t=0; t<numi1; ++t) {  
      g = getgtypes(cupt1, t) ;
      if (g<0) continue ;
      wbuff((unsigned char *)buff, t, g) ;
    }
    for (t=0; t<numi2; ++t) {  
      g = getgtypes(cupt2, t) ;
      if (g<0) continue ;
      wbuff((unsigned char *)buff, numi1+t, g) ;
    }
    cupt1 -> ngtypes = numindivs ;
    cupt1 -> pbuff = (char *) buff ;
    buff += rlen ;
   }
   *pindm1 = indivmarkers ;
   return numindivs ;
}
Exemple #8
0
int main(int argc, char **argv)
{

  int **snppos ;
  int *snpindx ;
  char **snpnamelist, **indnamelist ;
  char **eglist ;
  int  lsnplist, lindlist, numeg ;
  int i,j; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;
  double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ;
  double rthresh, zt ;
  int mpflag, ret, numvalidind, nvalid, numvalidsnps ;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;
  int nkill = 0 ;
  int t, k, g ;

  int nindiv = 0, e, f, lag=1  ;
  double xc[9], xd[4], xc2[9] ;
  double ychi, zscore, zthresh = 20.0 ;
  double y1, y2 ; 
  int nignore, numrisks = 1 ;

  char **genolist ;
  int numgenolist ;
  char c1, c2 ;
  int t1, t2 ;

  malexhet = YES ;    // convertf default is don't change the data
  tersem = YES ;     // no snp counts

  readcommands(argc, argv) ;

  setomode(&outputmode, omode) ;
  packmode = YES ;
  settersemode(tersem) ;

  if (r2thresh > 0.0) killr2 = YES ;
  if (badpedignore) setbadpedignore() ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;


  for (i=0; i<numsnps; i++)  {  
   if (xchrom == -1) break ;  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom != xchrom) cupt -> ignore = YES ; 
   if (cupt -> ignore) continue ; 
   t = nnint(cupt -> physpos) ; 
   if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ;
  }

  nignore = 0 ;
  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom > maxchrom) cupt -> ignore = YES ;  
   if (cupt -> chrom < minchrom) cupt -> ignore = YES ;  
   if (cupt -> ignore) ++nignore ;
  }

  if (numsnps == nignore) fatalx("no valid snps\n") ;


  numindivs = getindivs(indivname, &indivmarkers) ;
  if (polarid != NULL) {
   polarindex = indindex(indivmarkers, numindivs, polarid) ;
   if (polarindex<0) fatalx("polarid %s not found\n") ;
  }


  if (genotypelist!= NULL) {  
    getgenos_list(genotypelist, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  else {
   setgenotypename(&genotypename, indivname) ;
   getgenos(genotypename, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  if (newsnpname != NULL) { 
    nums2 = 
     getsnps(newsnpname, &snpm2, 0.0, NULL, &nignore, numrisks) ;
     remap(snpmarkers, numsnps, snpm2, nums2) ;
     snpmarkers = snpm2 ; 
     numsnps = nums2 ;
  }

  if (newindivname != NULL) { 
    numind2 = getindivs(newindivname, &indm2) ;
    remapind(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ;
    indivmarkers = indm2 ;
    numindivs = numind2 ;
    if (polarid != NULL) {
     polarindex = indindex(indivmarkers, numindivs, polarid) ;
    }
  }

  if (mkdiploid) { 

    numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ;
    numind2 = mkindh2d(indivmarkers, &indm2, numindivs) ;
    remaph2d(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ;

    indivmarkers = indm2 ;
    numindivs = numind2 ;

  }


  if (deletedup) dedupit(snpmarkers, numsnps) ; // only one marker per position

  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ;
   if (zerodistance) cupt -> genpos = 0.0 ;

   c1 = cupt -> alleles[0] ;  
   c2 = cupt -> alleles[1] ;  
   t1 = pedval(&c1) % 5 ;
   t2 = pedval(&c2) % 5 ;  // 0 and 5 are no good
   if ((t1==0) && (t2 >0)) flip1(cupt, phasedmode, YES) ;  
  }

  flipstrand(flipstrandname, snpmarkers, numsnps) ;
  flipsnps(flipsnpname, snpmarkers, numsnps, phasedmode) ;

  if (polarindex>=0) { 
    for (i=0; i<numsnps; i++)  {  
      cupt = snpmarkers[i] ;
      g = getgtypes(cupt, polarindex) ;
      if (g==0) { 
       printf("polarizing %s\n", cupt -> ID) ;
       flip1(cupt, NO, YES) ;
       g = getgtypes(cupt, polarindex) ;
       if (g!=2) fatalx("badbug\n") ;
      }
      if (g != 2) cupt -> ignore = YES ; 
    }
  }

  if (outputall) {
   outfiles(snpoutfilename, indoutfilename, genooutfilename, 
    snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

   printf("##end of convertf run (outputall mode)\n") ;
   return 0 ;
  }

  if (poplistname != NULL) 
  { 
    ZALLOC(eglist, numindivs, char *) ; 
    numeg = loadlist(eglist, poplistname) ;
    seteglist(indivmarkers, numindivs, poplistname);
    for (i=0; i<numindivs; ++i)  {     
     indx = indivmarkers[i] ; 
     if (indx -> affstatus == NO) indx -> ignore = YES ;
    }
  }
  else 
  setstatus(indivmarkers, numindivs, "Case") ;

  numsnps = rmsnps(snpmarkers, numsnps, deletesnpoutname) ;
  numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ;

  if (killr2) {
   nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ;
   if (nkill>0) printf("killhir2.  number of snps killed: %d\n", nkill) ;
  }


  if ( nhwfilter > 0 )  {
    hwfilter(snpmarkers, numsnps, numindivs, nhwfilter, deletesnpoutname);
  }

  if ( xregionname )  {
    excluderegions(xregionname, snpmarkers, numsnps, deletesnpoutname);
  }


  numvalidind = 0 ;
  for (i=0; i<numindivs; ++i)  { 
   indx = indivmarkers[i] ;
   if (indx -> ignore) continue ; 
   if (numvalidgtind(snpmarkers, numsnps, i) ==0) { 
    indx -> ignore = YES ; 
    printf("no data for individual: %s\n", indx -> ID) ;
   }
   if (indx -> ignore == NO) ++numvalidind ;
  }

  if (maxmiss<0) maxmiss  = (int) (maxmissfrac * (double) numvalidind+1) ;
  printf("numvalidind:  %5d  maxmiss: %5d\n", numvalidind, maxmiss)  ;
  if (numvalidind  == 0) fatalx("no valid samples!\n") ;

  for (k=0; k<numsnps; ++k) {  
   if (maxmiss>numvalidind) break ;
   cupt = snpmarkers[k] ;
   t = numvalidind - numvalidgtypes(cupt) ;
// printf("zz %20s %4d %4d\n", cupt -> ID, t, numvalidind-t) ;
   if (maxmiss < t) { 
    cupt -> ignore = YES ;
   }
/**
   if (numvalidind ==  t) { 
    printf("no data for snp: %s\n", cupt -> ID) ;
    cupt -> ignore = YES ;
   }
*/

  }

  if (fastdup)  {  

   printf("fastdup set %d\n", fastdupnum) ;
   if (fastdupnum > 0) {
     setfastdupnum(fastdupnum) ;
     setfastdupthresh(fastdupthresh, fastdupkill) ;
     fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ;  
   }
  }

  if (decim>0) {  
   snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ;
  }

  outfiles(snpoutfilename, indoutfilename, genooutfilename, 
   snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

  printf("##end of convertf run\n") ;
  return 0 ;
}
Exemple #9
0
int ipow2 (int l)
{
    return nnint(pow(2.0, l)) ;
}
int main(int argc, char **argv)
{

  int **snppos ;
  int *snpindx ;
  char **snpnamelist, **indnamelist ;
  char **eglist ;
  int  lsnplist, lindlist, numeg ;
  int i,j; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;
  double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ;
  double rthresh, zt ;
  int mpflag, ret, nvalid;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;
  int nkill = 0 ;
  int t, k ;

  int nindiv = 0, e, f, lag=1  ;
  double xc[9], xd[4], xc2[9] ;
  double ychi, zscore, zthresh = 20.0 ;
  double y1, y2 ; 
  int nignore, numrisks = 1 ;

  char **genolist ;
  int numgenolist ;
  int maxmiss ; 

  malexhet = YES ;    // convertf default is don't change the data
  tersem = YES ;     // no snp counts

  readcommands(argc, argv) ;

  setomode(&outputmode, omode) ;
  packmode = YES ;
  settersemode(tersem) ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;


  for (i=0; i<numsnps; i++)  {  
   if (xchrom == -1) break ;  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom != xchrom) cupt -> ignore = YES ; 
   if (cupt -> ignore) continue ; 
   t = nnint(cupt -> physpos) ; 
   if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ;
  }

  nignore = 0 ;
  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom > maxchrom) cupt -> ignore = YES ;  
   if (cupt -> chrom < minchrom) cupt -> ignore = YES ;  
   if (cupt -> ignore) ++nignore ;
  }

  if (numsnps == nignore) fatalx("no valid snps\n") ;

/**
  cupt = snpmarkers[0] ;
  printf("zz2: %d %d %d %20s: %d\n", numsnps, nignore, cupt -> chrom, cupt -> ID, cupt -> ignore) ;
*/

  numindivs = getindivs(indivname, &indivmarkers) ;

  if (genotypelist!= NULL) {  
    getgenos_list(genotypelist, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  else {
   setgenotypename(&genotypename, indivname) ;
   getgenos(genotypename, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  if (outputall) {
   outfiles(snpoutfilename, indoutfilename, genooutfilename, 
    snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

   printf("##end of convertf run (outputall mode)\n") ;
   return 0 ;
  }

  if (killr2) {
   nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ;
   if (nkill>0) printf("killhir2.  number of snps killed: %d\n", nkill) ;
  }

  setstatus(indivmarkers, numindivs, "Case") ;

  /******************************************************************/
  /* removesubthreshold(indivmarkers, snpmarkers, numindiv, numsnps, 
    maxmissfracind, maxmissfracsnp); */
  /******************************************************************/

  if (fastdup)  {  
   if (fastdupnum > 0) setfastdupnum(fastdupnum) ;
   fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ;  
  }

  if (decim>0) {  
   snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ;
  }

  outfiles(snpoutfilename, indoutfilename, genooutfilename, 
   snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

  printf("##end of convertf run\n") ;
  return 0 ;
}