Exemple #1
0
int
main (int argc, char **argv)
{
  FILE *ofile;
  int nlambda = 0;
  int i, m;
  double zn, zvar, tw, tail;
  double *xx[0], *lambda;

  readcommands (argc, argv);
  settwxtable (twxtab);

  if (oname == NULL)
    ofile = stdout;
  else
    openit (oname, &ofile, "w");

  if (iname == NULL)
    fatalx ("i paraameter compulsory\n");
  nlambda = numlines (iname);
  ZALLOC(lambda, nlambda, double);
  xx[0] = lambda;
  nlambda = getxx (xx, nlambda, 1, iname);
  vst (lambda, lambda, -1.0, nlambda);
  sortit (lambda, NULL, nlambda);
  vst (lambda, lambda, -1.0, nlambda);
  m = numgtz (lambda, nlambda);

  fprintf (ofile, "%4s  %12s", "#N", "eigenvalue");
  fprintf (ofile, "%12s", "difference");
  fprintf (ofile, " %9s %12s", "twstat", "p-value");
  fprintf (ofile, " %9s", "effect. n");
  fprintf (ofile, "\n");

  for (i = 0; i < m; ++i)
    {

      zn = nval;
      tail = dotwcalc (lambda + i, m - i, &tw, &zn, &zvar, minleneig);
      fprintf (ofile, "%4d  %12.6f", i + 1, lambda[i]);
      if (i == 0)
        fprintf (ofile, "%12s", "NA");
      else
        fprintf (ofile, "%12.6f", lambda[i] - lambda[i - 1]);
      if (tail >= 0.0)
        fprintf (ofile, " %9.3f %12.6g", tw, tail);
      else
        fprintf (ofile, " %9s %12s", "NA", "NA");
      if (zn > 0.0)
        {
          fprintf (ofile, " %9.3f", zn);
        }
      else
        {
          fprintf (ofile, " %9s", "NA");
        }
      fprintf (ofile, "\n");
    }
  return 0;
}
Exemple #2
0
int main(int argc, char **argv)
{

  char **eglist ;
  int numeg ;
  int i, j, k, pos; 
  int *vv ;
  SNP *cupt, *cupt2 ;
  Indiv *indx ;
  double y1, y2, y ;

  int n0, n1, nkill ;

  int nindiv = 0 ;
  int nignore, numrisks = 1 ;
  SNP **xsnplist  ;
  Indiv **xindlist ;
  int *xindex ;
  int nrows, ncols, m ;
  double *XTX, *cc, *evecs, *ww ;
  double *lambda ;
  double *tvecs ;
  int weightmode = NO ;
  int t ;
  double *xmean, *xfancy ;
  double *ldmat = NULL, *ldmat2 = NULL;
  double *ldvv = NULL, *ldvv2 = NULL, *vv2 = NULL ;
  int chrom,  numclear ;
  double gdis ;
  int outliter, numoutiter, *badlist, nbad ;
  int a, b, n ;
  FILE *outlfile ;
  

  int xblock, blocksize=10000 ;   
  double *tblock ;  

  OUTLINFO *outpt ;
  int *idperm, *vecind ;   // for sort

  readcommands(argc, argv) ;
  printf("## smartrel version: %s\n", WVERSION) ;
  packmode = YES ;
  setomode(&outputmode, omode) ;

  if (parname == NULL) return 0 ;
  if (xchrom == (numchrom+1)) noxdata = NO ;

  if (fstonly) { 
   printf("fstonly\n") ;
   numeigs = 0 ; 
   numoutliter = 0 ;
   numoutiter = 0 ;
   outputname = NULL ;
   snpeigname = NULL ;
  }

  if (fancynorm) printf("norm used\n\n") ;
  else printf("no norm used\n\n") ;

  nostatslim = MAX(nostatslim, 3) ;

  outlfile = ofile = stdout; 

  if (outputname != NULL)  openit(outputname, &ofile, "w") ;
  if (outliername != NULL) openit(outliername, &outlfile, "w") ;
  if (fstdetailsname != NULL) openit(fstdetailsname, &fstdetails, "w") ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;

  numindivs = getindivs(indivname, &indivmarkers) ;
  k = getgenos(genotypename, snpmarkers, indivmarkers, 
    numsnps, numindivs, nignore) ;


  if (poplistname != NULL) 
  { 
    ZALLOC(eglist, numindivs, char *) ; 
    numeg = loadlist(eglist, poplistname) ;
    seteglist(indivmarkers, numindivs, poplistname);
  }
  else
  {
    setstatus(indivmarkers, numindivs, NULL) ;
    ZALLOC(eglist, MAXPOPS, char *) ;
    numeg = makeeglist(eglist, MAXPOPS, indivmarkers, numindivs) ;
  }
  for (i=0; i<numeg; i++) 
  {  
    /* printf("%3d %s\n",i, eglist[i]) ; */
  }

  nindiv=0 ;
  for (i=0; i<numindivs; i++) 
  {
    indx = indivmarkers[i] ;
    if(indx -> affstatus == YES) ++nindiv  ;
  }

  for (i=0; i<numsnps; i++)  
  {  
    cupt = snpmarkers[i] ; 
    chrom = cupt -> chrom ;
    if ((noxdata) && (chrom == (numchrom+1))) cupt-> ignore = YES ;
    if (chrom == 0) cupt -> ignore = YES ;
    if (chrom > (numchrom+1)) cupt -> ignore = YES ;
  }
  for (i=0; i<numsnps; i++)  
  {
    cupt = snpmarkers[i] ; 
    pos = nnint(cupt -> physpos) ;
    if ((xchrom>0) && (cupt -> chrom != xchrom)) cupt -> ignore = YES ;
    if ((xchrom > 0) && (pos < lopos)) cupt -> ignore = YES ;
    if ((xchrom > 0) && (pos > hipos)) cupt -> ignore = YES ;
    if (cupt -> ignore) continue ;
    if (numvalidgtx(indivmarkers, cupt, YES) <= 1) 
    { 
      printf("nodata: %20s\n", cupt -> ID) ;
      cupt -> ignore = YES ;
    }
  }

  if (killr2) {
   nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ;
   if (nkill>0) printf("killhir2.  number of snps killed: %d\n", nkill) ;
  }

  ZALLOC(vv, numindivs, int) ;
  numvalidgtallind(vv, snpmarkers, numsnps,  numindivs) ; 
  for (i=0; i<numindivs; ++i)  { 
  if (vv[i] == 0) {
    indx = indivmarkers[i] ;
    indx -> ignore = YES ; 
   }
  }
  free(vv) ;

  numsnps = rmsnps(snpmarkers, numsnps, NULL) ;  //  rid ignorable snps

   
  if (missingmode) 
  {
    setmiss(snpmarkers, numsnps) ;
    fancynorm = NO ;
  }

  if  (weightname != NULL)   
  {  
    weightmode = YES ;
    getweights(weightname, snpmarkers, numsnps) ;
  }
  if (ldregress>0) 
  {  
    ZALLOC(ldvv,  ldregress*numindivs, double) ;
    ZALLOC(ldvv2,  ldregress*numindivs, double) ;
    ZALLOC(vv2,  numindivs, double) ;
    ZALLOC(ldmat,  ldregress*ldregress, double) ;
    ZALLOC(ldmat2,  ldregress*ldregress, double) ;
    setidmat(ldmat, ldregress) ;         
    vst(ldmat, ldmat, 1.0e-6, ldregress*ldregress) ;
  }

  ZALLOC(xindex, numindivs, int) ;
  ZALLOC(xindlist, numindivs, Indiv *) ;
  ZALLOC(xsnplist, numsnps, SNP *) ;

  if (popsizelimit > 0) 
  {  
    setplimit(indivmarkers, numindivs, eglist, numeg, popsizelimit) ; 
  }

  nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ;
  ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ;
  printf("number of samples used: %d number of snps used: %d\n", nrows, ncols) ;

/**
  cupt = xsnplist[0] ;
  for (j=0; j<nrows; ++j) {  
   k = xindex[j] ;
   g = getgtypes(cupt, k) ;
   indx = indivmarkers[k] ;
   t = indxindex(eglist, numeg, indx -> egroup) ;
   printf("yy1 %20s %20s %20s %d %d %d\n", cupt ->ID, indx -> ID, indx -> egroup, j, k, g) ;
  }
  printf("yya: ") ; printimat(xindex, 1, nrows) ;
  printf("zzindxa:  %s\n", indivmarkers[230] -> egroup) ;
*/

  /* printf("## nrows: %d  ncols  %d\n", nrows, ncols) ; */
  ZALLOC(xmean, ncols, double) ;
  ZALLOC(xfancy, ncols, double) ;
  ZALLOC(XTX, nrows*nrows, double) ;
  ZALLOC(evecs, nrows*nrows, double) ;
  ZALLOC(tvecs, nrows*nrows, double) ;
  ZALLOC(lambda, nrows, double) ;
  ZALLOC(cc, nrows, double) ;
  ZALLOC(ww, nrows, double) ;
  ZALLOC(badlist, nrows, int) ;

  blocksize = MIN(blocksize, ncols) ; 
  ZALLOC(tblock, nrows*blocksize, double) ;

  // xfancy is multiplier for column xmean is mean to take off
  // badlist is list of rows to delete (outlier removal) 

  numoutiter = 1 ;  

  if (numoutliter>=1) 
  {
    numoutiter = numoutliter+1 ;
    ZALLOC(outinfo, nrows,  OUTLINFO *) ;  
    for (k=0; k<nrows; k++) 
    {  
      ZALLOC(outinfo[k], 1, OUTLINFO) ;
    }
    /* fprintf(outlfile, "##%18s %4s %6s %9s\n", "ID", "iter","eigvec", "score") ; */
  }

  for (outliter = 1; outliter <= numoutiter ; ++outliter)  {
    if (fstonly) { 
     setidmat(XTX, nrows) ;
     vclear(lambda, 1.0, nrows) ;
     break ;
    }
    if (outliter>1) {
     ncols = loadsnpx(xsnplist, snpmarkers, numsnps, indivmarkers) ;
    }
    vzero(XTX, nrows*nrows) ;
    vzero(tblock, nrows*blocksize) ;
    xblock = 0 ; 

    vzero(xmean, ncols) ;
    vclear(xfancy, 1.0, ncols) ;

    for (i=0; i<ncols; i++) 
    { 
      cupt = xsnplist[i] ;
      chrom = cupt -> chrom ;
      getcolxz(cc, cupt, xindex, nrows, i, xmean, xfancy, &n0, &n1) ;
      t = MIN(n0, n1) ; 

      if (t <= minallelecnt)  {  
       cupt -> ignore = YES ;
       vzero(cc, nrows) ; 
      }

      if (weightmode) 
      {
        vst(cc, cc, xsnplist[i] -> weight, nrows) ;
      }
      if (ldregress>0) 
      {  
        numclear = 0 ;
        for (k=1; k<= ldregress; ++k)  
        {  
          j = i-k ;  
          if (j<0) 
          { 
            numclear = ldregress-k+1 ; 
            break ;
          }
          cupt2 = xsnplist[j] ;  
          if (cupt2 -> chrom != chrom) gdis = ldlimit + 1.0 ; 
          else gdis = cupt -> genpos - cupt2 -> genpos ;
          if (gdis>=ldlimit) 
          {   
            numclear = ldregress-k+1 ; 
            break ;
          }
        }
        if (numclear>0) clearld(ldmat, ldvv, ldregress, nrows, numclear) ; 
        ldreg(ldmat, ldmat2, cc, vv2, ldvv, ldvv2, ldregress, nrows) ;
        copyarr(ldmat2, ldmat, ldregress*ldregress) ;
        copyarr(vv2, cc, nrows) ;
        copyarr(ldvv2, ldvv, ldregress*nrows) ;
      }
      copyarr(cc, tblock+xblock*nrows, nrows) ;
      ++xblock ; 

/** this is the key code to parallelize */
      if (xblock==blocksize) 
      {  
        domult(tvecs, tblock, xblock, nrows) ;
        vvp(XTX, XTX, tvecs, nrows*nrows) ;
        xblock = 0 ;
        vzero(tblock, nrows*blocksize) ;
      }
    }

    if (xblock>0) 
    { 
     domult(tvecs, tblock, xblock, nrows) ;
     vvp(XTX, XTX, tvecs, nrows*nrows) ;
    }
    symit(XTX, nrows) ;

    /**
    a = 0; b=0 ;
    printf("zz1 %12.6f ", XTX[a*nrows+b]) ;
    a = nrows-1; b=nrows-1 ;
    printf(" %12.6f %15.9g\n", XTX[a*nrows+b], asum(XTX, nrows*nrows)) ;
    */

    if (verbose) 
    {
      printdiag(XTX, nrows) ;
    }

    y = trace(XTX, nrows) / (double) (nrows-1) ;
    if (isnan(y)) fatalx("bad XTX matrix\n") ;
    /* printf("trace:  %9.3f\n", y) ; */
    if (y<=0.0) fatalx("XTX has zero trace (perhaps no data)\n") ;
    vst(XTX, XTX, 1.0/y, nrows * nrows) ;
/// mean eigenvalue is 1
    eigvecs(XTX, lambda, evecs, nrows) ;
// eigenvalues are in decreasing order 

    if (outliter > numoutliter) break ;  
    // last pass skips outliers 
    numoutleigs = MIN(numoutleigs, nrows-1) ;
    nbad = ridoutlier(evecs, nrows, numoutleigs, outlthresh, badlist, outinfo) ;
    if (nbad == 0) break ; 
    for (i=0; i<nbad; i++) 
    {  
      j = badlist[i] ;
      indx = xindlist[j] ;
      outpt = outinfo[j] ;
      fprintf(outlfile, "REMOVED outlier %s iter %d evec %d sigmage %.3f\n", indx -> ID, outliter, outpt -> vecno, outpt -> score) ;
      indx -> ignore = YES ;
    }
    nrows = loadindx(xindlist, xindex, indivmarkers, numindivs) ;
    printf("number of samples after outlier removal: %d\n", nrows) ;
  }

  if (outliername != NULL) fclose(outlfile) ;

  m = numgtz(lambda, nrows)  ;
  /* printf("matrix rank: %d\n", m) ; */
  if (m==0) fatalx("no data\n") ;

/** smartrel code */
  for (i=0; i<numeigs; i++) {  
   y = sqrt(lambda[i]) ;
   vst(ww, evecs+i*nrows, y, nrows) ;               
   subouter(XTX, ww, nrows) ;
  }
  free(tvecs) ; 

  n = 0 ;
  ZALLOC(vecind, nrows*nrows/2, int) ; 
  for (i=0; i<nrows; i++) { 
   for (j=i+1; j<nrows; j++) { 
    k = i*nrows + j ; 
    y1 = XTX[i*nrows+i] ;
    y2 = XTX[j*nrows+j] ;
    y = XTX[k]/sqrt(y1*y2) ;
    y += 1/(double)(nrows-1);
    if (y<relthresh) continue ;
    vecind[n] = k ; 
    evecs[n] = -y ;
    ++n ;
   }
  }
  free(XTX) ; 
  if (n==0) { 
   printf("## nothing above relthresh!\n") ;
   printf("##end of smartrel run\n") ;
   return 0 ;
  }
  ZALLOC(idperm, n, int) ; 
  sortit(evecs, idperm, n) ;
  for (i=0; i<n; i++) {  
   j = idperm[i] ;
   k = vecind[j] ;
   a = k/nrows ; 
   b = k%nrows ;
   printf("rel: %20s ",  xindlist[a] ->ID) ;
   printf("%20s ",  xindlist[b] ->ID) ;
   printf(" %9.3f", -evecs[i]) ;
   printnl() ;
  }
  
  printf("##end of smartrel run\n") ;
  return 0 ;
}