int main(int argc, char **argv)
{
  SNP **snpmarkers ;
  Indiv **indivmarkers ;
  int numsnps, numindivs ;
  unsigned char *packg1, *packg2 ;

  int **snppos ;
  int *snpindx ;
  int  lsnplist, lindlist, numeg ;
  int i,j; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;
  int nkill = 0 ;
  int t, k, x ;

  int nignore, numrisks = 1 ;

  char **genolist ;
  int numgenolist ;
  int maxmiss ; 

  tersem = YES ;     // no snp counts

  readcommands(argc, argv) ;

  setomode(&outputmode, omode) ;
  packmode = YES ;
  settersemode(tersem) ;

  nums1 = 
    getsnps(snp1, &snpm1, 0.0, NULL, &nignore, numrisks) ;

  putped(1) ;
  freeped() ;

  nums2 = 
    getsnps(snp2, &snpm2, 0.0, NULL, &nignore, numrisks) ;

  putped(2) ;
  freeped() ;

  for (x=0; x<nums1; ++x)  {  
   cupt1 = snpm1[x] ;
   cupt1 -> tagnumber = -1 ;
  }
  for (x=0; x<nums2; ++x)  {  
   cupt2 = snpm2[x] ;
   t = x %1000 ;   
// if (t==0) printf("zz %d %d\n", x, nums2) ;

   k = snpindex(snpm1, nums1, cupt2 -> ID) ;  
   if (k<0) { 
    cupt2 -> ignore = YES ;
    continue ;
   }
   cupt1 = snpm1[k] ;
   cupt1 -> tagnumber = x ;
   t = checkmatch(cupt1, cupt2) ;
   if (t==1) continue ;
   if (t==2) {  
    cupt2 -> isrfake = YES ;
    continue ;
   }
   if (t<0) {  
    cupt1  -> ignore = cupt2 -> ignore = YES ;
    continue ;
   }
   printf("allele funny: %s", cupt1 -> ID) ;
   printalleles(cupt1, stdout) ;
   printalleles(cupt2, stdout) ;
   printnl() ;
   cupt1  -> ignore = cupt2 -> ignore = YES ;
   continue ;
  }
  freesnpindex() ;
  numi1 = getindivs(ind1, &indm1) ;
  numi2 = getindivs(ind2, &indm2) ;

  for (x=0; x<numi2; ++x) {  
   k = indindex(indm1, numi1, indm2[x] -> ID) ;
// this code could be modified to allow duplicate individuals
   if (k>=0) fatalx("dup ind: %s\n", indm2[x] -> ID) ;  // fix later?  
  }

  setgenotypename(&geno1, ind1) ;
  getped(1) ;
  getgenos(geno1, snpm1, indm1, 
     nums1, numi1, nignore) ;

  packg1 = (unsigned char *) getpackgenos() ;
  clearpackgenos() ;

  setgenotypename(&geno2, ind2) ;
  getped(2) ;
  getgenos(geno2, snpm2, indm2, 
     nums2, numi2, nignore) ;

  packg2 = (unsigned char *) getpackgenos() ;
  numindivs = mergeit(snpm1, snpm2, &indm1, indm2, nums1, nums2, numi1, numi2) ;

  snpmarkers = snpm1 ; 
  numsnps = nums1 ;
  indivmarkers = indm1 ; 

  free(packg1) ;
  free(packg2) ;

  outfiles(snpoutfilename, indoutfilename, genooutfilename, 
   snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

  printf("##end of mergeit run\n") ;
  return 0 ;
}
Esempio n. 2
0
int main(int argc, char **argv)
{

  int **snppos ;
  int *snpindx ;
  char **snpnamelist, **indnamelist ;
  char **eglist ;
  int  lsnplist, lindlist, numeg ;
  int i,j; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;
  double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ;
  double rthresh, zt ;
  int mpflag, ret, numvalidind, nvalid, numvalidsnps ;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;
  int nkill = 0 ;
  int t, k, g ;

  int nindiv = 0, e, f, lag=1  ;
  double xc[9], xd[4], xc2[9] ;
  double ychi, zscore, zthresh = 20.0 ;
  double y1, y2 ; 
  int nignore, numrisks = 1 ;

  char **genolist ;
  int numgenolist ;
  char c1, c2 ;
  int t1, t2 ;

  malexhet = YES ;    // convertf default is don't change the data
  tersem = YES ;     // no snp counts

  readcommands(argc, argv) ;

  setomode(&outputmode, omode) ;
  packmode = YES ;
  settersemode(tersem) ;

  if (r2thresh > 0.0) killr2 = YES ;
  if (badpedignore) setbadpedignore() ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;


  for (i=0; i<numsnps; i++)  {  
   if (xchrom == -1) break ;  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom != xchrom) cupt -> ignore = YES ; 
   if (cupt -> ignore) continue ; 
   t = nnint(cupt -> physpos) ; 
   if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ;
  }

  nignore = 0 ;
  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom > maxchrom) cupt -> ignore = YES ;  
   if (cupt -> chrom < minchrom) cupt -> ignore = YES ;  
   if (cupt -> ignore) ++nignore ;
  }

  if (numsnps == nignore) fatalx("no valid snps\n") ;


  numindivs = getindivs(indivname, &indivmarkers) ;
  if (polarid != NULL) {
   polarindex = indindex(indivmarkers, numindivs, polarid) ;
   if (polarindex<0) fatalx("polarid %s not found\n") ;
  }


  if (genotypelist!= NULL) {  
    getgenos_list(genotypelist, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  else {
   setgenotypename(&genotypename, indivname) ;
   getgenos(genotypename, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  if (newsnpname != NULL) { 
    nums2 = 
     getsnps(newsnpname, &snpm2, 0.0, NULL, &nignore, numrisks) ;
     remap(snpmarkers, numsnps, snpm2, nums2) ;
     snpmarkers = snpm2 ; 
     numsnps = nums2 ;
  }

  if (newindivname != NULL) { 
    numind2 = getindivs(newindivname, &indm2) ;
    remapind(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ;
    indivmarkers = indm2 ;
    numindivs = numind2 ;
    if (polarid != NULL) {
     polarindex = indindex(indivmarkers, numindivs, polarid) ;
    }
  }

  if (mkdiploid) { 

    numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ;
    numind2 = mkindh2d(indivmarkers, &indm2, numindivs) ;
    remaph2d(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ;

    indivmarkers = indm2 ;
    numindivs = numind2 ;

  }


  if (deletedup) dedupit(snpmarkers, numsnps) ; // only one marker per position

  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ;
   if (zerodistance) cupt -> genpos = 0.0 ;

   c1 = cupt -> alleles[0] ;  
   c2 = cupt -> alleles[1] ;  
   t1 = pedval(&c1) % 5 ;
   t2 = pedval(&c2) % 5 ;  // 0 and 5 are no good
   if ((t1==0) && (t2 >0)) flip1(cupt, phasedmode, YES) ;  
  }

  flipstrand(flipstrandname, snpmarkers, numsnps) ;
  flipsnps(flipsnpname, snpmarkers, numsnps, phasedmode) ;

  if (polarindex>=0) { 
    for (i=0; i<numsnps; i++)  {  
      cupt = snpmarkers[i] ;
      g = getgtypes(cupt, polarindex) ;
      if (g==0) { 
       printf("polarizing %s\n", cupt -> ID) ;
       flip1(cupt, NO, YES) ;
       g = getgtypes(cupt, polarindex) ;
       if (g!=2) fatalx("badbug\n") ;
      }
      if (g != 2) cupt -> ignore = YES ; 
    }
  }

  if (outputall) {
   outfiles(snpoutfilename, indoutfilename, genooutfilename, 
    snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

   printf("##end of convertf run (outputall mode)\n") ;
   return 0 ;
  }

  if (poplistname != NULL) 
  { 
    ZALLOC(eglist, numindivs, char *) ; 
    numeg = loadlist(eglist, poplistname) ;
    seteglist(indivmarkers, numindivs, poplistname);
    for (i=0; i<numindivs; ++i)  {     
     indx = indivmarkers[i] ; 
     if (indx -> affstatus == NO) indx -> ignore = YES ;
    }
  }
  else 
  setstatus(indivmarkers, numindivs, "Case") ;

  numsnps = rmsnps(snpmarkers, numsnps, deletesnpoutname) ;
  numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ;

  if (killr2) {
   nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ;
   if (nkill>0) printf("killhir2.  number of snps killed: %d\n", nkill) ;
  }


  if ( nhwfilter > 0 )  {
    hwfilter(snpmarkers, numsnps, numindivs, nhwfilter, deletesnpoutname);
  }

  if ( xregionname )  {
    excluderegions(xregionname, snpmarkers, numsnps, deletesnpoutname);
  }


  numvalidind = 0 ;
  for (i=0; i<numindivs; ++i)  { 
   indx = indivmarkers[i] ;
   if (indx -> ignore) continue ; 
   if (numvalidgtind(snpmarkers, numsnps, i) ==0) { 
    indx -> ignore = YES ; 
    printf("no data for individual: %s\n", indx -> ID) ;
   }
   if (indx -> ignore == NO) ++numvalidind ;
  }

  if (maxmiss<0) maxmiss  = (int) (maxmissfrac * (double) numvalidind+1) ;
  printf("numvalidind:  %5d  maxmiss: %5d\n", numvalidind, maxmiss)  ;
  if (numvalidind  == 0) fatalx("no valid samples!\n") ;

  for (k=0; k<numsnps; ++k) {  
   if (maxmiss>numvalidind) break ;
   cupt = snpmarkers[k] ;
   t = numvalidind - numvalidgtypes(cupt) ;
// printf("zz %20s %4d %4d\n", cupt -> ID, t, numvalidind-t) ;
   if (maxmiss < t) { 
    cupt -> ignore = YES ;
   }
/**
   if (numvalidind ==  t) { 
    printf("no data for snp: %s\n", cupt -> ID) ;
    cupt -> ignore = YES ;
   }
*/

  }

  if (fastdup)  {  

   printf("fastdup set %d\n", fastdupnum) ;
   if (fastdupnum > 0) {
     setfastdupnum(fastdupnum) ;
     setfastdupthresh(fastdupthresh, fastdupkill) ;
     fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ;  
   }
  }

  if (decim>0) {  
   snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ;
  }

  outfiles(snpoutfilename, indoutfilename, genooutfilename, 
   snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

  printf("##end of convertf run\n") ;
  return 0 ;
}
Esempio n. 3
0
int main(int argc, char **argv)
{
  double *V;
  double *xx;
  double *iscase;
  double *iscasecorr;
  int K;
  int k,m,n;
  int nignore;
  double rowsum, rowsum1;
  double chisq, Echisq, gamma, denom;

  readcommands(argc, argv) ;
  if (outputname != NULL) 
    openit(outputname, &fpout, "w") ;
  else 
    fpout = stdout;
  fprintf(fpout, "Chisq EIGENSTRAT\n");

  setinmode(&inmode, imode);
  packmode = YES;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0,  NULL, &nignore, 1) ;

  NSAMPLES = getindivs(indivname, &indivmarkers) ;

  setstatus(indivmarkers, NSAMPLES, "Case") ;
  setgenotypename(&genotypename, indivname) ;
  if (genotypename != NULL)  {
   getgenos(genotypename, snpmarkers, indivmarkers, 
    numsnps, NSAMPLES, nignore) ;
  }

  /*******************************************************************/
  /*  Free memory:  Usually this is done in outfiles:                */
  /*                                                                 */
  /*  nind = rmindivs(&snpmarkers, numsnps, &indmarkers, NSAMPLES);  */
  /*                                                                 */
  /*  But where is the snpmarkers array released?                    */
  /*******************************************************************/

  L = numpc;
  readpcafile(&V, &outlier, &K, L, NSAMPLES);
  getphenos(NSAMPLES, &iscase, outlier, &iscasecorr, L, V);

  /* main eigenstrat loop here */

  if ((xx = (double *)malloc(NSAMPLES*sizeof(*xx))) == NULL)
  {  fprintf(stderr,"CM\n");  exit(1);  }

  for(m=0;m<numsnps;m++)  {

    SNP *cupt = snpmarkers[m];
    for(n=0; n<NSAMPLES; n++)
    {
      int j = getgtypes(cupt,n);

      if(j == 0)       { xx[n] = 0.0; }
      else if(j == 1)  { xx[n] = 0.5; }
      else if(j == 2 ) { xx[n] = 1.0; }
      else if(j == -1) { xx[n] = -100.0; }

      if(outlier[n] == 1) xx[n] = -100.0;

    }

    /* mean-adjust xx */
    rowsum = 0.0; rowsum1 = 0.0;
    for(n=0; n<NSAMPLES; n++)
    {
      if(qtmode == NO && ((outlier[n]) || (xx[n] < -99.0))) continue;
      if(qtmode == YES && ((outlier[n]) || (xx[n] == -100.0))) continue;
      rowsum += xx[n];
      rowsum1 += 1.0;
    }
    for(n=0; n<NSAMPLES; n++)
    {  
      if(outlier[n]) continue;
      if(qtmode == NO)  {
        if (xx[n] < -99.0) 
          xx[n] = -100.0; /* still keep track */
        else 
	  xx[n] -= rowsum/rowsum1;
      }
      else  {
        if (xx[n] == -100.0) 
          xx[n] = -100.0; /* still keep track */
        else 
	  xx[n] -= rowsum/rowsum1;
      }
    }

    /* Chisq */
    chisq = compute_chisq(xx,iscase);

    /* EIGENSTRAT */
    for(k=0; k<L; k++)
    {
      gamma = 0.0;
      denom = 0.0;
      for(n=0; n<NSAMPLES; n++) 
      {
        if(qtmode == NO && (outlier[n] || xx[n] < -99.0)) continue;
        if(qtmode == YES && (outlier[n] || xx[n] == -100.0)) continue;
        gamma += xx[n]*V[NSAMPLES*n+k];
        denom += V[NSAMPLES*n+k]*V[NSAMPLES*n+k];
      }
      gamma /= denom;
      for(n=0; n<NSAMPLES; n++) 
      {
        if(qtmode == NO && (outlier[n] || xx[n] < -99.0)) continue;
        if(qtmode == YES && (outlier[n] || xx[n] == -100.0)) continue;
        xx[n] -= gamma*V[NSAMPLES*n+k];
      }
    }
    Echisq = compute_chisqE(xx,iscasecorr);

    if(rowsum1 == 0.0)
    {
      chisq = -1.0; Echisq = -1.0;
    }

    if(chisq >= 0.0) fprintf(fpout,"%.04f",chisq);
    else fprintf(fpout,"NA");
    if(Echisq >= 0.0) fprintf(fpout," %.04f\n",Echisq);
    else fprintf(fpout," NA\n");

    if(NSAMPLES*m > MAXSIZE)
    {
      fprintf(stderr,"OOPS genotype file has > %d genotypes\n",MAXSIZE);
      fprintf(fpout,"OOPS genotype file has > %d genotypes\n",MAXSIZE);
      exit(1);
    }
  }
}
int main(int argc, char **argv)
{

  int **snppos ;
  int *snpindx ;
  char **snpnamelist, **indnamelist ;
  char **eglist ;
  int  lsnplist, lindlist, numeg ;
  int i,j; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;
  double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ;
  double rthresh, zt ;
  int mpflag, ret, nvalid;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;
  int nkill = 0 ;
  int t, k ;

  int nindiv = 0, e, f, lag=1  ;
  double xc[9], xd[4], xc2[9] ;
  double ychi, zscore, zthresh = 20.0 ;
  double y1, y2 ; 
  int nignore, numrisks = 1 ;

  char **genolist ;
  int numgenolist ;
  int maxmiss ; 

  malexhet = YES ;    // convertf default is don't change the data
  tersem = YES ;     // no snp counts

  readcommands(argc, argv) ;

  setomode(&outputmode, omode) ;
  packmode = YES ;
  settersemode(tersem) ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;


  for (i=0; i<numsnps; i++)  {  
   if (xchrom == -1) break ;  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom != xchrom) cupt -> ignore = YES ; 
   if (cupt -> ignore) continue ; 
   t = nnint(cupt -> physpos) ; 
   if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ;
  }

  nignore = 0 ;
  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom > maxchrom) cupt -> ignore = YES ;  
   if (cupt -> chrom < minchrom) cupt -> ignore = YES ;  
   if (cupt -> ignore) ++nignore ;
  }

  if (numsnps == nignore) fatalx("no valid snps\n") ;

/**
  cupt = snpmarkers[0] ;
  printf("zz2: %d %d %d %20s: %d\n", numsnps, nignore, cupt -> chrom, cupt -> ID, cupt -> ignore) ;
*/

  numindivs = getindivs(indivname, &indivmarkers) ;

  if (genotypelist!= NULL) {  
    getgenos_list(genotypelist, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  else {
   setgenotypename(&genotypename, indivname) ;
   getgenos(genotypename, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  if (outputall) {
   outfiles(snpoutfilename, indoutfilename, genooutfilename, 
    snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

   printf("##end of convertf run (outputall mode)\n") ;
   return 0 ;
  }

  if (killr2) {
   nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ;
   if (nkill>0) printf("killhir2.  number of snps killed: %d\n", nkill) ;
  }

  setstatus(indivmarkers, numindivs, "Case") ;

  /******************************************************************/
  /* removesubthreshold(indivmarkers, snpmarkers, numindiv, numsnps, 
    maxmissfracind, maxmissfracsnp); */
  /******************************************************************/

  if (fastdup)  {  
   if (fastdupnum > 0) setfastdupnum(fastdupnum) ;
   fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ;  
  }

  if (decim>0) {  
   snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ;
  }

  outfiles(snpoutfilename, indoutfilename, genooutfilename, 
   snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

  printf("##end of convertf run\n") ;
  return 0 ;
}