Exemplo n.º 1
0
int main(int argc, char **argv)
{

  int **snppos ;
  int *snpindx ;
  char **snpnamelist, **indnamelist ;
  char **eglist ;
  int  lsnplist, lindlist, numeg ;
  int i,j; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;
  double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ;
  double rthresh, zt ;
  int mpflag, ret, numvalidind, nvalid, numvalidsnps ;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;
  int nkill = 0 ;
  int t, k, g ;

  int nindiv = 0, e, f, lag=1  ;
  double xc[9], xd[4], xc2[9] ;
  double ychi, zscore, zthresh = 20.0 ;
  double y1, y2 ; 
  int nignore, numrisks = 1 ;

  char **genolist ;
  int numgenolist ;
  char c1, c2 ;
  int t1, t2 ;

  malexhet = YES ;    // convertf default is don't change the data
  tersem = YES ;     // no snp counts

  readcommands(argc, argv) ;

  setomode(&outputmode, omode) ;
  packmode = YES ;
  settersemode(tersem) ;

  if (r2thresh > 0.0) killr2 = YES ;
  if (badpedignore) setbadpedignore() ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;


  for (i=0; i<numsnps; i++)  {  
   if (xchrom == -1) break ;  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom != xchrom) cupt -> ignore = YES ; 
   if (cupt -> ignore) continue ; 
   t = nnint(cupt -> physpos) ; 
   if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ;
  }

  nignore = 0 ;
  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom > maxchrom) cupt -> ignore = YES ;  
   if (cupt -> chrom < minchrom) cupt -> ignore = YES ;  
   if (cupt -> ignore) ++nignore ;
  }

  if (numsnps == nignore) fatalx("no valid snps\n") ;


  numindivs = getindivs(indivname, &indivmarkers) ;
  if (polarid != NULL) {
   polarindex = indindex(indivmarkers, numindivs, polarid) ;
   if (polarindex<0) fatalx("polarid %s not found\n") ;
  }


  if (genotypelist!= NULL) {  
    getgenos_list(genotypelist, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  else {
   setgenotypename(&genotypename, indivname) ;
   getgenos(genotypename, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  if (newsnpname != NULL) { 
    nums2 = 
     getsnps(newsnpname, &snpm2, 0.0, NULL, &nignore, numrisks) ;
     remap(snpmarkers, numsnps, snpm2, nums2) ;
     snpmarkers = snpm2 ; 
     numsnps = nums2 ;
  }

  if (newindivname != NULL) { 
    numind2 = getindivs(newindivname, &indm2) ;
    remapind(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ;
    indivmarkers = indm2 ;
    numindivs = numind2 ;
    if (polarid != NULL) {
     polarindex = indindex(indivmarkers, numindivs, polarid) ;
    }
  }

  if (mkdiploid) { 

    numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ;
    numind2 = mkindh2d(indivmarkers, &indm2, numindivs) ;
    remaph2d(snpmarkers, numsnps, indivmarkers, indm2, numindivs, numind2) ;

    indivmarkers = indm2 ;
    numindivs = numind2 ;

  }


  if (deletedup) dedupit(snpmarkers, numsnps) ; // only one marker per position

  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ;
   if (zerodistance) cupt -> genpos = 0.0 ;

   c1 = cupt -> alleles[0] ;  
   c2 = cupt -> alleles[1] ;  
   t1 = pedval(&c1) % 5 ;
   t2 = pedval(&c2) % 5 ;  // 0 and 5 are no good
   if ((t1==0) && (t2 >0)) flip1(cupt, phasedmode, YES) ;  
  }

  flipstrand(flipstrandname, snpmarkers, numsnps) ;
  flipsnps(flipsnpname, snpmarkers, numsnps, phasedmode) ;

  if (polarindex>=0) { 
    for (i=0; i<numsnps; i++)  {  
      cupt = snpmarkers[i] ;
      g = getgtypes(cupt, polarindex) ;
      if (g==0) { 
       printf("polarizing %s\n", cupt -> ID) ;
       flip1(cupt, NO, YES) ;
       g = getgtypes(cupt, polarindex) ;
       if (g!=2) fatalx("badbug\n") ;
      }
      if (g != 2) cupt -> ignore = YES ; 
    }
  }

  if (outputall) {
   outfiles(snpoutfilename, indoutfilename, genooutfilename, 
    snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

   printf("##end of convertf run (outputall mode)\n") ;
   return 0 ;
  }

  if (poplistname != NULL) 
  { 
    ZALLOC(eglist, numindivs, char *) ; 
    numeg = loadlist(eglist, poplistname) ;
    seteglist(indivmarkers, numindivs, poplistname);
    for (i=0; i<numindivs; ++i)  {     
     indx = indivmarkers[i] ; 
     if (indx -> affstatus == NO) indx -> ignore = YES ;
    }
  }
  else 
  setstatus(indivmarkers, numindivs, "Case") ;

  numsnps = rmsnps(snpmarkers, numsnps, deletesnpoutname) ;
  numindivs = rmindivs(snpmarkers, numsnps, indivmarkers, numindivs) ;

  if (killr2) {
   nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ;
   if (nkill>0) printf("killhir2.  number of snps killed: %d\n", nkill) ;
  }


  if ( nhwfilter > 0 )  {
    hwfilter(snpmarkers, numsnps, numindivs, nhwfilter, deletesnpoutname);
  }

  if ( xregionname )  {
    excluderegions(xregionname, snpmarkers, numsnps, deletesnpoutname);
  }


  numvalidind = 0 ;
  for (i=0; i<numindivs; ++i)  { 
   indx = indivmarkers[i] ;
   if (indx -> ignore) continue ; 
   if (numvalidgtind(snpmarkers, numsnps, i) ==0) { 
    indx -> ignore = YES ; 
    printf("no data for individual: %s\n", indx -> ID) ;
   }
   if (indx -> ignore == NO) ++numvalidind ;
  }

  if (maxmiss<0) maxmiss  = (int) (maxmissfrac * (double) numvalidind+1) ;
  printf("numvalidind:  %5d  maxmiss: %5d\n", numvalidind, maxmiss)  ;
  if (numvalidind  == 0) fatalx("no valid samples!\n") ;

  for (k=0; k<numsnps; ++k) {  
   if (maxmiss>numvalidind) break ;
   cupt = snpmarkers[k] ;
   t = numvalidind - numvalidgtypes(cupt) ;
// printf("zz %20s %4d %4d\n", cupt -> ID, t, numvalidind-t) ;
   if (maxmiss < t) { 
    cupt -> ignore = YES ;
   }
/**
   if (numvalidind ==  t) { 
    printf("no data for snp: %s\n", cupt -> ID) ;
    cupt -> ignore = YES ;
   }
*/

  }

  if (fastdup)  {  

   printf("fastdup set %d\n", fastdupnum) ;
   if (fastdupnum > 0) {
     setfastdupnum(fastdupnum) ;
     setfastdupthresh(fastdupthresh, fastdupkill) ;
     fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ;  
   }
  }

  if (decim>0) {  
   snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ;
  }

  outfiles(snpoutfilename, indoutfilename, genooutfilename, 
   snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

  printf("##end of convertf run\n") ;
  return 0 ;
}
int main(int argc, char **argv)
{

  int **snppos ;
  int *snpindx ;
  char **snpnamelist, **indnamelist ;
  char **eglist ;
  int  lsnplist, lindlist, numeg ;
  int i,j; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;
  double gpos1,gpos2,cpos1,cpos2,gd, cd, gd100 ;
  double rthresh, zt ;
  int mpflag, ret, nvalid;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;
  int nkill = 0 ;
  int t, k ;

  int nindiv = 0, e, f, lag=1  ;
  double xc[9], xd[4], xc2[9] ;
  double ychi, zscore, zthresh = 20.0 ;
  double y1, y2 ; 
  int nignore, numrisks = 1 ;

  char **genolist ;
  int numgenolist ;
  int maxmiss ; 

  malexhet = YES ;    // convertf default is don't change the data
  tersem = YES ;     // no snp counts

  readcommands(argc, argv) ;

  setomode(&outputmode, omode) ;
  packmode = YES ;
  settersemode(tersem) ;

  numsnps = 
    getsnps(snpname, &snpmarkers, 0.0, badsnpname, &nignore, numrisks) ;


  for (i=0; i<numsnps; i++)  {  
   if (xchrom == -1) break ;  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom != xchrom) cupt -> ignore = YES ; 
   if (cupt -> ignore) continue ; 
   t = nnint(cupt -> physpos) ; 
   if ( (t< lopos) || (t >hipos)) cupt -> ignore = YES ;
  }

  nignore = 0 ;
  for (i=0; i<numsnps; i++)  {  
   cupt = snpmarkers[i] ; 
   if (cupt -> chrom > maxchrom) cupt -> ignore = YES ;  
   if (cupt -> chrom < minchrom) cupt -> ignore = YES ;  
   if (cupt -> ignore) ++nignore ;
  }

  if (numsnps == nignore) fatalx("no valid snps\n") ;

/**
  cupt = snpmarkers[0] ;
  printf("zz2: %d %d %d %20s: %d\n", numsnps, nignore, cupt -> chrom, cupt -> ID, cupt -> ignore) ;
*/

  numindivs = getindivs(indivname, &indivmarkers) ;

  if (genotypelist!= NULL) {  
    getgenos_list(genotypelist, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  else {
   setgenotypename(&genotypename, indivname) ;
   getgenos(genotypename, snpmarkers, indivmarkers, 
     numsnps, numindivs, nignore) ;
  }

  if (outputall) {
   outfiles(snpoutfilename, indoutfilename, genooutfilename, 
    snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

   printf("##end of convertf run (outputall mode)\n") ;
   return 0 ;
  }

  if (killr2) {
   nkill = killhir2(snpmarkers, numsnps, numindivs, r2physlim, r2genlim, r2thresh) ;
   if (nkill>0) printf("killhir2.  number of snps killed: %d\n", nkill) ;
  }

  setstatus(indivmarkers, numindivs, "Case") ;

  /******************************************************************/
  /* removesubthreshold(indivmarkers, snpmarkers, numindiv, numsnps, 
    maxmissfracind, maxmissfracsnp); */
  /******************************************************************/

  if (fastdup)  {  
   if (fastdupnum > 0) setfastdupnum(fastdupnum) ;
   fastdupcheck(snpmarkers, indivmarkers, numsnps, numindivs) ;  
  }

  if (decim>0) {  
   snpdecimate(snpmarkers, numsnps, decim, dmindis, dmaxdis) ;
  }

  outfiles(snpoutfilename, indoutfilename, genooutfilename, 
   snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

  printf("##end of convertf run\n") ;
  return 0 ;
}