Example #1
0
/*
 * P = G + E
 */
double DefaultTrait::phenotype( const AlleleGroupPtr ag, const environmental * env ) const {
    double res = env->environment_factor();

    res += genotype( ag );

    return res;
}
Example #2
0
vector_t Plink::calcQTDT(vector<int> & C,
			 ofstream & QOUT,
			 bool permuting, 
			 Perm & perm,
			 vector<int> & pbetween, 
			 vector<bool> & pwithin)
{
  

  /////////////////////////
  // Iterate over each SNP
  
  vector_t results(nl_all);
  
  for (int l=0; l<nl_all; l++)
    {     
      
      // Note: when using adaptive permutation in QFAM, we do not skip
      // a failed SNP here, as we permute on a per-SNP basis instead;
      // i.e. for this particular SNP we will perform enough
      // permutations to assess significance in this first instance of the 
      // call to calcQTDT().  
      
      // Skip X markers for now
      
      if (par::chr_sex[locus[l]->chr] || 
	  par::chr_haploid[locus[l]->chr])
	{
 	  results[l] = -1;
 	  continue;
	}
      
      if (par::verbose)
	cout << "\n ******************************************\n"
	     << "  LOCUS " << locus[l]->name << "\n\n";
      
      
      ////////////////////////////////////////////////////////////////
      // Create X vector that encodes the genotype for each individual
      // as 1,0,-1 (or -9 for missing)
      
      // Use the per-person 'flag' variable to indicate a non-missing genotype
      // at this SNP (i.e. for gperson)
      
      // Use 'covar' to store the X= 1,0,-1 codes for this SNP
      
      setCovariatesForSNP(*this,l);
      
      
      ///////////////////////////////////////
      // Score between and within components
	
      scoreBetween(*this,l);
	
      // Now, for each individual, set B and W 

      vector<bool> include(n,true);
      
      scoreBandW(*this,l,include);
     

      // Now we have created the family structure, B and W and flagged who is missing
      // in terms of genotype and phenotype
      
      // We can either proceed to return one value for this (in max(T) mode)
      // or to exhaust all permutations
      
     
      /////////////////////////
      // Prune out missing data (already done?)
     
      vector<Family*>::iterator f = family.begin();
      while ( f != family.end() ) 
        {
 	 if ( ! (*f)->include ) 
 	   {
 	     if ( (*f)->pat ) 
 	       (*f)->pat->flag = false;
	     
 	     if ( (*f)->mat ) 
 	       (*f)->mat->flag = false;
		 
 	     for ( int k = 0 ; k < (*f)->kid.size() ; k++) 
	       (*f)->kid[k]->flag = false;
	     
 	   }
 	 f++;
        }
     
     
      // Prune individuals
      for (int i=0; i<n; i++)
        if ( (!sample[i]->flag) || sample[i]->missing ) 
	  include[i] = false;
                    

     /////////////////////////
     // Optional display
     
     if (par::verbose)
       {
	 
	 for (int i=0; i<n; i++)
	   {
	     if ( include[i] ) 
	       cout << "INC\t";
	     else
	       cout << "EXC\t";
	     
	     cout << C[i] << "\t"
		  << sample[i]->fid << " " << sample[i]->iid << "\t"
		  << sample[i]->phenotype << "\t"
		  << genotype(*this,i,l) << " "
		  << sample[i]->T << " "
		  << sample[i]->B << " "
		  << sample[i]->W ;
	     cout << "\n";
	   }
	 cout << "\n\n";
       }
     
     
     
     ///////////////////////////////////
     // Form linear model
     
     Model * lm;
     LinearModel * m = new LinearModel(this);
     lm = m;
     
     // Copy pattern of missing data over, with 
     // some additional exclusions based on family 
     // structure
     
     lm->setMissing(include);
     
     // Add independent variables: T, B and/or W
     // and set the test parameter
     // (intercept is 0)
     
     // Covariates  Model
     // 0 Total
     // 1 Between
     // 2 Within
     
     // Model
     // 0      Intercept      Intercept
     // 1      Total          Between
     // 2      n/a            Within
     
     if (par::QFAM_total) 
       {
	 lm->label.push_back("TOT");
	 lm->testParameter = 1;	     
       }
     else if (par::QFAM_between)
       {
	 lm->label.push_back("BET");
	 //	 lm->label.push_back("WITH");
	 lm->testParameter = 1;	     
       }
     else if (par::QFAM_within1 || par::QFAM_within2) 
       {
	 //	 lm->label.push_back("BET");
	 lm->label.push_back("WITH");
	 lm->testParameter = 1;	     
       }
     
     // Build design matrix
     lm->buildDesignMatrix();
     
     // Fit linear model
     if ( par::QFAM_total && par::qt )
       lm->fitUnivariateLM();
     else
       lm->fitLM();

     // Check for multi-collinearity
     lm->validParameters();
     
     // Calculate Original Test statistic
     results[l] = lm->getStatistic();

     // Store,return and display this value?
     
     lm->displayResults(QOUT,locus[l]);

     
     ///////////////////
     // Now, permutation
     
     // 1) We have the complete, non-missing data: permute only this
     //    i.e. we do not need to worry about missing data; we are
     //    no longer controlling the correlation between SNPs, as we
     //    are permuting genotype, so we do not need to worry about this
     //    in any case.
     
     // 2) Keep the same Model in each case: directly re-state the X 
     //    variables in the design matrix, then re-fit model. This 
     //    will avoid the cost of building the model, pruning for missing
     //    data, etc, each iteration
     
     
     // Store original, and set up permutations
     // (i.e. return pperson to original order)

     perm.nextSNP();
     double original = results[l];
     

     ////////////////////////
     // Adaptive permutation
     
     ///////////////////////////////////////////////////
     // Set up permutation indices, specific to this SNP
     
     int tc = 0;

     while ( true ) 
       {
	 
	 // Permute between and within family components
	 
	 permute(pbetween);
	 
	 for (int i=0; i<family.size(); i++)
	   {
	     if (CRandom::rand() < 0.5) pwithin[i] = true;
	     else pwithin[i] = false;
	   }
	 
	 // Edit pbetween for this SNP, so that we keep missing 
	 // B components constant
	 
	 for (int f=0; f<pbetween.size(); f++)
	   {
	     
	     if ( 
		 // Permuted family is all missing
		 ( ! family[pbetween[f]]->include ) 
		 &&
		 // Recipient family is not...
		 family[f]->include )
	       {
		 // ... then swap 
		 
		 //   F  P(F)    -->
		 //   0  2       -->   0  2
		 //   1  0       -->   1  0
		 //   2  3*      -->   2  4
		 //   3* 4       -->   3* 3*
		 //   4  1       -->   4  1
		 //   ...
		 
		 // e.g. 3* is missing, so swap 3* and 4 in P(F), so 2
		 // and 4 end up together instead, 3* is invarint
		 
		 int missing_family = pbetween[f];
		 int swap_in_family = pbetween[pbetween[f]];
		 pbetween[missing_family] = missing_family;
		 pbetween[f] = swap_in_family;
		 
// 		 if (par::verbose)
// 		   {
// 		     cout << "FAM " << f << " (NOT MISS) has " << missing_family << " (MISS)\n";
// 		     cout << "FAM " << missing_family << " (MISS) has " << swap_in_family << " (?)\n";
// 		     cout << "SWAP MADE ..\n";
// 		     cout << "FAM " << f << "  has " << pbetween[f] << "\n";
// 		     cout << "FAM " << missing_family << " has " << pbetween[missing_family] << "\n\n";
// 		   }
		 
		 // And re-check this new pairing
		 f--;
		 
	       }		 
	   }
	 

// 	 if (par::verbose)
// 	   for (int f=0; f<pbetween.size(); f++)
// 	     {
// 	       if ( ! family[pbetween[f]]->include ) 
// 		 cout << " Permuted family is all missing " << f << "\t" << family[pbetween[f]]->kid[0]->fid << "\n";
// 	       if ( ! family[f]->include ) 
// 		 cout << " Recipient family is all missing " << f << "\t" << family[f]->kid[0]->fid << "\n";
// 	     }


//  	 if (true)
//  	   {
//  	     for (int i=0; i<n; i++)
//  	       {
//  		 cout << sample[i]->fid << "\t"
//  		      << include[i] << "\t"
//  		      << C[i] << "\t"
//  		      << pbetween[C[i]] << "\t"
//  		      << sample[i]->family->include << "\t"
//  		      << family[C[i]]->include << "\t"
//  		      << family[pbetween[C[i]]]->include << "\n";
//  	       }
//  	   }




	 //////////////////////////////////
	 // Reconstitute genotypes
	 // and fit back into LinearModel
	 
 	 int c=0;
 	 for (int i=0; i<n; i++)
 	   {
 	     if (include[i])
 	       {
 		 Family * pfam = family[ pbetween[C[i]] ];
 		 Individual * person = sample[i];
		 
 		 if ( par::QFAM_total )
 		   lm->X[c++][1] = pwithin[C[i]] ? pfam->B + person->W : pfam->B - person->W;
 		 else if ( par::QFAM_between )
 		   {
 		     lm->X[c++][1] = pfam->B;
 		   }
 		 else
 		   {
 		     lm->X[c++][1] = pwithin[C[i]] ? person->W : - person->W;
 		   }
		 
// 		 cout << "added " << person->fid << " " 
// 		      << person->iid << " " 
// 		      << lm->X[c-1][1] << "\n";


 	       }
 	   }
// 	 cout << "\n\n";



		 
	 ////////////////////////////////////
	 // Re-fit model
	 
	 if ( par::QFAM_total && par::qt )
	   lm->fitUnivariateLM();
	 else
	   lm->fitLM();
	 
	 // Check for multi-collinearity
	 lm->validParameters();
	 

	 // Calculate Original Test statistic; 

	 // Should not encounter this too much, but if not valid,
	 // count conservatively.

	 double r = lm->isValid() ? lm->getStatistic() : original + 1 ;
	 
// 	 cout << "Permutation ... \n";
// 	 if ( ! lm->isValid() )
// 	   cout << "NOT VALID>.. \n";

// 	 int c2 = 0;

// 	 for (int i=0; i<n; i++)
// 	   {
// 	     if ( include[i] ) 
// 	       cout << "INC\t";
// 	     else
// 	       cout << "EXC\t";
	     	     
// 	     cout << C[i] << "\t"
// 		  << sample[i]->fid << " " << sample[i]->iid << "\t"
// 		  << sample[i]->phenotype << "\t"
// 		  << genotype(*this,i,l) << " ";

// 	     if ( include[i] )
// 	       cout << lm->X[c2++][1] << " ";
// 	     else 
// 	       cout << "NA" << " ";
// 	     cout << "\n";
// 	   }
// 	 cout << "\n\n";






	 // Reset in case the previous model was not valid

	 lm->setValid();
	 
	 ////////////////////////////////////
	 // Test / update / are we finished ? 
	 
	 if ( perm.updateSNP( r , original , l ) )
	   {
	     if ( ! par::silent )
	       {
		 cout << "Adaptive permutation done for " 
		      << l+1 << " of " << nl_all << " SNPs            \r";
		 cout.flush(); 
	       }
	     break; // We are done for this SNP
	   }
	 
	 
       } // Next adaptive permutation

      
     // Clear up
     delete lm;
          
   } // Next SNP
 
 return results;
 
}
int main(int argc, char ** argv)
{
    clock_t t0;
    t0 = clock();
    bool print = true;

    if (argc==1)
    {
        help();
        exit(0);
    }

    std::string cmd(argv[1]);

    //primitive programs that do not require help pages and summary statistics by default
    if (argc>1 && cmd=="view")
    {
        print = view(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="index")
    {
        print = index(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="merge")
    {
        print = merge(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="paste")
    {
        print = paste(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="concat")
    {
        print = concat(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="subset")
    {
        subset(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="decompose")
    {
        decompose(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="normalize")
    {
        print = normalize(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="config")
    {
        config(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="mergedups")
    {
        merge_duplicate_variants(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="remove_overlap")
    {
        remove_overlap(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="peek")
    {
        peek(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="partition")
    {
        partition(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="annotate_variants")
    {
        annotate_variants(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="annotate_regions")
    {
        annotate_regions(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="annotate_dbsnp_rsid")
    {
        annotate_dbsnp_rsid(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="discover")
    {
        discover(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="merge_candidate_variants")
    {
        merge_candidate_variants(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="union_variants")
    {
        union_variants(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="genotype")
    {
        genotype2(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="characterize")
    {
        genotype(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="construct_probes")
    {
        construct_probes(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="profile_indels")
    {
        profile_indels(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="profile_snps")
    {
        profile_snps(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="profile_mendelian")
    {
        profile_mendelian(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="profile_na12878")
    {
        profile_na12878(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="profile_chrom")
    {
        profile_chrom(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="align")
    {
        align(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="compute_features")
    {
        compute_features(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="profile_afs")
    {
        profile_afs(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="profile_hwe")
    {
        profile_hwe(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="profile_len")
    {
        profile_len(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="annotate_str")
    {
        annotate_str(argc-1, ++argv);
    }
    else if (argc>1 && cmd=="consolidate_variants")
    {
        consolidate_variants(argc-1, ++argv);
    }
    else
    {
        std::clog << "Command not found: " << argv[1] << "\n\n";
        help();
        exit(1);
    }

    if (print)
    {
        clock_t t1;
        t1 = clock();
        print_time((float)(t1-t0)/CLOCKS_PER_SEC);
    }

    return 0;
}