예제 #1
 * P = G + E
double DefaultTrait::phenotype( const AlleleGroupPtr ag, const environmental * env ) const {
    double res = env->environment_factor();

    res += genotype( ag );

    return res;
예제 #2
vector_t Plink::calcQTDT(vector<int> & C,
			 ofstream & QOUT,
			 bool permuting, 
			 Perm & perm,
			 vector<int> & pbetween, 
			 vector<bool> & pwithin)

  // Iterate over each SNP
  vector_t results(nl_all);
  for (int l=0; l<nl_all; l++)
      // Note: when using adaptive permutation in QFAM, we do not skip
      // a failed SNP here, as we permute on a per-SNP basis instead;
      // i.e. for this particular SNP we will perform enough
      // permutations to assess significance in this first instance of the 
      // call to calcQTDT().  
      // Skip X markers for now
      if (par::chr_sex[locus[l]->chr] || 
 	  results[l] = -1;
      if (par::verbose)
	cout << "\n ******************************************\n"
	     << "  LOCUS " << locus[l]->name << "\n\n";
      // Create X vector that encodes the genotype for each individual
      // as 1,0,-1 (or -9 for missing)
      // Use the per-person 'flag' variable to indicate a non-missing genotype
      // at this SNP (i.e. for gperson)
      // Use 'covar' to store the X= 1,0,-1 codes for this SNP
      // Score between and within components
      // Now, for each individual, set B and W 

      vector<bool> include(n,true);

      // Now we have created the family structure, B and W and flagged who is missing
      // in terms of genotype and phenotype
      // We can either proceed to return one value for this (in max(T) mode)
      // or to exhaust all permutations
      // Prune out missing data (already done?)
      vector<Family*>::iterator f = family.begin();
      while ( f != family.end() ) 
 	 if ( ! (*f)->include ) 
 	     if ( (*f)->pat ) 
 	       (*f)->pat->flag = false;
 	     if ( (*f)->mat ) 
 	       (*f)->mat->flag = false;
 	     for ( int k = 0 ; k < (*f)->kid.size() ; k++) 
	       (*f)->kid[k]->flag = false;
      // Prune individuals
      for (int i=0; i<n; i++)
        if ( (!sample[i]->flag) || sample[i]->missing ) 
	  include[i] = false;

     // Optional display
     if (par::verbose)
	 for (int i=0; i<n; i++)
	     if ( include[i] ) 
	       cout << "INC\t";
	       cout << "EXC\t";
	     cout << C[i] << "\t"
		  << sample[i]->fid << " " << sample[i]->iid << "\t"
		  << sample[i]->phenotype << "\t"
		  << genotype(*this,i,l) << " "
		  << sample[i]->T << " "
		  << sample[i]->B << " "
		  << sample[i]->W ;
	     cout << "\n";
	 cout << "\n\n";
     // Form linear model
     Model * lm;
     LinearModel * m = new LinearModel(this);
     lm = m;
     // Copy pattern of missing data over, with 
     // some additional exclusions based on family 
     // structure
     // Add independent variables: T, B and/or W
     // and set the test parameter
     // (intercept is 0)
     // Covariates  Model
     // 0 Total
     // 1 Between
     // 2 Within
     // Model
     // 0      Intercept      Intercept
     // 1      Total          Between
     // 2      n/a            Within
     if (par::QFAM_total) 
	 lm->testParameter = 1;	     
     else if (par::QFAM_between)
	 //	 lm->label.push_back("WITH");
	 lm->testParameter = 1;	     
     else if (par::QFAM_within1 || par::QFAM_within2) 
	 //	 lm->label.push_back("BET");
	 lm->testParameter = 1;	     
     // Build design matrix
     // Fit linear model
     if ( par::QFAM_total && par::qt )

     // Check for multi-collinearity
     // Calculate Original Test statistic
     results[l] = lm->getStatistic();

     // Store,return and display this value?

     // Now, permutation
     // 1) We have the complete, non-missing data: permute only this
     //    i.e. we do not need to worry about missing data; we are
     //    no longer controlling the correlation between SNPs, as we
     //    are permuting genotype, so we do not need to worry about this
     //    in any case.
     // 2) Keep the same Model in each case: directly re-state the X 
     //    variables in the design matrix, then re-fit model. This 
     //    will avoid the cost of building the model, pruning for missing
     //    data, etc, each iteration
     // Store original, and set up permutations
     // (i.e. return pperson to original order)

     double original = results[l];

     // Adaptive permutation
     // Set up permutation indices, specific to this SNP
     int tc = 0;

     while ( true ) 
	 // Permute between and within family components
	 for (int i=0; i<family.size(); i++)
	     if (CRandom::rand() < 0.5) pwithin[i] = true;
	     else pwithin[i] = false;
	 // Edit pbetween for this SNP, so that we keep missing 
	 // B components constant
	 for (int f=0; f<pbetween.size(); f++)
	     if ( 
		 // Permuted family is all missing
		 ( ! family[pbetween[f]]->include ) 
		 // Recipient family is not...
		 family[f]->include )
		 // ... then swap 
		 //   F  P(F)    -->
		 //   0  2       -->   0  2
		 //   1  0       -->   1  0
		 //   2  3*      -->   2  4
		 //   3* 4       -->   3* 3*
		 //   4  1       -->   4  1
		 //   ...
		 // e.g. 3* is missing, so swap 3* and 4 in P(F), so 2
		 // and 4 end up together instead, 3* is invarint
		 int missing_family = pbetween[f];
		 int swap_in_family = pbetween[pbetween[f]];
		 pbetween[missing_family] = missing_family;
		 pbetween[f] = swap_in_family;
// 		 if (par::verbose)
// 		   {
// 		     cout << "FAM " << f << " (NOT MISS) has " << missing_family << " (MISS)\n";
// 		     cout << "FAM " << missing_family << " (MISS) has " << swap_in_family << " (?)\n";
// 		     cout << "SWAP MADE ..\n";
// 		     cout << "FAM " << f << "  has " << pbetween[f] << "\n";
// 		     cout << "FAM " << missing_family << " has " << pbetween[missing_family] << "\n\n";
// 		   }
		 // And re-check this new pairing

// 	 if (par::verbose)
// 	   for (int f=0; f<pbetween.size(); f++)
// 	     {
// 	       if ( ! family[pbetween[f]]->include ) 
// 		 cout << " Permuted family is all missing " << f << "\t" << family[pbetween[f]]->kid[0]->fid << "\n";
// 	       if ( ! family[f]->include ) 
// 		 cout << " Recipient family is all missing " << f << "\t" << family[f]->kid[0]->fid << "\n";
// 	     }

//  	 if (true)
//  	   {
//  	     for (int i=0; i<n; i++)
//  	       {
//  		 cout << sample[i]->fid << "\t"
//  		      << include[i] << "\t"
//  		      << C[i] << "\t"
//  		      << pbetween[C[i]] << "\t"
//  		      << sample[i]->family->include << "\t"
//  		      << family[C[i]]->include << "\t"
//  		      << family[pbetween[C[i]]]->include << "\n";
//  	       }
//  	   }

	 // Reconstitute genotypes
	 // and fit back into LinearModel
 	 int c=0;
 	 for (int i=0; i<n; i++)
 	     if (include[i])
 		 Family * pfam = family[ pbetween[C[i]] ];
 		 Individual * person = sample[i];
 		 if ( par::QFAM_total )
 		   lm->X[c++][1] = pwithin[C[i]] ? pfam->B + person->W : pfam->B - person->W;
 		 else if ( par::QFAM_between )
 		     lm->X[c++][1] = pfam->B;
 		     lm->X[c++][1] = pwithin[C[i]] ? person->W : - person->W;
// 		 cout << "added " << person->fid << " " 
// 		      << person->iid << " " 
// 		      << lm->X[c-1][1] << "\n";

// 	 cout << "\n\n";

	 // Re-fit model
	 if ( par::QFAM_total && par::qt )
	 // Check for multi-collinearity

	 // Calculate Original Test statistic; 

	 // Should not encounter this too much, but if not valid,
	 // count conservatively.

	 double r = lm->isValid() ? lm->getStatistic() : original + 1 ;
// 	 cout << "Permutation ... \n";
// 	 if ( ! lm->isValid() )
// 	   cout << "NOT VALID>.. \n";

// 	 int c2 = 0;

// 	 for (int i=0; i<n; i++)
// 	   {
// 	     if ( include[i] ) 
// 	       cout << "INC\t";
// 	     else
// 	       cout << "EXC\t";
// 	     cout << C[i] << "\t"
// 		  << sample[i]->fid << " " << sample[i]->iid << "\t"
// 		  << sample[i]->phenotype << "\t"
// 		  << genotype(*this,i,l) << " ";

// 	     if ( include[i] )
// 	       cout << lm->X[c2++][1] << " ";
// 	     else 
// 	       cout << "NA" << " ";
// 	     cout << "\n";
// 	   }
// 	 cout << "\n\n";

	 // Reset in case the previous model was not valid

	 // Test / update / are we finished ? 
	 if ( perm.updateSNP( r , original , l ) )
	     if ( ! par::silent )
		 cout << "Adaptive permutation done for " 
		      << l+1 << " of " << nl_all << " SNPs            \r";
	     break; // We are done for this SNP
       } // Next adaptive permutation

     // Clear up
     delete lm;
   } // Next SNP
 return results;
int main(int argc, char ** argv)
    clock_t t0;
    t0 = clock();
    bool print = true;

    if (argc==1)

    std::string cmd(argv[1]);

    //primitive programs that do not require help pages and summary statistics by default
    if (argc>1 && cmd=="view")
        print = view(argc-1, ++argv);
    else if (argc>1 && cmd=="index")
        print = index(argc-1, ++argv);
    else if (argc>1 && cmd=="merge")
        print = merge(argc-1, ++argv);
    else if (argc>1 && cmd=="paste")
        print = paste(argc-1, ++argv);
    else if (argc>1 && cmd=="concat")
        print = concat(argc-1, ++argv);
    else if (argc>1 && cmd=="subset")
        subset(argc-1, ++argv);
    else if (argc>1 && cmd=="decompose")
        decompose(argc-1, ++argv);
    else if (argc>1 && cmd=="normalize")
        print = normalize(argc-1, ++argv);
    else if (argc>1 && cmd=="config")
        config(argc-1, ++argv);
    else if (argc>1 && cmd=="mergedups")
        merge_duplicate_variants(argc-1, ++argv);
    else if (argc>1 && cmd=="remove_overlap")
        remove_overlap(argc-1, ++argv);
    else if (argc>1 && cmd=="peek")
        peek(argc-1, ++argv);
    else if (argc>1 && cmd=="partition")
        partition(argc-1, ++argv);
    else if (argc>1 && cmd=="annotate_variants")
        annotate_variants(argc-1, ++argv);
    else if (argc>1 && cmd=="annotate_regions")
        annotate_regions(argc-1, ++argv);
    else if (argc>1 && cmd=="annotate_dbsnp_rsid")
        annotate_dbsnp_rsid(argc-1, ++argv);
    else if (argc>1 && cmd=="discover")
        discover(argc-1, ++argv);
    else if (argc>1 && cmd=="merge_candidate_variants")
        merge_candidate_variants(argc-1, ++argv);
    else if (argc>1 && cmd=="union_variants")
        union_variants(argc-1, ++argv);
    else if (argc>1 && cmd=="genotype")
        genotype2(argc-1, ++argv);
    else if (argc>1 && cmd=="characterize")
        genotype(argc-1, ++argv);
    else if (argc>1 && cmd=="construct_probes")
        construct_probes(argc-1, ++argv);
    else if (argc>1 && cmd=="profile_indels")
        profile_indels(argc-1, ++argv);
    else if (argc>1 && cmd=="profile_snps")
        profile_snps(argc-1, ++argv);
    else if (argc>1 && cmd=="profile_mendelian")
        profile_mendelian(argc-1, ++argv);
    else if (argc>1 && cmd=="profile_na12878")
        profile_na12878(argc-1, ++argv);
    else if (argc>1 && cmd=="profile_chrom")
        profile_chrom(argc-1, ++argv);
    else if (argc>1 && cmd=="align")
        align(argc-1, ++argv);
    else if (argc>1 && cmd=="compute_features")
        compute_features(argc-1, ++argv);
    else if (argc>1 && cmd=="profile_afs")
        profile_afs(argc-1, ++argv);
    else if (argc>1 && cmd=="profile_hwe")
        profile_hwe(argc-1, ++argv);
    else if (argc>1 && cmd=="profile_len")
        profile_len(argc-1, ++argv);
    else if (argc>1 && cmd=="annotate_str")
        annotate_str(argc-1, ++argv);
    else if (argc>1 && cmd=="consolidate_variants")
        consolidate_variants(argc-1, ++argv);
        std::clog << "Command not found: " << argv[1] << "\n\n";

    if (print)
        clock_t t1;
        t1 = clock();

    return 0;