/* * P = G + E */ double DefaultTrait::phenotype( const AlleleGroupPtr ag, const environmental * env ) const { double res = env->environment_factor(); res += genotype( ag ); return res; }
vector_t Plink::calcQTDT(vector<int> & C, ofstream & QOUT, bool permuting, Perm & perm, vector<int> & pbetween, vector<bool> & pwithin) { ///////////////////////// // Iterate over each SNP vector_t results(nl_all); for (int l=0; l<nl_all; l++) { // Note: when using adaptive permutation in QFAM, we do not skip // a failed SNP here, as we permute on a per-SNP basis instead; // i.e. for this particular SNP we will perform enough // permutations to assess significance in this first instance of the // call to calcQTDT(). // Skip X markers for now if (par::chr_sex[locus[l]->chr] || par::chr_haploid[locus[l]->chr]) { results[l] = -1; continue; } if (par::verbose) cout << "\n ******************************************\n" << " LOCUS " << locus[l]->name << "\n\n"; //////////////////////////////////////////////////////////////// // Create X vector that encodes the genotype for each individual // as 1,0,-1 (or -9 for missing) // Use the per-person 'flag' variable to indicate a non-missing genotype // at this SNP (i.e. for gperson) // Use 'covar' to store the X= 1,0,-1 codes for this SNP setCovariatesForSNP(*this,l); /////////////////////////////////////// // Score between and within components scoreBetween(*this,l); // Now, for each individual, set B and W vector<bool> include(n,true); scoreBandW(*this,l,include); // Now we have created the family structure, B and W and flagged who is missing // in terms of genotype and phenotype // We can either proceed to return one value for this (in max(T) mode) // or to exhaust all permutations ///////////////////////// // Prune out missing data (already done?) vector<Family*>::iterator f = family.begin(); while ( f != family.end() ) { if ( ! (*f)->include ) { if ( (*f)->pat ) (*f)->pat->flag = false; if ( (*f)->mat ) (*f)->mat->flag = false; for ( int k = 0 ; k < (*f)->kid.size() ; k++) (*f)->kid[k]->flag = false; } f++; } // Prune individuals for (int i=0; i<n; i++) if ( (!sample[i]->flag) || sample[i]->missing ) include[i] = false; ///////////////////////// // Optional display if (par::verbose) { for (int i=0; i<n; i++) { if ( include[i] ) cout << "INC\t"; else cout << "EXC\t"; cout << C[i] << "\t" << sample[i]->fid << " " << sample[i]->iid << "\t" << sample[i]->phenotype << "\t" << genotype(*this,i,l) << " " << sample[i]->T << " " << sample[i]->B << " " << sample[i]->W ; cout << "\n"; } cout << "\n\n"; } /////////////////////////////////// // Form linear model Model * lm; LinearModel * m = new LinearModel(this); lm = m; // Copy pattern of missing data over, with // some additional exclusions based on family // structure lm->setMissing(include); // Add independent variables: T, B and/or W // and set the test parameter // (intercept is 0) // Covariates Model // 0 Total // 1 Between // 2 Within // Model // 0 Intercept Intercept // 1 Total Between // 2 n/a Within if (par::QFAM_total) { lm->label.push_back("TOT"); lm->testParameter = 1; } else if (par::QFAM_between) { lm->label.push_back("BET"); // lm->label.push_back("WITH"); lm->testParameter = 1; } else if (par::QFAM_within1 || par::QFAM_within2) { // lm->label.push_back("BET"); lm->label.push_back("WITH"); lm->testParameter = 1; } // Build design matrix lm->buildDesignMatrix(); // Fit linear model if ( par::QFAM_total && par::qt ) lm->fitUnivariateLM(); else lm->fitLM(); // Check for multi-collinearity lm->validParameters(); // Calculate Original Test statistic results[l] = lm->getStatistic(); // Store,return and display this value? lm->displayResults(QOUT,locus[l]); /////////////////// // Now, permutation // 1) We have the complete, non-missing data: permute only this // i.e. we do not need to worry about missing data; we are // no longer controlling the correlation between SNPs, as we // are permuting genotype, so we do not need to worry about this // in any case. // 2) Keep the same Model in each case: directly re-state the X // variables in the design matrix, then re-fit model. This // will avoid the cost of building the model, pruning for missing // data, etc, each iteration // Store original, and set up permutations // (i.e. return pperson to original order) perm.nextSNP(); double original = results[l]; //////////////////////// // Adaptive permutation /////////////////////////////////////////////////// // Set up permutation indices, specific to this SNP int tc = 0; while ( true ) { // Permute between and within family components permute(pbetween); for (int i=0; i<family.size(); i++) { if (CRandom::rand() < 0.5) pwithin[i] = true; else pwithin[i] = false; } // Edit pbetween for this SNP, so that we keep missing // B components constant for (int f=0; f<pbetween.size(); f++) { if ( // Permuted family is all missing ( ! family[pbetween[f]]->include ) && // Recipient family is not... family[f]->include ) { // ... then swap // F P(F) --> // 0 2 --> 0 2 // 1 0 --> 1 0 // 2 3* --> 2 4 // 3* 4 --> 3* 3* // 4 1 --> 4 1 // ... // e.g. 3* is missing, so swap 3* and 4 in P(F), so 2 // and 4 end up together instead, 3* is invarint int missing_family = pbetween[f]; int swap_in_family = pbetween[pbetween[f]]; pbetween[missing_family] = missing_family; pbetween[f] = swap_in_family; // if (par::verbose) // { // cout << "FAM " << f << " (NOT MISS) has " << missing_family << " (MISS)\n"; // cout << "FAM " << missing_family << " (MISS) has " << swap_in_family << " (?)\n"; // cout << "SWAP MADE ..\n"; // cout << "FAM " << f << " has " << pbetween[f] << "\n"; // cout << "FAM " << missing_family << " has " << pbetween[missing_family] << "\n\n"; // } // And re-check this new pairing f--; } } // if (par::verbose) // for (int f=0; f<pbetween.size(); f++) // { // if ( ! family[pbetween[f]]->include ) // cout << " Permuted family is all missing " << f << "\t" << family[pbetween[f]]->kid[0]->fid << "\n"; // if ( ! family[f]->include ) // cout << " Recipient family is all missing " << f << "\t" << family[f]->kid[0]->fid << "\n"; // } // if (true) // { // for (int i=0; i<n; i++) // { // cout << sample[i]->fid << "\t" // << include[i] << "\t" // << C[i] << "\t" // << pbetween[C[i]] << "\t" // << sample[i]->family->include << "\t" // << family[C[i]]->include << "\t" // << family[pbetween[C[i]]]->include << "\n"; // } // } ////////////////////////////////// // Reconstitute genotypes // and fit back into LinearModel int c=0; for (int i=0; i<n; i++) { if (include[i]) { Family * pfam = family[ pbetween[C[i]] ]; Individual * person = sample[i]; if ( par::QFAM_total ) lm->X[c++][1] = pwithin[C[i]] ? pfam->B + person->W : pfam->B - person->W; else if ( par::QFAM_between ) { lm->X[c++][1] = pfam->B; } else { lm->X[c++][1] = pwithin[C[i]] ? person->W : - person->W; } // cout << "added " << person->fid << " " // << person->iid << " " // << lm->X[c-1][1] << "\n"; } } // cout << "\n\n"; //////////////////////////////////// // Re-fit model if ( par::QFAM_total && par::qt ) lm->fitUnivariateLM(); else lm->fitLM(); // Check for multi-collinearity lm->validParameters(); // Calculate Original Test statistic; // Should not encounter this too much, but if not valid, // count conservatively. double r = lm->isValid() ? lm->getStatistic() : original + 1 ; // cout << "Permutation ... \n"; // if ( ! lm->isValid() ) // cout << "NOT VALID>.. \n"; // int c2 = 0; // for (int i=0; i<n; i++) // { // if ( include[i] ) // cout << "INC\t"; // else // cout << "EXC\t"; // cout << C[i] << "\t" // << sample[i]->fid << " " << sample[i]->iid << "\t" // << sample[i]->phenotype << "\t" // << genotype(*this,i,l) << " "; // if ( include[i] ) // cout << lm->X[c2++][1] << " "; // else // cout << "NA" << " "; // cout << "\n"; // } // cout << "\n\n"; // Reset in case the previous model was not valid lm->setValid(); //////////////////////////////////// // Test / update / are we finished ? if ( perm.updateSNP( r , original , l ) ) { if ( ! par::silent ) { cout << "Adaptive permutation done for " << l+1 << " of " << nl_all << " SNPs \r"; cout.flush(); } break; // We are done for this SNP } } // Next adaptive permutation // Clear up delete lm; } // Next SNP return results; }
int main(int argc, char ** argv) { clock_t t0; t0 = clock(); bool print = true; if (argc==1) { help(); exit(0); } std::string cmd(argv[1]); //primitive programs that do not require help pages and summary statistics by default if (argc>1 && cmd=="view") { print = view(argc-1, ++argv); } else if (argc>1 && cmd=="index") { print = index(argc-1, ++argv); } else if (argc>1 && cmd=="merge") { print = merge(argc-1, ++argv); } else if (argc>1 && cmd=="paste") { print = paste(argc-1, ++argv); } else if (argc>1 && cmd=="concat") { print = concat(argc-1, ++argv); } else if (argc>1 && cmd=="subset") { subset(argc-1, ++argv); } else if (argc>1 && cmd=="decompose") { decompose(argc-1, ++argv); } else if (argc>1 && cmd=="normalize") { print = normalize(argc-1, ++argv); } else if (argc>1 && cmd=="config") { config(argc-1, ++argv); } else if (argc>1 && cmd=="mergedups") { merge_duplicate_variants(argc-1, ++argv); } else if (argc>1 && cmd=="remove_overlap") { remove_overlap(argc-1, ++argv); } else if (argc>1 && cmd=="peek") { peek(argc-1, ++argv); } else if (argc>1 && cmd=="partition") { partition(argc-1, ++argv); } else if (argc>1 && cmd=="annotate_variants") { annotate_variants(argc-1, ++argv); } else if (argc>1 && cmd=="annotate_regions") { annotate_regions(argc-1, ++argv); } else if (argc>1 && cmd=="annotate_dbsnp_rsid") { annotate_dbsnp_rsid(argc-1, ++argv); } else if (argc>1 && cmd=="discover") { discover(argc-1, ++argv); } else if (argc>1 && cmd=="merge_candidate_variants") { merge_candidate_variants(argc-1, ++argv); } else if (argc>1 && cmd=="union_variants") { union_variants(argc-1, ++argv); } else if (argc>1 && cmd=="genotype") { genotype2(argc-1, ++argv); } else if (argc>1 && cmd=="characterize") { genotype(argc-1, ++argv); } else if (argc>1 && cmd=="construct_probes") { construct_probes(argc-1, ++argv); } else if (argc>1 && cmd=="profile_indels") { profile_indels(argc-1, ++argv); } else if (argc>1 && cmd=="profile_snps") { profile_snps(argc-1, ++argv); } else if (argc>1 && cmd=="profile_mendelian") { profile_mendelian(argc-1, ++argv); } else if (argc>1 && cmd=="profile_na12878") { profile_na12878(argc-1, ++argv); } else if (argc>1 && cmd=="profile_chrom") { profile_chrom(argc-1, ++argv); } else if (argc>1 && cmd=="align") { align(argc-1, ++argv); } else if (argc>1 && cmd=="compute_features") { compute_features(argc-1, ++argv); } else if (argc>1 && cmd=="profile_afs") { profile_afs(argc-1, ++argv); } else if (argc>1 && cmd=="profile_hwe") { profile_hwe(argc-1, ++argv); } else if (argc>1 && cmd=="profile_len") { profile_len(argc-1, ++argv); } else if (argc>1 && cmd=="annotate_str") { annotate_str(argc-1, ++argv); } else if (argc>1 && cmd=="consolidate_variants") { consolidate_variants(argc-1, ++argv); } else { std::clog << "Command not found: " << argv[1] << "\n\n"; help(); exit(1); } if (print) { clock_t t1; t1 = clock(); print_time((float)(t1-t0)/CLOCKS_PER_SEC); } return 0; }