void map_save_bits(FILE * f, MAP * map, mapobj * obj) { int tmp; int i, j, c, tc = 0; unsigned char **foo; int xs = map->map_width; int ys = map->map_height; unsigned char tmpb; #define outbyte(a) tmpb=(a);fwrite(&tmpb, 1, 1, f); foo = (unsigned char **) ((void *) obj->datai); /* First, we clean up our act */ for(i = 0; i < ys; i++) { c = 0; if(foo[i]) { for(j = 0; j < realnum(xs); j++) if(foo[i][j]) c++; if(!c) { free((void *) foo[i]); foo[i] = NULL; } else tc += c; } } if(!tc) { /* We don't want to save worthless shit */ /* On other hand, cleaning us out of memory would take too much trouble compared to the worth. Therefore, during next cleanup (reboot), this structure does a disappearance act. */ return; } outbyte(TYPE_BITS + 1); CHESA(foo, sizeof(unsigned char *), ys, f); for(i = 0; i < ys; i++) if(foo[i]) CHESA(foo[i], sizeof(unsigned char), realnum(xs), f); }
double LinearModel::getPValue() { vector_t var = getVar(); bool okay = var[testParameter] < 1e-20 || !realnum(var[testParameter]) ? false : all_valid; if (all_valid) { double se = sqrt(var[testParameter]); double Z = coef[testParameter] / se; return pT(Z,Y.size()-np); } else return 1; }
int bit_size(MAP * map) { int xs = map->map_width; int ys = map->map_height; int i, s = 0; unsigned char **foo; if(!map->mapobj[TYPE_BITS]) return 0; foo = grab_us_an_array(map); for(i = 0; i < ys; i++) if(foo[i]) s += realnum(xs); return s; }
/* All the nasty bits on the map ;) */ void map_load_bits(FILE * f, MAP * map) { int xs = map->map_width; int ys = map->map_height; unsigned char **foo; int tmp, i; Create(foo, unsigned char *, ys); CHELO(foo, sizeof(unsigned char *), ys, f); for(i = 0; i < ys; i++) if(foo[i]) { Create(foo[i], unsigned char, realnum(xs)); CHELO(foo[i], sizeof(unsigned char), realnum(xs), f); } }
vector_t Plink::glmAssoc(bool print_results, Perm & perm) { // The model.cpp functions require a SNP-major structure, if SNP // data are being used. There are some exceptions to this however, // listed below if ( par::SNP_major && ! ( par::epi_genebased || par::set_score || par::set_step || par::proxy_glm || par::dosage_assoc || par::cnv_enrichment_test || par::cnv_glm || par::score_test || par::rare_test || par::gvar ) ) SNP2Ind(); // Test all SNPs 1 at a time automatically, or is this // a tailored single test? int ntests = par::assoc_glm_without_main_snp ? 1 : nl_all; vector<double> results(ntests); if ( print_results && par::qt && par::multtest ) tcnt.resize(ntests); ofstream ASC; if (print_results) { string f = par::output_file_name; if ( par::bt) { f += ".assoc.logistic"; printLOG("Writing logistic model association results to [ " + f + " ] \n"); } else { f += ".assoc.linear"; printLOG("Writing linear model association results to [ " + f + " ] \n"); } ASC.open(f.c_str(),ios::out); ASC << setw(4) << "CHR" << " " << setw(par::pp_maxsnp) << "SNP" << " " << setw(10) << "BP" << " " << setw(4) << "A1" << " " << setw(10) << "TEST" << " " << setw(8) << "NMISS" << " "; if ( par::bt && ! par::return_beta ) ASC << setw(10) << "OR" << " "; else ASC << setw(10) << "BETA" << " "; if (par::display_ci) ASC << setw(8) << "SE" << " " << setw(8) << string("L"+dbl2str(par::ci_level*100)) << " " << setw(8) << string("U"+dbl2str(par::ci_level*100)) << " "; ASC << setw(12) << "STAT" << " " << setw(12) << "P" << " " << "\n"; ASC.precision(4); } ///////////////////////////// // Determine sex distribution int nmales = 0, nfemales = 0; for (int i=0; i<n; i++) if ( ! sample[i]->missing ) { if ( sample[i]->sex ) nmales++; else nfemales++; } bool variationInSex = nmales > 0 && nfemales > 0; ////////////////////////////////////////// // Iterate over each locus, or just once for (int l=0; l<ntests; l++) { // Skip possibly (in all-locus mode) if ( par::adaptive_perm && ( ! par::assoc_glm_without_main_snp ) && ( ! perm.snp_test[l]) ) continue; ////////////////////////////////////////////////////////// // X-chromosome, haploid? // xchr_model 0: skip non-autosomal SNPs bool X=false; bool automaticSex=false; if ( ! par::assoc_glm_without_main_snp ) { if ( par::xchr_model == 0 ) { if ( par::chr_sex[locus[l]->chr] || par::chr_haploid[locus[l]->chr] ) continue; } else if (par::chr_sex[locus[l]->chr]) X=true; } ////////////////////////////////////////////////////////// // A new GLM Model * lm; ////////////////////////////////////////////////////////// // Linear or logistic? if (par::bt) { LogisticModel * m = new LogisticModel(this); lm = m; } else { LinearModel * m = new LinearModel(this); lm = m; } ////////////////////////////////////////////////////////// // A temporary fix if ( par::dosage_assoc || par::cnv_enrichment_test || par::cnv_glm || par::score_test || par::set_score || par::proxy_glm || par::gvar || par::rare_test ) lm->hasSNPs(false); ////////////////////////////////////////////////////////// // Set missing data lm->setMissing(); ////////////////////////////////////////////////////////// // Set genetic model if ( par::glm_dominant ) lm->setDominant(); else if ( par::glm_recessive || par::twoDFmodel_hethom ) lm->setRecessive(); string mainEffect = ""; bool genotypic = false; ///////////////////////////////////////////////// // Main SNP if ( ! par::assoc_glm_without_main_snp ) { genotypic = par::chr_haploid[locus[l]->chr] ? false : par::twoDFmodel ; // Models // AA AB BB // Additive 0 1 2 // Dominant 0 1 1 // Recessive 0 0 1 // Genotypic(1) // Additive 0 1 2 // Dom Dev. 0 1 0 // Genotypic(2) // Homozygote 0 0 1 // Heterozygote 0 1 0 //////////////////////////////////////////////////////////// // An additive effect? (or single coded effect) of main SNP if ( par::glm_recessive ) mainEffect = "REC"; else if ( par::glm_dominant ) mainEffect = "DOM"; else if ( par::twoDFmodel_hethom ) mainEffect = "HOM"; else mainEffect = "ADD"; lm->addAdditiveSNP(l); lm->label.push_back(mainEffect); ////////////////////////////////////////////////////////// // Or a 2-df additive + dominance model? if ( genotypic ) { lm->addDominanceSNP(l); if ( par::twoDFmodel_hethom ) lm->label.push_back("HET"); else lm->label.push_back("DOMDEV"); } } ////////////////////////////////////////////////////////// // Haplotypes: WHAP test (grouped?) if ( par::chap_test ) { // Use whap->group (a list of sets) to specify these, from // the current model (either alternate or null) // Start from second category (i.e. first is reference) for (int h=1; h < whap->current->group.size(); h++) { lm->addHaplotypeDosage( whap->current->group[h] ); lm->label.push_back( "WHAP"+int2str(h+1) ); } } ////////////////////////////////////////////////////////// // Haplotypes: proxy test if ( par::proxy_glm ) { // Unlike WHAP tests, we now will only ever have two // categories; and a single tested coefficient set<int> t1 = haplo->makeSetFromMap(haplo->testSet); lm->addHaplotypeDosage( t1 ); lm->label.push_back( "PROXY" ); } if ( par::test_hap_GLM ) { // Assume model specified in haplotype sets // Either 1 versus all others, or H-1 versus // terms for omnibus set<int>::iterator i = haplo->sets.begin(); while ( i != haplo->sets.end() ) { set<int> t; t.insert(*i); lm->addHaplotypeDosage( t ); lm->label.push_back( haplo->haplotypeName( *i ) ); ++i; } } ////////////////////////////////////////////////////////// // Conditioning SNPs? // (might be X or autosomal, dealth with automatically) if (par::conditioning_snps) { if ( par::chap_test ) { for (int c=0; c<conditioner.size(); c++) { if ( whap->current->masked_conditioning_snps[c] ) { lm->addAdditiveSNP(conditioner[c]); lm->label.push_back(locus[conditioner[c]]->name); } } } else { for (int c=0; c<conditioner.size(); c++) { lm->addAdditiveSNP(conditioner[c]); lm->label.push_back(locus[conditioner[c]]->name); } } } ////////////////////////////////////////////////////////// // Sex-covariate (necessary for X chromosome models, unless // explicitly told otherwise) if ( ( par::glm_sex_effect || ( X && !par::glm_no_auto_sex_effect ) ) && variationInSex ) { automaticSex = true; lm->addSexEffect(); lm->label.push_back("SEX"); } ////////////////////////////////////////////////////////// // Covariates? if (par::clist) { for (int c=0; c<par::clist_number; c++) { lm->addCovariate(c); lm->label.push_back(clistname[c]); } } ////////////////////////////////////////////////////////// // Interactions // addInteraction() takes parameter numbers // i.e. not covariate codes // 0 intercept // 1 {A} // {D} // {conditioning SNPs} // {sex efffect} // {covariates} // Allow for interactions between conditioning SNPs, sex, covariates, etc //////////////////////////////////////// // Basic SNP x covariate interaction? // Currently -- do not allow interactions if no main effect // SNP -- i.e. we need a recoding of things here. if ( par::simple_interaction && ! par::assoc_glm_without_main_snp ) { // A, D and haplotypes by conditioning SNPs, sex, covariates int cindex = 2; if ( genotypic ) cindex = 3; for (int c=0; c<conditioner.size(); c++) { lm->addInteraction(1,cindex); lm->label.push_back(mainEffect+"xCSNP"+int2str(c+1)); if ( genotypic ) { lm->addInteraction(2,cindex); if ( par::twoDFmodel_hethom ) lm->label.push_back("HETxCSNP"+int2str(c+1)); else lm->label.push_back("DOMDEVxCSNP"+int2str(c+1)); } cindex++; } if ( automaticSex ) { lm->addInteraction(1,cindex); lm->label.push_back(mainEffect+"xSEX"); if ( genotypic ) { lm->addInteraction(2,cindex); if ( par::twoDFmodel_hethom ) lm->label.push_back("HETxSEX"); else lm->label.push_back("DOMDEVxSEX"); } cindex++; } for (int c=0; c<par::clist_number; c++) { lm->addInteraction(1,cindex); lm->label.push_back(mainEffect+"x"+clistname[c]); if ( genotypic ) { lm->addInteraction(2,cindex); if ( par::twoDFmodel_hethom ) lm->label.push_back("HETx"+clistname[c]); else lm->label.push_back("DOMDEVx"+clistname[c]); } cindex++; } } ////////////////////////////// // Fancy X chromosome models if ( X && automaticSex && par::xchr_model > 2 ) { // Interaction between allelic term and sex (i.e. // allow scale of male effect to vary) int sindex = 2; if ( genotypic ) sindex++; sindex += conditioner.size(); lm->addInteraction(2,sindex); lm->label.push_back("XxSEX"); // xchr model 3 : test ADD + XxSEX // xchr model 4 : test ADD + DOM + XxSEX } ////////////////////////////// // Build design matrix lm->buildDesignMatrix(); ////////////////////////////// // Clusters specified? if ( par::include_cluster ) { lm->setCluster(); } ////////////////////////////////////////////////// // Fit linear or logistic model (Newton-Raphson) lm->fitLM(); //////////////////////////////////////// // Check for multi-collinearity lm->validParameters(); //////////////////////////////////////// // Obtain estimates and statistic if (print_results) lm->displayResults(ASC,locus[l]); //cout << setw(25) << lm->getVar()[1] << " " << lm->isValid() << " " << realnum(lm->getVar()[1]) << endl; //for test purpose only //////////////////////////////////////////////// // Test linear hypothesis (multiple parameters) // Perform if: // automatic 2df genotypic test ( --genotypic ) // OR // sex-tests ( --xchr-model ) // OR // test of everything ( --test-all ) // OR // user has specified user-defined test ( --tests ) if ( ( genotypic && ! par::glm_user_parameters ) || par::glm_user_test || par::test_full_model ) { vector_t h; // dim = number of fixes (to =0) matrix_t H; // row = number of fixes; cols = np int df; string testname; //////////////////////////////////////////////// // Joint test of all parameters if (par::test_full_model) { df = lm->getNP() - 1; h.resize(df,0); testname = "FULL_"+int2str(df)+"DF"; sizeMatrix(H,df,lm->getNP()); for (int i=0; i<df; i++) H[i][i+1] = 1; } //////////////////////////////////////////////// // Joint test of user-specified parameters else if (par::glm_user_test) { df = par::test_list.size(); h.resize(df,0); testname = "USER_"+int2str(df)+"DF"; sizeMatrix(H,df,lm->getNP()); for (int i=0; i<df; i++) if ( par::test_list[i]<lm->getNP() ) H[i][par::test_list[i]] = 1; } //////////////////////////////////////////////// // Joint test of additive and dominant models else if ( genotypic ) { testname = "GENO_2DF"; df = 2; h.resize(2,0); sizeMatrix(H,2,lm->getNP()); H[0][1] = H[1][2] = 1; } else if ( X && par::xchr_model == 3 ) { testname = "XMOD_2DF"; } //////////////////////////////////////////////// // Joint test of all parameters double chisq = lm->isValid() ? lm->linearHypothesis(H,h) : 0; double pvalue = chiprobP(chisq,df); // If filtering p-values if ( (!par::pfilter) || pvalue <= par::pfvalue ) { ASC << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(10) << locus[l]->bp << " " << setw(4) << locus[l]->allele1 << " " << setw(10) << testname << " " << setw(8) << lm->Ysize() << " " << setw(10) << "NA" << " "; if (par::display_ci) ASC << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " "; if (lm->isValid() && realnum(chisq) ) ASC << setw(12) << chisq << " " << setw(12) << pvalue << "\n"; else ASC << setw(12) << "NA" << " " << setw(12) << "NA" << "\n"; } } //////////////////////////////////////// // Store statistic (1 df chisq), and p-value // if need be ( based on value of testParameter ) if ( ! par::assoc_glm_without_main_snp ) results[l] = lm->getStatistic(); if ( par::qt && print_results && par::multtest ) tcnt[l] = lm->Ysize() - lm->getNP(); ////////////////////////////////////////////// // Clear up linear model, if no longer needed if ( par::chap_test || par::test_hap_GLM || par::set_step || par::set_score || par::proxy_glm || par::dosage_assoc || par::cnv_enrichment_test || par::cnv_glm || par::score_test || par::gvar || par::rare_test ) { // Responsibility to clear up in parent routine model = lm; } else { delete lm; } // Flush output buffer ASC.flush(); // Next SNP } if (print_results) ASC.close(); return results; }
void Plink::calcHomog() { if (!par::SNP_major) Ind2SNP(); string f = par::output_file_name + ".homog"; ofstream MHOUT; MHOUT.open(f.c_str(),ios::out); MHOUT.precision(4); if (nk==0) error("No clusters (K=0)... cannot perform CMH tests"); printLOG("Homogeneity of odds ratio test, K = " + int2str(nk) + "\n"); if (nk<2) { printLOG("** Warning ** less then 2 clusters specified... \n"); printLOG(" cannot compute between-cluster effects ** \n"); return; } if (nk>10) printLOG("** Warning ** statistics can be unreliable if strata have small N ** \n"); printLOG("Writing results to [ " + f + " ]\n"); MHOUT << setw(4) << "CHR" << " " << setw(par::pp_maxsnp) << "SNP" << " " << setw(4) << "A1" << " " << setw(4) << "A2" << " " << setw(8) << "F_A" << " " << setw(8) << "F_U" << " " << setw(8) << "N_A" << " " << setw(8) << "N_U" << " " << setw(8) << "TEST" << " " << setw(10) << "CHISQ" << " " << setw(4) << "DF" << " " << setw(10) << "P" << " " << setw(10) << "OR" << "\n"; /////////////////////////////////// // Create boolean affection coding affCoding(*this); ////////////////////////////////// // Any individual not assigned to a cluster, // making missing phenotype vector<Individual*>::iterator person = sample.begin(); while ( person != sample.end() ) { if ( (*person)->sol < 0 ) (*person)->missing = true; person++; } /////////////////////////////// // Iterate over SNPs vector<CSNP*>::iterator s = SNP.begin(); int l=0; while ( s != SNP.end() ) { // Uncomment this if we allow permutation for the CMH // tests // In adaptive mode, possibly skip this test // if (par::adaptive_perm && (!perm.snp_test[l])) // { // s++; // l++; // continue; // } // Calculate statistic vector<double> n_11(nk,0); vector<double> n_12(nk,0); vector<double> n_21(nk,0); vector<double> n_22(nk,0); vector<double> lnOR(nk,0); vector<double> SEsq(nk,0); ///////////////// // Autosomal or haploid? bool X=false, haploid=false; if (par::chr_sex[locus[l]->chr]) X=true; else if (par::chr_haploid[locus[l]->chr]) haploid=true; ///////////////////////////// // Iterate over individuals vector<bool>::iterator i1 = (*s)->one.begin(); vector<bool>::iterator i2 = (*s)->two.begin(); vector<Individual*>::iterator gperson = sample.begin(); while ( gperson != sample.end() ) { // Phenotype for this person (i.e. might be permuted) Individual * pperson = (*gperson)->pperson; // SNP alleles bool s1 = *i1; bool s2 = *i2; int hom = 2; if ( haploid || ( X && (*gperson)->sex ) ) hom = 1; // Affected individuals if ( pperson->aff && !pperson->missing ) { // Allelic marginal if ( !s1 ) { if ( !s2 ) // FF hom { n_11[ pperson->sol ] += hom ; } else { n_11[ pperson->sol ]++ ; // FT het n_12[ pperson->sol ]++ ; } } else { if ( !s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_12[ pperson->sol ] += hom ; } } } else if ( ! pperson->missing ) // Unaffecteds { // Allelic marginal if ( ! s1 ) { if ( ! s2 ) // FF { n_21[ pperson->sol ] += hom ; } else { n_21[ pperson->sol ] ++ ; n_22[ pperson->sol ] ++ ; } } else { if ( ! s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_22[ pperson->sol ] += hom ; } } } // Next individual gperson++; i1++; i2++; } // Calculate log(OR) and SE(ln(OR)) for eacsh strata double X_total = 0; double X_assoc1 = 0; double X_assoc2 = 0; vector<double> X_indiv(nk,0); for (int k=0; k<nk; k++) { // Add 0.5 to each cell to reduce bias n_11[k] += 0.5; n_12[k] += 0.5; n_21[k] += 0.5; n_22[k] += 0.5; // ln(OR) lnOR[k] = log ( ( n_11[k] * n_22[k] ) / ( n_12[k] * n_21[k] ) ); SEsq[k] = 1/n_11[k] + 1/n_12[k] + 1/n_21[k] + 1/n_22[k] ; X_indiv[k] = (lnOR[k] * lnOR[k]) / SEsq[k]; X_total += X_indiv[k]; // For the common, strata-adjusted test X_assoc1 += lnOR[k] / SEsq[k]; X_assoc2 += 1/ SEsq[k]; } // X_total is total chi-square on nk df // X_indiv are individual chi-squares, each on 1 df // X_homog is test for homogeneity of OR, with nk-1 df // X_assoc is strata-adjusted test, with 1 df double X_assoc = (X_assoc1*X_assoc1)/X_assoc2; double X_homog = X_total - X_assoc; MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(4) << locus[l]->allele1 << " " << setw(4) << locus[l]->allele2 << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(6) << "TOTAL" << " " << setw(10) << X_total << " " << setw(4) << nk << " " << setw(10) << chiprobP(X_total,nk) << " " << setw(10) << "NA" << "\n"; MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(4) << locus[l]->allele1 << " " << setw(4) << locus[l]->allele2 << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(6) << "ASSOC" << " " << setw(10) << X_assoc << " " << setw(4) << 1 << " " << setw(10) << chiprobP(X_assoc,1) << " " << setw(10) << "NA" << "\n"; MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(4) << locus[l]->allele1 << " " << setw(4) << locus[l]->allele2 << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(6) << "HOMOG" << " " << setw(10) << X_homog << " " << setw(4) << nk-1 << " " << setw(10) << chiprobP(X_homog,nk-1) << " " << setw(10) << "NA" << "\n"; for (int k=0; k<nk; k++) { if ( n_11[k] + n_12[k] <= 1.0001 || n_21[k] + n_22[k] <= 1.0001 ) { MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(4) << locus[l]->allele1 << " " << setw(4) << locus[l]->allele2 << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << n_11[k] + n_12[k] - 1 << " " << setw(8) << n_21[k] + n_22[k] - 1 << " " << setw(6) << kname[k] << " " << setw(10) << "NA" << " " << setw(4) << "NA" << " " << setw(10) << "NA" << " " << setw(10) << "NA" << "\n"; } else { MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(4) << locus[l]->allele1 << " " << setw(4) << locus[l]->allele2 << " " << setw(8) << n_11[k]/double(n_11[k]+n_12[k]) << " " << setw(8) << n_21[k]/double(n_21[k]+n_22[k]) << " " << setw(8) << n_11[k] + n_12[k] - 1 << " " << setw(8) << n_21[k] + n_22[k] - 1 << " " << setw(6) << kname[k] << " " << setw(10) << X_indiv[k] << " " << setw(4) << 1 << " " << setw(10) << chiprobP(X_indiv[k],1) << " "; double odr = ( n_11[k] * n_22[k] ) / ( n_12[k] * n_21[k] ); if ( realnum(odr) ) MHOUT << setw(10) << odr << "\n"; else MHOUT << setw(10) << "NA" << "\n"; } } // Next locus s++; l++; } MHOUT.close(); }
vector<double> Plink::calcMantelHaenszel_2x2xK(Perm & perm, bool original) { // Should we perform BD test (K>1) if (nk<2) par::breslowday = false; ofstream MHOUT; if ( original ) { ////////////////////////////////// // Any individual not assigned to a cluster, making missing // phenotype (only need to do this once, for original) vector<Individual*>::iterator person = sample.begin(); while ( person != sample.end() ) { if ( (*person)->sol < 0 ) (*person)->missing = true; person++; } string f = par::output_file_name + ".cmh"; MHOUT.open(f.c_str(),ios::out); MHOUT << setw(4) << "CHR" << " " << setw(par::pp_maxsnp) << "SNP" << " " << setw(10) << "BP" << " " << setw(4) << "A1" << " " << setw(8) << "MAF" << " " << setw(4) << "A2" << " " << setw(10) << "CHISQ" << " " << setw(10) << "P" << " " << setw(10) << "OR" << " " << setw(10) << "SE" << " " << setw(10) << string("L"+dbl2str(par::ci_level*100)) << " " << setw(10) << string("U"+dbl2str(par::ci_level*100)) << " "; if (par::breslowday) MHOUT << setw(10) << "CHISQ_BD" << " " << setw(10) << "P_BD" << " "; MHOUT << "\n"; MHOUT.precision(4); printLOG("Cochran-Mantel-Haenszel 2x2xK test, K = " + int2str( nk) + "\n"); if (par::breslowday) printLOG("Performing Breslow-Day test of homogeneous odds ratios\n"); printLOG("Writing results to [ " + f + " ]\n"); // Warnings, if (par::breslowday && nk>10) printLOG("** Warning ** Breslow-Day statistics require large N per cluster ** \n"); } double zt = ltqnorm( 1 - (1 - par::ci_level) / 2 ) ; // Cochran-Mantel-Haenszel 2x2xK test vector<double> results(nl_all); vector<CSNP*>::iterator s = SNP.begin(); int l=0; while ( s != SNP.end() ) { // Skip possibly if (par::adaptive_perm && !perm.snp_test[l]) { s++; l++; continue; } // Disease X allele X strata // Calculate mean of 11 cell for each strata vector<double> mean_11(nk,0); vector<double> var_11(nk,0); // Calculate statistic vector<double> n_11(nk,0); vector<double> n_12(nk,0); vector<double> n_21(nk,0); vector<double> n_22(nk,0); // Disease marginals vector<double> n_1X(nk,0); // disease vector<double> n_2X(nk,0); // no disease vector<double> n_X1(nk,0); // F allele vector<double> n_X2(nk,0); // T allele vector<double> n_TT(nk,0); // Total allele count ///////////////////////// // Autosomal or haploid? bool X=false, haploid=false; if (par::chr_sex[locus[l]->chr]) X=true; else if (par::chr_haploid[locus[l]->chr]) haploid=true; //////////////////////// // Consider each person vector<bool>::iterator i1 = (*s)->one.begin(); vector<bool>::iterator i2 = (*s)->two.begin(); vector<Individual*>::iterator gperson = sample.begin(); while ( gperson != sample.end() ) { Individual * pperson = (*gperson)->pperson; bool s1 = *i1; bool s2 = *i2; // Affected individuals if ( pperson->aff && !pperson->missing ) { // Haploid? if ( haploid || ( X && (*gperson)->sex ) ) { // Allelic marginal if ( ! s1 ) { // FF hom n_11[ pperson->sol ] ++ ; n_X1[ pperson->sol ] ++ ; } else { if ( ! s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_12[ pperson->sol ] ++ ; n_X2[ pperson->sol ] ++ ; } } // Disease marginal n_1X[ pperson->sol ] ++; n_TT[ pperson->sol ] ++; } else // autosomal { // Allelic marginal if ( ! s1 ) { if ( ! s2 ) // FF hom { n_11[ pperson->sol ] +=2 ; n_X1[ pperson->sol ] +=2 ; } else { n_11[ pperson->sol ]++ ; // FT het n_12[ pperson->sol ]++ ; n_X1[ pperson->sol ]++ ; n_X2[ pperson->sol ]++ ; } } else { if ( ! s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_12[ pperson->sol ] +=2 ; n_X2[ pperson->sol ] +=2 ; } } // Disease marginal n_1X[ pperson->sol ] += 2; n_TT[ pperson->sol ] += 2; } // end autosomal } else if ( ! pperson->missing ) // Unaffecteds { // Haploid? if ( haploid || ( X && (*gperson)->sex ) ) { // Allelic marginal if ( ! s1 ) { // FF hom n_21[ pperson->sol ] ++ ; n_X1[ pperson->sol ] ++ ; } else { if ( ! s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_22[ pperson->sol ] ++ ; n_X2[ pperson->sol ] ++ ; } } // Disease marginal n_2X[ pperson->sol ] ++; n_TT[ pperson->sol ] ++; } else // autosomal { // Allelic marginal if ( ! s1 ) { if ( ! s2 ) // FF { n_X1[ pperson->sol ] +=2 ; n_21[ pperson->sol ] +=2 ; } else { n_X1[ pperson->sol ] ++ ; n_X2[ pperson->sol ] ++ ; n_21[ pperson->sol ] ++ ; n_22[ pperson->sol ] ++ ; } } else { if ( ! s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_X2[ pperson->sol ] +=2 ; n_22[ pperson->sol ] +=2 ; } } // disease marginal n_2X[ pperson->sol ] += 2; n_TT[ pperson->sol ] += 2; } // end autosomal } // end unaffected gperson++; i1++; i2++; } // count next individual // Finished iterating over individuals: cluster needs at least 2 // nonmissing individuals vector<bool> validK(nk,false); for (int k=0; k<nk; k++) if (n_TT[k]>=2) validK[k]=true; for (int k=0; k<nk; k++) { if (validK[k]) { mean_11[k] = ( n_X1[k] * n_1X[k] ) / n_TT[k] ; var_11[k] = ( n_X1[k] * n_X2[k] * n_1X[k] * n_2X[k] ) / ( n_TT[k]*n_TT[k]*(n_TT[k]-1) ); // cout << k << " " // << n_11[k] << " " // << n_12[k] << " " // << n_21[k] << " " // << n_22[k] << "\n"; } } double CMH = 0; double denom = 0; for (int k=0; k<nk; k++) { if (validK[k]) { CMH += n_11[k] - mean_11[k]; denom += var_11[k]; } } CMH *= CMH; CMH /= denom; // MH Odds ratio & CI double R = 0; double S = 0; vector<double> r2(nk); vector<double> s2(nk); for (int k=0; k<nk; k++) { if (validK[k]) { r2[k] = (n_11[k]*n_22[k]) / n_TT[k]; s2[k] = (n_12[k]*n_21[k]) / n_TT[k]; R += r2[k]; S += s2[k]; } } double OR = R / S ; double v1 = 0, v2 = 0, v3 = 0; for (int k=0; k<nk; k++) { if (validK[k]) { v1 += (1/n_TT[k]) * ( n_11[k] + n_22[k] ) * r2[k] ; v2 += (1/n_TT[k]) * ( n_12[k] + n_21[k] ) * s2[k] ; v3 += (1/n_TT[k]) * ( ( n_11[k] + n_22[k] ) * s2[k] + ( n_12[k] + n_21[k] ) * r2[k] ); } } double SE = ( 1/(2*R*R) ) * v1 + (1/(2*S*S)) * v2 + (1/(2*R*S)) * v3 ; SE = sqrt(SE); double OR_lower = exp( log(OR) - zt * SE ); double OR_upper = exp( log(OR) + zt * SE ); if ( original ) { double pvalue = chiprobP(CMH,1); // Skip?, if filtering p-values if ( par::pfilter && ( pvalue > par::pfvalue || pvalue < 0 ) ) goto skip_p_cmh; MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(10) << locus[l]->bp << " " << setw(4) << locus[l]->allele1 << " " << setw(8) << locus[l]->freq << " " << setw(4) << locus[l]->allele2 << " "; if (realnum(CMH)) MHOUT << setw(10) << CMH << " " << setw(10) << chiprobP(CMH,1) << " "; else MHOUT << setw(10) << "NA" << " " << setw(10) << "NA" << " "; if (realnum(OR)) MHOUT << setw(10) << OR << " "; else MHOUT << setw(10) << "NA" << " "; if (realnum(SE)) MHOUT << setw(10) << SE << " "; else MHOUT << setw(10) << "NA" << " "; if (realnum(OR_lower)) MHOUT << setw(10) << OR_lower << " "; else MHOUT << setw(10) << "NA" << " "; if (realnum(OR_upper)) MHOUT << setw(10) << OR_upper << " "; else MHOUT << setw(10) << "NA" << " "; // Optional Breslow-Day test of homogeneity of odds ratios if (par::breslowday) { double amax; double bb; double determ; double as_plus; double as_minus; double Astar; double Bstar; double Cstar; double Dstar; double Var; double BDX2 = 0; int df = 0; for (int k=0; k<nk; k++) { if (validK[k]) { df++; amax = (n_1X[k] < n_X1[k]) ? n_1X[k] : n_X1[k]; bb = n_2X[k] + n_1X[k] * OR - n_X1[k] * (1-OR); determ = sqrt(bb*bb + 4*(1-OR) * OR * n_1X[k] * n_X1[k]); as_plus = ( -bb + determ ) / ( 2 - 2 * OR ); as_minus = ( -bb - determ ) / ( 2 - 2 * OR ); Astar = as_minus <= amax && as_minus >= 0 ? as_minus : as_plus ; Bstar = n_1X[k] - Astar; Cstar = n_X1[k] - Astar; Dstar = n_2X[k] - n_X1[k] + Astar; Var = 1/(1/Astar + 1/Bstar + 1/Cstar + 1/Dstar); BDX2 += ( (n_11[k] - Astar) * ( n_11[k] - Astar ) ) / Var ; } } double BDp = chiprobP( BDX2 , df-1 ); if ( BDp > -1 ) MHOUT << setw(10) << BDX2 << " " << setw(10) << BDp << " "; else MHOUT << setw(10) << "NA" << " " << setw(10) << "NA" << " "; } MHOUT << "\n"; } skip_p_cmh: // Store for permutation procedure, based 2x2xK CMH result results[l] = CMH; // Next SNP s++; l++; } if (original) MHOUT.close(); return results; }
void HaploWindow::performEM() { ////////////////// // Begin E-M if ( par::haplo_plem_verbose ) haplo->VPHASE << "\nWINDOW spanning " << start << " to " << stop << "\n" << "\nINNER EM LOOP FOR " << par::haplo_plem_iter << " ITERATIONS "; for (int j=0; j<=par::haplo_plem_iter; j++) { ////////////////////////// // E-step for genoGroups set<MultiLocusGenotype*>::iterator im = genotypes.begin(); while (im != genotypes.end() ) { int i = (*im)->reference; if (ambig[i]) { double s=0; // Haploid phases... if (haplo->haploid || (haplo->X && P->sample[i]->sex)) { for (int z=0; z<hap1[i].size(); z++) { pp[i][z] = f[hap1[i][z]]; s += pp[i][z]; } } else // ... or diploid { for (int z=0; z<hap1[i].size(); z++) { if ((*im)->skip[z]) continue; int h1 = hap1[i][z]; int h2 = hap2[i][z]; if (zero[h1] || zero[h2]) { (*im)->skip[z] = true; continue; } pp[i][z] = f[h1] * f[h2]; if (h1 != h2) pp[i][z] *= 2; s += pp[i][z]; } } ///////////////////////////////////////// // Check for single phase with 0 probability if ( s == 0 ) { if ( pp[i].size()==1 ) { pp[i][0] = s = 1; if ( par::haplo_plem_verbose ) haplo->VPHASE << "\n*** WARNING *** FIXED INDIVIDUAL " << P->sample[i]->fid << " " << P->sample[i]->iid << " TO PP=1 FOR SINGLE IMPOSS PHASE\n"; } else { if ( par::haplo_plem_verbose ) { haplo->VPHASE << "\n*** ERROR *** INDIVIDUAL " << P->sample[i]->fid << " " << P->sample[i]->iid << " HAS >1 PHASE BUT PP SUMS TO 0\n"; verboseDisplayWindows2(haplo,i,true); haplo->VPHASE.close(); error("See phased.verbose (--em-verbose) file"); } } } ///////////////////////////////////////// // Rescale haplotype phase probabilities for (int z=0; z<hap1[i].size(); z++) { if ( !(*im)->skip[z] ) { pp[i][z] /= s; if ( par::haplo_plem_verbose ) { if ( (!realnum(pp[i][z])) || pp[i][z] < 0 || pp[i][z] > 1 ) haplo->VPHASE << "\n*** WARNING *** PROBLEM PP FOR INDIVIDUAL " << P->sample[i]->fid << " " << P->sample[i]->iid << "\n"; } } } } im++; } ///////////////////////////////////// // M-step for pre-counted haplotypes // unambiguous counts for (int h=0; h<nh; h++) f[h] = uc[h]; //////////////////////////////////// // M step for ambiguous genoGroups im = genotypes.begin(); while (im != genotypes.end() ) { int i = (*im)->reference; if (ambig[i]) { if (haplo->haploid || (haplo->X && P->sample[i]->sex)) { for (int z=0; z<hap1[i].size(); z++) { f[hap1[i][z]] += pp[i][z] * (*im)->count; } } else { for (int z=0; z<hap1[i].size(); z++) { if ((*im)->skip[z]) { continue; } // haplo->VPHASE << "considering " << haplotypeName( hap1[i][z] ) // << " and " << haplotypeName( hap2[i][z] ) << " for " // << P->sample[i]->fid << " " << P->sample[i]->iid << "\t" // << " times " << (*im)->count << " " << pp[i][z] // << " and hap codes are " << hap1[i][z] << " " << hap2[i][z] << "\n" ; f[hap1[i][z]] += pp[i][z] * (*im)->count; f[hap2[i][z]] += pp[i][z] * (*im)->count; } } } ++im; } // validN is the total number of *chromosomes* for (int h=0; h<nh; h++) f[h] /= (double)haplo->validN; ////////////////////////////////////////// // Update likelihood (not every iteration) if ( j == par::haplo_plem_iter - 1 || j % par::haplo_plem_likelihood_iter == 0) { // Zero out unlikely haplotypes? for (int h=0; h<nh; h++) if ( !zero[h]) if (f[h] <= par::haplo_plem_zero_threshold ) zero[h] = true; if ( par::haplo_plem_nonzero_threshold ) { double psum = 0; for (int h=0; h<nh; h++) if ( ! zero[h] ) psum += f[h]; for (int h=0; h<nh; h++) if ( ! zero[h] ) f[h] /= psum; } double lnl = 0; // genoGroups im = genotypes.begin(); while (im != genotypes.end() ) { int i = (*im)->reference; double lk = 0; if (haplo->haploid || (haplo->X && P->sample[i]->sex)) { for (int z=0; z<hap1[i].size(); z++) lk += f[hap1[i][z]]; } else for (int z=0; z<hap1[i].size(); z++) { if ((*im)->skip[z] || zero[hap1[i][z]] || zero[hap2[i][z]]) continue; lk += f[hap1[i][z]] * f[hap2[i][z]]; if (hap1[i][z] != hap2[i][z]) lk += f[hap1[i][z]] * f[hap2[i][z]]; } if (lk > 0) lnl -= log(lk) * (*im)->count; ++im; } if ( par::haplo_plem_verbose ) { haplo->VPHASE << "INNER_LNL " << lnl << "\n"; } if (j > 0 && sampleLogLikelihood - lnl < par::haplo_plem_window_tol ) { if ( par::haplo_plem_verbose ) haplo->VPHASE << "INNER_CONVERGED AT " << j << " ITERATIONS\n"; iter = j; converged = true; break; } sampleLogLikelihood = lnl; } // End of likelihood calculation } // Next EM iteration if ( par::haplo_plem_verbose ) haplo->VPHASE << "INNER_EM HAS FINISHED/CONVERGED\n\n"; if ( par::haplo_plem_verbose ) { haplo->VPHASE << "INNER_FREQS "; for (int h=0; h<nh; h++) if ( f[h] > 0.001 ) haplo->VPHASE << h << " " << haplotypeName(h) << "\t" << f[h] << "\n"; haplo->VPHASE << "\n--------------------\n"; } // EM has converged/finished }
void LinearModel::displayResults(ofstream & OUT, Locus * loc) { vector_t var; if ( all_valid ) var = getVar(); else { var.clear(); var.resize(np,0); } for (int p=1; p<np; p++) // skip intercept { bool okay = var[p] < 1e-20 || !realnum(var[p]) ? false : all_valid; double se = 0; double Z = 0; double pvalue = 1; if (okay) { se = sqrt(var[p]); Z = coef[p] / se; pvalue = pT(Z,Y.size()-np); } // If filtering p-values if ( (!par::pfilter) || pvalue <= par::pfvalue ) { // Skip covariates? if ( par::no_show_covar && p != testParameter ) continue; OUT << setw(4) << loc->chr << " " << setw(par::pp_maxsnp) << loc->name << " " << setw(10) << loc->bp << " " << setw(4) << loc->allele1 << " " << setw(10) << label[p] << " " << setw(8) << Y.size() << " "; if (okay) { OUT << setw(10) << coef[p] << " "; if (par::display_ci) OUT << setw(8) << se << " " << setw(8) << coef[p] - par::ci_zt * se << " " << setw(8) << coef[p] + par::ci_zt * se << " "; OUT << setw(12) << Z << " " << setw(12) << pvalue; } else { OUT << setw(10) << "NA" << " "; if (par::display_ci) OUT << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " "; OUT << setw(12) << "NA" << " " << setw(12) << "NA"; } OUT << "\n"; } } }