/** * Main routine. The main logic of ProbABEL can be found here * * \param argc Number of command line arguments * \param argv Vector containing the command line arguments * * \return 0 if all went well. Other integer numbers if an error * occurred */ int main(int argc, char * argv[]) { cmdvars input_var; input_var.set_variables(argc, argv); input_var.printinfo(); cout << "Reading info data...\n" << flush; mlinfo mli(input_var.getMlinfofilename(), input_var.getMapfilename()); int nsnps = mli.nsnps; phedata phd; cout << "Reading phenotype data...\n" << flush; int interaction_cox = create_phenotype(phd, input_var); masked_matrix invvarmatrix; if (input_var.getInverseFilename() != NULL) { loadInvSigma(input_var, phd, invvarmatrix); } gendata gtd; cout << "Reading genotype data... " << flush; if (!input_var.getIsFvf()) { // TODO(maartenk): remove timing code // make clock to time loading of the non filevector file std::clock_t start; start = std::clock(); // use the non-filevector input format gtd.re_gendata(input_var.getGenfilename(), nsnps, input_var.getNgpreds(), phd.nids_all, phd.nids, phd.allmeasured, input_var.getSkipd(), phd.idnames); // TODO(maartenk): remove timing code double millisec=((std::clock() - start) / (double)(CLOCKS_PER_SEC / 1000))/1000; cout << "done in "<< millisec<< " seconds.\n" << flush; } else { // use the filevector input format (missing second last skipd // parameter) gtd.re_gendata(input_var.getStrGenfilename(), nsnps, input_var.getNgpreds(), phd.nids_all, phd.nids, phd.allmeasured, phd.idnames); cout << "done.\n" << flush; } // estimate null model #if COXPH coxph_data nrgd = coxph_data(phd, gtd, -1); #else regdata nrgd = regdata(phd, gtd, -1, input_var.isIsInteractionExcluded()); #endif std::cout << " loaded null data..." << std::flush; #if LOGISTIC logistic_reg nrd = logistic_reg(nrgd); nrd.estimate(0, 0, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust(), 1); #elif LINEAR linear_reg nrd = linear_reg(nrgd); #if DEBUG std::cout << "[DEBUG] linear_reg nrd = linear_reg(nrgd); DONE."; #endif nrd.estimate(0, 0, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust(), 1); #elif COXPH coxph_reg nrd = coxph_reg(nrgd); nrd.estimate(nrgd, 0, input_var.getInteraction(), input_var.getNgpreds(), true, 1, mli, 0); #endif double null_loglik = nrd.loglik; std::cout << " estimated null model..."; // end null #if COXPH coxph_data rgd(phd, gtd, 0); #else regdata rgd(phd, gtd, 0, input_var.isIsInteractionExcluded()); #endif std::cout << " formed regression object...\n"; // Open a vector of files that will be used for output. Depending // on the number of genomic predictors we either open 5 files (one // for each model if we have prob data) or one (if we have dosage // data). std::string outfilename_str(input_var.getOutfilename()); std::vector<std::ofstream*> outfile; // Prob data: All models output. One file per model if (input_var.getNgpreds() == 2) { open_files_for_output(outfile, outfilename_str); if (input_var.getNohead() != 1) { create_header(outfile, input_var, phd, interaction_cox); } } else // Dosage data: Only additive model => only one output file { outfile.push_back( new std::ofstream((outfilename_str + "_add.out.txt").c_str())); if (!outfile[0]->is_open()) { std::cerr << "Cannot open file for writing: " << outfilename_str << "\n"; exit(1); } if (input_var.getNohead() != 1) { create_header(outfile, input_var, phd, interaction_cox); } } // END else: we have dosage data => only one file int maxmod = 5; // Total number of models (in random // order: additive, recessive, // dominant, over_dominant, 2df). Only // with dosage data can we run all of // them. For dosage data we can only // run the additive model. int start_pos, end_pos; std::vector<std::ostringstream *> beta_sebeta; // Han Chen std::vector<std::ostringstream *> covvalue; // Oct 26, 2009 std::vector<std::ostringstream *> chi2; // Create string streams for betas, SEs, etc. These are used to // later store the various output values that will be written to // files. for (int i = 0; i < maxmod; i++) { beta_sebeta.push_back(new std::ostringstream()); beta_sebeta[i]->precision(6); // *beta_sebeta[i] << scientific; // Han Chen covvalue.push_back(new std::ostringstream()); covvalue[i]->precision(6); // *covvalue[i] << scientific; // Oct 26, 2009 chi2.push_back(new std::ostringstream()); chi2[i]->precision(6); // *chi2[i] << scientific; } // Here we start the analysis for each SNP. for (int csnp = 0; csnp < nsnps; csnp++) { rgd.update_snp(>d, csnp); int poly = 1; if (fabs(rgd.freq) < 1.e-16 || fabs(1. - rgd.freq) < 1.e-16) { poly = 0; } if (fabs(mli.Rsq[csnp]) < 1.e-16) { poly = 0; } // Write mlinfo information to the output file(s) // Prob data: All models output. One file per model if (input_var.getNgpreds() == 2) { for (unsigned int file = 0; file < outfile.size(); file++) { write_mlinfo(outfile, file, mli, csnp, input_var, rgd.gcount, rgd.freq); } } else{ // Dosage data: only additive model int file = 0; write_mlinfo(outfile, file, mli, csnp, input_var, rgd.gcount, rgd.freq); maxmod = 1; // We can only calculate the additive // model with dosage data } // Run regression for each model for the current SNP for (int model = 0; model < maxmod; model++) { if (poly) // Allele freq is not too rare { #if LOGISTIC logistic_reg rd(rgd); #elif LINEAR linear_reg rd(rgd); #elif COXPH coxph_reg rd(rgd); #endif #if !COXPH if (input_var.getScore()) { rd.score(nrd.residuals, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); } else { rd.estimate(0, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust()); } #else rd.estimate(rgd, model, input_var.getInteraction(), input_var.getNgpreds(), true, 0, mli, csnp); #endif int number_of_rows_or_columns = rd.beta.nrow; start_pos = get_start_position(input_var, model, number_of_rows_or_columns); // The regression coefficients for the SNPs are in the // last rows of beta[] and sebeta[]. for (int pos = start_pos; pos < rd.beta.nrow; pos++) { *beta_sebeta[model] << input_var.getSep() << rd.beta[pos] << input_var.getSep() << rd.sebeta[pos]; // Han Chen #if !COXPH if (input_var.getInverseFilename() == NULL && !input_var.getAllcov() && input_var.getInteraction() != 0) { if (pos > start_pos) { if (model == 0) { if (input_var.getNgpreds() == 2) { if (pos > start_pos + 2) { *covvalue[model] << rd.covariance[pos - 3] << input_var.getSep() << rd.covariance[pos - 2]; } } // END ngpreds=2 else { *covvalue[model] << rd.covariance[pos - 1]; } } // END model == 0 else { *covvalue[model] << rd.covariance[pos - 1]; } // END model != 0 } // END if pos > start_pos } #endif // Oct 26, 2009 } // END for(pos = start_pos; pos < rd.beta.nrow; pos++) // calculate chi^2 // ________________________________ // cout << rd.loglik<<" "<<input_var.getNgpreds() << "\n"; if (input_var.getInverseFilename() == NULL) { // Only if we don't have an inv.sigma file can we use LRT if (input_var.getScore() == 0) { double loglik = rd.loglik; if (rgd.gcount != gtd.nids) { // If SNP data is missing we didn't // correctly compute the null likelihood // Recalculate null likelihood by // stripping the SNP data column(s) from // the X matrix in the regression object // and run the null model estimation again // for this SNP. #if !COXPH regdata new_rgd = rgd; #else coxph_data new_rgd = rgd; #endif new_rgd.remove_snp_from_X(); #ifdef LINEAR linear_reg new_null_rd(new_rgd); #elif LOGISTIC logistic_reg new_null_rd(new_rgd); #endif #if !COXPH new_null_rd.estimate(0, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust(), 1); #else coxph_reg new_null_rd(new_rgd); new_null_rd.estimate(new_rgd, model, input_var.getInteraction(), input_var.getNgpreds(), true, 1, mli, csnp); #endif *chi2[model] << 2. * (loglik - new_null_rd.loglik); } else { // No missing SNP data, we can compute the LRT *chi2[model] << 2. * (loglik - null_loglik); } } else{ // We want score test output *chi2[model] << rd.chi2_score; } } // END if( inv.sigma == NULL ) else if (input_var.getInverseFilename() != NULL) { // We can't use the LRT here, because mmscore is a // REML method. Therefore go for the Wald test if (input_var.getNgpreds() == 2 && model == 0) { /* For the 2df model we can't simply use the * Wald statistic. This can be fixed using the * equation just below Eq.(4) in the ProbABEL * paper. TODO LCK */ *chi2[model] << "NaN"; } else { double Z = rd.beta[start_pos] / rd.sebeta[start_pos]; *chi2[model] << Z * Z; } } } // END first part of if(poly); allele not too rare else { // SNP is rare: beta, sebeta, chi2 = NaN int number_of_rows_or_columns = rgd.X.ncol; start_pos = get_start_position(input_var, model, number_of_rows_or_columns); if (input_var.getInteraction() != 0 && !input_var.getAllcov() && input_var.getNgpreds() != 2) { start_pos++; } if (input_var.getNgpreds() == 0) { end_pos = rgd.X.ncol; } else{ end_pos = rgd.X.ncol - 1; } if (input_var.getInteraction() != 0) { end_pos++; } for (int pos = start_pos; pos <= end_pos; pos++) { *beta_sebeta[model] << input_var.getSep() << "NaN" << input_var.getSep() << "NaN"; } if (input_var.getNgpreds() == 2) { // Han Chen #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { if (model == 0) { *covvalue[model] << "NaN" << input_var.getSep() << "NaN"; } else{ *covvalue[model] << "NaN"; } } #endif // Oct 26, 2009 *chi2[model] << "NaN"; } else{ // ngpreds==1 (and SNP is rare) if (input_var.getInverseFilename() == NULL) { // Han Chen #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *covvalue[model] << "NaN"; } #endif // Oct 26, 2009 } // END if getInverseFilename == NULL *chi2[model] << "NaN"; } // END ngpreds == 1 (and SNP is rare) } // END else: SNP is rare } // END of model cycle // Start writing beta's, se_beta's etc. to file if (input_var.getNgpreds() == 2) { for (int model = 0; model < maxmod; model++) { *outfile[model] << beta_sebeta[model]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[model] << covvalue[model]->str() << input_var.getSep(); } #endif *outfile[model] << chi2[model]->str() << "\n"; } // END for loop over all models } else // Dose data: only additive model. Only one output file { *outfile[0] << beta_sebeta[0]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[0] << covvalue[0]->str() << input_var.getSep(); } #endif *outfile[0] << chi2[0]->str() << "\n"; } // End ngpreds == 1 when writing output files // Clean chi2 and other streams for (int model = 0; model < maxmod; model++) { beta_sebeta[model]->str(""); // Han Chen covvalue[model]->str(""); // Oct 26, 2009 chi2[model]->str(""); } update_progress_to_cmd_line(csnp, nsnps); } // END for loop over all SNPs // We're almost done. All computations have finished, time to // clean up. std::cout << setprecision(2) << fixed; std::cout << "\b\b\b\b\b\b\b\b\b" << 100.; std::cout << "%... done\n"; // Close output files for (unsigned int i = 0; i < outfile.size(); i++) { outfile[i]->close(); delete outfile[i]; } // delete gtd; // Clean up a couple of vectors std::vector<std::ostringstream *>::iterator it = beta_sebeta.begin(); while (it != beta_sebeta.end()) { delete *it; ++it; } it = covvalue.begin(); while (it != covvalue.end()) { delete *it; ++it; } it = chi2.begin(); while (it != chi2.end()) { delete *it; ++it; } return (0); }
int main(int argc, char * argv[]) { cmdvars input_var; input_var.set_variables(argc, argv); input_var.printinfo(); // if (allcov && ngpreds>1) // { // cout << "\n\n" // << "WARNING: --allcov allowed only for 1 predictor (MLDOSE)\n"; // allcov = 0; // } mlinfo mli(input_var.getMlinfofilename(), input_var.getMapfilename()); int nsnps = mli.nsnps; phedata phd; int interaction_cox = create_phenotype(phd, input_var); //interaction--; // if (input_var.getInverseFilename()!= NULL && phd.ncov > 1) // { // std::cerr << "Error: In mmscore you can not use any covariates." // << " You phenotype file must conatin id column and " // << "trait (residuals) only\n"; // exit(1); // } // if (input_var.getInverseFilename()!= NULL && // (allcov == 1 || score == 1 // || input_var.getInteraction()!= 0 // || ngpreds==2)) // { // std::cerr << "Error: In mmscore you can use additive model " // << "without any inetractions only\n"; // exit(1); // } masked_matrix invvarmatrix; /* * now should be possible... delete this part later when everything works #if LOGISTIC if (input_var.getInverseFilename()!= NULL) { std::cerr << "ERROR: mmscore is forbidden for logistic regression\n"; exit(1); } #endif */ std::cout << "Reading data ..." << std::flush; if (input_var.getInverseFilename() != NULL) { loadInvSigma(input_var, phd, invvarmatrix); } gendata gtd; if (!input_var.getIsFvf()) // use the non-filevector input format gtd.re_gendata(input_var.getGenfilename(), nsnps, input_var.getNgpreds(), phd.nids_all, phd.nids, phd.allmeasured, input_var.getSkipd(), phd.idnames); else // use the filevector input format (missing second last skipd // parameter) gtd.re_gendata(input_var.getStrGenfilename(), nsnps, input_var.getNgpreds(), phd.nids_all, phd.nids, phd.allmeasured, phd.idnames); std::cout << " loaded genotypic data ..." << std::flush; /** if (input_var.getIsFvf()) gendata gtd(str_genfilename, nsnps, input_var.getNgpreds(), phd.nids_all, phd.allmeasured, phd.idnames); else gendata gtd(input_var.getGenfilename(), nsnps, input_var.getNgpreds(), phd.nids_all, phd.nids, phd.allmeasured, skipd, phd.idnames); **/ // estimate null model #if COXPH coxph_data nrgd = coxph_data(phd, gtd, -1); #else regdata nrgd = regdata(phd, gtd, -1, input_var.isIsInteractionExcluded()); #endif std::cout << " loaded null data ..." << std::flush; #if LOGISTIC logistic_reg nrd = logistic_reg(nrgd); nrd.estimate(nrgd, 0, MAXITER, EPS, CHOLTOL, 0, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust(), 1); #elif LINEAR linear_reg nrd = linear_reg(nrgd); #if DEBUG std::cout << "[DEBUG] linear_reg nrd = linear_reg(nrgd); DONE."; #endif nrd.estimate(nrgd, 0, CHOLTOL, 0, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust(), 1); #elif COXPH coxph_reg nrd(nrgd); nrd.estimate(nrgd, 0, MAXITER, EPS, CHOLTOL, 0, input_var.getInteraction(), input_var.getNgpreds(), 1); #endif std::cout << " estimated null model ..."; // end null #if COXPH coxph_data rgd(phd, gtd, 0); #else regdata rgd(phd, gtd, 0, input_var.isIsInteractionExcluded()); #endif std::cout << " formed regression object ..."; std::cout << " done\n" << std::flush; //________________________________________________________________ //Maksim, 9 Jan, 2009 std::string outfilename_str(input_var.getOutfilename()); std::vector<std::ofstream*> outfile; //All models output.One file per each model if (input_var.getNgpreds() == 2) { open_files_for_output(outfile, outfilename_str); if (input_var.getNohead() != 1) { create_header_1(outfile, input_var, phd, interaction_cox); } } else //Only additive model. Only one output file { outfile.push_back( new std::ofstream((outfilename_str + "_add.out.txt").c_str())); if (!outfile[0]->is_open()) { std::cerr << "Cannot open file for writing: " << outfilename_str << "\n"; exit(1); } if (input_var.getNohead() != 1) { create_header2(outfile, input_var, phd, interaction_cox); } } //________________________________________________________________ /* if (input_var.getAllcov()) { if (score) { outfile << input_var.getSep() << "beta_mu"; // << input_var.getSep() << "beta_SNP_A1"; outfile << input_var.getSep() << "sebeta_mu"; // << input_var.getSep() << "sebeta_SNP_A1"; } else { for (int i =0; i<phd.n_model_terms-1;i++) outfile << input_var.getSep() << "beta_" << phd.model_terms[i] << input_var.getSep() << "sebeta_" << phd.model_terms[i]; } if(interactio != 0) outfile << input_var.getSep() << "beta_SNP_" << phd.model_terms[interaction]; } if (input_var.getNgpreds()==2) { outfile << input_var.getSep() << "beta_SNP_A1A2" << input_var.getSep() << "beta_SNP_A1A1" << input_var.getSep() << "sebeta_SNP_A1A2" << input_var.getSep() << "sebeta_SNP_a1A1" << input_var.getSep() << "chi2_SNP_2df" << input_var.getSep() << "beta_SNP_addA1" << input_var.getSep() << "sebeta_SNP_addA1" << input_var.getSep() << "chi2_SNP_addA1" << input_var.getSep() << "beta_SNP_domA1" << input_var.getSep() << "sebeta_SNP_domA1" << input_var.getSep() << "chi2_SNP_domA1" << input_var.getSep() << "beta_SNP_recA1" << input_var.getSep() << "sebeta_SNP_recA1" << input_var.getSep() << "chi2_SNP_recA1" << input_var.getSep() << "beta_SNP_odom" << input_var.getSep() << "sebeta_SNP_odom" << input_var.getSep() << "chi2_SNP_odom\n"; } else { outfile << input_var.getSep() << "beta_SNP_add" << input_var.getSep() << "sebeta_SNP_add" << input_var.getSep() << "chi2_SNP_add\n"; } */ // exit(1); //________________________________________________________________ //Maksim, 9 Jan, 2009 int maxmod = 5; int start_pos, end_pos; std::vector<std::ostringstream *> beta_sebeta; //Han Chen std::vector<std::ostringstream *> covvalue; //Oct 26, 2009 std::vector<std::ostringstream *> chi2; for (int i = 0; i < maxmod; i++) { beta_sebeta.push_back(new std::ostringstream()); //Han Chen covvalue.push_back(new std::ostringstream()); //Oct 26, 2009 chi2.push_back(new std::ostringstream()); } for (int csnp = 0; csnp < nsnps; csnp++) { rgd.update_snp(gtd, csnp); double freq = 0.; int gcount = 0; float snpdata1[gtd.nids]; float snpdata2[gtd.nids]; if (input_var.getNgpreds() == 2) { //freq = ((gtd.G).column_mean(csnp*2)*2. + // (gtd.G).column_mean(csnp*2+1))/2.; gtd.get_var(csnp * 2, snpdata1); gtd.get_var(csnp * 2 + 1, snpdata2); for (unsigned int ii = 0; ii < gtd.nids; ii++) if (!isnan(snpdata1[ii]) && !isnan(snpdata2[ii])) { gcount++; freq += snpdata1[ii] + snpdata2[ii] * 0.5; } } else { // freq = (gtd.G).column_mean(csnp)/2.; gtd.get_var(csnp, snpdata1); for (unsigned int ii = 0; ii < gtd.nids; ii++) if (!isnan(snpdata1[ii])) { gcount++; freq += snpdata1[ii] * 0.5; } } freq /= static_cast<double>(gcount); int poly = 1; if (fabs(freq) < 1.e-16 || fabs(1. - freq) < 1.e-16) poly = 0; if (fabs(mli.Rsq[csnp]) < 1.e-16) poly = 0; //All models output. One file per each model if (input_var.getNgpreds() == 2) { //Write mlinfo to output: for (unsigned int file = 0; file < outfile.size(); file++) { *outfile[file] << mli.name[csnp] << input_var.getSep() << mli.A1[csnp] << input_var.getSep() << mli.A2[csnp] << input_var.getSep() << mli.Freq1[csnp] << input_var.getSep() << mli.MAF[csnp] << input_var.getSep() << mli.Quality[csnp] << input_var.getSep() << mli.Rsq[csnp] << input_var.getSep() << gcount << input_var.getSep() << freq; if (input_var.getChrom() != "-1") *outfile[file] << input_var.getSep() << input_var.getChrom(); if (input_var.getMapfilename() != NULL) *outfile[file] << input_var.getSep() << mli.map[csnp]; } for (int model = 0; model < maxmod; model++) { if (poly) //allel freq is not to rare { #if LOGISTIC logistic_reg rd(rgd); if (input_var.getScore()) rd.score(nrd.residuals, rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); else rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust()); #elif LINEAR linear_reg rd(rgd); if (input_var.getScore()) rd.score(nrd.residuals, rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); else { // rd.mmscore(rgd,0,CHOLTOL,model,input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); rd.estimate(rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust()); } #elif COXPH coxph_reg rd(rgd); rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model, input_var.getInteraction(), true, input_var.getNgpreds()); #endif if (!input_var.getAllcov() && model == 0 && input_var.getInteraction() == 0) start_pos = rd.beta.nrow - 2; else if (!input_var.getAllcov() && model == 0 && input_var.getInteraction() != 0) start_pos = rd.beta.nrow - 4; else if (!input_var.getAllcov() && model != 0 && input_var.getInteraction() == 0) start_pos = rd.beta.nrow - 1; else if (!input_var.getAllcov() && model != 0 && input_var.getInteraction() != 0) start_pos = rd.beta.nrow - 2; else start_pos = 0; for (int pos = start_pos; pos < rd.beta.nrow; pos++) { *beta_sebeta[model] << input_var.getSep() << rd.beta[pos] << input_var.getSep() << rd.sebeta[pos]; //Han Chen #if !COXPH if (input_var.getInverseFilename() == NULL && !input_var.getAllcov() && input_var.getInteraction() != 0) { if (pos > start_pos) { if (model == 0) { if (pos > start_pos + 2) { *covvalue[model] << rd.covariance[pos - 3] << input_var.getSep() << rd.covariance[pos - 2]; } } else { *covvalue[model] << rd.covariance[pos - 1]; } } } #endif //Oct 26, 2009 } //calculate chi2 //________________________________ if (input_var.getScore() == 0) { //*chi2[model] << 2.*(rd.loglik-null_loglik); *chi2[model] << rd.loglik; } else { //*chi2[model] << rd.chi2_score; *chi2[model] << "nan"; } //________________________________ } else //beta, sebeta = nan { if (!input_var.getAllcov() && model == 0 && input_var.getInteraction() == 0) start_pos = rgd.X.ncol - 2; else if (!input_var.getAllcov() && model == 0 && input_var.getInteraction() != 0) start_pos = rgd.X.ncol - 4; else if (!input_var.getAllcov() && model != 0 && input_var.getInteraction() == 0) start_pos = rgd.X.ncol - 1; else if (!input_var.getAllcov() && model != 0 && input_var.getInteraction() != 0) start_pos = rgd.X.ncol - 2; else start_pos = 0; if (model == 0) { end_pos = rgd.X.ncol; } else { end_pos = rgd.X.ncol - 1; } if (input_var.getInteraction() != 0) end_pos++; for (int pos = start_pos; pos < end_pos; pos++) { *beta_sebeta[model] << input_var.getSep() << "nan" << input_var.getSep() << "nan"; } //Han Chen #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { if (model == 0) { *covvalue[model] << "nan" << input_var.getSep() << "nan"; } else { *covvalue[model] << "nan"; } } #endif //Oct 26, 2009 *chi2[model] << "nan"; } } //end of model cycle //Han Chen *outfile[0] << beta_sebeta[0]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[0] << covvalue[0]->str() << input_var.getSep(); } #endif *outfile[0] << chi2[0]->str() << "\n"; *outfile[1] << beta_sebeta[1]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[1] << covvalue[1]->str() << input_var.getSep(); } #endif *outfile[1] << chi2[1]->str() << "\n"; *outfile[2] << beta_sebeta[2]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[2] << covvalue[2]->str() << input_var.getSep(); } #endif *outfile[2] << chi2[2]->str() << "\n"; *outfile[3] << beta_sebeta[3]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[3] << covvalue[3]->str() << input_var.getSep(); } #endif *outfile[3] << chi2[3]->str() << "\n"; *outfile[4] << beta_sebeta[4]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[4] << covvalue[4]->str() << input_var.getSep(); } #endif *outfile[4] << chi2[4]->str() << "\n"; //Oct 26, 2009 } else //Only additive model. Only one output file { //Write mlinfo to output: *outfile[0] << mli.name[csnp] << input_var.getSep() << mli.A1[csnp] << input_var.getSep() << mli.A2[csnp] << input_var.getSep(); *outfile[0] << mli.Freq1[csnp] << input_var.getSep() << mli.MAF[csnp] << input_var.getSep() << mli.Quality[csnp] << input_var.getSep() << mli.Rsq[csnp] << input_var.getSep(); *outfile[0] << gcount << input_var.getSep() << freq; if (input_var.getChrom() != "-1") *outfile[0] << input_var.getSep() << input_var.getChrom(); if (input_var.getMapfilename() != NULL) *outfile[0] << input_var.getSep() << mli.map[csnp]; int model = 0; if (poly) //allel freq is not to rare { #if LOGISTIC logistic_reg rd(rgd); if (input_var.getScore()) rd.score(nrd.residuals, rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); else rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust()); #elif LINEAR //cout << (rgd.get_unmasked_data()).nids << " 1\n"; #if DEBUG rgd.X.print(); rgd.Y.print(); #endif linear_reg rd(rgd); #if DEBUG rgd.X.print(); rgd.Y.print(); #endif //cout << (rgd.get_unmasked_data()).nids << " 2\n"; if (input_var.getScore()) { #if DEBUG cout << "input_var.getScore/n"; nrd.residuals.print(); cout << CHOLTOL << " <-CHOLTOL\n"; cout << model << " <-model\n"; cout << input_var.getInteraction() << " <-input_var.getInteraction()\n"; cout << input_var.getNgpreds() << " <-input_var.getNgpreds()\n"; invvarmatrix.print(); #endif rd.score(nrd.residuals, rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); #if DEBUG rd.beta.print(); cout << rd.chi2_score << " <-chi2_scoren\n"; rd.covariance.print(); rd.residuals.print(); rd.sebeta.print(); cout << rd.loglik << " <-logliken\n"; cout << rd.sigma2 << " <-sigma2\n"; #endif } else { // if(input_var.getInverseFilename()== NULL) // { // cout << (rgd.get_unmasked_data()).nids << " 3\n"; #if DEBUG cout << "rd.estimate\n"; cout << CHOLTOL << " <-CHOLTOL\n"; cout << model << " <-model\n"; cout << input_var.getInteraction() << " <-input_var.getInteraction()\n"; cout << input_var.getNgpreds() << " <-input_var.getNgpreds()\n"; cout << input_var.getRobust() << " <-input_var.getRobust()\n"; cout << "start invarmatrix\n"; invvarmatrix.print(); cout << "end invarmatrix\n"; cout << rgd.is_interaction_excluded << " <-rgd.is_interaction_excluded\n"; #endif rd.estimate(rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust()); #if DEBUG cout << "rd.beta\n"; rd.beta.print(); cout << rd.chi2_score << " <-chi2_scoren\n"; cout << "rd.covariance\n"; rd.covariance.print(); cout << "rd.residuals\n"; rd.residuals.print(); cout << "rd.sebeta\n"; rd.sebeta.print(); cout << rd.loglik << " <-logliken\n"; cout << rd.sigma2 << " <-sigma2\n"; #endif //cout << (rgd.get_unmasked_data()).nids << " 4\n"; //} //else //{ // rd.mmscore(rgd, 0, CHOLTOL, model, // input_var.getInteraction(), // input_var.getNgpreds(), invvarmatrix); //} } #elif COXPH coxph_reg rd(rgd); rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model, input_var.getInteraction(), true, input_var.getNgpreds()); #endif if (!input_var.getAllcov() && input_var.getInteraction() == 0) { start_pos = rd.beta.nrow - 1; } else if (!input_var.getAllcov() && input_var.getInteraction() != 0) { start_pos = rd.beta.nrow - 2; } else { start_pos = 0; } #if DEBUG cout << " start_pos;" << start_pos << "\n"; #endif for (int pos = start_pos; pos < rd.beta.nrow; pos++) { *beta_sebeta[0] << input_var.getSep() << rd.beta[pos] << input_var.getSep() << rd.sebeta[pos]; //Han Chen #if !COXPH if (input_var.getInverseFilename() == NULL && !input_var.getAllcov() && input_var.getInteraction() != 0) { if (pos > start_pos) { *covvalue[0] << rd.covariance[pos - 1]; } } #endif //Oct 26, 2009 } //calculate chi2 //________________________________ if (input_var.getInverseFilename() == NULL) { #if DEBUG cout << " inverse_filename == NULL" << "\n"; #endif if (input_var.getScore() == 0) { *chi2[0] << rd.loglik; //2.*(rd.loglik-null_loglik); } else { *chi2[0] << "nan"; //rd.chi2_score; } } //________________________________ } else //beta, sebeta = nan { if (!input_var.getAllcov() && input_var.getInteraction() == 0) start_pos = rgd.X.ncol - 1; else if (!input_var.getAllcov() && input_var.getInteraction() != 0) start_pos = rgd.X.ncol - 2; else start_pos = 0; end_pos = rgd.X.ncol; if (input_var.getInteraction() != 0) { end_pos++; } if (input_var.getInteraction() != 0 && !input_var.getAllcov()) { start_pos++; } for (int pos = start_pos; pos < end_pos; pos++) { *beta_sebeta[0] << input_var.getSep() << "nan" << input_var.getSep() << "nan"; } if (input_var.getInverseFilename() == NULL) { //Han Chen #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *covvalue[0] << "nan"; } #endif //Oct 26, 2009 *chi2[0] << "nan"; } } if (input_var.getInverseFilename() == NULL) { //Han Chen *outfile[0] << beta_sebeta[0]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[0] << covvalue[0]->str() << input_var.getSep(); } #endif *outfile[0] << chi2[model]->str() << "\n"; //Oct 26, 2009 } else { *outfile[0] << beta_sebeta[0]->str() << "\n"; #if DEBUG cout << "Se beta" << beta_sebeta[0] << "\n"; #endif } } //clean chi2 for (int i = 0; i < 5; i++) { beta_sebeta[i]->str(""); //Han Chen covvalue[i]->str(""); //Oct 26, 2009 chi2[i]->str(""); } update_progress_to_cmd_line(csnp, nsnps); } std::cout << "\b\b\b\b\b\b\b\b\b" << 100.; std::cout << "%... done\n"; //________________________________________________________________ //Maksim, 9 Jan, 2009 for (unsigned int i = 0; i < outfile.size(); i++) { outfile[i]->close(); delete outfile[i]; } //delete gtd; // Clean up a couple of vectors std::vector<std::ostringstream *>::iterator it = beta_sebeta.begin(); while (it != beta_sebeta.end()) { delete *it; ++it; } it = covvalue.begin(); while (it != covvalue.end()) { delete *it; ++it; } it = chi2.begin(); while (it != chi2.end()) { delete *it; ++it; } return (0); }