int main(int argc, char * argv[]) { cmdvars input_var; input_var.set_variables(argc, argv); input_var.printinfo(); // if (allcov && ngpreds>1) // { // cout << "\n\n" // << "WARNING: --allcov allowed only for 1 predictor (MLDOSE)\n"; // allcov = 0; // } mlinfo mli(input_var.getMlinfofilename(), input_var.getMapfilename()); int nsnps = mli.nsnps; phedata phd; int interaction_cox = create_phenotype(phd, input_var); //interaction--; // if (input_var.getInverseFilename()!= NULL && phd.ncov > 1) // { // std::cerr << "Error: In mmscore you can not use any covariates." // << " You phenotype file must conatin id column and " // << "trait (residuals) only\n"; // exit(1); // } // if (input_var.getInverseFilename()!= NULL && // (allcov == 1 || score == 1 // || input_var.getInteraction()!= 0 // || ngpreds==2)) // { // std::cerr << "Error: In mmscore you can use additive model " // << "without any inetractions only\n"; // exit(1); // } masked_matrix invvarmatrix; /* * now should be possible... delete this part later when everything works #if LOGISTIC if (input_var.getInverseFilename()!= NULL) { std::cerr << "ERROR: mmscore is forbidden for logistic regression\n"; exit(1); } #endif */ std::cout << "Reading data ..." << std::flush; if (input_var.getInverseFilename() != NULL) { loadInvSigma(input_var, phd, invvarmatrix); } gendata gtd; if (!input_var.getIsFvf()) // use the non-filevector input format gtd.re_gendata(input_var.getGenfilename(), nsnps, input_var.getNgpreds(), phd.nids_all, phd.nids, phd.allmeasured, input_var.getSkipd(), phd.idnames); else // use the filevector input format (missing second last skipd // parameter) gtd.re_gendata(input_var.getStrGenfilename(), nsnps, input_var.getNgpreds(), phd.nids_all, phd.nids, phd.allmeasured, phd.idnames); std::cout << " loaded genotypic data ..." << std::flush; /** if (input_var.getIsFvf()) gendata gtd(str_genfilename, nsnps, input_var.getNgpreds(), phd.nids_all, phd.allmeasured, phd.idnames); else gendata gtd(input_var.getGenfilename(), nsnps, input_var.getNgpreds(), phd.nids_all, phd.nids, phd.allmeasured, skipd, phd.idnames); **/ // estimate null model #if COXPH coxph_data nrgd = coxph_data(phd, gtd, -1); #else regdata nrgd = regdata(phd, gtd, -1, input_var.isIsInteractionExcluded()); #endif std::cout << " loaded null data ..." << std::flush; #if LOGISTIC logistic_reg nrd = logistic_reg(nrgd); nrd.estimate(nrgd, 0, MAXITER, EPS, CHOLTOL, 0, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust(), 1); #elif LINEAR linear_reg nrd = linear_reg(nrgd); #if DEBUG std::cout << "[DEBUG] linear_reg nrd = linear_reg(nrgd); DONE."; #endif nrd.estimate(nrgd, 0, CHOLTOL, 0, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust(), 1); #elif COXPH coxph_reg nrd(nrgd); nrd.estimate(nrgd, 0, MAXITER, EPS, CHOLTOL, 0, input_var.getInteraction(), input_var.getNgpreds(), 1); #endif std::cout << " estimated null model ..."; // end null #if COXPH coxph_data rgd(phd, gtd, 0); #else regdata rgd(phd, gtd, 0, input_var.isIsInteractionExcluded()); #endif std::cout << " formed regression object ..."; std::cout << " done\n" << std::flush; //________________________________________________________________ //Maksim, 9 Jan, 2009 std::string outfilename_str(input_var.getOutfilename()); std::vector<std::ofstream*> outfile; //All models output.One file per each model if (input_var.getNgpreds() == 2) { open_files_for_output(outfile, outfilename_str); if (input_var.getNohead() != 1) { create_header_1(outfile, input_var, phd, interaction_cox); } } else //Only additive model. Only one output file { outfile.push_back( new std::ofstream((outfilename_str + "_add.out.txt").c_str())); if (!outfile[0]->is_open()) { std::cerr << "Cannot open file for writing: " << outfilename_str << "\n"; exit(1); } if (input_var.getNohead() != 1) { create_header2(outfile, input_var, phd, interaction_cox); } } //________________________________________________________________ /* if (input_var.getAllcov()) { if (score) { outfile << input_var.getSep() << "beta_mu"; // << input_var.getSep() << "beta_SNP_A1"; outfile << input_var.getSep() << "sebeta_mu"; // << input_var.getSep() << "sebeta_SNP_A1"; } else { for (int i =0; i<phd.n_model_terms-1;i++) outfile << input_var.getSep() << "beta_" << phd.model_terms[i] << input_var.getSep() << "sebeta_" << phd.model_terms[i]; } if(interactio != 0) outfile << input_var.getSep() << "beta_SNP_" << phd.model_terms[interaction]; } if (input_var.getNgpreds()==2) { outfile << input_var.getSep() << "beta_SNP_A1A2" << input_var.getSep() << "beta_SNP_A1A1" << input_var.getSep() << "sebeta_SNP_A1A2" << input_var.getSep() << "sebeta_SNP_a1A1" << input_var.getSep() << "chi2_SNP_2df" << input_var.getSep() << "beta_SNP_addA1" << input_var.getSep() << "sebeta_SNP_addA1" << input_var.getSep() << "chi2_SNP_addA1" << input_var.getSep() << "beta_SNP_domA1" << input_var.getSep() << "sebeta_SNP_domA1" << input_var.getSep() << "chi2_SNP_domA1" << input_var.getSep() << "beta_SNP_recA1" << input_var.getSep() << "sebeta_SNP_recA1" << input_var.getSep() << "chi2_SNP_recA1" << input_var.getSep() << "beta_SNP_odom" << input_var.getSep() << "sebeta_SNP_odom" << input_var.getSep() << "chi2_SNP_odom\n"; } else { outfile << input_var.getSep() << "beta_SNP_add" << input_var.getSep() << "sebeta_SNP_add" << input_var.getSep() << "chi2_SNP_add\n"; } */ // exit(1); //________________________________________________________________ //Maksim, 9 Jan, 2009 int maxmod = 5; int start_pos, end_pos; std::vector<std::ostringstream *> beta_sebeta; //Han Chen std::vector<std::ostringstream *> covvalue; //Oct 26, 2009 std::vector<std::ostringstream *> chi2; for (int i = 0; i < maxmod; i++) { beta_sebeta.push_back(new std::ostringstream()); //Han Chen covvalue.push_back(new std::ostringstream()); //Oct 26, 2009 chi2.push_back(new std::ostringstream()); } for (int csnp = 0; csnp < nsnps; csnp++) { rgd.update_snp(gtd, csnp); double freq = 0.; int gcount = 0; float snpdata1[gtd.nids]; float snpdata2[gtd.nids]; if (input_var.getNgpreds() == 2) { //freq = ((gtd.G).column_mean(csnp*2)*2. + // (gtd.G).column_mean(csnp*2+1))/2.; gtd.get_var(csnp * 2, snpdata1); gtd.get_var(csnp * 2 + 1, snpdata2); for (unsigned int ii = 0; ii < gtd.nids; ii++) if (!isnan(snpdata1[ii]) && !isnan(snpdata2[ii])) { gcount++; freq += snpdata1[ii] + snpdata2[ii] * 0.5; } } else { // freq = (gtd.G).column_mean(csnp)/2.; gtd.get_var(csnp, snpdata1); for (unsigned int ii = 0; ii < gtd.nids; ii++) if (!isnan(snpdata1[ii])) { gcount++; freq += snpdata1[ii] * 0.5; } } freq /= static_cast<double>(gcount); int poly = 1; if (fabs(freq) < 1.e-16 || fabs(1. - freq) < 1.e-16) poly = 0; if (fabs(mli.Rsq[csnp]) < 1.e-16) poly = 0; //All models output. One file per each model if (input_var.getNgpreds() == 2) { //Write mlinfo to output: for (unsigned int file = 0; file < outfile.size(); file++) { *outfile[file] << mli.name[csnp] << input_var.getSep() << mli.A1[csnp] << input_var.getSep() << mli.A2[csnp] << input_var.getSep() << mli.Freq1[csnp] << input_var.getSep() << mli.MAF[csnp] << input_var.getSep() << mli.Quality[csnp] << input_var.getSep() << mli.Rsq[csnp] << input_var.getSep() << gcount << input_var.getSep() << freq; if (input_var.getChrom() != "-1") *outfile[file] << input_var.getSep() << input_var.getChrom(); if (input_var.getMapfilename() != NULL) *outfile[file] << input_var.getSep() << mli.map[csnp]; } for (int model = 0; model < maxmod; model++) { if (poly) //allel freq is not to rare { #if LOGISTIC logistic_reg rd(rgd); if (input_var.getScore()) rd.score(nrd.residuals, rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); else rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust()); #elif LINEAR linear_reg rd(rgd); if (input_var.getScore()) rd.score(nrd.residuals, rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); else { // rd.mmscore(rgd,0,CHOLTOL,model,input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); rd.estimate(rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust()); } #elif COXPH coxph_reg rd(rgd); rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model, input_var.getInteraction(), true, input_var.getNgpreds()); #endif if (!input_var.getAllcov() && model == 0 && input_var.getInteraction() == 0) start_pos = rd.beta.nrow - 2; else if (!input_var.getAllcov() && model == 0 && input_var.getInteraction() != 0) start_pos = rd.beta.nrow - 4; else if (!input_var.getAllcov() && model != 0 && input_var.getInteraction() == 0) start_pos = rd.beta.nrow - 1; else if (!input_var.getAllcov() && model != 0 && input_var.getInteraction() != 0) start_pos = rd.beta.nrow - 2; else start_pos = 0; for (int pos = start_pos; pos < rd.beta.nrow; pos++) { *beta_sebeta[model] << input_var.getSep() << rd.beta[pos] << input_var.getSep() << rd.sebeta[pos]; //Han Chen #if !COXPH if (input_var.getInverseFilename() == NULL && !input_var.getAllcov() && input_var.getInteraction() != 0) { if (pos > start_pos) { if (model == 0) { if (pos > start_pos + 2) { *covvalue[model] << rd.covariance[pos - 3] << input_var.getSep() << rd.covariance[pos - 2]; } } else { *covvalue[model] << rd.covariance[pos - 1]; } } } #endif //Oct 26, 2009 } //calculate chi2 //________________________________ if (input_var.getScore() == 0) { //*chi2[model] << 2.*(rd.loglik-null_loglik); *chi2[model] << rd.loglik; } else { //*chi2[model] << rd.chi2_score; *chi2[model] << "nan"; } //________________________________ } else //beta, sebeta = nan { if (!input_var.getAllcov() && model == 0 && input_var.getInteraction() == 0) start_pos = rgd.X.ncol - 2; else if (!input_var.getAllcov() && model == 0 && input_var.getInteraction() != 0) start_pos = rgd.X.ncol - 4; else if (!input_var.getAllcov() && model != 0 && input_var.getInteraction() == 0) start_pos = rgd.X.ncol - 1; else if (!input_var.getAllcov() && model != 0 && input_var.getInteraction() != 0) start_pos = rgd.X.ncol - 2; else start_pos = 0; if (model == 0) { end_pos = rgd.X.ncol; } else { end_pos = rgd.X.ncol - 1; } if (input_var.getInteraction() != 0) end_pos++; for (int pos = start_pos; pos < end_pos; pos++) { *beta_sebeta[model] << input_var.getSep() << "nan" << input_var.getSep() << "nan"; } //Han Chen #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { if (model == 0) { *covvalue[model] << "nan" << input_var.getSep() << "nan"; } else { *covvalue[model] << "nan"; } } #endif //Oct 26, 2009 *chi2[model] << "nan"; } } //end of model cycle //Han Chen *outfile[0] << beta_sebeta[0]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[0] << covvalue[0]->str() << input_var.getSep(); } #endif *outfile[0] << chi2[0]->str() << "\n"; *outfile[1] << beta_sebeta[1]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[1] << covvalue[1]->str() << input_var.getSep(); } #endif *outfile[1] << chi2[1]->str() << "\n"; *outfile[2] << beta_sebeta[2]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[2] << covvalue[2]->str() << input_var.getSep(); } #endif *outfile[2] << chi2[2]->str() << "\n"; *outfile[3] << beta_sebeta[3]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[3] << covvalue[3]->str() << input_var.getSep(); } #endif *outfile[3] << chi2[3]->str() << "\n"; *outfile[4] << beta_sebeta[4]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[4] << covvalue[4]->str() << input_var.getSep(); } #endif *outfile[4] << chi2[4]->str() << "\n"; //Oct 26, 2009 } else //Only additive model. Only one output file { //Write mlinfo to output: *outfile[0] << mli.name[csnp] << input_var.getSep() << mli.A1[csnp] << input_var.getSep() << mli.A2[csnp] << input_var.getSep(); *outfile[0] << mli.Freq1[csnp] << input_var.getSep() << mli.MAF[csnp] << input_var.getSep() << mli.Quality[csnp] << input_var.getSep() << mli.Rsq[csnp] << input_var.getSep(); *outfile[0] << gcount << input_var.getSep() << freq; if (input_var.getChrom() != "-1") *outfile[0] << input_var.getSep() << input_var.getChrom(); if (input_var.getMapfilename() != NULL) *outfile[0] << input_var.getSep() << mli.map[csnp]; int model = 0; if (poly) //allel freq is not to rare { #if LOGISTIC logistic_reg rd(rgd); if (input_var.getScore()) rd.score(nrd.residuals, rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); else rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust()); #elif LINEAR //cout << (rgd.get_unmasked_data()).nids << " 1\n"; #if DEBUG rgd.X.print(); rgd.Y.print(); #endif linear_reg rd(rgd); #if DEBUG rgd.X.print(); rgd.Y.print(); #endif //cout << (rgd.get_unmasked_data()).nids << " 2\n"; if (input_var.getScore()) { #if DEBUG cout << "input_var.getScore/n"; nrd.residuals.print(); cout << CHOLTOL << " <-CHOLTOL\n"; cout << model << " <-model\n"; cout << input_var.getInteraction() << " <-input_var.getInteraction()\n"; cout << input_var.getNgpreds() << " <-input_var.getNgpreds()\n"; invvarmatrix.print(); #endif rd.score(nrd.residuals, rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); #if DEBUG rd.beta.print(); cout << rd.chi2_score << " <-chi2_scoren\n"; rd.covariance.print(); rd.residuals.print(); rd.sebeta.print(); cout << rd.loglik << " <-logliken\n"; cout << rd.sigma2 << " <-sigma2\n"; #endif } else { // if(input_var.getInverseFilename()== NULL) // { // cout << (rgd.get_unmasked_data()).nids << " 3\n"; #if DEBUG cout << "rd.estimate\n"; cout << CHOLTOL << " <-CHOLTOL\n"; cout << model << " <-model\n"; cout << input_var.getInteraction() << " <-input_var.getInteraction()\n"; cout << input_var.getNgpreds() << " <-input_var.getNgpreds()\n"; cout << input_var.getRobust() << " <-input_var.getRobust()\n"; cout << "start invarmatrix\n"; invvarmatrix.print(); cout << "end invarmatrix\n"; cout << rgd.is_interaction_excluded << " <-rgd.is_interaction_excluded\n"; #endif rd.estimate(rgd, 0, CHOLTOL, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust()); #if DEBUG cout << "rd.beta\n"; rd.beta.print(); cout << rd.chi2_score << " <-chi2_scoren\n"; cout << "rd.covariance\n"; rd.covariance.print(); cout << "rd.residuals\n"; rd.residuals.print(); cout << "rd.sebeta\n"; rd.sebeta.print(); cout << rd.loglik << " <-logliken\n"; cout << rd.sigma2 << " <-sigma2\n"; #endif //cout << (rgd.get_unmasked_data()).nids << " 4\n"; //} //else //{ // rd.mmscore(rgd, 0, CHOLTOL, model, // input_var.getInteraction(), // input_var.getNgpreds(), invvarmatrix); //} } #elif COXPH coxph_reg rd(rgd); rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model, input_var.getInteraction(), true, input_var.getNgpreds()); #endif if (!input_var.getAllcov() && input_var.getInteraction() == 0) { start_pos = rd.beta.nrow - 1; } else if (!input_var.getAllcov() && input_var.getInteraction() != 0) { start_pos = rd.beta.nrow - 2; } else { start_pos = 0; } #if DEBUG cout << " start_pos;" << start_pos << "\n"; #endif for (int pos = start_pos; pos < rd.beta.nrow; pos++) { *beta_sebeta[0] << input_var.getSep() << rd.beta[pos] << input_var.getSep() << rd.sebeta[pos]; //Han Chen #if !COXPH if (input_var.getInverseFilename() == NULL && !input_var.getAllcov() && input_var.getInteraction() != 0) { if (pos > start_pos) { *covvalue[0] << rd.covariance[pos - 1]; } } #endif //Oct 26, 2009 } //calculate chi2 //________________________________ if (input_var.getInverseFilename() == NULL) { #if DEBUG cout << " inverse_filename == NULL" << "\n"; #endif if (input_var.getScore() == 0) { *chi2[0] << rd.loglik; //2.*(rd.loglik-null_loglik); } else { *chi2[0] << "nan"; //rd.chi2_score; } } //________________________________ } else //beta, sebeta = nan { if (!input_var.getAllcov() && input_var.getInteraction() == 0) start_pos = rgd.X.ncol - 1; else if (!input_var.getAllcov() && input_var.getInteraction() != 0) start_pos = rgd.X.ncol - 2; else start_pos = 0; end_pos = rgd.X.ncol; if (input_var.getInteraction() != 0) { end_pos++; } if (input_var.getInteraction() != 0 && !input_var.getAllcov()) { start_pos++; } for (int pos = start_pos; pos < end_pos; pos++) { *beta_sebeta[0] << input_var.getSep() << "nan" << input_var.getSep() << "nan"; } if (input_var.getInverseFilename() == NULL) { //Han Chen #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *covvalue[0] << "nan"; } #endif //Oct 26, 2009 *chi2[0] << "nan"; } } if (input_var.getInverseFilename() == NULL) { //Han Chen *outfile[0] << beta_sebeta[0]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[0] << covvalue[0]->str() << input_var.getSep(); } #endif *outfile[0] << chi2[model]->str() << "\n"; //Oct 26, 2009 } else { *outfile[0] << beta_sebeta[0]->str() << "\n"; #if DEBUG cout << "Se beta" << beta_sebeta[0] << "\n"; #endif } } //clean chi2 for (int i = 0; i < 5; i++) { beta_sebeta[i]->str(""); //Han Chen covvalue[i]->str(""); //Oct 26, 2009 chi2[i]->str(""); } update_progress_to_cmd_line(csnp, nsnps); } std::cout << "\b\b\b\b\b\b\b\b\b" << 100.; std::cout << "%... done\n"; //________________________________________________________________ //Maksim, 9 Jan, 2009 for (unsigned int i = 0; i < outfile.size(); i++) { outfile[i]->close(); delete outfile[i]; } //delete gtd; // Clean up a couple of vectors std::vector<std::ostringstream *>::iterator it = beta_sebeta.begin(); while (it != beta_sebeta.end()) { delete *it; ++it; } it = covvalue.begin(); while (it != covvalue.end()) { delete *it; ++it; } it = chi2.begin(); while (it != chi2.end()) { delete *it; ++it; } return (0); }
/** * Main routine. The main logic of ProbABEL can be found here * * \param argc Number of command line arguments * \param argv Vector containing the command line arguments * * \return 0 if all went well. Other integer numbers if an error * occurred */ int main(int argc, char * argv[]) { cmdvars input_var; input_var.set_variables(argc, argv); input_var.printinfo(); cout << "Reading info data...\n" << flush; mlinfo mli(input_var.getMlinfofilename(), input_var.getMapfilename()); int nsnps = mli.nsnps; phedata phd; cout << "Reading phenotype data...\n" << flush; int interaction_cox = create_phenotype(phd, input_var); masked_matrix invvarmatrix; if (input_var.getInverseFilename() != NULL) { loadInvSigma(input_var, phd, invvarmatrix); } gendata gtd; cout << "Reading genotype data... " << flush; if (!input_var.getIsFvf()) { // TODO(maartenk): remove timing code // make clock to time loading of the non filevector file std::clock_t start; start = std::clock(); // use the non-filevector input format gtd.re_gendata(input_var.getGenfilename(), nsnps, input_var.getNgpreds(), phd.nids_all, phd.nids, phd.allmeasured, input_var.getSkipd(), phd.idnames); // TODO(maartenk): remove timing code double millisec=((std::clock() - start) / (double)(CLOCKS_PER_SEC / 1000))/1000; cout << "done in "<< millisec<< " seconds.\n" << flush; } else { // use the filevector input format (missing second last skipd // parameter) gtd.re_gendata(input_var.getStrGenfilename(), nsnps, input_var.getNgpreds(), phd.nids_all, phd.nids, phd.allmeasured, phd.idnames); cout << "done.\n" << flush; } // estimate null model #if COXPH coxph_data nrgd = coxph_data(phd, gtd, -1); #else regdata nrgd = regdata(phd, gtd, -1, input_var.isIsInteractionExcluded()); #endif std::cout << " loaded null data..." << std::flush; #if LOGISTIC logistic_reg nrd = logistic_reg(nrgd); nrd.estimate(0, 0, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust(), 1); #elif LINEAR linear_reg nrd = linear_reg(nrgd); #if DEBUG std::cout << "[DEBUG] linear_reg nrd = linear_reg(nrgd); DONE."; #endif nrd.estimate(0, 0, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust(), 1); #elif COXPH coxph_reg nrd = coxph_reg(nrgd); nrd.estimate(nrgd, 0, input_var.getInteraction(), input_var.getNgpreds(), true, 1, mli, 0); #endif double null_loglik = nrd.loglik; std::cout << " estimated null model..."; // end null #if COXPH coxph_data rgd(phd, gtd, 0); #else regdata rgd(phd, gtd, 0, input_var.isIsInteractionExcluded()); #endif std::cout << " formed regression object...\n"; // Open a vector of files that will be used for output. Depending // on the number of genomic predictors we either open 5 files (one // for each model if we have prob data) or one (if we have dosage // data). std::string outfilename_str(input_var.getOutfilename()); std::vector<std::ofstream*> outfile; // Prob data: All models output. One file per model if (input_var.getNgpreds() == 2) { open_files_for_output(outfile, outfilename_str); if (input_var.getNohead() != 1) { create_header(outfile, input_var, phd, interaction_cox); } } else // Dosage data: Only additive model => only one output file { outfile.push_back( new std::ofstream((outfilename_str + "_add.out.txt").c_str())); if (!outfile[0]->is_open()) { std::cerr << "Cannot open file for writing: " << outfilename_str << "\n"; exit(1); } if (input_var.getNohead() != 1) { create_header(outfile, input_var, phd, interaction_cox); } } // END else: we have dosage data => only one file int maxmod = 5; // Total number of models (in random // order: additive, recessive, // dominant, over_dominant, 2df). Only // with dosage data can we run all of // them. For dosage data we can only // run the additive model. int start_pos, end_pos; std::vector<std::ostringstream *> beta_sebeta; // Han Chen std::vector<std::ostringstream *> covvalue; // Oct 26, 2009 std::vector<std::ostringstream *> chi2; // Create string streams for betas, SEs, etc. These are used to // later store the various output values that will be written to // files. for (int i = 0; i < maxmod; i++) { beta_sebeta.push_back(new std::ostringstream()); beta_sebeta[i]->precision(6); // *beta_sebeta[i] << scientific; // Han Chen covvalue.push_back(new std::ostringstream()); covvalue[i]->precision(6); // *covvalue[i] << scientific; // Oct 26, 2009 chi2.push_back(new std::ostringstream()); chi2[i]->precision(6); // *chi2[i] << scientific; } // Here we start the analysis for each SNP. for (int csnp = 0; csnp < nsnps; csnp++) { rgd.update_snp(>d, csnp); int poly = 1; if (fabs(rgd.freq) < 1.e-16 || fabs(1. - rgd.freq) < 1.e-16) { poly = 0; } if (fabs(mli.Rsq[csnp]) < 1.e-16) { poly = 0; } // Write mlinfo information to the output file(s) // Prob data: All models output. One file per model if (input_var.getNgpreds() == 2) { for (unsigned int file = 0; file < outfile.size(); file++) { write_mlinfo(outfile, file, mli, csnp, input_var, rgd.gcount, rgd.freq); } } else{ // Dosage data: only additive model int file = 0; write_mlinfo(outfile, file, mli, csnp, input_var, rgd.gcount, rgd.freq); maxmod = 1; // We can only calculate the additive // model with dosage data } // Run regression for each model for the current SNP for (int model = 0; model < maxmod; model++) { if (poly) // Allele freq is not too rare { #if LOGISTIC logistic_reg rd(rgd); #elif LINEAR linear_reg rd(rgd); #elif COXPH coxph_reg rd(rgd); #endif #if !COXPH if (input_var.getScore()) { rd.score(nrd.residuals, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix); } else { rd.estimate(0, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust()); } #else rd.estimate(rgd, model, input_var.getInteraction(), input_var.getNgpreds(), true, 0, mli, csnp); #endif int number_of_rows_or_columns = rd.beta.nrow; start_pos = get_start_position(input_var, model, number_of_rows_or_columns); // The regression coefficients for the SNPs are in the // last rows of beta[] and sebeta[]. for (int pos = start_pos; pos < rd.beta.nrow; pos++) { *beta_sebeta[model] << input_var.getSep() << rd.beta[pos] << input_var.getSep() << rd.sebeta[pos]; // Han Chen #if !COXPH if (input_var.getInverseFilename() == NULL && !input_var.getAllcov() && input_var.getInteraction() != 0) { if (pos > start_pos) { if (model == 0) { if (input_var.getNgpreds() == 2) { if (pos > start_pos + 2) { *covvalue[model] << rd.covariance[pos - 3] << input_var.getSep() << rd.covariance[pos - 2]; } } // END ngpreds=2 else { *covvalue[model] << rd.covariance[pos - 1]; } } // END model == 0 else { *covvalue[model] << rd.covariance[pos - 1]; } // END model != 0 } // END if pos > start_pos } #endif // Oct 26, 2009 } // END for(pos = start_pos; pos < rd.beta.nrow; pos++) // calculate chi^2 // ________________________________ // cout << rd.loglik<<" "<<input_var.getNgpreds() << "\n"; if (input_var.getInverseFilename() == NULL) { // Only if we don't have an inv.sigma file can we use LRT if (input_var.getScore() == 0) { double loglik = rd.loglik; if (rgd.gcount != gtd.nids) { // If SNP data is missing we didn't // correctly compute the null likelihood // Recalculate null likelihood by // stripping the SNP data column(s) from // the X matrix in the regression object // and run the null model estimation again // for this SNP. #if !COXPH regdata new_rgd = rgd; #else coxph_data new_rgd = rgd; #endif new_rgd.remove_snp_from_X(); #ifdef LINEAR linear_reg new_null_rd(new_rgd); #elif LOGISTIC logistic_reg new_null_rd(new_rgd); #endif #if !COXPH new_null_rd.estimate(0, model, input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix, input_var.getRobust(), 1); #else coxph_reg new_null_rd(new_rgd); new_null_rd.estimate(new_rgd, model, input_var.getInteraction(), input_var.getNgpreds(), true, 1, mli, csnp); #endif *chi2[model] << 2. * (loglik - new_null_rd.loglik); } else { // No missing SNP data, we can compute the LRT *chi2[model] << 2. * (loglik - null_loglik); } } else{ // We want score test output *chi2[model] << rd.chi2_score; } } // END if( inv.sigma == NULL ) else if (input_var.getInverseFilename() != NULL) { // We can't use the LRT here, because mmscore is a // REML method. Therefore go for the Wald test if (input_var.getNgpreds() == 2 && model == 0) { /* For the 2df model we can't simply use the * Wald statistic. This can be fixed using the * equation just below Eq.(4) in the ProbABEL * paper. TODO LCK */ *chi2[model] << "NaN"; } else { double Z = rd.beta[start_pos] / rd.sebeta[start_pos]; *chi2[model] << Z * Z; } } } // END first part of if(poly); allele not too rare else { // SNP is rare: beta, sebeta, chi2 = NaN int number_of_rows_or_columns = rgd.X.ncol; start_pos = get_start_position(input_var, model, number_of_rows_or_columns); if (input_var.getInteraction() != 0 && !input_var.getAllcov() && input_var.getNgpreds() != 2) { start_pos++; } if (input_var.getNgpreds() == 0) { end_pos = rgd.X.ncol; } else{ end_pos = rgd.X.ncol - 1; } if (input_var.getInteraction() != 0) { end_pos++; } for (int pos = start_pos; pos <= end_pos; pos++) { *beta_sebeta[model] << input_var.getSep() << "NaN" << input_var.getSep() << "NaN"; } if (input_var.getNgpreds() == 2) { // Han Chen #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { if (model == 0) { *covvalue[model] << "NaN" << input_var.getSep() << "NaN"; } else{ *covvalue[model] << "NaN"; } } #endif // Oct 26, 2009 *chi2[model] << "NaN"; } else{ // ngpreds==1 (and SNP is rare) if (input_var.getInverseFilename() == NULL) { // Han Chen #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *covvalue[model] << "NaN"; } #endif // Oct 26, 2009 } // END if getInverseFilename == NULL *chi2[model] << "NaN"; } // END ngpreds == 1 (and SNP is rare) } // END else: SNP is rare } // END of model cycle // Start writing beta's, se_beta's etc. to file if (input_var.getNgpreds() == 2) { for (int model = 0; model < maxmod; model++) { *outfile[model] << beta_sebeta[model]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[model] << covvalue[model]->str() << input_var.getSep(); } #endif *outfile[model] << chi2[model]->str() << "\n"; } // END for loop over all models } else // Dose data: only additive model. Only one output file { *outfile[0] << beta_sebeta[0]->str() << input_var.getSep(); #if !COXPH if (!input_var.getAllcov() && input_var.getInteraction() != 0) { *outfile[0] << covvalue[0]->str() << input_var.getSep(); } #endif *outfile[0] << chi2[0]->str() << "\n"; } // End ngpreds == 1 when writing output files // Clean chi2 and other streams for (int model = 0; model < maxmod; model++) { beta_sebeta[model]->str(""); // Han Chen covvalue[model]->str(""); // Oct 26, 2009 chi2[model]->str(""); } update_progress_to_cmd_line(csnp, nsnps); } // END for loop over all SNPs // We're almost done. All computations have finished, time to // clean up. std::cout << setprecision(2) << fixed; std::cout << "\b\b\b\b\b\b\b\b\b" << 100.; std::cout << "%... done\n"; // Close output files for (unsigned int i = 0; i < outfile.size(); i++) { outfile[i]->close(); delete outfile[i]; } // delete gtd; // Clean up a couple of vectors std::vector<std::ostringstream *>::iterator it = beta_sebeta.begin(); while (it != beta_sebeta.end()) { delete *it; ++it; } it = covvalue.begin(); while (it != covvalue.end()) { delete *it; ++it; } it = chi2.begin(); while (it != chi2.end()) { delete *it; ++it; } return (0); }
void colorengine::threadFunc() { std::unique_ptr<float> regdata(new float[width_*height_]); std::vector<float> scalar_constant(3); scalar_constant.push_back(0); scalar_constant.push_back(0); scalar_constant.push_back(0); _XTIFFInitialize(); rawdata_tiff = TIFFOpen(raw_tiff_path.c_str(), "w"); TIFFSetField(rawdata_tiff, TIFFTAG_NFILTERS, filter_->nfilters()); TIFFSetField(rawdata_tiff, TIFFTAG_NLIGHTS, nlights_); for (auto filter_index = 0; filter_index < filter_->nfilters(); ++filter_index) { std::vector<float> cmf = filter_->cmfValues(filter_->wavelengthAtPos(filter_index)); float illuminant = filter_->illuminantValue(filter_->wavelengthAtPos(filter_index)); for (auto xyz_index = 0; xyz_index < 3; ++xyz_index) { scalar_constant[xyz_index] += cmf[xyz_index] * illuminant; } } int page_index = 0; for (int filter_index = 0; filter_index < filter_->nfilters(); ++filter_index) { for (int light_index = 0; light_index < nlights_; ++light_index) { std::shared_ptr<unsigned short> data = data_queue_.pop(); if (cancel_) { return; } std::unique_ptr<float> floatdata(new float[width_*height_]); for (auto y = 0; y < height_; ++y) { for (auto x = 0; x < width_; ++x) { if (data.get()[y*width_+x] - bias_data.get()[y*width_+x] < 0) { data.get()[y*width_+x] = 0; } else { data.get()[y*width_+x] -= bias_data.get()[y*width_+x]; } } } for (auto y = 0; y < height_; ++y) { for (auto x = 0; x <width_; ++x) { if (flat_data[light_index]->filterData(filter_index)[y*width_+x] == 0) { floatdata.get()[y*width_+x] = 0; } else { floatdata.get()[y*width_+x] = (float)data.get()[y*width_+x] / (float)flat_data[light_index]->filterData(filter_index)[y*width_+x]; } } } // subtract bias, divide flat field cv::Mat floatdatamat(height_, width_, CV_32F, floatdata.get()); // Normalize data to a white reference std::vector<float> values; for (auto y = wtpt_rect_.y(); y < wtpt_rect_.y() + wtpt_rect_.size().height(); ++y) { for (auto x = wtpt_rect_.x(); x < wtpt_rect_.x() + wtpt_rect_.size().width(); ++x) { values.push_back(floatdata.get()[y*width_+x]); } } std::nth_element(values.begin(), values.begin()+(values.size()/2), values.end()); // median sort in constant time float measured_wtpt = values[values.size()/2]; for (auto y = 0; y < height_; ++y) { for (auto x = 0; x < width_; ++x) { floatdata.get()[y*width_+x] /= (measured_wtpt / absolute_wtpt_values_[filter_index]); } } if (filter_index == 0) { // use first filter as registration target for (auto y = 0; y < height_; ++y) { for (auto x = 0; x < width_; ++x) { regdata.get()[y*width_+x] = floatdata.get()[y*width_+x]; } } cv::Mat regdatamat(height_, width_, CV_32F, regdata.get()); } else { // Register image to regtarget_data // Phase-correlate based registration algorithm to align image planes based on two concentric circle targets // Concentric targets are used because of their non-repeating nature; phase correlate gets tripped up by repeating patterns as the peaks can be matched at errant points //cv::mat construction is efficient and does not copy data. Initialize registration target from our regdata cv::Mat registerTo(height_, width_, CV_32F, regdata.get()); cv::Mat reg0_source, reg0_target, reg1_source, reg1_target; cv::Mat reg0_sourcef, reg0_targetf, reg1_sourcef, reg1_targetf; // These cv::Mats' are our registration targets; selected by the user in the main application. //reg0_source = registration target 0 on the image that is going to be registered //reg0_target = registration target 1 on the image that is going to be registered to reg0_sourcef = floatdatamat(cv::Range(regtargets[0].y(), regtargets[0].y() + regtargets[0].size().height()), cv::Range(regtargets[0].x(), regtargets[0].x() + regtargets[0].size().width())); reg0_targetf = registerTo(cv::Range(regtargets[0].y(), regtargets[0].y() + regtargets[0].size().height()), cv::Range(regtargets[0].x(), regtargets[0].x() + regtargets[0].size().width()));// - (reg_size/2), regtargets[0].y + (reg_size/2)), cv::Range(regtargets[0].x - (reg_size/2), regtargets[0].x + (reg_size/2))); reg1_sourcef = floatdatamat(cv::Range(regtargets[1].y(), regtargets[1].y() + regtargets[1].size().height()), cv::Range(regtargets[1].x(), regtargets[1].x() + regtargets[1].size().width()));// - (reg_size/2), regtargets[1].y + (reg_size/2)), cv::Range(regtargets[1].x - (reg_size/2), regtargets[1].x + (reg_size/2))); reg1_targetf = registerTo(cv::Range(regtargets[1].y(), regtargets[1].y() + regtargets[1].size().height()), cv::Range(regtargets[1].x(), regtargets[1].x() + regtargets[1].size().width()));// - (reg_size/2), regtargets[1].y + (reg_size/2)), cv::Range(regtargets[1].x - (reg_size/2), regtargets[1].x + (reg_size/2))); double min, max; cv::Point minLoc, maxLoc; cv::minMaxLoc(reg0_sourcef, &min, &max, &minLoc, &maxLoc); reg0_sourcef *= 255/max; cv::minMaxLoc(reg1_sourcef, &min, &max, &minLoc, &maxLoc); reg1_sourcef *= 255/max; cv::minMaxLoc(reg0_targetf, &min, &max, &minLoc, &maxLoc); reg0_targetf *= 255/max; cv::minMaxLoc(reg1_targetf, &min, &max, &minLoc, &maxLoc); reg1_targetf *= 255/max; reg0_sourcef.convertTo(reg0_source, CV_8U); reg0_targetf.convertTo(reg0_target, CV_8U); reg1_sourcef.convertTo(reg1_source, CV_8U); reg1_targetf.convertTo(reg1_target, CV_8U); cv::adaptiveThreshold(reg0_source, reg0_source, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 11, 2); cv::adaptiveThreshold(reg0_target, reg0_target, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 11, 2); cv::adaptiveThreshold(reg1_source, reg1_source, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 11, 2); cv::adaptiveThreshold(reg1_target, reg1_target, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 11, 2); reg0_source.convertTo(reg0_source, CV_32F); reg1_source.convertTo(reg1_source, CV_32F); reg0_target.convertTo(reg0_target, CV_32F); reg1_target.convertTo(reg1_target, CV_32F); reg0_sourcef /= 255; reg0_targetf /= 255; reg1_sourcef /= 255; reg1_targetf /= 255; cv::Point2d offset_center = cv::phaseCorrelate(reg0_source, reg0_target); cv::Point2d offset_corner = cv::phaseCorrelate(reg1_source, reg1_target); float r, r_prime, deltaX, deltaY; deltaX = offset_corner.x - offset_center.x; deltaY = offset_corner.y - offset_center.y; r = pow((regtargets[1].center().x() - regtargets[0].center().x()), 2) + pow((regtargets[1].center().y() - regtargets[0].center().y()), 2); r = sqrt(r); r_prime = pow((regtargets[1].center().x() - regtargets[0].center().x() + deltaX), 2) + pow((regtargets[1].center().y() - regtargets[0].center().y() + deltaY), 2); r_prime = sqrt(r_prime); float scale = r_prime / r; float trans_x = (floatdatamat.size().width / 2) * (1-scale); float trans_y = (floatdatamat.size().height / 2) * (1-scale); std::vector<float> matrix_data(6); matrix_data[0] = scale; matrix_data[1] = 0; matrix_data[2] = trans_x; matrix_data[3] = 0; matrix_data[4] = scale; matrix_data[5] = trans_y; cv::Mat affine(2, 3, CV_32F, matrix_data.data()); cv::Mat scaled; cv::warpAffine(floatdatamat, scaled, affine, floatdatamat.size()); cv::Mat scaled_target = scaled(cv::Range(regtargets[0].y(), regtargets[0].y() + regtargets[0].size().height()), cv::Range(regtargets[0].x(), regtargets[0].x() + regtargets[0].size().width())); cv::Point2d translation_offset = cv::phaseCorrelate(scaled_target, reg0_target); matrix_data[2] += translation_offset.x; matrix_data[5] += translation_offset.y; cv::warpAffine(floatdatamat, floatdatamat, affine, floatdatamat.size()); } if (raw_tiff_path.size() > 0) { TIFFSetField(rawdata_tiff, TIFFTAG_IMAGEWIDTH, width_); TIFFSetField(rawdata_tiff, TIFFTAG_IMAGELENGTH, height_); TIFFSetField(rawdata_tiff, TIFFTAG_BITSPERSAMPLE, sizeof(float) * 8); TIFFSetField(rawdata_tiff, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_IEEEFP); TIFFSetField(rawdata_tiff, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG); TIFFSetField(rawdata_tiff, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK); TIFFSetField(rawdata_tiff, TIFFTAG_SAMPLESPERPIXEL, 1); TIFFSetField(rawdata_tiff, TIFFTAG_SUBFILETYPE, FILETYPE_PAGE); TIFFSetField(rawdata_tiff, TIFFTAG_WTPTVAL, absolute_wtpt_values_[filter_index]); TIFFSetField(rawdata_tiff, TIFFTAG_WTPTMEASURED, measured_wtpt); TIFFSetField(rawdata_tiff, TIFFTAG_PAGENUMBER, page_index); for (auto row = 0; row < height_; ++row) { TIFFWriteScanline(rawdata_tiff, &floatdata.get()[row*width_], row); } TIFFWriteDirectory(rawdata_tiff); ++page_index; } int wavelength = filter_->wavelengthAtPos(filter_index); std::vector<float> cmf = filter_->cmfValues(wavelength); float illuminant = filter_->illuminantValue(wavelength); for (auto y = 0; y < height_; ++y) { for (auto x = 0; x < width_; ++x) { for (auto xyz_index = 0; xyz_index < 3; ++xyz_index) { xyz_data[light_index].get()->filterData(xyz_index)[y*width_+x] += floatdata.get()[y*width_+x] * cmf[xyz_index] * illuminant; } } } } } if (raw_tiff_path.size() > 0) { TIFFClose(rawdata_tiff); } // Scale xyz data by scalar constant for (auto light = 0; light < nlights_; ++light) { for (auto y = 0; y < height_; ++y) { for (auto x = 0; x < width_; ++x) { for (auto xyz_index = 0; xyz_index < 3; ++xyz_index) { xyz_data[light].get()->filterData(xyz_index)[y*width_+x] /= scalar_constant[xyz_index]; } } } } std::vector<XYZImage*> xyz_ptr; for (auto i = 0; i < xyz_data.size(); ++i) { xyz_ptr.push_back(xyz_data[i].get()); } std::vector<float> weights(nlights_); for (auto weight = 0; weight < weights.size(); ++weight) { weights[weight] = 1.0 / float(nlights_); } master_xyz = std::shared_ptr<XYZImage>(new XYZImage(xyz_ptr, nlights_, weights)); }