Example #1
0
/**
 * Main routine. The main logic of ProbABEL can be found here
 *
 * \param argc Number of command line arguments
 * \param argv Vector containing the command line arguments
 *
 * \return 0 if all went well. Other integer numbers if an error
 * occurred
 */
int main(int argc, char * argv[])
{
    cmdvars input_var;
    input_var.set_variables(argc, argv);

    input_var.printinfo();

    cout << "Reading info data...\n" << flush;
    mlinfo mli(input_var.getMlinfofilename(), input_var.getMapfilename());
    int nsnps = mli.nsnps;
    phedata phd;
    cout << "Reading phenotype data...\n" << flush;
    int interaction_cox = create_phenotype(phd, input_var);

    masked_matrix invvarmatrix;

    if (input_var.getInverseFilename() != NULL)
    {
        loadInvSigma(input_var, phd, invvarmatrix);
    }

    gendata gtd;
    cout << "Reading genotype data... " << flush;
    if (!input_var.getIsFvf())
    {
        // TODO(maartenk): remove timing code
        // make clock to time loading of the non filevector file
        std::clock_t    start;
        start = std::clock();

        // use the non-filevector input format
        gtd.re_gendata(input_var.getGenfilename(), nsnps,
                       input_var.getNgpreds(), phd.nids_all, phd.nids,
                       phd.allmeasured, input_var.getSkipd(), phd.idnames);

        // TODO(maartenk): remove timing code
        double millisec=((std::clock() - start) / (double)(CLOCKS_PER_SEC / 1000))/1000;
        cout << "done in "<< millisec<< " seconds.\n" << flush;
    }
    else
    {
        // use the filevector input format (missing second last skipd
        // parameter)
        gtd.re_gendata(input_var.getStrGenfilename(), nsnps,
                       input_var.getNgpreds(), phd.nids_all, phd.nids,
                       phd.allmeasured, phd.idnames);
        cout << "done.\n" << flush;
    }


    // estimate null model
#if COXPH
    coxph_data nrgd = coxph_data(phd, gtd, -1);
#else
    regdata nrgd = regdata(phd, gtd, -1, input_var.isIsInteractionExcluded());
#endif

    std::cout << " loaded null data..." << std::flush;
#if LOGISTIC
    logistic_reg nrd = logistic_reg(nrgd);

    nrd.estimate(0, 0,
                 input_var.getInteraction(),
                 input_var.getNgpreds(),
                 invvarmatrix,
                 input_var.getRobust(),
                 1);
#elif LINEAR

    linear_reg nrd = linear_reg(nrgd);
#if DEBUG
    std::cout << "[DEBUG] linear_reg nrd = linear_reg(nrgd); DONE.";
#endif
    nrd.estimate(0, 0, input_var.getInteraction(),
                 input_var.getNgpreds(), invvarmatrix,
                 input_var.getRobust(), 1);
#elif COXPH
    coxph_reg nrd = coxph_reg(nrgd);
    nrd.estimate(nrgd, 0,
                 input_var.getInteraction(), input_var.getNgpreds(),
                 true, 1, mli, 0);
#endif
    double null_loglik = nrd.loglik;

    std::cout << " estimated null model...";
    // end null
#if COXPH
    coxph_data rgd(phd, gtd, 0);
#else
    regdata rgd(phd, gtd, 0, input_var.isIsInteractionExcluded());
#endif
    std::cout << " formed regression object...\n";


    // Open a vector of files that will be used for output. Depending
    // on the number of genomic predictors we either open 5 files (one
    // for each model if we have prob data) or one (if we have dosage
    // data).
    std::string outfilename_str(input_var.getOutfilename());
    std::vector<std::ofstream*> outfile;

    // Prob data: All models output. One file per model
    if (input_var.getNgpreds() == 2)
    {
        open_files_for_output(outfile, outfilename_str);
        if (input_var.getNohead() != 1)
        {
            create_header(outfile, input_var, phd, interaction_cox);
        }
    }
    else  // Dosage data: Only additive model => only one output file
    {
        outfile.push_back(
            new std::ofstream((outfilename_str + "_add.out.txt").c_str()));

        if (!outfile[0]->is_open())
        {
            std::cerr << "Cannot open file for writing: "
                      << outfilename_str
                      << "\n";
            exit(1);
        }
        if (input_var.getNohead() != 1)
        {
            create_header(outfile, input_var, phd, interaction_cox);
        }
    }  // END else: we have dosage data => only one file


    int maxmod = 5;             // Total number of models (in random
                                // order: additive, recessive,
                                // dominant, over_dominant, 2df). Only
                                // with dosage data can we run all of
                                // them. For dosage data we can only
                                // run the additive model.

    int start_pos, end_pos;

    std::vector<std::ostringstream *> beta_sebeta;
    // Han Chen
    std::vector<std::ostringstream *> covvalue;
    // Oct 26, 2009
    std::vector<std::ostringstream *> chi2;

    // Create string streams for betas, SEs, etc. These are used to
    // later store the various output values that will be written to
    // files.
    for (int i = 0; i < maxmod; i++)
    {
        beta_sebeta.push_back(new std::ostringstream());
        beta_sebeta[i]->precision(6);
        // *beta_sebeta[i] << scientific;
        // Han Chen
        covvalue.push_back(new std::ostringstream());
        covvalue[i]->precision(6);
        // *covvalue[i] << scientific;
        // Oct 26, 2009
        chi2.push_back(new std::ostringstream());
        chi2[i]->precision(6);
        // *chi2[i] << scientific;
    }


    // Here we start the analysis for each SNP.
    for (int csnp = 0; csnp < nsnps; csnp++)
    {
        rgd.update_snp(&gtd, csnp);


        int poly = 1;
        if (fabs(rgd.freq) < 1.e-16 || fabs(1. - rgd.freq) < 1.e-16)
        {
            poly = 0;
        }

        if (fabs(mli.Rsq[csnp]) < 1.e-16)
        {
            poly = 0;
        }

        // Write mlinfo information to the output file(s)
        // Prob data: All models output. One file per model
        if (input_var.getNgpreds() == 2)
        {
            for (unsigned int file = 0; file < outfile.size(); file++)
            {
                write_mlinfo(outfile, file, mli, csnp, input_var,
                             rgd.gcount, rgd.freq);
            }
        } else{
            // Dosage data: only additive model
            int file = 0;
            write_mlinfo(outfile, file, mli, csnp, input_var,
                         rgd.gcount, rgd.freq);
            maxmod = 1;         // We can only calculate the additive
                                // model with dosage data
        }

        // Run regression for each model for the current SNP
        for (int model = 0; model < maxmod; model++)
        {
            if (poly) // Allele freq is not too rare
            {
#if LOGISTIC
                logistic_reg rd(rgd);
#elif LINEAR
                linear_reg rd(rgd);
#elif COXPH
                coxph_reg rd(rgd);
#endif
#if !COXPH
                if (input_var.getScore())
                {
                    rd.score(nrd.residuals, model,
                             input_var.getInteraction(),
                             input_var.getNgpreds(),
                             invvarmatrix);
                }
                else
                {
                    rd.estimate(0, model,
                                input_var.getInteraction(),
                                input_var.getNgpreds(),
                                invvarmatrix,
                                input_var.getRobust());
                }
#else
                rd.estimate(rgd, model,
                            input_var.getInteraction(),
                            input_var.getNgpreds(), true, 0, mli, csnp);
#endif

                int number_of_rows_or_columns = rd.beta.nrow;
                start_pos = get_start_position(input_var, model,
                                               number_of_rows_or_columns);

                // The regression coefficients for the SNPs are in the
                // last rows of beta[] and sebeta[].
                for (int pos = start_pos; pos < rd.beta.nrow; pos++)
                {
                    *beta_sebeta[model] << input_var.getSep()
                                        << rd.beta[pos]
                                        << input_var.getSep()
                                        << rd.sebeta[pos];
                    // Han Chen
#if !COXPH
                    if (input_var.getInverseFilename() == NULL
                            && !input_var.getAllcov()
                            && input_var.getInteraction() != 0)
                    {
                        if (pos > start_pos)
                        {
                            if (model == 0)
                            {
                                if (input_var.getNgpreds() == 2)
                                {
                                    if (pos > start_pos + 2)
                                    {
                                        *covvalue[model]
                                            << rd.covariance[pos - 3]
                                            << input_var.getSep()
                                            << rd.covariance[pos - 2];
                                    }
                                }  // END ngpreds=2
                                else
                                {
                                    *covvalue[model] << rd.covariance[pos - 1];
                                }
                            }  // END model == 0
                            else
                            {
                                *covvalue[model] << rd.covariance[pos - 1];
                            }  // END model != 0
                        }  // END if pos > start_pos
                    }
#endif
                    // Oct 26, 2009
                }  // END for(pos = start_pos; pos < rd.beta.nrow; pos++)


                // calculate chi^2
                // ________________________________
                // cout <<  rd.loglik<<" "<<input_var.getNgpreds() << "\n";

                if (input_var.getInverseFilename() == NULL)
                { // Only if we don't have an inv.sigma file can we use LRT
                    if (input_var.getScore() == 0)
                    {
                        double loglik = rd.loglik;
                        if (rgd.gcount != gtd.nids)
                        {
                            // If SNP data is missing we didn't
                            // correctly compute the null likelihood

                            // Recalculate null likelihood by
                            // stripping the SNP data column(s) from
                            // the X matrix in the regression object
                            // and run the null model estimation again
                            // for this SNP.
#if !COXPH
                            regdata new_rgd = rgd;
#else
                            coxph_data new_rgd = rgd;
#endif

                            new_rgd.remove_snp_from_X();

#ifdef LINEAR
                            linear_reg new_null_rd(new_rgd);
#elif LOGISTIC
                            logistic_reg new_null_rd(new_rgd);
#endif
#if !COXPH
                            new_null_rd.estimate(0,
                                                 model,
                                                 input_var.getInteraction(),
                                                 input_var.getNgpreds(),
                                                 invvarmatrix,
                                                 input_var.getRobust(), 1);
#else
                            coxph_reg new_null_rd(new_rgd);
                            new_null_rd.estimate(new_rgd,
                                                 model,
                                                 input_var.getInteraction(),
                                                 input_var.getNgpreds(),
                                                 true, 1, mli, csnp);
#endif
                            *chi2[model] << 2. * (loglik - new_null_rd.loglik);
                        }
                        else
                        {
                            // No missing SNP data, we can compute the LRT
                            *chi2[model] << 2. * (loglik - null_loglik);
                        }
                    } else{
                        // We want score test output
                        *chi2[model] << rd.chi2_score;
                    }
                }  // END if( inv.sigma == NULL )
                else if (input_var.getInverseFilename() != NULL)
                {
                    // We can't use the LRT here, because mmscore is a
                    // REML method. Therefore go for the Wald test
                    if (input_var.getNgpreds() == 2 && model == 0)
                    {
                        /* For the 2df model we can't simply use the
                         * Wald statistic. This can be fixed using the
                         * equation just below Eq.(4) in the ProbABEL
                         * paper. TODO LCK
                         */
                        *chi2[model] << "NaN";
                    }
                    else
                    {
                        double Z = rd.beta[start_pos] / rd.sebeta[start_pos];
                        *chi2[model] << Z * Z;
                    }
                }
            }  // END first part of if(poly); allele not too rare
            else
            {   // SNP is rare: beta, sebeta, chi2 = NaN
                int number_of_rows_or_columns = rgd.X.ncol;
                start_pos = get_start_position(input_var, model,
                        number_of_rows_or_columns);

                if (input_var.getInteraction() != 0 && !input_var.getAllcov()
                    && input_var.getNgpreds() != 2)
                {
                    start_pos++;
                }

                if (input_var.getNgpreds() == 0)
                {
                    end_pos = rgd.X.ncol;
                } else{
                    end_pos = rgd.X.ncol - 1;
                }

                if (input_var.getInteraction() != 0)
                {
                    end_pos++;
                }

                for (int pos = start_pos; pos <= end_pos; pos++)
                {
                    *beta_sebeta[model] << input_var.getSep()
                            << "NaN"
                            << input_var.getSep()
                            << "NaN";
                }

                if (input_var.getNgpreds() == 2)
                {
                    // Han Chen
#if !COXPH
                    if (!input_var.getAllcov()
                            && input_var.getInteraction() != 0)
                    {
                        if (model == 0)
                        {
                            *covvalue[model] << "NaN"
                                             << input_var.getSep()
                                             << "NaN";
                        } else{
                            *covvalue[model] << "NaN";
                        }
                    }
#endif
                    // Oct 26, 2009
                    *chi2[model] << "NaN";
                } else{
                    // ngpreds==1 (and SNP is rare)
                    if (input_var.getInverseFilename() == NULL)
                    {
                        //                     Han Chen
#if !COXPH
                        if (!input_var.getAllcov()
                                && input_var.getInteraction() != 0)
                        {
                            *covvalue[model] << "NaN";
                        }
#endif
                        // Oct 26, 2009
                    }  // END if getInverseFilename == NULL
                    *chi2[model] << "NaN";
                }  // END ngpreds == 1 (and SNP is rare)
            }  // END else: SNP is rare
        }  // END of model cycle


        // Start writing beta's, se_beta's etc. to file
        if (input_var.getNgpreds() == 2)
        {
            for (int model = 0; model < maxmod; model++)
            {
                *outfile[model] << beta_sebeta[model]->str()
                                << input_var.getSep();
#if !COXPH
                if (!input_var.getAllcov() && input_var.getInteraction() != 0)
                {
                    *outfile[model] << covvalue[model]->str()
                                    << input_var.getSep();
                }
#endif
                *outfile[model] << chi2[model]->str()
                                << "\n";
            }  // END for loop over all models
        }
        else  // Dose data: only additive model. Only one output file
        {
            *outfile[0] << beta_sebeta[0]->str() << input_var.getSep();
#if !COXPH
            if (!input_var.getAllcov() && input_var.getInteraction() != 0)
            {
                *outfile[0] << covvalue[0]->str() << input_var.getSep();
            }
#endif
            *outfile[0] << chi2[0]->str() << "\n";
        }  // End ngpreds == 1 when writing output files


        // Clean chi2 and other streams
        for (int model = 0; model < maxmod; model++)
        {
            beta_sebeta[model]->str("");
            // Han Chen
            covvalue[model]->str("");
            // Oct 26, 2009
            chi2[model]->str("");
        }

        update_progress_to_cmd_line(csnp, nsnps);
    }  // END for loop over all SNPs


    // We're almost done. All computations have finished, time to
    // clean up.

    std::cout << setprecision(2) << fixed;
    std::cout << "\b\b\b\b\b\b\b\b\b" << 100.;
    std::cout << "%... done\n";

    // Close output files
    for (unsigned int i = 0; i < outfile.size(); i++)
    {
        outfile[i]->close();
        delete outfile[i];
    }

    // delete gtd;

    // Clean up a couple of vectors
    std::vector<std::ostringstream *>::iterator it = beta_sebeta.begin();
    while (it != beta_sebeta.end())
    {
        delete *it;
        ++it;
    }
    it = covvalue.begin();
    while (it != covvalue.end())
    {
        delete *it;
        ++it;
    }
    it = chi2.begin();
    while (it != chi2.end())
    {
        delete *it;
        ++it;
    }

    return (0);
}
Example #2
0
int main(int argc, char * argv [])
{

	int next_option;
	const char * const short_options = "p:i:d:m:n:c:o:s:t:g:a:erlh:b:vu";
//b - interaction parameter

	const struct option long_options [] =
	{
		{"pheno",  1, NULL, 'p'},
		{"info",   1, NULL, 'i'},
		{"dose",   1, NULL, 'd'},
		{"map",	   1, NULL, 'm'},
		{"nids",   1, NULL, 'n'},
		{"chrom",  1, NULL, 'c'},
		{"out",    1, NULL, 'o'},
		{"skipd",  1, NULL, 's'},
		{"ntraits",1, NULL, 't'},
		{"ngpreds",1, NULL, 'g'},
		{"separat",1, NULL, 'a'},
		{"score",  0, NULL, 'r'},
		{"no-head",0, NULL, 'e'},
		{"allcov", 0, NULL, 'l'},
		{"help",   0, NULL, 'h'},
		{"interaction",   1, NULL, 'b'},
		{"interaction_only",   1, NULL, 'k'},
		{"mmscore",   1, NULL, 'v'},
		{"robust",   0, NULL, 'u'},
		{NULL  ,   0, NULL, 0  }
	};
	char * program_name = argv[0];

	char *phefilename = NULL;
	char *mlinfofilename = NULL;
	char *genfilename = NULL;
	char *mapfilename = NULL;
	char *outfilename = NULL;
	char *inverse_filename = NULL;
	string sep = " ";
	int nohead=0;
	int score=0;
	int npeople=-1;
	int ngpreds=1;
	int interaction=0;
	int interaction_excluded=0;
	int robust = 0;
	string chrom = "-1";
	int neco[] = {0,0,0};
	bool iscox=false;
#if COXPH
	int noutcomes = 2;
	iscox=true;
#else
	int noutcomes = 1;
#endif
	int skipd = 2;
	int allcov = 0;
	do
	{
		next_option = getopt_long(argc,argv,short_options,long_options,NULL);
		switch (next_option)
		{
			case 'h': print_help(program_name,0);
			case 'p': 
				  phefilename = optarg; 
				  neco[0]=1; 
				  break;
			case 'i': 
				  mlinfofilename = optarg; 
				  neco[1]=1; 
				  break;
			case 'd': 
				  genfilename = optarg; 
				  neco[2]=1; 
				  break;
			case 'm': 
				  mapfilename = optarg; 
				  break;
			case 'n': 
				  npeople = atoi(optarg); 
				  break;
			case 'c': 
				  chrom = optarg; 
				  break;
			case 'o': 
				  outfilename = optarg; 
				  break;
			case 's':
				  skipd = atoi(optarg); 
				  break;
			case 't':
				  noutcomes = atoi(optarg); 
				  break;
			case 'g':
				  ngpreds = atoi(optarg); 
				  break;
			case 'a':
				  sep = optarg; 
				  break;
			case 'e':
				 nohead=1; 
				 break;
			case 'r':
				 score=1; 
				 break;
			case 'l':
				 allcov=1; 
				 break;
			case 'b':
				 interaction=atoi(optarg); 
				 break;
			case 'k':
				 interaction_excluded=atoi(optarg); 
				 break;
			case 'v':
				 inverse_filename=optarg; 
				 break;
			case 'u':
				 robust=1;
				 break;


			case '?': print_usage(program_name,1);
			case -1 : break;
			default: abort();
		}
	}
	while (next_option != -1);


	fprintf(stdout, 
		"ProbABEL v. %s (%s) (C) Yurii Aulchenko, Maksim Struchalin, EMCR, and others\n\n", 
		VERSION, DATE);
	if (neco[0]!=1 || neco[1]!=1 || neco[2]!=1)
	{
		print_usage(program_name,1);
	}
	
	fprintf(stdout,"Options in effect:\n");
	fprintf(stdout,"\t --pheno   = %s\n",phefilename);
	fprintf(stdout,"\t --info    = %s\n",mlinfofilename);
	fprintf(stdout,"\t --dose    = %s\n",genfilename);
	fprintf(stdout,"\t --ntraits = %d\n",noutcomes);
	fprintf(stdout,"\t --ngpreds = %d\n",ngpreds);
	fprintf(stdout,"\t --interaction = %d\n",interaction);
	fprintf(stdout,"\t --interaction_only = %d\n",interaction_excluded);
	
	if (inverse_filename != NULL) fprintf(stdout,"\t --mmscore = %s\n",inverse_filename);
	else fprintf(stdout,"\t --mmscore     = not in output\n");
//	fprintf(stdout,"\t --mmscore = %s\n",inverse_filename);
	
	if (mapfilename != NULL) fprintf(stdout,"\t --map     = %s\n",mapfilename);
	else fprintf(stdout,"\t --map     = not in output\n");
	if (npeople>0) fprintf(stdout,"\t --nids    = %d\n",npeople);
	else fprintf(stdout,"\t --nids    = estimated from data\n");
	if (chrom != "-1") cout << "\t --chrom   = " << chrom << "\n";
	else cout << "\t --chrom   = not in output\n";
	if (outfilename != NULL ) fprintf(stdout,"\t --out     = %s\n",outfilename);
	else fprintf(stdout,"\t --out     = regression.out.txt\n");
	fprintf(stdout,"\t --skipd   = %d\n",skipd);
	cout << "\t --separat = \"" << sep << "\"\n";
	if (score)
		fprintf(stdout,"\t --score   = ON\n");
	else
		fprintf(stdout,"\t --score   = OFF\n");
	if (nohead)
		fprintf(stdout,"\t --nohead  = ON\n");
	else
		fprintf(stdout,"\t --nohead  = OFF\n");
	if (allcov)
		fprintf(stdout,"\t --allcov  = ON\n");
	else
		fprintf(stdout,"\t --allcov  = OFF\n");
	if (robust)
		fprintf(stdout,"\t --robust  = ON\n");
	else
		fprintf(stdout,"\t --robust  = OFF\n");

	if (ngpreds!=1 && ngpreds!=2) 
	{
		fprintf(stderr,"\n\n--ngpreds should be 1 for MLDOSE or 2 for MLPROB\n");
		exit(1);
	}
	
	if(interaction_excluded != 0)
		{
		interaction = interaction_excluded; //ups
		is_interaction_excluded = true;
		}

#if COXPH
	if (score) 
	{
		fprintf(stderr,"\n\nOption --score is implemented for linear and logistic models only\n");
		exit(1);
	}
#endif
//	if (allcov && ngpreds>1)
//	{
//		fprintf(stdout,"\n\nWARNING: --allcov allowed only for 1 predictor (MLDOSE)\n");
//		allcov = 0;
//	}

	mlinfo mli(mlinfofilename,mapfilename);
	int nsnps = mli.nsnps;
	phedata phd(phefilename,noutcomes,npeople, interaction, iscox);
	
  int interaction_cox = interaction;
#if COXPH
	interaction_cox--;
#endif
	if(interaction < 0 || interaction > phd.ncov || interaction_cox > phd.ncov)
		{
		std::cerr << "error: Interaction parameter is out of range (ineraction="<<interaction<<") \n";
		exit(1);
		}

	//interaction--;

//	if(inverse_filename != NULL && phd.ncov > 1)
//		{
//		std::cerr<<"Error: In mmscore you can not use any covariates. You phenotype file must conatin id column and trait (residuals) only\n";
//		exit(1);
//		}

//	if(inverse_filename != NULL && (allcov == 1 || score == 1 || interaction != 0 || ngpreds==2))
//		{
//		std::cerr<<"Error: In mmscore you can use additive model without any inetractions only\n";		
//		exit(1);
//		}




	 mematrix<double> invvarmatrix;

#if LOGISTIC		
	if(inverse_filename != NULL) {std::cerr<<"ERROR: mmscore is forbidden for logistic regression\n";exit(1);}
#endif

#if COXPH
	if(inverse_filename != NULL) {std::cerr<<"ERROR: mmscore is forbidden for cox regression\n";exit(1);}
	if (robust) {
	  //std::cerr<<"ERROR: robust standard errors not implemented for Cox regression (drop us e-mail if you really need that)\n";exit(1);
	  cout << "Robust standard errors implemented for Cox regression\n";
	}
#endif


	if(inverse_filename != NULL)
		{
		std::cout<<"you are runing mmscore...\n";
		}

	std::cout << "Reading data ...";



	if(inverse_filename != NULL)
		{
		InvSigma inv(inverse_filename, &phd);
		invvarmatrix = inv.get_matrix();
		double par=1.; //var(phd.Y)*phd.nids/(phd.nids-phd.ncov-1);
		invvarmatrix = invvarmatrix*par;
	//	matrix.print();
		}

		
	

	std::cout.flush();
	gendata gtd(genfilename,nsnps,ngpreds,phd.nids_all,phd.nids,phd.allmeasured,skipd,phd.idnames);

// estimate null model
	double null_loglik=0.;
#if COXPH
	coxph_data nrgd(phd,gtd,-1);
#else 
	regdata nrgd(phd,gtd,-1);
#endif
#if LOGISTIC
	logistic_reg nrd(nrgd);
	nrd.estimate(nrgd,0,MAXITER,EPS,CHOLTOL,0, interaction, ngpreds, invvarmatrix, robust, 1);
#elif LINEAR
	linear_reg nrd(nrgd);
	nrd.estimate(nrgd,0,CHOLTOL,0, interaction, ngpreds, invvarmatrix, robust, 1);
#elif COXPH
	coxph_reg nrd(nrgd);
	nrd.estimate(nrgd,0,MAXITER,EPS,CHOLTOL,0, interaction, ngpreds, 1, robust);
#endif
	null_loglik = nrd.loglik;

// end null 
#if COXPH
	coxph_data rgd(phd,gtd,0);
#else 
	regdata rgd(phd,gtd,0);
#endif
	std::cout << " done\n";
	std::cout.flush();







//________________________________________________________________
//Maksim, 9 Jan, 2009

if (outfilename==NULL) 
	{
	outfilename="regression";
	}

std::string outfilename_str(outfilename);
std::vector<std::ofstream*> outfile;


if (nohead!=1)
	{

	if(ngpreds==2) //All models output. One file per each model
		{
		// open a file for output
		//_____________________



		for(int i=0 ; i<5 ; i++)
			{
			outfile.push_back(new std::ofstream());
			}
	
		outfile[0]->open((outfilename_str+"_2df.out.txt").c_str());
		outfile[1]->open((outfilename_str+"_add.out.txt").c_str());
		outfile[2]->open((outfilename_str+"_domin.out.txt").c_str());
		outfile[3]->open((outfilename_str+"_recess.out.txt").c_str());
		outfile[4]->open((outfilename_str+"_over_domin.out.txt").c_str());



		if (!outfile[0]->is_open()) {std::cerr << "Can not open file for writing: " << outfilename_str+"_2df.out.txt" << "\n"; exit(1);}
		if (!outfile[1]->is_open()) {std::cerr << "Can not open file for writing: " << outfilename_str+"_add.out.txt" << "\n"; exit(1);}
		if (!outfile[2]->is_open()) {std::cerr << "Can not open file for writing: " << outfilename_str+"_domin.out.txt" << "\n"; exit(1);}
		if (!outfile[3]->is_open()) {std::cerr << "Can not open file for writing: " << outfilename_str+"_recess.out.txt" << "\n"; exit(1);}
		if (!outfile[4]->is_open()) {std::cerr << "Can not open file for writing: " << outfilename_str+"_over_domin.out.txt" << "\n"; exit(1);}
		//_____________________


		//Header
		//_____________________
		for(int i=0 ; i<outfile.size() ; i++)
			{
			(*outfile[i]) << "name" << sep << "A1" << sep << "A2" << sep << "Freq1" << sep << "MAF" << sep << "Quality" << sep << "Rsq" 
							<< sep << "n" << sep << "Mean_predictor_allele";
			if (chrom != "-1") (*outfile[i]) << sep << "chrom";
			if (mapfilename != NULL) (*outfile[i]) << sep << "position";
			}
		//_____________________
	
		if(allcov) //All covariates in output
			{
			for (int file=0; file<outfile.size() ; file++)
				for (int i =0; i<phd.n_model_terms-1;i++)
					*outfile[file] << sep << "beta_" << phd.model_terms[i] << sep << "sebeta_" << phd.model_terms[i];
			}
			*outfile[0] << sep << "beta_SNP_A1A2" << sep << "beta_SNP_A1A1" << sep << "sebeta_SNP_A1A2" << sep << "sebeta_SNP_A1A1";
			*outfile[1] << sep << "beta_SNP_addA1" << sep << "sebeta_SNP_addA1";
			*outfile[2] << sep << "beta_SNP_domA1" << sep << "sebeta_SNP_domA1";
			*outfile[3] << sep << "beta_SNP_recA1" << sep << "sebeta_SNP_recA1";
			*outfile[4] << sep << "beta_SNP_odom" << sep << "sebeta_SNP_odom";
	
		if(interaction != 0)
			{
//Han Chen
			*outfile[0] << sep << "beta_SNP_A1A2_" << phd.model_terms[interaction_cox] << sep << "sebeta_SNP_A1A2_" << phd.model_terms[interaction_cox]
								 << sep << "beta_SNP_A1A1_" << phd.model_terms[interaction_cox] << sep << "sebeta_SNP_A1A1_" << phd.model_terms[interaction_cox];
			//   #if !COXPH
	    	if(inverse_filename == NULL && !allcov) *outfile[0] << sep << "cov_SNP_A1A2_int_SNP_" << phd.model_terms[interaction_cox]
            << sep << "cov_SNP_A1A1_int_SNP_" << phd.model_terms[interaction_cox];
		// #endif
//Oct 26, 2009
			for (int file=1; file<outfile.size() ; file++)
				{		
			  *outfile[file] << sep << "beta_SNP_" << phd.model_terms[interaction_cox]  << sep << "sebeta_SNP_" << phd.model_terms[interaction_cox];
//Han Chen
    #if !COXPH
	    	if(inverse_filename == NULL && !allcov) *outfile[file] << sep << "cov_SNP_int_SNP_" << phd.model_terms[interaction_cox];
 	  #endif
//Oct 26, 2009
				}
			}
		*outfile[0] << sep << "chi2_SNP_2df\n";
		*outfile[1] << sep << "chi2_SNP_A1\n";
		*outfile[2] << sep << "chi2_SNP_domA1\n";
		*outfile[3] << sep << "chi2_SNP_recA1\n";
		*outfile[4] << sep << "chi2_SNP_odom\n";
	

		}
	else //Only additive model. Only one output file
		{
			
		// open a file for output
		//_____________________
//		if (outfilename != NULL)
//	 		{
			outfile.push_back(new std::ofstream((outfilename_str+"_add.out.txt").c_str()));
//			}
//		else
//	 		{
//			outfilename_str="regression_add.out.txt"; outfile.push_back(new std::ofstream((outfilename_str+"_add.out.txt").c_str()));
//			}

		if (!outfile[0]->is_open())
			{
			std::cerr << "Can not open file for writing: " << outfilename_str << "\n";
			exit(1);
			}
		//_____________________

		//Header
		//_____________________
			*outfile[0] << "name" << sep << "A1" << sep << "A2" << sep << "Freq1" << sep << "MAF" << sep << "Quality" << sep << "Rsq" 
							<< sep << "n" << sep << "Mean_predictor_allele";
			if (chrom != "-1") *outfile[0] << sep << "chrom";
			if (mapfilename != NULL) *outfile[0] << sep << "position";
		//_____________________


		if(allcov) //All covariates in output
			{
			for (int i =0; i<phd.n_model_terms-1;i++)
				*outfile[0] << sep << "beta_" << phd.model_terms[i] << sep << "sebeta_" << phd.model_terms[i];
			
			*outfile[0] << sep << "beta_SNP_add" << sep << "sebeta_SNP_add";
			}
		else //Only beta, sebeta for additive model go to output file
			{
			*outfile[0] << sep << "beta_SNP_add" << sep << "sebeta_SNP_add";
			}
		if(interaction != 0) *outfile[0] << sep << "beta_SNP_" << phd.model_terms[interaction_cox] << sep << "sebeta_SNP_" << phd.model_terms[interaction_cox];

		if(inverse_filename == NULL)
//Han Chen
            {
	      //            #if !COXPH
	    	if(interaction != 0 && !allcov) *outfile[0] << sep << "cov_SNP_int_SNP_" << phd.model_terms[interaction_cox];
		// 	        #endif
			*outfile[0] << sep << "chi2_SNP";
            }
//Oct 26, 2009
		*outfile[0] << "\n";


		}
	}
else
	{
	if(ngpreds==2) //All models output. One file per each model
		{
		// open a file for output
		//_____________________
//		if (outfilename==NULL) 
//			{
//			outfilename_str="regression";
//			}



		for(int i=0 ; i<5 ; i++)
			{
			outfile.push_back(new std::ofstream());
			}
	
		outfile[0]->open((outfilename_str+"_2df.out.txt").c_str());
		outfile[1]->open((outfilename_str+"_add.out.txt").c_str());
		outfile[2]->open((outfilename_str+"_domin.out.txt").c_str());
		outfile[3]->open((outfilename_str+"_recess.out.txt").c_str());
		outfile[4]->open((outfilename_str+"_over_domin.out.txt").c_str());



		if (!outfile[0]->is_open()) {std::cerr << "Can not open file for writing: " << outfilename_str+"_2df.out.txt" << "\n"; exit(1);}
		if (!outfile[1]->is_open()) {std::cerr << "Can not open file for writing: " << outfilename_str+"_add.out.txt" << "\n"; exit(1);}
		if (!outfile[2]->is_open()) {std::cerr << "Can not open file for writing: " << outfilename_str+"_domin.out.txt" << "\n"; exit(1);}
		if (!outfile[3]->is_open()) {std::cerr << "Can not open file for writing: " << outfilename_str+"_recess.out.txt" << "\n"; exit(1);}
		if (!outfile[4]->is_open()) {std::cerr << "Can not open file for writing: " << outfilename_str+"_over_domin.out.txt" << "\n"; exit(1);}
		}
	else
		{
		// open a file for output
		//_____________________
//		if (outfilename != NULL)
//	 		{
			outfile.push_back(new std::ofstream((outfilename_str+"_add.out.txt").c_str()));
//			}
//		else
//	 		{
//			outfilename_str="regression_add.out.txt"; outfile.push_back(new std::ofstream((outfilename_str+"_add.out.txt").c_str()));
//			}

		if (!outfile[0]->is_open())
			{
			std::cerr << "Can not open file for writing: " << outfilename_str << "\n";
			exit(1);
			}
		
		}
	
	}

//________________________________________________________________

/*
	if (allcov)
		{
		if (score)
			{
			outfile << sep << "beta_mu"; // << sep << "beta_SNP_A1";
			outfile << sep << "sebeta_mu"; // << sep << "sebeta_SNP_A1";
			}
		else 
			{
			for (int i =0; i<phd.n_model_terms-1;i++) 
			outfile << sep << "beta_" << phd.model_terms[i] << sep << "sebeta_" << phd.model_terms[i];
			}
		if(interactio != 0) outfile << sep << "beta_SNP_" << phd.model_terms[interaction];
		}
	if (ngpreds==2) 
		{
		outfile << sep << "beta_SNP_A1A2" << sep << "beta_SNP_A1A1" << sep 
		<< "sebeta_SNP_A1A2" << sep << "sebeta_SNP_a1A1" << sep << "chi2_SNP_2df"
		<< sep << "beta_SNP_addA1" << sep << "sebeta_SNP_addA1" << sep << "chi2_SNP_addA1"
		<< sep << "beta_SNP_domA1" << sep << "sebeta_SNP_domA1" << sep << "chi2_SNP_domA1"
		<< sep << "beta_SNP_recA1" << sep << "sebeta_SNP_recA1" << sep << "chi2_SNP_recA1"
		<< sep << "beta_SNP_odom" << sep << "sebeta_SNP_odom" << sep << "chi2_SNP_odom\n";
		}	
	else 
		{
		outfile << sep << "beta_SNP_add" << sep << "sebeta_SNP_add" << sep << "chi2_SNP_add\n";
		}
	}
*/
//	exit(1);

	
	
//________________________________________________________________
//Maksim, 9 Jan, 2009



int maxmod=5;
int start_pos, end_pos;

std::vector<std::ostringstream *> beta_sebeta;
//Han Chen
std::vector<std::ostringstream *> covvalue;
//Oct 26, 2009
std::vector<std::ostringstream *> chi2;

for(int i=0 ; i<maxmod ; i++)
	{
	beta_sebeta.push_back(new std::ostringstream());
//Han Chen
	covvalue.push_back(new std::ostringstream());
//Oct 26, 2009
	chi2.push_back(new std::ostringstream());
	}



for (int csnp=0;csnp<nsnps;csnp++) 
	{

	rgd.update_snp(gtd,csnp);
	double freq;
	if (ngpreds==2)
			freq = ((gtd.G).column_mean(csnp*2)*2.+(gtd.G).column_mean(csnp*2+1))/2.;
	else
			freq = (gtd.G).column_mean(csnp)/2.;
	int poly = 1;
	if (fabs(freq)<1.e-16 || fabs(1.-freq)<1.e-16) poly=0;
	if (fabs(mli.Rsq[csnp])<1.e-16) poly=0;



	if(ngpreds==2) //All models output. One file per each model
		{
		//Write mlinfo to output:
		for(int file=0 ; file<outfile.size() ; file++)
			{
			*outfile[file] << mli.name[csnp] << sep << mli.A1[csnp] << sep << mli.A2[csnp] << sep
										 << mli.Freq1[csnp] << sep << mli.MAF[csnp] << sep << mli.Quality[csnp] << sep << mli.Rsq[csnp] << sep
										 << phd.nids << sep << freq;
			if (chrom != "-1") *outfile[file] << sep << chrom;
			if (mapfilename != NULL) *outfile[file] << sep << mli.map[csnp];
			}


		for(int model=0 ; model<maxmod ; model++)
			{		
			if(poly)//allel freq is not to rare
				{
				#if LOGISTIC
				logistic_reg rd(rgd);
				if (score)
					rd.score(nrd.residuals,rgd,0,CHOLTOL,model, interaction, ngpreds, invvarmatrix);
				else
					rd.estimate(rgd,0,MAXITER,EPS,CHOLTOL,model, interaction, ngpreds, invvarmatrix, robust);
				#elif LINEAR
				linear_reg rd(rgd);
				if(score)
					rd.score(nrd.residuals,rgd,0,CHOLTOL,model, interaction, ngpreds, invvarmatrix);
				else
				{
				//	rd.mmscore(rgd,0,CHOLTOL,model, interaction, ngpreds, invvarmatrix);
					rd.estimate(rgd,0,CHOLTOL,model, interaction, ngpreds, invvarmatrix, robust);
				}
				#elif COXPH
				coxph_reg rd(rgd);
				rd.estimate(rgd,0,MAXITER,EPS,CHOLTOL,model, interaction, true, ngpreds,0,robust);
				#endif

				if(!allcov && model==0 && interaction==0) start_pos=rd.beta.nrow-2;
				else if(!allcov && model==0 && interaction!=0) start_pos=rd.beta.nrow-4;
				else if(!allcov && model!=0 && interaction==0) start_pos=rd.beta.nrow-1;
				else if(!allcov && model!=0 && interaction!=0) start_pos=rd.beta.nrow-2;
				else start_pos=0;

		
				for(int pos=start_pos ; pos<rd.beta.nrow ; pos++)
					{
					*beta_sebeta[model] << sep << rd.beta[pos] << sep << rd.sebeta[pos];
//Han Chen
//				#if !COXPH
				if (inverse_filename == NULL && !allcov && interaction != 0)
				   {
				   if (pos>start_pos)
				      {if (model==0)
				          {if (pos>start_pos+2)
				              {*covvalue[model] << rd.covariance[pos-3] << sep << rd.covariance[pos-2];}
                          }
                      else
               	          {*covvalue[model] << rd.covariance[pos-1];}
                      }
                   }
//                #endif	
//Oct 26, 2009
					}

				//calculate chi2
				//________________________________
				if (score==0)
					{
					*chi2[model] << 2.*(rd.loglik-null_loglik);
					}
				else
					{
					*chi2[model] << rd.chi2_score;
					}
				//________________________________
				
				
				}
			else //beta, sebeta = nan
				{
				if(!allcov && model==0 && interaction==0) start_pos=rgd.X.ncol-2;
				else if(!allcov && model==0 && interaction!=0) start_pos=rgd.X.ncol-4;
				else if(!allcov && model!=0 && interaction==0) start_pos=rgd.X.ncol-1;
				else if(!allcov && model!=0 && interaction!=0) start_pos=rgd.X.ncol-2;
				else start_pos=0;
					
				if(model==0) {end_pos=rgd.X.ncol;}
				else {end_pos=rgd.X.ncol-1;}

				if(interaction!=0) end_pos++;

				for(int pos=start_pos ; pos<end_pos ; pos++)
					{
					*beta_sebeta[model] << sep << "nan" << sep << "nan";
					}
//Han Chen
//                #if !COXPH
                if (!allcov && interaction !=0)
                   {if (model==0)
                       {*covvalue[model] << "nan" << sep << "nan";}
                   else
                       {*covvalue[model] << "nan";}
                   }
//                #endif
//Oct 26, 2009
				*chi2[model] << "nan";
				}
			}//end of moel cycle
	



//Han Chen
			*outfile[0] << beta_sebeta[0]->str() << sep;
//				#if !COXPH
				if (!allcov && interaction !=0)
                {
                *outfile[0] << covvalue[0]->str() << sep;
                }
//                #endif
            *outfile[0] << chi2[0]->str() << "\n";
			*outfile[1] << beta_sebeta[1]->str() << sep;
//				#if !COXPH
				if (!allcov && interaction !=0)
                {
                *outfile[1] << covvalue[1]->str() << sep;
                }
//                #endif
            *outfile[1] << chi2[1]->str() << "\n";
			*outfile[2] << beta_sebeta[2]->str() << sep;
//				#if !COXPH
				if (!allcov && interaction !=0)
                {
                *outfile[2] << covvalue[2]->str() << sep;
                }
//                #endif
            *outfile[2] << chi2[2]->str() << "\n";
			*outfile[3] << beta_sebeta[3]->str() << sep;
//				#if !COXPH
				if (!allcov && interaction !=0)
                {
                *outfile[3] << covvalue[3]->str() << sep;
                }
//                #endif
            *outfile[3] << chi2[3]->str() << "\n";
			*outfile[4] << beta_sebeta[4]->str() << sep;
//				#if !COXPH
				if (!allcov && interaction !=0)
                {
                *outfile[4] << covvalue[4]->str() << sep;
                }
//                #endif
            *outfile[4] << chi2[4]->str() << "\n";		
//Oct 26, 2009





		}
	else //Only additive model. Only one output file
		{
		//Write mlinfo to output:
		*outfile[0] << mli.name[csnp] << sep << mli.A1[csnp] << sep << mli.A2[csnp] << sep;
		*outfile[0] << mli.Freq1[csnp] << sep << mli.MAF[csnp] << sep << mli.Quality[csnp] << sep << mli.Rsq[csnp] << sep;
		*outfile[0] << phd.nids << sep << freq;
		if (chrom != "-1") *outfile[0] << sep << chrom;
		if (mapfilename != NULL) *outfile[0] << sep << mli.map[csnp];
		int model=0;
		if(poly)//allel freq is not to rare
			{
			#if LOGISTIC
			logistic_reg rd(rgd);
			if (score)
				rd.score(nrd.residuals,rgd,0,CHOLTOL,model, interaction, ngpreds, invvarmatrix);
			else
				rd.estimate(rgd,0,MAXITER,EPS,CHOLTOL,model, interaction, ngpreds, invvarmatrix, robust);
			#elif LINEAR
			linear_reg rd(rgd);
			if (score)
				rd.score(nrd.residuals,rgd,0,CHOLTOL,model, interaction, ngpreds, invvarmatrix);
			else
				{
//					if(inverse_filename == NULL)
//						{
						rd.estimate(rgd,0,CHOLTOL,model, interaction, ngpreds, invvarmatrix, robust);
//						}
//					else
//						{
//						rd.mmscore(rgd,0,CHOLTOL,model, interaction, ngpreds, invvarmatrix);
//						}
				}
			#elif COXPH
			coxph_reg rd(rgd);
			rd.estimate(rgd,0,MAXITER,EPS,CHOLTOL,model, interaction, true, ngpreds,0,robust);
			#endif

			if(!allcov && interaction==0) start_pos=rd.beta.nrow-1;
			else if(!allcov && interaction!=0) start_pos=rd.beta.nrow-2;
			else start_pos=0;
			

				
			for(int pos=start_pos ; pos<rd.beta.nrow ; pos++)
				{
				*beta_sebeta[0] << sep << rd.beta[pos] << sep << rd.sebeta[pos];
//Han Chen
//				#if !COXPH
				if (inverse_filename == NULL && !allcov && interaction != 0)
				   {if (pos>start_pos)
               	       {*covvalue[0] << rd.covariance[pos-1];}
                   }
//                #endif	
//Oct 26, 2009
				}





			//calculate chi2
			//________________________________
			if(inverse_filename == NULL)
				{
				if(score==0)
					{
					*chi2[0] << 2.*(rd.loglik-null_loglik);
					}
					else
					{
					*chi2[0] << rd.chi2_score;
					}
				}
			//________________________________
			}
		else //beta, sebeta = nan
			{
			if(!allcov && interaction==0) start_pos=rgd.X.ncol-1;
			else  if(!allcov && interaction!=0) start_pos=rgd.X.ncol-2;
			else start_pos=0;


	    end_pos=rgd.X.ncol;
			if(interaction!=0) {end_pos++;}
			if(interaction!=0 && !allcov) {start_pos++;} 
	


			for(int pos=start_pos ; pos<end_pos ; pos++)
				{
				*beta_sebeta[0] << sep << "nan" << sep << "nan";
				}
			if(inverse_filename == NULL)
				{
//Han Chen
//                #if !COXPH
                if (!allcov && interaction !=0)
                   {*covvalue[0] << "nan";}
//                #endif
//Oct 26, 2009
				*chi2[0] << "nan";
				}
			}
		
			if(inverse_filename == NULL)
				{
//Han Chen
				*outfile[0] << beta_sebeta[0]->str() << sep;
//				#if !COXPH
				if (!allcov && interaction !=0)
                   {*outfile[0] << covvalue[0]->str() << sep;}
//                #endif
                *outfile[0] << chi2[model]->str() << "\n";
//Oct 26, 2009
				}
			else
				{
				*outfile[0] << beta_sebeta[0]->str() << "\n";
				}
		}

	//clean chi2	
	for(int i=0 ; i<5 ; i++)
		{
		beta_sebeta[i]->str("");
//Han Chen
		covvalue[i]->str("");
//Oct 26, 2009
		chi2[i]->str("");
		}


	if (csnp % 1000 == 0)
		{
		if (csnp==0)
			{
			fprintf(stdout,"Analysis: %6.2f ...",100.*double(csnp)/double(nsnps));
			}
		else
			{
			fprintf(stdout,"\b\b\b\b\b\b\b\b\b\b%6.2f ...",100.*double(csnp)/double(nsnps));
			}
		std::cout.flush();
		}

	}

fprintf(stdout,"\b\b\b\b\b\b\b\b\b\b%6.2f",100.);

fprintf(stdout," ... done\n");
	
//________________________________________________________________
//Maksim, 9 Jan, 2009



	for(int i=0 ; i<outfile.size() ; i++)
		{	
		outfile[i]->close();
		delete outfile[i];	
		}

	return(0);
}
Example #3
0
int main(int argc, char * argv[])
{
    cmdvars input_var;
    input_var.set_variables(argc, argv);

    input_var.printinfo();
    //	if (allcov && ngpreds>1)
    //	{
    //      cout << "\n\n"
    //           << "WARNING: --allcov allowed only for 1 predictor (MLDOSE)\n";
    //		allcov = 0;
    //	}
    mlinfo mli(input_var.getMlinfofilename(), input_var.getMapfilename());
    int nsnps = mli.nsnps;
    phedata phd;
    int interaction_cox = create_phenotype(phd, input_var);

    //interaction--;
    //	if (input_var.getInverseFilename()!= NULL && phd.ncov > 1)
    //     {
    //         std::cerr << "Error: In mmscore you can not use any covariates."
    //                   << " You phenotype file must conatin id column and "
    //                   << "trait (residuals) only\n";
    //         exit(1);
    //      }
    //	if (input_var.getInverseFilename()!= NULL &&
    //      (allcov == 1 || score == 1
    //                   || input_var.getInteraction()!= 0
    //                   || ngpreds==2))
    //      {
    //          std::cerr << "Error: In mmscore you can use additive model "
    //                    << "without any inetractions only\n";
    //          exit(1);
    //      }
    masked_matrix invvarmatrix;

    /*
     * now should be possible... delete this part later when everything works
     #if LOGISTIC
     if (input_var.getInverseFilename()!= NULL)
     {
         std::cerr << "ERROR: mmscore is forbidden for logistic regression\n";
         exit(1);
     }
     #endif
     */

    std::cout << "Reading data ..." << std::flush;
    if (input_var.getInverseFilename() != NULL)
    {
        loadInvSigma(input_var, phd, invvarmatrix);
    }

    gendata gtd;
    if (!input_var.getIsFvf())
        // use the non-filevector input format
        gtd.re_gendata(input_var.getGenfilename(), nsnps,
                       input_var.getNgpreds(), phd.nids_all, phd.nids,
                       phd.allmeasured, input_var.getSkipd(), phd.idnames);
    else
        // use the filevector input format (missing second last skipd
        // parameter)
        gtd.re_gendata(input_var.getStrGenfilename(), nsnps,
                       input_var.getNgpreds(), phd.nids_all, phd.nids,
                       phd.allmeasured, phd.idnames);

    std::cout << " loaded genotypic data ..." << std::flush;
    /**
       if (input_var.getIsFvf())
          gendata gtd(str_genfilename, nsnps, input_var.getNgpreds(),
                      phd.nids_all, phd.allmeasured, phd.idnames);
       else
           gendata gtd(input_var.getGenfilename(), nsnps,
                       input_var.getNgpreds(), phd.nids_all, phd.nids,
                       phd.allmeasured, skipd, phd.idnames);
     **/

    // estimate null model
#if COXPH
    coxph_data nrgd = coxph_data(phd, gtd, -1);
#else
    regdata nrgd = regdata(phd, gtd, -1, input_var.isIsInteractionExcluded());
#endif

    std::cout << " loaded null data ..." << std::flush;
#if LOGISTIC
    logistic_reg nrd = logistic_reg(nrgd);
    nrd.estimate(nrgd, 0, MAXITER, EPS, CHOLTOL, 0,
                 input_var.getInteraction(), input_var.getNgpreds(),
                 invvarmatrix, input_var.getRobust(), 1);
#elif LINEAR

    linear_reg nrd = linear_reg(nrgd);
#if DEBUG
    std::cout << "[DEBUG] linear_reg nrd = linear_reg(nrgd); DONE.";
#endif
    nrd.estimate(nrgd, 0, CHOLTOL, 0, input_var.getInteraction(),
                 input_var.getNgpreds(), invvarmatrix, input_var.getRobust(), 1);
#elif COXPH
    coxph_reg nrd(nrgd);

    nrd.estimate(nrgd, 0, MAXITER, EPS, CHOLTOL, 0,
                 input_var.getInteraction(), input_var.getNgpreds(), 1);
#endif

    std::cout << " estimated null model ...";
    // end null
#if COXPH
    coxph_data rgd(phd, gtd, 0);
#else
    regdata rgd(phd, gtd, 0, input_var.isIsInteractionExcluded());
#endif

    std::cout << " formed regression object ...";
    std::cout << " done\n" << std::flush;

    //________________________________________________________________
    //Maksim, 9 Jan, 2009
    std::string outfilename_str(input_var.getOutfilename());
    std::vector<std::ofstream*> outfile;

    //All models output.One file per each model
    if (input_var.getNgpreds() == 2)
    {
        open_files_for_output(outfile, outfilename_str);
        if (input_var.getNohead() != 1)
        {
            create_header_1(outfile, input_var, phd, interaction_cox);
        }
    }
    else //Only additive model. Only one output file
    {
        outfile.push_back(
            new std::ofstream((outfilename_str + "_add.out.txt").c_str()));

        if (!outfile[0]->is_open())
        {
            std::cerr << "Cannot open file for writing: " << outfilename_str
                      << "\n";
            exit(1);
        }
        if (input_var.getNohead() != 1)
        {
            create_header2(outfile, input_var, phd, interaction_cox);
        }
    }

    //________________________________________________________________

    /*
     if (input_var.getAllcov())
     {
     if (score)
     {
     outfile << input_var.getSep() << "beta_mu"; // << input_var.getSep() << "beta_SNP_A1";
     outfile << input_var.getSep() << "sebeta_mu"; // << input_var.getSep() << "sebeta_SNP_A1";
     }
     else
     {
     for (int i =0; i<phd.n_model_terms-1;i++)
     outfile << input_var.getSep() << "beta_" << phd.model_terms[i] << input_var.getSep() << "sebeta_" << phd.model_terms[i];
     }
     if(interactio != 0) outfile << input_var.getSep() << "beta_SNP_" << phd.model_terms[interaction];
     }
     if (input_var.getNgpreds()==2)
     {
        outfile << input_var.getSep() << "beta_SNP_A1A2"
                << input_var.getSep() << "beta_SNP_A1A1"
                << input_var.getSep() << "sebeta_SNP_A1A2"
                << input_var.getSep() << "sebeta_SNP_a1A1"
                << input_var.getSep() << "chi2_SNP_2df"
                << input_var.getSep() << "beta_SNP_addA1"
                << input_var.getSep() << "sebeta_SNP_addA1"
                << input_var.getSep() << "chi2_SNP_addA1"
                << input_var.getSep() << "beta_SNP_domA1"
                << input_var.getSep() << "sebeta_SNP_domA1"
                << input_var.getSep() << "chi2_SNP_domA1"
                << input_var.getSep() << "beta_SNP_recA1"
                << input_var.getSep() << "sebeta_SNP_recA1"
                << input_var.getSep() << "chi2_SNP_recA1"
                << input_var.getSep() << "beta_SNP_odom"
                << input_var.getSep() << "sebeta_SNP_odom"
                << input_var.getSep() << "chi2_SNP_odom\n";
     }
     else
     {
         outfile << input_var.getSep() << "beta_SNP_add"
                 << input_var.getSep() << "sebeta_SNP_add"
                 << input_var.getSep() << "chi2_SNP_add\n";
     }

    */
    //	exit(1);
    //________________________________________________________________
    //Maksim, 9 Jan, 2009
    int maxmod = 5;
    int start_pos, end_pos;

    std::vector<std::ostringstream *> beta_sebeta;
    //Han Chen
    std::vector<std::ostringstream *> covvalue;
    //Oct 26, 2009
    std::vector<std::ostringstream *> chi2;

    for (int i = 0; i < maxmod; i++)
    {
        beta_sebeta.push_back(new std::ostringstream());
        //Han Chen
        covvalue.push_back(new std::ostringstream());
        //Oct 26, 2009
        chi2.push_back(new std::ostringstream());
    }

    for (int csnp = 0; csnp < nsnps; csnp++)
    {
        rgd.update_snp(gtd, csnp);
        double freq = 0.;
        int gcount = 0;
        float snpdata1[gtd.nids];
        float snpdata2[gtd.nids];
        if (input_var.getNgpreds() == 2)
        {
            //freq = ((gtd.G).column_mean(csnp*2)*2. +
            //        (gtd.G).column_mean(csnp*2+1))/2.;
            gtd.get_var(csnp * 2, snpdata1);
            gtd.get_var(csnp * 2 + 1, snpdata2);
            for (unsigned int ii = 0; ii < gtd.nids; ii++)
                if (!isnan(snpdata1[ii]) && !isnan(snpdata2[ii]))
                {
                    gcount++;
                    freq += snpdata1[ii] + snpdata2[ii] * 0.5;
                }
        }
        else
        {
            // freq = (gtd.G).column_mean(csnp)/2.;
            gtd.get_var(csnp, snpdata1);
            for (unsigned int ii = 0; ii < gtd.nids; ii++)
                if (!isnan(snpdata1[ii]))
                {
                    gcount++;
                    freq += snpdata1[ii] * 0.5;
                }
        }
        freq /= static_cast<double>(gcount);
        int poly = 1;
        if (fabs(freq) < 1.e-16 || fabs(1. - freq) < 1.e-16)
            poly = 0;

        if (fabs(mli.Rsq[csnp]) < 1.e-16)
            poly = 0;
        //All models output. One file per each model
        if (input_var.getNgpreds() == 2)
        {
            //Write mlinfo to output:
            for (unsigned int file = 0; file < outfile.size(); file++)
            {
                *outfile[file] << mli.name[csnp]
                               << input_var.getSep() << mli.A1[csnp]
                               << input_var.getSep() << mli.A2[csnp]
                               << input_var.getSep() << mli.Freq1[csnp]
                               << input_var.getSep() << mli.MAF[csnp]
                               << input_var.getSep() << mli.Quality[csnp]
                               << input_var.getSep() << mli.Rsq[csnp]
                               << input_var.getSep() << gcount
                               << input_var.getSep() << freq;
                if (input_var.getChrom() != "-1")
                    *outfile[file] << input_var.getSep()
                                   << input_var.getChrom();
                if (input_var.getMapfilename() != NULL)
                    *outfile[file] << input_var.getSep() << mli.map[csnp];
            }

            for (int model = 0; model < maxmod; model++)
            {
                if (poly) //allel freq is not to rare
                {
#if LOGISTIC
                    logistic_reg rd(rgd);
                    if (input_var.getScore())
                        rd.score(nrd.residuals, rgd, 0, CHOLTOL, model,
                                 input_var.getInteraction(),
                                 input_var.getNgpreds(),
                                 invvarmatrix);
                    else
                    rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model,
                                input_var.getInteraction(),
                                input_var.getNgpreds(),
                                invvarmatrix,
                                input_var.getRobust());
#elif LINEAR
                    linear_reg rd(rgd);
                    if (input_var.getScore())
                        rd.score(nrd.residuals, rgd, 0, CHOLTOL, model,
                                 input_var.getInteraction(),
                                 input_var.getNgpreds(),
                                 invvarmatrix);
                    else
                    {
                        //	rd.mmscore(rgd,0,CHOLTOL,model,input_var.getInteraction(), input_var.getNgpreds(), invvarmatrix);
                        rd.estimate(rgd, 0, CHOLTOL, model,
                                input_var.getInteraction(),
                                input_var.getNgpreds(), invvarmatrix,
                                input_var.getRobust());
                    }
#elif COXPH
                    coxph_reg rd(rgd);
                    rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model,
                                input_var.getInteraction(), true,
                                input_var.getNgpreds());
#endif

                    if (!input_var.getAllcov() && model == 0
                        && input_var.getInteraction() == 0)
                        start_pos = rd.beta.nrow - 2;
                    else if (!input_var.getAllcov() && model == 0
                             && input_var.getInteraction() != 0)
                        start_pos = rd.beta.nrow - 4;
                    else if (!input_var.getAllcov() && model != 0
                             && input_var.getInteraction() == 0)
                        start_pos = rd.beta.nrow - 1;
                    else if (!input_var.getAllcov() && model != 0
                             && input_var.getInteraction() != 0)
                        start_pos = rd.beta.nrow - 2;
                    else
                        start_pos = 0;

                    for (int pos = start_pos; pos < rd.beta.nrow; pos++)
                    {
                        *beta_sebeta[model] << input_var.getSep()
                                            << rd.beta[pos]
                                            << input_var.getSep()
                                            << rd.sebeta[pos];
                        //Han Chen
#if !COXPH
                        if (input_var.getInverseFilename() == NULL
                            && !input_var.getAllcov()
                            && input_var.getInteraction() != 0)
                        {
                            if (pos > start_pos)
                            {
                                if (model == 0)
                                {
                                    if (pos > start_pos + 2)
                                    {
                                        *covvalue[model]
                                            << rd.covariance[pos - 3]
                                            << input_var.getSep()
                                            << rd.covariance[pos - 2];
                                    }
                                }
                                else
                                {
                                    *covvalue[model] << rd.covariance[pos - 1];
                                }
                            }
                        }
#endif
                        //Oct 26, 2009
                    }

                    //calculate chi2
                    //________________________________
                    if (input_var.getScore() == 0)
                    {
                        //*chi2[model] << 2.*(rd.loglik-null_loglik);
                        *chi2[model] << rd.loglik;
                    }
                    else
                    {
                        //*chi2[model] << rd.chi2_score;
                        *chi2[model] << "nan";
                    }
                    //________________________________
                }
                else //beta, sebeta = nan
                {
                    if (!input_var.getAllcov() && model == 0
                        && input_var.getInteraction() == 0)
                        start_pos = rgd.X.ncol - 2;
                    else if (!input_var.getAllcov() && model == 0
                             && input_var.getInteraction() != 0)
                        start_pos = rgd.X.ncol - 4;
                    else if (!input_var.getAllcov() && model != 0
                             && input_var.getInteraction() == 0)
                        start_pos = rgd.X.ncol - 1;
                    else if (!input_var.getAllcov() && model != 0
                             && input_var.getInteraction() != 0)
                        start_pos = rgd.X.ncol - 2;
                    else
                        start_pos = 0;

                    if (model == 0)
                    {
                        end_pos = rgd.X.ncol;
                    }
                    else
                    {
                        end_pos = rgd.X.ncol - 1;
                    }

                    if (input_var.getInteraction() != 0)
                        end_pos++;

                    for (int pos = start_pos; pos < end_pos; pos++)
                    {
                        *beta_sebeta[model] << input_var.getSep() << "nan"
                                            << input_var.getSep() << "nan";
                    }
                    //Han Chen
#if !COXPH
                    if (!input_var.getAllcov()
                        && input_var.getInteraction() != 0)
                    {
                        if (model == 0)
                        {
                            *covvalue[model] << "nan"
                                             << input_var.getSep() << "nan";
                        }
                        else
                        {
                            *covvalue[model] << "nan";
                        }
                    }
#endif
                    //Oct 26, 2009
                    *chi2[model] << "nan";
                }
            } //end of model cycle

            //Han Chen
            *outfile[0] << beta_sebeta[0]->str() << input_var.getSep();
#if !COXPH
            if (!input_var.getAllcov() && input_var.getInteraction() != 0)
            {
                *outfile[0] << covvalue[0]->str() << input_var.getSep();
            }
#endif
            *outfile[0] << chi2[0]->str() << "\n";

            *outfile[1] << beta_sebeta[1]->str() << input_var.getSep();
#if !COXPH
            if (!input_var.getAllcov() && input_var.getInteraction() != 0)
            {
                *outfile[1] << covvalue[1]->str() << input_var.getSep();
            }
#endif
            *outfile[1] << chi2[1]->str() << "\n";

            *outfile[2] << beta_sebeta[2]->str() << input_var.getSep();
#if !COXPH
            if (!input_var.getAllcov() && input_var.getInteraction() != 0)
            {
                *outfile[2] << covvalue[2]->str() << input_var.getSep();
            }
#endif
            *outfile[2] << chi2[2]->str() << "\n";

            *outfile[3] << beta_sebeta[3]->str() << input_var.getSep();
#if !COXPH
            if (!input_var.getAllcov() && input_var.getInteraction() != 0)
            {
                *outfile[3] << covvalue[3]->str() << input_var.getSep();
            }
#endif
            *outfile[3] << chi2[3]->str() << "\n";

            *outfile[4] << beta_sebeta[4]->str() << input_var.getSep();
#if !COXPH
            if (!input_var.getAllcov() && input_var.getInteraction() != 0)
            {
                *outfile[4] << covvalue[4]->str() << input_var.getSep();
            }
#endif
            *outfile[4] << chi2[4]->str() << "\n";
            //Oct 26, 2009
        }
        else //Only additive model. Only one output file
        {
            //Write mlinfo to output:
            *outfile[0] << mli.name[csnp]
                        << input_var.getSep() << mli.A1[csnp]
                        << input_var.getSep() << mli.A2[csnp]
                        << input_var.getSep();
            *outfile[0] << mli.Freq1[csnp]
                        << input_var.getSep() << mli.MAF[csnp]
                        << input_var.getSep() << mli.Quality[csnp]
                        << input_var.getSep() << mli.Rsq[csnp]
                        << input_var.getSep();
            *outfile[0] << gcount << input_var.getSep() << freq;
            if (input_var.getChrom() != "-1")
                *outfile[0] << input_var.getSep() << input_var.getChrom();
            if (input_var.getMapfilename() != NULL)
                *outfile[0] << input_var.getSep() << mli.map[csnp];
            int model = 0;
            if (poly) //allel freq is not to rare
            {
#if LOGISTIC
                logistic_reg rd(rgd);
                if (input_var.getScore())
                    rd.score(nrd.residuals, rgd, 0, CHOLTOL, model,
                             input_var.getInteraction(),
                             input_var.getNgpreds(),
                             invvarmatrix);
                else
                    rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model,
                                input_var.getInteraction(),
                                input_var.getNgpreds(),
                                invvarmatrix,
                                input_var.getRobust());
#elif LINEAR
                //cout << (rgd.get_unmasked_data()).nids << " 1\n";
#if DEBUG
                rgd.X.print();
                rgd.Y.print();
#endif
                linear_reg rd(rgd);
#if DEBUG
                rgd.X.print();
                rgd.Y.print();
#endif
                //cout << (rgd.get_unmasked_data()).nids << " 2\n";
                if (input_var.getScore())
                {
#if DEBUG
                    cout << "input_var.getScore/n";
                    nrd.residuals.print();
                    cout << CHOLTOL << " <-CHOLTOL\n";
                    cout << model << " <-model\n";
                    cout << input_var.getInteraction()
                         << " <-input_var.getInteraction()\n";
                    cout << input_var.getNgpreds()
                         << " <-input_var.getNgpreds()\n";
                    invvarmatrix.print();
#endif
                    rd.score(nrd.residuals, rgd, 0, CHOLTOL, model,
                             input_var.getInteraction(),
                             input_var.getNgpreds(),
                             invvarmatrix);
#if DEBUG
                    rd.beta.print();
                    cout << rd.chi2_score << " <-chi2_scoren\n";
                    rd.covariance.print();
                    rd.residuals.print();
                    rd.sebeta.print();
                    cout << rd.loglik << " <-logliken\n";
                    cout << rd.sigma2 << " <-sigma2\n";
#endif
                }
                else
                {
                    // if(input_var.getInverseFilename()== NULL)
                    // {
                    // cout << (rgd.get_unmasked_data()).nids << " 3\n";
#if DEBUG
                    cout << "rd.estimate\n";
                    cout << CHOLTOL << " <-CHOLTOL\n";
                    cout << model << " <-model\n";
                    cout << input_var.getInteraction()
                         << " <-input_var.getInteraction()\n";
                    cout << input_var.getNgpreds()
                         << " <-input_var.getNgpreds()\n";
                    cout << input_var.getRobust()
                         << " <-input_var.getRobust()\n";
                    cout << "start invarmatrix\n";
                    invvarmatrix.print();
                    cout << "end invarmatrix\n";
                    cout << rgd.is_interaction_excluded
                         << " <-rgd.is_interaction_excluded\n";
#endif
                    rd.estimate(rgd, 0, CHOLTOL, model,
                                input_var.getInteraction(),
                                input_var.getNgpreds(),
                                invvarmatrix,
                                input_var.getRobust());

#if DEBUG
                    cout << "rd.beta\n";
                    rd.beta.print();
                    cout << rd.chi2_score << " <-chi2_scoren\n";
                    cout << "rd.covariance\n";
                    rd.covariance.print();
                    cout << "rd.residuals\n";
                    rd.residuals.print();
                    cout << "rd.sebeta\n";
                    rd.sebeta.print();
                    cout << rd.loglik << " <-logliken\n";
                    cout << rd.sigma2 << " <-sigma2\n";
#endif
                    //cout << (rgd.get_unmasked_data()).nids << " 4\n";
                    //}
                    //else
                    //{
                    //   rd.mmscore(rgd, 0, CHOLTOL, model,
                    //              input_var.getInteraction(),
                    //              input_var.getNgpreds(), invvarmatrix);
                    //}
                }
#elif COXPH
                coxph_reg rd(rgd);
                rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model,
                            input_var.getInteraction(), true,
                            input_var.getNgpreds());
#endif

                if (!input_var.getAllcov() && input_var.getInteraction() == 0)
                {
                    start_pos = rd.beta.nrow - 1;
                }
                else if (!input_var.getAllcov()
                         && input_var.getInteraction() != 0)
                {
                    start_pos = rd.beta.nrow - 2;
                }
                else
                {
                    start_pos = 0;
                }
#if DEBUG
                cout << " start_pos;" << start_pos << "\n";
#endif
                for (int pos = start_pos; pos < rd.beta.nrow; pos++)
                {
                    *beta_sebeta[0] << input_var.getSep() << rd.beta[pos]
                                    << input_var.getSep() << rd.sebeta[pos];
                    //Han Chen
#if !COXPH
                    if (input_var.getInverseFilename() == NULL
                        && !input_var.getAllcov()
                        && input_var.getInteraction() != 0)
                    {
                        if (pos > start_pos)
                        {
                            *covvalue[0] << rd.covariance[pos - 1];
                        }
                    }
#endif
                    //Oct 26, 2009
                }

                //calculate chi2
                //________________________________
                if (input_var.getInverseFilename() == NULL)
                {
#if DEBUG
                    cout << " inverse_filename == NULL" << "\n";
#endif
                    if (input_var.getScore() == 0)
                    {
                        *chi2[0] << rd.loglik; //2.*(rd.loglik-null_loglik);
                    }
                    else
                    {
                        *chi2[0] << "nan"; //rd.chi2_score;
                    }
                }
                //________________________________
            }
            else //beta, sebeta = nan
            {
                if (!input_var.getAllcov() && input_var.getInteraction() == 0)
                    start_pos = rgd.X.ncol - 1;
                else if (!input_var.getAllcov()
                         && input_var.getInteraction() != 0)
                    start_pos = rgd.X.ncol - 2;
                else
                    start_pos = 0;

                end_pos = rgd.X.ncol;
                if (input_var.getInteraction() != 0)
                {
                    end_pos++;
                }
                if (input_var.getInteraction() != 0 && !input_var.getAllcov())
                {
                    start_pos++;
                }

                for (int pos = start_pos; pos < end_pos; pos++)
                {
                    *beta_sebeta[0] << input_var.getSep() << "nan"
                                    << input_var.getSep() << "nan";
                }
                if (input_var.getInverseFilename() == NULL)
                {
                    //Han Chen
#if !COXPH
                    if (!input_var.getAllcov()
                        && input_var.getInteraction() != 0)
                    {
                        *covvalue[0] << "nan";
                    }
#endif
                    //Oct 26, 2009
                    *chi2[0] << "nan";
                }
            }

            if (input_var.getInverseFilename() == NULL)
            {
                //Han Chen
                *outfile[0] << beta_sebeta[0]->str() << input_var.getSep();
#if !COXPH
                if (!input_var.getAllcov() && input_var.getInteraction() != 0)
                {
                    *outfile[0] << covvalue[0]->str() << input_var.getSep();
                }
#endif
                *outfile[0] << chi2[model]->str() << "\n";
                //Oct 26, 2009
            }
            else
            {
                *outfile[0] << beta_sebeta[0]->str() << "\n";
#if DEBUG
                cout << "Se beta" << beta_sebeta[0] << "\n";
#endif
            }
        }
        //clean chi2
        for (int i = 0; i < 5; i++)
        {
            beta_sebeta[i]->str("");
            //Han Chen
            covvalue[i]->str("");
            //Oct 26, 2009
            chi2[i]->str("");
        }
        update_progress_to_cmd_line(csnp, nsnps);
    }

    std::cout << "\b\b\b\b\b\b\b\b\b" << 100.;
    std::cout << "%... done\n";

    //________________________________________________________________
    //Maksim, 9 Jan, 2009

    for (unsigned int i = 0; i < outfile.size(); i++)
    {
        outfile[i]->close();
        delete outfile[i];
    }

    //delete gtd;

    // Clean up a couple of vectors
    std::vector<std::ostringstream *>::iterator it = beta_sebeta.begin();
    while (it != beta_sebeta.end())
    {
        delete *it;
        ++it;
    }
    it = covvalue.begin();
    while (it != covvalue.end())
    {
        delete *it;
        ++it;
    }
    it = chi2.begin();
    while (it != chi2.end())
    {
        delete *it;
        ++it;
    }

    return (0);
}