int main(int argc, char* argv[]) { LogisticRegression lr; LogisticRegressionScoreTest lrst; LogisticRegressionPermutationTest lrpt; Vector y; Matrix x; Matrix cov; LoadVector("input.y", y); LoadMatrix("input.x", x); LoadMatrix("input.cov", cov); Matrix xall; xall = x; xall.StackRight(cov); // 1 + x + cov if (lr.FitLogisticModel(xall, y, 100) == false) { fprintf(stderr, "Fitting failed!\n"); return -1; } Vector& beta = lr.GetCovEst(); Matrix& v = lr.GetCovB(); Vector& pWald = lr.GetAsyPvalue(); fprintf(stdout, "wald_beta\t"); Print(beta); fputc('\n', stdout); fprintf(stdout, "wald_vcov\t"); Print(v); fputc('\n', stdout); fprintf(stdout, "wald_p\t"); Print(pWald[1]); fputc('\n', stdout); if (lrpt.FitLogisticModelCov(xall, 1, y, 2000, -1) == false) { fprintf(stderr, "Fitting failed!\n"); return -1; } fprintf(stdout, "permutation_p\t"); double permu_p = lrpt.getPvalue(); Print(permu_p); fputc('\n', stdout); if (lrst.FitLogisticModel(xall, y, 1, 100) == false) { fprintf(stderr, "Fitting failed!\n"); return -1; } fprintf(stdout, "score_p\t"); double score_p = lrst.GetPvalue(); Print(score_p); fputc('\n', stdout); return 0; };
TEST_F(LR_Separable_Test, test_separable) { LogisticRegression lr; int separableColumn = lr.dataIsSeparable(cov, response); ASSERT_EQ( separableColumn, 0); }
int main (void) { cout << "Hello world for Logistic Regression" << endl; LogisticRegression toDo; toDo.Test (); return 0; }
int main(int argc, char *argv[]) { time_t now = time(0); printf("chisq p = 0.05 cutoff = %f",chidist(3.84, 1.0)); printf("Start analysis at %s \n", ctime(&now)); /* Matrix a(2,2); a[0][0] = 1.0; a[1][1] = 1.0; a[0][1] = a[1][0] = 0.5; SVD svd; svd.InvertInPlace(a); for (int i = 0; i < a.rows; i ++) { for (int j = 0; j < a.cols; j ++) { std::cout << "a[" << i << "]" << "[" << j << "]" << a[i][j] << "\t"; } std::cout << "\n"; } return 0; */ Matrix X; String Xinput = "ExampleX.test"; Vector Y; String Yinput = "ExampleY.test"; if (loadMatrix(X,Xinput) || loadVector(Y, Yinput)) { fprintf(stderr, "Data loading problem!\n"); exit(1); } LogisticRegression lr; if (lr.FitLogisticModel(X, Y, 30) ) { printf("fit all right!\n"); } else { printf("fit failed\n"); } now = time(0); printf("Finsihed analysis at %s \n", ctime(&now)); LogisticRegressionScoreTest lrst; int Xcol = 1; lrst.FitLogisticModel(X,Y,Xcol,30); printf("score p-value is: %lf \n", lrst.getPvalue()); Vector& pvalue = lr.GetAsyPvalue(); printf("wald p-value is: %lf \n", pvalue[Xcol]); return 0; }
int main() { double alpha = 0.001; int iters = 10000; LogisticRegression lr; lr.readData("input.txt"); lr.gradientDescent(alpha, iters); int n = _getch(); return 1; }
void PosteriorEstimator::estimate(vector<pair<double, bool> >& combined, LogisticRegression& lr) { // switch sorting order if (!reversed) { reverse(combined.begin(), combined.end()); } vector<double> medians; vector<unsigned int> negatives, sizes; binData(combined, medians, negatives, sizes); lr.setData(medians, negatives, sizes); lr.roughnessPenaltyIRLS(); // restore sorting order if (!reversed) { reverse(combined.begin(), combined.end()); } }
void PosteriorEstimator::estimatePEP( vector<pair<double, bool> >& combined, double pi0, vector<double>& peps, bool include_negative) { // Logistic regression on the data size_t nTargets = 0, nDecoys = 0; LogisticRegression lr; estimate(combined, lr); vector<double> xvals(0); vector<pair<double, bool> >::const_iterator elem = combined.begin(); for (; elem != combined.end(); ++elem) if (elem->second) { xvals.push_back(elem->first); ++nTargets; } else { if (include_negative) { xvals.push_back(elem->first); } ++nDecoys; } lr.predict(xvals, peps); #define OUTPUT_DEBUG_FILES #undef OUTPUT_DEBUG_FILES #ifdef OUTPUT_DEBUG_FILES ofstream drFile("decoyRate.all", ios::out), xvalFile("xvals.all", ios::out); ostream_iterator<double> drIt(drFile, "\n"), xvalIt(xvalFile, "\n"); copy(peps.begin(), peps.end(), drIt); copy(xvals.begin(), xvals.end(), xvalIt); #endif double factor = pi0 * ((double)nTargets / (double)nDecoys); double top = min(1.0, factor * exp(*max_element(peps.begin(), peps.end()))); vector<double>::iterator pep = peps.begin(); bool crap = false; for (; pep != peps.end(); ++pep) { if (crap) { *pep = top; continue; } *pep = factor * exp(*pep); if (*pep >= top) { *pep = top; crap = true; } } partial_sum(peps.rbegin(), peps.rend(), peps.rbegin(), mymin); }
int main(int argc,char **argv) { if(argc!=6) { cerr<<"usage: "; cout<<argv[0]<<" train_feature_data test_feature_data regularization_parameter bias_parameter feature_config"<<endl; return -1; } int rank,size; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); // string train_data=string(argv[1])+string(".")+CommonTool::to_string(rank); // string test_data=string(argv[2])+string(".")+CommonTool::to_string(rank); bool split=true; string train_data=string(argv[1]); string test_data=string(argv[2]); LogisticRegression LR; LR.set_mpirank(rank); LR.set_mpisize(size); if(rank==0) { cerr<<" mpi rank="<<rank<<" mpi size="<<size<<endl; time_t t=time(0); cerr<<"start process:"<<asctime(localtime(&t))<<endl; } // LR.read_dataset("data/train.feature.filter","conf/overall.conf"); // LR.init_data_buffer("data/train.feature.filter"); LR.init_data_buffer(train_data,test_data,split); string parameters; float lambda=atof(argv[3]); float bias=atof(argv[4]); string feature_conf(argv[5]); int status=LR.optimize(feature_conf,string("model/para"),parameters,lambda,bias); if(rank==0) { cerr<<"LBFGS status:"<<status<<endl; time_t t=time(0); cerr<<"end process:"<<asctime(localtime(&t))<<endl; } MPI_Finalize(); return 0; }
void PosteriorEstimator::estimatePEPGeneralized( vector<pair<double, bool> >& combined, vector<double>& peps, bool include_negative) { // Logistic regression on the data size_t nTargets = 0, nDecoys = 0; LogisticRegression lr; estimate(combined, lr); vector<double> xvals(0); vector<pair<double, bool> >::const_iterator elem = combined.begin(); for (; elem != combined.end(); ++elem) { xvals.push_back(elem->first); if (elem->second) { ++nTargets; } else { if (include_negative) { xvals.push_back(elem->first); } ++nDecoys; } } lr.predict(xvals, peps); #ifdef OUTPUT_DEBUG_FILES ofstream drFile("decoyRate.all", ios::out), xvalFile("xvals.all", ios::out); ostream_iterator<double> drIt(drFile, "\n"), xvalIt(xvalFile, "\n"); copy(peps.begin(), peps.end(), drIt); copy(xvals.begin(), xvals.end(), xvalIt); #endif double top = exp(*max_element(peps.begin(), peps.end())); top = top/(1+top); bool crap = false; vector<double>::iterator pep = peps.begin(); for (; pep != peps.end(); ++pep) { if (crap) { *pep = top; continue; } // eg = p/(1-p) // eg - egp = p // p = eg/(1+eg) double eg = exp(*pep); *pep = eg/(1+eg); if (*pep >= top) { *pep = top; crap = true; } } partial_sum(peps.rbegin(), peps.rend(), peps.rbegin(), mymin); double high = *max_element(peps.begin(), peps.end()); double low = *min_element(peps.begin(), peps.end()); assert(high>low); if (VERB > 2) { cerr << "Highest generalized decoy rate =" << high << ", low rate = " << low << endl; } pep = peps.begin(); for (; pep != peps.end(); ++pep) { *pep = (*pep - low)/(high-low); } }
bool train( CommandLineParser &parser ){ infoLog << "Training regression model..." << endl; string trainDatasetFilename = ""; string modelFilename = ""; string defaultFilename = "linear-regression-model.grt"; bool removeFeatures = false; bool defaultRemoveFeatures = false; //Get the filename if( !parser.get("filename",trainDatasetFilename) ){ errorLog << "Failed to parse filename from command line! You can set the filename using the -f." << endl; printUsage(); return false; } //Get the model filename parser.get("model-filename",modelFilename,defaultFilename); //Load the training data to train the model RegressionData trainingData; infoLog << "- Loading Training Data..." << endl; if( !trainingData.load( trainDatasetFilename ) ){ errorLog << "Failed to load training data!\n"; return false; } const unsigned int N = trainingData.getNumInputDimensions(); const unsigned int T = trainingData.getNumTargetDimensions(); infoLog << "- Num training samples: " << trainingData.getNumSamples() << endl; infoLog << "- Num input dimensions: " << N << endl; infoLog << "- Num target dimensions: " << T << endl; //Create a new regression instance LogisticRegression regression; regression.setMaxNumEpochs( 500 ); regression.setMinChange( 1.0e-5 ); regression.setUseValidationSet( true ); regression.setValidationSetSize( 20 ); regression.setRandomiseTrainingOrder( true ); regression.enableScaling( true ); //Create a new pipeline that will hold the regression algorithm GestureRecognitionPipeline pipeline; //Add a multidimensional regression instance and set the regression algorithm to Linear Regression pipeline.setRegressifier( MultidimensionalRegression( regression, true ) ); infoLog << "- Training model...\n"; //Train the classifier if( !pipeline.train( trainingData ) ){ errorLog << "Failed to train model!" << endl; return false; } infoLog << "- Model trained!" << endl; infoLog << "- Saving model to: " << modelFilename << endl; //Save the pipeline if( pipeline.save( modelFilename ) ){ infoLog << "- Model saved." << endl; }else warningLog << "Failed to save model to file: " << modelFilename << endl; infoLog << "- TrainingTime: " << pipeline.getTrainingTime() << endl; return true; }
/** * Run a single likelihood ratio test. Assumes covariates and phenotype are set as global variables. * The test is performed on the haps vector. * * @param haps Haplotype variable. H_0 is that this vector is independant. * @param ones Set of ones. Precomputed for speed. * @return ZaykingStatsInfo should contain all information for this likelihood ratio test. */ ZaykinStatsInfo Zaykin::runLikelihoodRatio(const vector<double> &haps, const vector<double> &ones){ ZaykinStatsInfo stats; vector<vector<double> > testVecWithout = cov; vector<vector<double> > testVecWith; // wait to fill this one. testVecWithout.push_back(ones); LogisticRegression lrWith, lrWithout; vector<vector<double> > inv_infmatrixWithOut, inv_infmatrixWith; vector<double> betasWith, betasWithOut; inv_infmatrixWithOut = vecops::getDblVec(testVecWithout.size() , testVecWithout.size()); int retry = 0; double startVal = 0; // value to start betas with. while(retry < 3){ try{ betasWithOut = lrWithout.newtonRaphson(testVecWithout, phenotype, inv_infmatrixWithOut, startVal); break; }catch(NewtonRaphsonFailureEx){ handleException(stats, startVal, retry, "Unable to compute reduced model in single haplotype test: Newton-Raphson setup failure."); }catch(NewtonRaphsonIterationEx){ handleException(stats, startVal, retry, "Unable to compute reduced model in single haplotype test: max iterations hit."); }catch(SingularMatrixEx){ handleException(stats, startVal, retry, "Unable to compute reduced model in single haplotype test: information matrix was singular."); }catch(ConditionNumberEx){ LogisticRegression lr; int separableVariable = lr.dataIsSeparable(testVecWithout, phenotype); string message; if (separableVariable < 0){ // Error: poor conditioning. message = "Unable to compute reduced model in single haplotype test: Poor conditioning in information matrix."; }else{ message = "Unable to compute reduced model in single haplotype test: Separable data matrix."; } handleException(stats, startVal, retry, message); if (retry >= 3) return stats; }catch(alglib::ap_error err){ stringstream ss; ss << "Unable to compute reduced model single haplotype test due to linalg exception: " << err.msg; handleException(stats, startVal, retry, ss.str()); if (retry >= 3) return stats; } } retry = 0; startVal = 0; // value to start betas with. testVecWith = testVecWithout; testVecWith.push_back(haps); inv_infmatrixWith = vecops::getDblVec(testVecWith.size() , testVecWith.size()); while(retry < 3){ try{ betasWith = lrWith.newtonRaphson(testVecWith, phenotype, inv_infmatrixWith, startVal); break; }catch(NewtonRaphsonFailureEx){ handleException(stats, startVal, retry, "Unable to compute full model in single haplotype test: Newton-Raphson setup failure."); }catch(NewtonRaphsonIterationEx){ handleException(stats, startVal, retry, "Unable to compute full model in single haplotype test: max iterations hit."); }catch(SingularMatrixEx){ handleException(stats, startVal, retry, "Unable to compute full model in single haplotype test: information matrix was singular."); }catch(ConditionNumberEx){ LogisticRegression lr; int separableVariable = lr.dataIsSeparable(testVecWith, phenotype); string message; if (separableVariable < 0){ // Error: poor conditioning. message = "Unable to compute full model in single haplotype test: Poor conditioning in information matrix."; }else{ message = "Unable to compute full model in single haplotype test: Separable data matrix."; } handleException(stats, startVal, retry, message); if (retry >= 3) return stats; }catch(alglib::ap_error err){ stringstream ss; ss << "Unable to compute full model single haplotype test due to linalg exception: " << err.msg; handleException(stats, startVal, retry, ss.str()); if (retry >= 3) return stats; } } stats.chiSqStat = lrWith.likelihoodRatio(betasWithOut, testVecWithout, betasWith, testVecWith, phenotype); stats.degFree = betasWith.size() - betasWithOut.size(); double beta = betasWith.at(betasWith.size() - 1); double stderr = sqrt(inv_infmatrixWith.at(inv_infmatrixWith.size() - 1).at(inv_infmatrixWith.size() - 1)); stats.OR = exp(beta); stats.LCI = exp(beta - 1.96*stderr); stats.UCI = exp(beta + 1.96*stderr); return stats; }
/** * Run the global analysis. Put all data and covariates in a logistic regression model. * * Compute likelihood ratio statistic. * This uses a likelihood statistic where n-1 haplotypes are tested. * * @return pvalue. */ ZaykinGlobalStatsResults Zaykin::runGlobal(){ ZaykinGlobalStatsResults stats; vector<vector<double> > haps; vector<double> ones; prepHaplotypes(ones, haps); #if DEBUG_ZAY_PROGRESS cout << "Zaykin start LR portion" << endl; #endif LogisticRegression with(params->getRegressionConditionNumberThreshold()), without(params->getRegressionConditionNumberThreshold()); /* * Run without the haplotypes: */ vector<vector<double> > inv_infmatrixWithOut; vector<double> betasWithOut; vector<vector<double> > inWithout; inWithout = cov; inWithout.push_back(ones); inv_infmatrixWithOut = vecops::getDblVec(inWithout.size() , inWithout.size()); int retry = 0; double startVal = 0; // value to start betas with. while(retry < 3){ try{ betasWithOut = without.newtonRaphson(inWithout, phenotype, inv_infmatrixWithOut, startVal); break; }catch(NewtonRaphsonFailureEx){ handleException(stats, startVal, retry, "Unable to compute reduced model: Newton-Raphson setup failure."); }catch(NewtonRaphsonIterationEx){ handleException(stats, startVal, retry, "Unable to compute reduced model: max iterations hit."); }catch(SingularMatrixEx){ handleException(stats, startVal, retry, "Unable to compute reduced model: information matrix was singular."); }catch(ConditionNumberEx err){ LogisticRegression lr; int separableVariable = lr.dataIsSeparable(inWithout, phenotype); string message; if (separableVariable < 0){ // Error: poor conditioning. stringstream ss; ss << "Unable to compute reduced model: Poor conditioning in information matrix. "; ss << "Condition number (1-norm) is " << err.conditionNumber; message = ss.str(); }else{ stringstream ss; ss << "Unable to compute reduced model: Separable data matrix."; ss << "Condition number (1-norm) is " << err.conditionNumber; message = ss.str(); } handleException(stats, startVal, retry, message); if (retry >= 3) return stats; }catch(ADTException e){ // This one is generic. string message = "Unable to compute reduced model: Newton-Raphson error."; handleException(stats, startVal, retry, message); if (retry >= 3) return stats; }catch(alglib::ap_error err){ stringstream ss; ss << "Unable to compute reduced model due to linalg exception: " << err.msg; handleException(stats, startVal, retry, ss.str()); if (retry >= 3) return stats; } } /* * Run with the haplotypes: */ vector<vector<double> > inv_infmatrixWith, inWith; vector<double> betasWith; inWith = inWithout; for (unsigned int i=0; i < haps.size()-1; i++){ // NOTE: Don't push the very last haplotype. inWith.push_back(haps.at(i)); } inv_infmatrixWith = vecops::getDblVec(inWith.size() , inWith.size()); retry = 0; startVal = 0; // value to start betas with. while(retry < 3){ try{ betasWith = with.newtonRaphson(inWith, phenotype, inv_infmatrixWith, startVal); break; }catch(NewtonRaphsonFailureEx){ handleException(stats, startVal, retry, "Unable to compute full model: Newton-Raphson setup failure."); }catch(NewtonRaphsonIterationEx){ handleException(stats, startVal, retry, "Unable to compute full model: max iterations hit."); }catch(SingularMatrixEx){ handleException(stats, startVal, retry, "Unable to compute full model: information matrix was singular."); }catch(ConditionNumberEx err){ LogisticRegression lr; int separableVariable = lr.dataIsSeparable(inWith, phenotype); stringstream ss; if (separableVariable < 0){ // Error: poor conditioning. ss << "Unable to compute reduced model: Poor conditioning in information matrix. "; ss << "Condition number (1-norm) is " << err.conditionNumber; }else{ ss << "Unable to compute reduced model: Separable data matrix."; ss << "Condition number (1-norm) is " << err.conditionNumber; } string message = ss.str(); handleException(stats, startVal, retry, message); if (retry >= 3) return stats; }catch(ADTException e){ string message = "Unable to compute full model: Newton-Raphson error."; handleException(stats, startVal, retry, message); if (retry >= 3) return stats; }catch(alglib::ap_error err){ stringstream ss; ss << "Unable to compute full model due to linalg exception: " << err.msg; handleException(stats, startVal, retry,ss.str()); if (retry >= 3) return stats; } } double likeRatio = with.likelihoodRatio(betasWithOut, inWithout, betasWith, inWith, phenotype); try{ stats.pvalue = Statistics::chi2prob(likeRatio, betasWith.size() - betasWithOut.size()); stats.testStat = likeRatio; stats.degFreedom = betasWith.size() - betasWithOut.size(); }catch(...){ stringstream ss; ss << "Zaykin's method: unable to compute chi square: " << likeRatio << " " << betasWith.size() - betasWithOut.size() << endl; Logger::Instance()->writeLine(ss.str()); stats.fillDefault(); return stats; } return stats; }
int main() { LogisticRegression lr; lr.Test(); return 0; }
bool train( CommandLineParser &parser ){ infoLog << "Training regression model..." << endl; string trainDatasetFilename = ""; string modelFilename = ""; float learningRate = 0; float minChange = 0; unsigned int maxEpoch = 0; unsigned int batchSize = 0; //Get the filename if( !parser.get("filename",trainDatasetFilename) ){ errorLog << "Failed to parse filename from command line! You can set the filename using the -f." << endl; printHelp(); return false; } //Get the parameters from the parser parser.get("model-filename",modelFilename); parser.get( "learning-rate", learningRate ); parser.get( "min-change", minChange ); parser.get( "max-epoch", maxEpoch ); parser.get( "batch-size", batchSize ); infoLog << "settings: learning-rate: " << learningRate << " min-change: " << minChange << " max-epoch: " << maxEpoch << " batch-size: " << batchSize << endl; //Load the training data to train the model RegressionData trainingData; //Try and parse the input and target dimensions unsigned int numInputDimensions = 0; unsigned int numTargetDimensions = 0; if( parser.get("num-inputs",numInputDimensions) && parser.get("num-targets",numTargetDimensions) ){ infoLog << "num input dimensions: " << numInputDimensions << " num target dimensions: " << numTargetDimensions << endl; trainingData.setInputAndTargetDimensions( numInputDimensions, numTargetDimensions ); } if( (numInputDimensions == 0 || numTargetDimensions == 0) && Util::stringEndsWith( trainDatasetFilename, ".csv" ) ){ errorLog << "Failed to parse num input dimensions and num target dimensions from input arguments. You must supply the input and target dimensions if the data format is CSV!" << endl; printHelp(); return false; } infoLog << "- Loading Training Data..." << endl; if( !trainingData.load( trainDatasetFilename ) ){ errorLog << "Failed to load training data!\n"; return false; } const unsigned int N = trainingData.getNumInputDimensions(); const unsigned int T = trainingData.getNumTargetDimensions(); infoLog << "- Num training samples: " << trainingData.getNumSamples() << endl; infoLog << "- Num input dimensions: " << N << endl; infoLog << "- Num target dimensions: " << T << endl; //Create a new regression instance LogisticRegression regression; regression.setMaxNumEpochs( maxEpoch ); regression.setMinChange( minChange ); regression.setUseValidationSet( true ); regression.setValidationSetSize( 20 ); regression.setRandomiseTrainingOrder( true ); regression.enableScaling( true ); //Create a new pipeline that will hold the regression algorithm GestureRecognitionPipeline pipeline; //Add a multidimensional regression instance and set the regression algorithm to Linear Regression pipeline.setRegressifier( MultidimensionalRegression( regression, true ) ); infoLog << "- Training model...\n"; //Train the classifier if( !pipeline.train( trainingData ) ){ errorLog << "Failed to train model!" << endl; return false; } infoLog << "- Model trained!" << endl; infoLog << "- Saving model to: " << modelFilename << endl; //Save the pipeline if( pipeline.save( modelFilename ) ){ infoLog << "- Model saved." << endl; }else warningLog << "Failed to save model to file: " << modelFilename << endl; infoLog << "- TrainingTime: " << pipeline.getTrainingTime() << endl; return true; }