Esempio n. 1
0
File: lr.cpp Progetto: guyrt/WFUBMC
// Perform exact (and slower) NR test using the exact fisher information matrix.
vector<double> LogisticRegression::newtonRaphson(const vector<vector<double> > &data, const vector<double> &response, vector<vector<double> > &invInfMatrix, double startVal)
{

    vector<double> betas;
    
    // Variables used in the computation:
    alglib::real_1d_array tempBetas;
    alglib::real_2d_array tempData;
    alglib::real_2d_array tempDataTrans;
    alglib::real_1d_array oldExpY;
    alglib::real_2d_array hessian; // holds data' * w * data.  Returned in last input param.
    alglib::real_1d_array expY;
    alglib::real_1d_array W; // holds diagonal of the w matrix above.
    alglib::real_1d_array adjy;
    
    alglib::real_1d_array work;
    
    double stop_var = 1e-10;

    int iter = 0;
    int maxIter = 200;
    
    int numVars = data.size();
    if(numVars < 1){
        throw NewtonRaphsonFailureEx();
    }
    
    int numSamples = data.at(0).size();
    if(numSamples < 1){
        throw NewtonRaphsonFailureEx();
    }
    
    tempBetas.setlength(numVars);
    tempData.setlength(numSamples,numVars);
    tempDataTrans.setlength(numVars, numSamples);
    oldExpY.setlength(numSamples);
    expY.setlength(numSamples);
    hessian.setlength(numVars, numVars);
    adjy.setlength(numSamples);
    W.setlength(numSamples);
    
    work.setlength(numVars);
    
    for(int i=0;i < numVars; i++){
        for(int j=0;j < numSamples; j++){
            tempData(j,i) = data.at(i).at(j);
        }
        tempBetas(i) = startVal;
    }
    for(int i=0;i < numSamples;i++){
        oldExpY(i) = -1;
        adjy(i) = 0; // makes valgrind happier.
    }

    // End initial setup.
    // In each iteration, create a hessian and a first derivative.
    while(iter < maxIter){
        
        //adjy <- data * tempBetas (get new y guess)
        matrixvectormultiply(tempData, 0, numSamples-1,0,numVars-1,false, 
                            tempBetas, 0, numVars-1, 1.0, 
                            adjy, 0, numSamples-1, 0.0);
        
        // adjy = 1 / (1 + exp(-adjy))  
        for(int i=0;i < numSamples; i++){
            expY(i) = 1 / (1 + exp(-adjy(i)));
        }
        
        // build deriv.
        for(int i=0;i < numSamples; i++){
            W(i) = expY(i) * (1 - expY(i));
        }
        
        // adjy = adjy + (y-expy) ./ deriv
        for(int i=0;i<numSamples;i++){
            adjy(i) = adjy(i) + (response.at(i) - expY(i)) / W(i);
        }
        
        // build data' * w * data
        // set to hessian.  
        // Also doing secondary computation (see inside)
        for(int i=0; i < numVars; i++){
            for(int j=0; j < numVars; j++)
                hessian(i,j) = 0.0;
        }
        
        for(int indiv=0; indiv < numSamples; ++indiv){
            for(int i = 0; i < numVars; i++){
                for(int j = 0; j < numVars; j++){
                    hessian(i,j) += W(indiv) * tempData(indiv, j) * tempData(indiv, i);
                }
                
                // NOTE: as a speedup, I'm also computing X' * W
                tempDataTrans(i, indiv) = W(indiv) * tempData(indiv, i);
                
            }
        }
        
        alglib::matinvreport report;
        alglib::ae_int_t reportInfo;
        rmatrixinverse(hessian, reportInfo, report);
        if(reportInfo != 1 ){
            throw SingularMatrixEx();
        }
        
        // Check condition number.
        if (report.r1 < condition_number_limit){
            throw ConditionNumberEx(1.0/report.r1);
        }
        
        // work <- X'W * adjy
        matrixvectormultiply(tempDataTrans,0,numVars-1,0,numSamples-1,false,
                            adjy,0,numSamples-1,1,
                            work,0,numVars-1,0.0);
        // tempBetas <= invHessian * work
        matrixvectormultiply(hessian,0,numVars-1,0,numVars-1,false,
                            work,0,numVars-1,1.0,
                            tempBetas,0,numVars-1,0.0);
        
        
        #if DEBUG_NR
            cout << "Betas ";
            for(int i=0;i < numVars;i++) cout << tempBetas(i) << "  " ;
            cout << endl;
        #endif
        
        double stop = 0.0;
        // Could be computed as a 1-norm.
        // This should be done as a sum of abs diff.
        for(int i=0;i < numSamples;i++){
            stop += abs(expY(i) - oldExpY(i));
        }

        if (stop < numSamples*stop_var){
            break;
        }
        
        oldExpY = expY;
        
        iter++;
    }
    
    if(iter == maxIter){
        
        throw NewtonRaphsonIterationEx();
    }
    
    betas.clear();
    for(int i=0;i<numVars;i++){
        betas.push_back(tempBetas(i));
    }

    for(int i=0;i<numVars;i++){
        for(int j=0;j<numVars;j++){
            invInfMatrix.at(i).at(j) = hessian(i,j);
        }
    }
    
    //dumpMatrix(invInfMatrix);
    
    return betas;
    
}
Esempio n. 2
0
/* If we're about to run off the matrix, adjust accordingly (if possible) */
adjust()
{
        adjx();
        adjy();
}
Esempio n. 3
0
File: lr.cpp Progetto: guyrt/WFUBMC
/*
 * @depricated
 * 
 * NR implementation by RTG.
 *
 * Note on matrix vector multiplication:
 *  void matrixmatrixmultiply(const alglib::real_2d_array& a,
 *    int ai1,
 *    int ai2,
 *    int aj1,
 *    int aj2,
 *    bool transa,
 *    const alglib::real_2d_array& b,
 *    int bi1,
 *    int bi2,
 *    int bj1,
 *    int bj2,
 *    bool transb,
 *    double alpha,
 *    alglib::real_2d_array& c,
 *    int ci1,
 *    int ci2,
 *    int cj1,
 *    int cj2,
 *    double beta,
 *    alglib::real_1d_array& work);
 * 
 * transa is true if transposed.
 * Operation is: c = A * alpha * b + beta * C
 * 
 *  
 * @input const vector<vector<double>>  data holds explantory variables.  each inner vector is a variable.
 * @input const vector<double>          response holds response variable.
 * @input       vector<double>          Place to return the information matrix inverse.  Column major order.
 *                                      Expects correct size.
 */
vector<double> LogisticRegression::newtonRaphsonFast(const vector<vector<double> > &data, const vector<double> &response
                                                    , vector<vector<double> > &invInfMatrix, double startVal)
{
    
    
    vector<double> betas;
    
    
    // Variables used in the computation:
    alglib::real_1d_array tempBetas;
    alglib::real_2d_array tempData;
    alglib::real_1d_array oldExpY;
    alglib::real_2d_array tempDeriv; // holds data' * w * data.  Returned in last input param.
    alglib::real_1d_array expY;
    
    alglib::real_1d_array adjy;
    
    
    double stop_var = 1e-10;

    int iter = 0;
    int maxIter = 200;
    
    int numVars = data.size();
    if(numVars < 1){
        throw NewtonRaphsonFailureEx();
    }
    
    int numSamples = data.at(0).size();
    if(numSamples < 1){
        throw NewtonRaphsonFailureEx();
    }
    
    tempBetas.setlength(numVars);
    tempData.setlength(numSamples,numVars);
    oldExpY.setlength(numSamples);
    expY.setlength(numSamples);
    tempDeriv.setlength(numVars,numVars);
    adjy.setlength(numSamples);
    
    for(int i=0;i < numVars; i++){
        for(int j=0;j < numSamples; j++){
            tempData(j,i) = data.at(i).at(j);
        }
        tempBetas(i) = startVal;
    }
    for(int i=0;i < numSamples;i++){
        oldExpY(i) = -1;
        adjy(i) = 0; // makes valgrind happier.
    }

    while(iter < maxIter){
    
        //data * tempBetas
        matrixvectormultiply(tempData, 0, numSamples-1,0,numVars-1,false, 
                            tempBetas, 0, numVars-1, 1.0, 
                            adjy, 0, numSamples-1, 0.0);
        
        for(int i=0;i < numSamples; i++){
            expY(i) = 1 / (1 + exp(-adjy(i)));
        }
        
        // build deriv.
        double deriv = -100000;
        for(int i=0;i < numSamples; i++){
            if(expY(i) * (1 - expY(i)) > deriv)
                deriv = expY(i) * (1 - expY(i));
        }
        if(stop_var * 0.001 > deriv) deriv = stop_var * 0.001;
        
        // adjy = adjy + (y-expy) ./ deriv
        for(int i=0;i<numSamples;i++){
            adjy(i) = adjy(i) + (response.at(i) - expY(i)) / deriv;
        }
        
        // build data' * w * data
        alglib::real_1d_array work;
        work.setlength(numSamples); // This temporary workspace must be one larger than the longest
                                    // row or column that will be seen.  Otherwise, the program
                                    // crashes or - worse! - corrupts data.
        matrixmatrixmultiply(tempData,0,numSamples-1,0,numVars-1,true,
                            tempData,0,numSamples-1,0,numVars-1,false,deriv,
                            tempDeriv,0,numVars-1,0,numVars-1,0.0,work);

        #if DEBUG_NR
            cout << "A' * w * A " << endl;
            cout << tempDeriv(0,0) << " " << tempDeriv(0,1) << endl;
            cout << tempDeriv(1,0) << " " << tempDeriv(1,1) << endl;
            cout << endl;
        #endif
        
        alglib::matinvreport report;
        alglib::ae_int_t reportInfo;
        rmatrixinverse(tempDeriv, reportInfo, report);
        
        if( reportInfo != 1 ){
            throw SingularMatrixEx();
        }

        #if DEBUG_NR
            cout << "inv(A' * w * A) " << endl;
            cout << tempDeriv(0,0) << " " << tempDeriv(0,1) << endl;
            cout << tempDeriv(1,0) << " " << tempDeriv(1,1) << endl;
            cout << endl;
        #endif

        matrixvectormultiply(tempData,0,numSamples-1,0,numVars-1,true,
                            adjy,0,numSamples-1,deriv,
                            work,0,numVars-1,0.0);
        matrixvectormultiply(tempDeriv,0,numVars-1,0,numVars-1,false,
                            work,0,numVars-1,1.0,
                            tempBetas,0,numVars-1,0.0);
        
        #if DEBUG_NR
            cout << "Betas ";
            for(int i=0;i < numVars;i++) cout << tempBetas(i) << "  " ;
            cout << endl;
        #endif
        
        double stop = 0.0;
        for(int i=0;i < numSamples;i++){
            stop += abs(expY(i) - oldExpY(i));
        }
    
        if (stop < numSamples*stop_var){
            break;
        }
        
        oldExpY = expY;
        
        iter++;
    }
    
    if(iter == maxIter){
        throw NewtonRaphsonIterationEx();
    }
    
    betas.clear();
    for(int i=0;i<numVars;i++){
        betas.push_back(tempBetas(i));
    }

    for(int i=0;i<numVars;i++){
        for(int j=0;j<numVars;j++){
            invInfMatrix.at(i).at(j) = tempDeriv(i,j);
        }
    }
    
    return betas;
}