Exemplo n.º 1
void standardize_data(dmatrix *X, const double *b, double **average, 
                      double **stddev, double **acol, double **arow)
    int i, n, m, nz;
    double *avg, *std, *ac, *ar;

    n   = X->n;
    m   = X->m;
    nz  = X->nz;

    avg = malloc(n*sizeof(double));
    std = malloc(n*sizeof(double));
    dmat_colavg(X, avg);
    dmat_colstd(X, avg, std);

    for (i = 0; i < n; i++)
        if (std[i] < 1.0e-20) std[i] = 1;

    if ( nz >= 0)
        ar  = malloc(n*sizeof(double));
        ac  = malloc(m*sizeof(double));

        /* X := diag(b)*X*inv(diag(std)) */
        dmat_diagscale(X, b, FALSE, std, TRUE);

        /* ac = diag(b)*1 */
        if (b != NULL) dmat_vcopy(m,   b, ac);
        else           dmat_vset (m, 1.0, ac);

        /* ar = avg^T*inv(diag(std)) */
        dmat_elemdivi(n, avg, std, ar);
        ar  = NULL;
        ac  = NULL;

        for (i = 0; i < m; i++)
            int j;
            for (j = 0; j < n; j++)
                X->val[i*n+j] -= avg[j];
        /* X = diag(b)*X*inv(diag(std)) */
        dmat_diagscale(X, b, FALSE, std, TRUE);
    if (average != NULL) *average = avg; else free(avg);
    if (stddev  != NULL) *stddev  = std; else free(std);
    if (acol    != NULL) *acol    = ac;  else free(ac);
    if (arow    != NULL) *arow    = ar;  else free(ar);
Exemplo n.º 2
/** \brief  Compute search direction using pcg method.
void compute_searchdir_pcg(problem_data_t * pdat, variables_t * vars,
                           double t, double s, double gap, pcg_status_t * pcgstat,
                           adata_t * adata, mdata_t * mdata, double *precond,
                           double *tmp_m1, double *A2h, double *tmp_x1)
    int i, m, n, nz;
    double *p0, *p1, *p2, *p3;
    double normg, pcgtol, pcgmaxi, multfact;

    dmatrix *matX1, *matX2;
    double lambda, tinv;
    double *g, *h, *z, *expz, *expmz, *ac, *ar, *b, *d1, *d2, *Aw;
    double *x, *v, *w, *u, *dx, *dv, *dw, *du, *gv, *gw, *gu, *gx;

    static double pcgtol_factor = 1.0;

    get_problem_data(pdat, &matX1, &matX2, &ac, &ar, &b, &lambda);
    get_variables(vars, &x, &v, &w, &u, &dx, &dv, &dw, &du, &gx, &gv,
                  &gw, &gu, &g, &h, &z, &expz, &expmz, &d1, &d2, &Aw);
    m  = matX1->m;
    n  = matX1->n;
    nz = matX1->nz;
    tinv = 1.0 / t;

    p0 = &precond[0];
    p1 = &precond[1];
    p2 = &precond[1+n];
    p3 = &precond[1+n+n];

    /* dmat_vset(n+n+1, 0, dx); */

    dmat_yATx(matX2, h, A2h);        /* A2h = A2'*h */

    multfact = 0.0;
    if (ac != NULL)
        /* h.*ac */
        dmat_elemprod(m, h, ac, tmp_m1);

        dmat_vset(n, 0, tmp_x1);
        dmat_yAmpqTx(matX1, NULL, NULL, tmp_m1, tmp_x1);
        dmat_elemprod(n, ar, tmp_x1, tmp_x1);

        for (i = 0; i < m; i++)
            multfact += h[i] * ac[i] * ac[i];

    p0[0] = 0;
    for (i = 0; i < m; i++)
        p0[0] += b[i] * b[i] * h[i];

    /* complete forming gradient and d1, d2, precond */
    for (i = 0; i < n; i++)
        double q1, q2, d3, div;

        q1 = 1.0 / (u[i] + w[i]);
        q2 = 1.0 / (u[i] - w[i]);

        gw[i] -= (q1 - q2) * tinv;        /* A'*g   - (q1-q2) */
        gu[i] = lambda - (q1 + q2) * tinv;        /* lambda - (q1+q2) */

        d1[i] = (q1 * q1 + q2 * q2) * tinv;
        d2[i] = (q1 * q1 - q2 * q2) * tinv;

        if (ac != NULL)
            d3 = A2h[i] + d1[i] + multfact*ar[i]*ar[i] - 2*tmp_x1[i];
            d3 = A2h[i] + d1[i];
        div = 1 / (d3 * d1[i] - d2[i] * d2[i]);

        p1[i] = d1[i] * div;
        p2[i] = d2[i] * div;
        p3[i] = d3 * div;
    normg = dmat_norm2(n+n+1, gx);

    pcgtol = min(1e-1, 0.3*gap/min(1.0,normg));
    pcgtol = min(1e-1, 0.3*gap/min(1.0,sqrt(normg)));
    pcgmaxi = MAX_PCG_ITER;
    if (s < 1e-5)
        pcgtol_factor *= 0.5;
        pcgtol_factor = 1.0;
     pcgtol = pcgtol*pcgtol_factor;

    dmat_waxpby(n+n+1, -1, gx, 0, NULL, tmp_x1);

    pcg(dx, pcgstat, afun, adata, mfun, mdata, tmp_x1, pcgtol, pcgmaxi, n+n+1);
Exemplo n.º 3
int l1_logreg_train(dmatrix *X, double *b, double lambda, train_opts to,
                    double *initial_x, double *initial_t, double *sol,
                    int *total_ntiter, int *total_pcgiter)
    /* problem data */
    problem_data_t  prob;
    variables_t     vars;

    dmatrix *matX1;     /* matX1 = diag(b)*X_std */
    dmatrix *matX2;     /* matX2 = X_std.^2 (only for pcg) */
    double *ac, *ar;
    double *avg_x, *std_x;

    int m, n, ntiter, pcgiter, status;
    double pobj, dobj, gap;
    double t, s, maxAnu;

    double *g,  *h,  *z,  *expz, *expmz;
    double *x,  *v,  *w,  *u;
    double *dx, *dv, *dw, *du;
    double *gv, *gw, *gu, *gx;
    double *d1, *d2, *Aw;

    /* pcg variables */
    pcg_status_t pcgstat;
    adata_t adata;
    mdata_t mdata;
    double *precond;

    /* temporary variables */
    double *tm1, *tn1, *tn2, *tn3, *tn4, *tx1;

    /* temporary variables for dense case (cholesky) */
    dmatrix *B;     /* m x n (or m x n) */
    dmatrix *BB;    /* n x n (or m x m) */

    char format_buf[PRINT_BUF_SIZE];

    dmatrix *internal_plot;
    dmat_new_dense(&internal_plot, 3, MAX_NT_ITER);
    /* row 1: cum_nt_iter, row 2: cum_pcg_iter, row 3: duality gap */

    p2func_progress print_progress = NULL;

    s       =  1.0;
    pobj    =  DBL_MAX;
    dobj    = -DBL_MAX;
    pcgiter =  0;
    matX1   =  NULL;
    matX2   =  NULL;


    dmat_duplicate(X, &matX1);
    dmat_copy(X, matX1);

    m = matX1->m;
    n = matX1->n;

    if (to.sflag == TRUE)
        /* standardize_data not only standardizes the data,
           but also multiplies diag(b). */
        standardize_data(matX1, b, &avg_x, &std_x, &ac, &ar);
        /* matX1 = diag(b)*X */
        dmat_diagscale(matX1, b, FALSE, NULL, TRUE);
        avg_x = std_x = ac = ar = NULL;

    if (matX1->nz >= 0)                /* only for pcg */
        dmat_elemAA(matX1, &matX2);
        matX2 = NULL;

    set_problem_data(&prob, matX1, matX2, ac, ar, b, lambda, avg_x, std_x);

    create_variables(&vars, m, n);
    get_variables(&vars, &x, &v, &w, &u, &dx, &dv, &dw, &du, &gx, &gv,
                  &gw, &gu, &g, &h, &z, &expz, &expmz, &d1, &d2, &Aw);

    allocate_temporaries(m, n, (matX1->nz >= 0),
                         &tm1, &tn1, &tn2, &tn3, &tn4, &tx1, &precond, &B, &BB);

    if (initial_x == NULL)
        dmat_vset(1, 0.0, v);
        dmat_vset(n, 0.0, w);
        dmat_vset(n, 1.0, u);
        dmat_vset(n+n+1, 0, dx);
        t = min(max(1.0, 1.0 / lambda), 2.0 * n / ABSTOL);
        dmat_vcopy(n+n+1, initial_x, x);
        dmat_vset(n+n+1, 0, dx);
        t = *initial_t;

    set_adata(&adata, matX1, ac, ar, b, h, d1, d2);
    set_mdata(&mdata, m, n, precond);

    /* select printing function and format according to
           verbose level and method type (pcg/direct) */

    if (to.verbose_level>=2) init_progress((matX1->nz >= 0), to.verbose_level,
                              format_buf, &print_progress);

    /*** MAIN LOOP ************************************************************/

    for (ntiter = 0; ntiter < MAX_NT_ITER; ntiter++)
         *  Sets v as the optimal value of the intercept.
        dmat_yAmpqx(matX1, ac, ar, w, Aw);
        optimize_intercept(v, z, expz, expmz, tm1, b, Aw, m);

         *  Constructs dual feasible point nu.
        fprimes(m, expz, expmz, g, h);

        /* partially computes the gradient of phi.
           the rest part of the gradient will be completed while computing 
           the search direction. */

        gv[0] = dmat_dot(m, b, g);              /* gv = b'*g */
        dmat_yAmpqTx(matX1, ac, ar, g, gw);     /* gw = A'*g */

        dmat_waxpby(m, -1, g, 0, NULL, tm1);    /* nu = -g   */
        maxAnu = dmat_norminf(n, gw);           /* max(A'*nu) */

        if (maxAnu > lambda)
            dmat_waxpby(m, lambda / maxAnu, tm1, 0.0, NULL, tm1);

         *  Evaluates duality gap.
        pobj = logistic_loss2(m,z,expz,expmz)/m + lambda*dmat_norm1(n,w);
        dobj = max(nentropy(m, tm1) / m, dobj);
        gap  = pobj - dobj;

        internal_plot->val[0*MAX_NT_ITER+ntiter] = (double)ntiter;
        internal_plot->val[1*MAX_NT_ITER+ntiter] = (double)pcgiter;
        internal_plot->val[2*MAX_NT_ITER+ntiter] = gap;
        if (to.verbose_level>=2)
            (*print_progress)(format_buf, ntiter, gap, pobj, dobj, s, t,
                              pcgstat.flag, pcgstat.relres, pcgstat.iter);

         *  Quits if gap < tolerance.
        if (gap < to.tolerance ) /***********************************************/
            if (sol != NULL)
                /* trim solution */
                int i;
                double lambda_threshold;

                lambda_threshold = to.ktolerance*lambda;
                sol[0] = x[0];

                for (i = 0; i < n; i++)
                    sol[i+1] = (fabs(gw[i])>lambda_threshold)? x[i+1] : 0.0;
                /* if standardized, sol = coeff/std */
                if (to.sflag == TRUE && to.cflag == FALSE)
                    dmat_elemdivi(n, sol+1, std_x, sol+1);
                    sol[0] -= dmat_dot(n, avg_x, sol+1);

            if (initial_x != NULL)
                dmat_vcopy(n+n+1, x, initial_x);
                *initial_t = t;

            if (total_pcgiter) *total_pcgiter = pcgiter;
            if (total_ntiter ) *total_ntiter  = ntiter;

            /* free memory */
            free_temporaries(tm1, tn1, tn2, tn3, tn4, tx1, precond, B, BB);

            return STATUS_SOLUTION_FOUND;

        } /********************************************************************/

         *  Updates t
        if (s >= 0.5)
            t = max(min(2.0 * n * MU / gap, MU * t), t);
        else if (s < 1e-5)
            t = 1.1*t;

         *  Computes search direction.
        if (matX1->nz >= 0)
            /* pcg */
            compute_searchdir_pcg(&prob, &vars, t, s, gap, &pcgstat, &adata, 
                                  &mdata, precond, tm1, tn1, tx1);
            pcgiter += pcgstat.iter;
            /* direct */
            if (n > m)
                /* direct method for n > m, SMW */
                compute_searchdir_chol_fat(&prob, &vars, t, B, BB,
                                           tm1, tn1, tn2, tn3, tn4);
                /* direct method for n <= m */
                compute_searchdir_chol_thin(&prob, &vars, t, B, BB,
                                            tm1, tn1, tn2);

         *  Backtracking linesearch & update x = (v,w,u) and z.
        s = backtracking_linesearch(&prob, &vars, t, tm1, tx1);
        if (s < 0) break; /* BLS error */
    /*** END OF MAIN LOOP *****************************************************/

    /*  Abnormal termination */
    if (s < 0)
    else /* if (ntiter == MAX_NT_ITER) */

    if (sol != NULL)
        /* trim solution */
        int i;
        double lambda_threshold;

        lambda_threshold = to.ktolerance*lambda;
        sol[0] = x[0];

        for (i = 0; i < n; i++)
            sol[i+1] = (fabs(gw[i])>lambda_threshold)? x[i+1] : 0.0;
        /* if standardized, sol = coeff/std */
        if (to.sflag == TRUE && to.cflag == FALSE)
            dmat_elemdivi(n, sol+1, std_x, sol+1);
            sol[0] -= dmat_dot(n, avg_x, sol+1);

    if (initial_x != NULL)
        dmat_vcopy(n+n+1, x, initial_x);
        *initial_t = t;
    if (total_pcgiter) *total_pcgiter = pcgiter;
    if (total_ntiter ) *total_ntiter  = ntiter;

    /* free memory */
    free_temporaries(tm1, tn1, tn2, tn3, tn4, tx1, precond, B, BB);

    return status;