예제 #1
0
void CLibLinear::solve_l1r_lr(
    const problem *prob_col, double eps,
    double Cp, double Cn)
{
    int l = prob_col->l;
    int w_size = prob_col->n;
    int j, s, iter = 0;
    int active_size = w_size;
    int max_num_linesearch = 20;

    double x_min = 0;
    double sigma = 0.01;
    double d, G, H;
    double Gmax_old = CMath::INFTY;
    double Gmax_new;
    double Gmax_init=0;
    double sum1, appxcond1;
    double sum2, appxcond2;
    double cond;

    int *index = SG_MALLOC(int, w_size);
    int32_t *y = SG_MALLOC(int32_t, l);
    double *exp_wTx = SG_MALLOC(double, l);
    double *exp_wTx_new = SG_MALLOC(double, l);
    double *xj_max = SG_MALLOC(double, w_size);
    double *C_sum = SG_MALLOC(double, w_size);
    double *xjneg_sum = SG_MALLOC(double, w_size);
    double *xjpos_sum = SG_MALLOC(double, w_size);

    CDotFeatures* x = prob_col->x;
    void* iterator;
    int ind;
    double val;

    double C[3] = {Cn,0,Cp};

    int n = prob_col->n;
    if (prob_col->use_bias)
        n--;

    for(j=0; j<l; j++)
    {
        exp_wTx[j] = 1;
        if(prob_col->y[j] > 0)
            y[j] = 1;
        else
            y[j] = -1;
    }
    for(j=0; j<w_size; j++)
    {
        w.vector[j] = 0;
        index[j] = j;
        xj_max[j] = 0;
        C_sum[j] = 0;
        xjneg_sum[j] = 0;
        xjpos_sum[j] = 0;

        if (use_bias && j==n)
        {
            for (ind=0; ind<l; ind++)
            {
                x_min = CMath::min(x_min, 1.0);
                xj_max[j] = CMath::max(xj_max[j], 1.0);
                C_sum[j] += C[GETI(ind)];
                if(y[ind] == -1)
                    xjneg_sum[j] += C[GETI(ind)];
                else
                    xjpos_sum[j] += C[GETI(ind)];
            }
        }
        else
        {
            iterator=x->get_feature_iterator(j);
            while (x->get_next_feature(ind, val, iterator))
            {
                x_min = CMath::min(x_min, val);
                xj_max[j] = CMath::max(xj_max[j], val);
                C_sum[j] += C[GETI(ind)];
                if(y[ind] == -1)
                    xjneg_sum[j] += C[GETI(ind)]*val;
                else
                    xjpos_sum[j] += C[GETI(ind)]*val;
            }
            x->free_feature_iterator(iterator);
        }
    }

    CTime start_time;
    while (iter < max_iterations && !CSignal::cancel_computations())
    {
        if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
            break;

        Gmax_new = 0;

        for(j=0; j<active_size; j++)
        {
            int i = j+rand()%(active_size-j);
            CMath::swap(index[i], index[j]);
        }

        for(s=0; s<active_size; s++)
        {
            j = index[s];
            sum1 = 0;
            sum2 = 0;
            H = 0;

            if (use_bias && j==n)
            {
                for (ind=0; ind<l; ind++)
                {
                    double exp_wTxind = exp_wTx[ind];
                    double tmp1 = 1.0/(1+exp_wTxind);
                    double tmp2 = C[GETI(ind)]*tmp1;
                    double tmp3 = tmp2*exp_wTxind;
                    sum2 += tmp2;
                    sum1 += tmp3;
                    H += tmp1*tmp3;
                }
            }
            else
            {
                iterator=x->get_feature_iterator(j);
                while (x->get_next_feature(ind, val, iterator))
                {
                    double exp_wTxind = exp_wTx[ind];
                    double tmp1 = val/(1+exp_wTxind);
                    double tmp2 = C[GETI(ind)]*tmp1;
                    double tmp3 = tmp2*exp_wTxind;
                    sum2 += tmp2;
                    sum1 += tmp3;
                    H += tmp1*tmp3;
                }
                x->free_feature_iterator(iterator);
            }

            G = -sum2 + xjneg_sum[j];

            double Gp = G+1;
            double Gn = G-1;
            double violation = 0;
            if(w.vector[j] == 0)
            {
                if(Gp < 0)
                    violation = -Gp;
                else if(Gn > 0)
                    violation = Gn;
                else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
                {
                    active_size--;
                    CMath::swap(index[s], index[active_size]);
                    s--;
                    continue;
                }
            }
            else if(w.vector[j] > 0)
                violation = fabs(Gp);
            else
                violation = fabs(Gn);

            Gmax_new = CMath::max(Gmax_new, violation);

            // obtain Newton direction d
            if(Gp <= H*w.vector[j])
                d = -Gp/H;
            else if(Gn >= H*w.vector[j])
                d = -Gn/H;
            else
                d = -w.vector[j];

            if(fabs(d) < 1.0e-12)
                continue;

            d = CMath::min(CMath::max(d,-10.0),10.0);

            double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
            int num_linesearch;
            for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
            {
                cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;

                if(x_min >= 0)
                {
                    double tmp = exp(d*xj_max[j]);
                    appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
                    appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
                    if(CMath::min(appxcond1,appxcond2) <= 0)
                    {
                        if (use_bias && j==n)
                        {
                            for (ind=0; ind<l; ind++)
                                exp_wTx[ind] *= exp(d);
                        }

                        else
                        {
                            iterator=x->get_feature_iterator(j);
                            while (x->get_next_feature(ind, val, iterator))
                                exp_wTx[ind] *= exp(d*val);
                            x->free_feature_iterator(iterator);
                        }
                        break;
                    }
                }

                cond += d*xjneg_sum[j];

                int i = 0;

                if (use_bias && j==n)
                {
                    for (ind=0; ind<l; ind++)
                    {
                        double exp_dx = exp(d);
                        exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
                        cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
                        i++;
                    }
                }
                else
                {

                    iterator=x->get_feature_iterator(j);
                    while (x->get_next_feature(ind, val, iterator))
                    {
                        double exp_dx = exp(d*val);
                        exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
                        cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
                        i++;
                    }
                    x->free_feature_iterator(iterator);
                }

                if(cond <= 0)
                {
                    i = 0;
                    if (use_bias && j==n)
                    {
                        for (ind=0; ind<l; ind++)
                        {
                            exp_wTx[ind] = exp_wTx_new[i];
                            i++;
                        }
                    }
                    else
                    {
                        iterator=x->get_feature_iterator(j);
                        while (x->get_next_feature(ind, val, iterator))
                        {
                            exp_wTx[ind] = exp_wTx_new[i];
                            i++;
                        }
                        x->free_feature_iterator(iterator);
                    }
                    break;
                }
                else
                {
                    d *= 0.5;
                    delta *= 0.5;
                }
            }

            w.vector[j] += d;

            // recompute exp_wTx[] if line search takes too many steps
            if(num_linesearch >= max_num_linesearch)
            {
                SG_INFO("#");
                for(int i=0; i<l; i++)
                    exp_wTx[i] = 0;

                for(int i=0; i<w_size; i++)
                {
                    if(w.vector[i]==0) continue;

                    if (use_bias && i==n)
                    {
                        for (ind=0; ind<l; ind++)
                            exp_wTx[ind] += w.vector[i];
                    }
                    else
                    {
                        iterator=x->get_feature_iterator(i);
                        while (x->get_next_feature(ind, val, iterator))
                            exp_wTx[ind] += w.vector[i]*val;
                        x->free_feature_iterator(iterator);
                    }
                }

                for(int i=0; i<l; i++)
                    exp_wTx[i] = exp(exp_wTx[i]);
            }
        }

        if(iter == 0)
            Gmax_init = Gmax_new;
        iter++;
        SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);

        if(Gmax_new <= eps*Gmax_init)
        {
            if(active_size == w_size)
                break;
            else
            {
                active_size = w_size;
                Gmax_old = CMath::INFTY;
                continue;
            }
        }

        Gmax_old = Gmax_new;
    }

    SG_DONE();
    SG_INFO("optimization finished, #iter = %d\n", iter);
    if(iter >= max_iterations)
        SG_WARNING("\nWARNING: reaching max number of iterations\n");

    // calculate objective value

    double v = 0;
    int nnz = 0;
    for(j=0; j<w_size; j++)
        if(w.vector[j] != 0)
        {
            v += fabs(w.vector[j]);
            nnz++;
        }
    for(j=0; j<l; j++)
        if(y[j] == 1)
            v += C[GETI(j)]*log(1+1/exp_wTx[j]);
        else
            v += C[GETI(j)]*log(1+exp_wTx[j]);

    SG_INFO("Objective value = %lf\n", v);
    SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);

    delete [] index;
    delete [] y;
    delete [] exp_wTx;
    delete [] exp_wTx_new;
    delete [] xj_max;
    delete [] C_sum;
    delete [] xjneg_sum;
    delete [] xjpos_sum;
}
예제 #2
0
void CLibLinear::solve_l1r_l2_svc(
    problem *prob_col, double eps, double Cp, double Cn)
{
    int l = prob_col->l;
    int w_size = prob_col->n;
    int j, s, iter = 0;
    int active_size = w_size;
    int max_num_linesearch = 20;

    double sigma = 0.01;
    double d, G_loss, G, H;
    double Gmax_old = CMath::INFTY;
    double Gmax_new;
    double Gmax_init=0;
    double d_old, d_diff;
    double loss_old=0, loss_new;
    double appxcond, cond;

    int *index = SG_MALLOC(int, w_size);
    int32_t *y = SG_MALLOC(int32_t, l);
    double *b = SG_MALLOC(double, l); // b = 1-ywTx
    double *xj_sq = SG_MALLOC(double, w_size);

    CDotFeatures* x = (CDotFeatures*) prob_col->x;
    void* iterator;
    int32_t ind;
    float64_t val;

    double C[3] = {Cn,0,Cp};

    int n = prob_col->n;
    if (prob_col->use_bias)
        n--;

    for(j=0; j<l; j++)
    {
        b[j] = 1;
        if(prob_col->y[j] > 0)
            y[j] = 1;
        else
            y[j] = -1;
    }

    for(j=0; j<w_size; j++)
    {
        w.vector[j] = 0;
        index[j] = j;
        xj_sq[j] = 0;

        if (use_bias && j==n)
        {
            for (ind=0; ind<l; ind++)
                xj_sq[n] += C[GETI(ind)];
        }
        else
        {
            iterator=x->get_feature_iterator(j);
            while (x->get_next_feature(ind, val, iterator))
                xj_sq[j] += C[GETI(ind)]*val*val;
            x->free_feature_iterator(iterator);
        }
    }


    CTime start_time;
    while (iter < max_iterations && !CSignal::cancel_computations())
    {
        if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
            break;

        Gmax_new  = 0;

        for(j=0; j<active_size; j++)
        {
            int i = j+rand()%(active_size-j);
            CMath::swap(index[i], index[j]);
        }

        for(s=0; s<active_size; s++)
        {
            j = index[s];
            G_loss = 0;
            H = 0;

            if (use_bias && j==n)
            {
                for (ind=0; ind<l; ind++)
                {
                    if(b[ind] > 0)
                    {
                        double tmp = C[GETI(ind)]*y[ind];
                        G_loss -= tmp*b[ind];
                        H += tmp*y[ind];
                    }
                }
            }
            else
            {
                iterator=x->get_feature_iterator(j);

                while (x->get_next_feature(ind, val, iterator))
                {
                    if(b[ind] > 0)
                    {
                        double tmp = C[GETI(ind)]*val*y[ind];
                        G_loss -= tmp*b[ind];
                        H += tmp*val*y[ind];
                    }
                }
                x->free_feature_iterator(iterator);
            }

            G_loss *= 2;

            G = G_loss;
            H *= 2;
            H = CMath::max(H, 1e-12);

            double Gp = G+1;
            double Gn = G-1;
            double violation = 0;
            if(w.vector[j] == 0)
            {
                if(Gp < 0)
                    violation = -Gp;
                else if(Gn > 0)
                    violation = Gn;
                else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
                {
                    active_size--;
                    CMath::swap(index[s], index[active_size]);
                    s--;
                    continue;
                }
            }
            else if(w.vector[j] > 0)
                violation = fabs(Gp);
            else
                violation = fabs(Gn);

            Gmax_new = CMath::max(Gmax_new, violation);

            // obtain Newton direction d
            if(Gp <= H*w.vector[j])
                d = -Gp/H;
            else if(Gn >= H*w.vector[j])
                d = -Gn/H;
            else
                d = -w.vector[j];

            if(fabs(d) < 1.0e-12)
                continue;

            double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
            d_old = 0;
            int num_linesearch;
            for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
            {
                d_diff = d_old - d;
                cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;

                appxcond = xj_sq[j]*d*d + G_loss*d + cond;
                if(appxcond <= 0)
                {
                    if (use_bias && j==n)
                    {
                        for (ind=0; ind<l; ind++)
                            b[ind] += d_diff*y[ind];
                        break;
                    }
                    else
                    {
                        iterator=x->get_feature_iterator(j);
                        while (x->get_next_feature(ind, val, iterator))
                            b[ind] += d_diff*val*y[ind];

                        x->free_feature_iterator(iterator);
                        break;
                    }
                }

                if(num_linesearch == 0)
                {
                    loss_old = 0;
                    loss_new = 0;

                    if (use_bias && j==n)
                    {
                        for (ind=0; ind<l; ind++)
                        {
                            if(b[ind] > 0)
                                loss_old += C[GETI(ind)]*b[ind]*b[ind];
                            double b_new = b[ind] + d_diff*y[ind];
                            b[ind] = b_new;
                            if(b_new > 0)
                                loss_new += C[GETI(ind)]*b_new*b_new;
                        }
                    }
                    else
                    {
                        iterator=x->get_feature_iterator(j);
                        while (x->get_next_feature(ind, val, iterator))
                        {
                            if(b[ind] > 0)
                                loss_old += C[GETI(ind)]*b[ind]*b[ind];
                            double b_new = b[ind] + d_diff*val*y[ind];
                            b[ind] = b_new;
                            if(b_new > 0)
                                loss_new += C[GETI(ind)]*b_new*b_new;
                        }
                        x->free_feature_iterator(iterator);
                    }
                }
                else
                {
                    loss_new = 0;
                    if (use_bias && j==n)
                    {
                        for (ind=0; ind<l; ind++)
                        {
                            double b_new = b[ind] + d_diff*y[ind];
                            b[ind] = b_new;
                            if(b_new > 0)
                                loss_new += C[GETI(ind)]*b_new*b_new;
                        }
                    }
                    else
                    {
                        iterator=x->get_feature_iterator(j);
                        while (x->get_next_feature(ind, val, iterator))
                        {
                            double b_new = b[ind] + d_diff*val*y[ind];
                            b[ind] = b_new;
                            if(b_new > 0)
                                loss_new += C[GETI(ind)]*b_new*b_new;
                        }
                        x->free_feature_iterator(iterator);
                    }
                }

                cond = cond + loss_new - loss_old;
                if(cond <= 0)
                    break;
                else
                {
                    d_old = d;
                    d *= 0.5;
                    delta *= 0.5;
                }
            }

            w.vector[j] += d;

            // recompute b[] if line search takes too many steps
            if(num_linesearch >= max_num_linesearch)
            {
                SG_INFO("#");
                for(int i=0; i<l; i++)
                    b[i] = 1;

                for(int i=0; i<n; i++)
                {
                    if(w.vector[i]==0)
                        continue;

                    iterator=x->get_feature_iterator(i);
                    while (x->get_next_feature(ind, val, iterator))
                        b[ind] -= w.vector[i]*val*y[ind];
                    x->free_feature_iterator(iterator);
                }

                if (use_bias && w.vector[n])
                {
                    for (ind=0; ind<l; ind++)
                        b[ind] -= w.vector[n]*y[ind];
                }
            }
        }

        if(iter == 0)
            Gmax_init = Gmax_new;
        iter++;

        SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);

        if(Gmax_new <= eps*Gmax_init)
        {
            if(active_size == w_size)
                break;
            else
            {
                active_size = w_size;
                Gmax_old = CMath::INFTY;
                continue;
            }
        }

        Gmax_old = Gmax_new;
    }

    SG_DONE();
    SG_INFO("optimization finished, #iter = %d\n", iter);
    if(iter >= max_iterations)
        SG_WARNING("\nWARNING: reaching max number of iterations\n");

    // calculate objective value

    double v = 0;
    int nnz = 0;
    for(j=0; j<w_size; j++)
    {
        if(w.vector[j] != 0)
        {
            v += fabs(w.vector[j]);
            nnz++;
        }
    }
    for(j=0; j<l; j++)
        if(b[j] > 0)
            v += C[GETI(j)]*b[j]*b[j];

    SG_INFO("Objective value = %lf\n", v);
    SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);

    delete [] index;
    delete [] y;
    delete [] b;
    delete [] xj_sq;
}