void CLibLinear::solve_l1r_lr( const problem *prob_col, double eps, double Cp, double Cn) { int l = prob_col->l; int w_size = prob_col->n; int j, s, iter = 0; int active_size = w_size; int max_num_linesearch = 20; double x_min = 0; double sigma = 0.01; double d, G, H; double Gmax_old = CMath::INFTY; double Gmax_new; double Gmax_init=0; double sum1, appxcond1; double sum2, appxcond2; double cond; int *index = SG_MALLOC(int, w_size); int32_t *y = SG_MALLOC(int32_t, l); double *exp_wTx = SG_MALLOC(double, l); double *exp_wTx_new = SG_MALLOC(double, l); double *xj_max = SG_MALLOC(double, w_size); double *C_sum = SG_MALLOC(double, w_size); double *xjneg_sum = SG_MALLOC(double, w_size); double *xjpos_sum = SG_MALLOC(double, w_size); CDotFeatures* x = prob_col->x; void* iterator; int ind; double val; double C[3] = {Cn,0,Cp}; int n = prob_col->n; if (prob_col->use_bias) n--; for(j=0; j<l; j++) { exp_wTx[j] = 1; if(prob_col->y[j] > 0) y[j] = 1; else y[j] = -1; } for(j=0; j<w_size; j++) { w.vector[j] = 0; index[j] = j; xj_max[j] = 0; C_sum[j] = 0; xjneg_sum[j] = 0; xjpos_sum[j] = 0; if (use_bias && j==n) { for (ind=0; ind<l; ind++) { x_min = CMath::min(x_min, 1.0); xj_max[j] = CMath::max(xj_max[j], 1.0); C_sum[j] += C[GETI(ind)]; if(y[ind] == -1) xjneg_sum[j] += C[GETI(ind)]; else xjpos_sum[j] += C[GETI(ind)]; } } else { iterator=x->get_feature_iterator(j); while (x->get_next_feature(ind, val, iterator)) { x_min = CMath::min(x_min, val); xj_max[j] = CMath::max(xj_max[j], val); C_sum[j] += C[GETI(ind)]; if(y[ind] == -1) xjneg_sum[j] += C[GETI(ind)]*val; else xjpos_sum[j] += C[GETI(ind)]*val; } x->free_feature_iterator(iterator); } } CTime start_time; while (iter < max_iterations && !CSignal::cancel_computations()) { if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) break; Gmax_new = 0; for(j=0; j<active_size; j++) { int i = j+rand()%(active_size-j); CMath::swap(index[i], index[j]); } for(s=0; s<active_size; s++) { j = index[s]; sum1 = 0; sum2 = 0; H = 0; if (use_bias && j==n) { for (ind=0; ind<l; ind++) { double exp_wTxind = exp_wTx[ind]; double tmp1 = 1.0/(1+exp_wTxind); double tmp2 = C[GETI(ind)]*tmp1; double tmp3 = tmp2*exp_wTxind; sum2 += tmp2; sum1 += tmp3; H += tmp1*tmp3; } } else { iterator=x->get_feature_iterator(j); while (x->get_next_feature(ind, val, iterator)) { double exp_wTxind = exp_wTx[ind]; double tmp1 = val/(1+exp_wTxind); double tmp2 = C[GETI(ind)]*tmp1; double tmp3 = tmp2*exp_wTxind; sum2 += tmp2; sum1 += tmp3; H += tmp1*tmp3; } x->free_feature_iterator(iterator); } G = -sum2 + xjneg_sum[j]; double Gp = G+1; double Gn = G-1; double violation = 0; if(w.vector[j] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; CMath::swap(index[s], index[active_size]); s--; continue; } } else if(w.vector[j] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = CMath::max(Gmax_new, violation); // obtain Newton direction d if(Gp <= H*w.vector[j]) d = -Gp/H; else if(Gn >= H*w.vector[j]) d = -Gn/H; else d = -w.vector[j]; if(fabs(d) < 1.0e-12) continue; d = CMath::min(CMath::max(d,-10.0),10.0); double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d; int num_linesearch; for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) { cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta; if(x_min >= 0) { double tmp = exp(d*xj_max[j]); appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j]; appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j]; if(CMath::min(appxcond1,appxcond2) <= 0) { if (use_bias && j==n) { for (ind=0; ind<l; ind++) exp_wTx[ind] *= exp(d); } else { iterator=x->get_feature_iterator(j); while (x->get_next_feature(ind, val, iterator)) exp_wTx[ind] *= exp(d*val); x->free_feature_iterator(iterator); } break; } } cond += d*xjneg_sum[j]; int i = 0; if (use_bias && j==n) { for (ind=0; ind<l; ind++) { double exp_dx = exp(d); exp_wTx_new[i] = exp_wTx[ind]*exp_dx; cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); i++; } } else { iterator=x->get_feature_iterator(j); while (x->get_next_feature(ind, val, iterator)) { double exp_dx = exp(d*val); exp_wTx_new[i] = exp_wTx[ind]*exp_dx; cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); i++; } x->free_feature_iterator(iterator); } if(cond <= 0) { i = 0; if (use_bias && j==n) { for (ind=0; ind<l; ind++) { exp_wTx[ind] = exp_wTx_new[i]; i++; } } else { iterator=x->get_feature_iterator(j); while (x->get_next_feature(ind, val, iterator)) { exp_wTx[ind] = exp_wTx_new[i]; i++; } x->free_feature_iterator(iterator); } break; } else { d *= 0.5; delta *= 0.5; } } w.vector[j] += d; // recompute exp_wTx[] if line search takes too many steps if(num_linesearch >= max_num_linesearch) { SG_INFO("#"); for(int i=0; i<l; i++) exp_wTx[i] = 0; for(int i=0; i<w_size; i++) { if(w.vector[i]==0) continue; if (use_bias && i==n) { for (ind=0; ind<l; ind++) exp_wTx[ind] += w.vector[i]; } else { iterator=x->get_feature_iterator(i); while (x->get_next_feature(ind, val, iterator)) exp_wTx[ind] += w.vector[i]*val; x->free_feature_iterator(iterator); } } for(int i=0; i<l; i++) exp_wTx[i] = exp(exp_wTx[i]); } } if(iter == 0) Gmax_init = Gmax_new; iter++; SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6); if(Gmax_new <= eps*Gmax_init) { if(active_size == w_size) break; else { active_size = w_size; Gmax_old = CMath::INFTY; continue; } } Gmax_old = Gmax_new; } SG_DONE(); SG_INFO("optimization finished, #iter = %d\n", iter); if(iter >= max_iterations) SG_WARNING("\nWARNING: reaching max number of iterations\n"); // calculate objective value double v = 0; int nnz = 0; for(j=0; j<w_size; j++) if(w.vector[j] != 0) { v += fabs(w.vector[j]); nnz++; } for(j=0; j<l; j++) if(y[j] == 1) v += C[GETI(j)]*log(1+1/exp_wTx[j]); else v += C[GETI(j)]*log(1+exp_wTx[j]); SG_INFO("Objective value = %lf\n", v); SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size); delete [] index; delete [] y; delete [] exp_wTx; delete [] exp_wTx_new; delete [] xj_max; delete [] C_sum; delete [] xjneg_sum; delete [] xjpos_sum; }
void CLibLinear::solve_l1r_l2_svc( problem *prob_col, double eps, double Cp, double Cn) { int l = prob_col->l; int w_size = prob_col->n; int j, s, iter = 0; int active_size = w_size; int max_num_linesearch = 20; double sigma = 0.01; double d, G_loss, G, H; double Gmax_old = CMath::INFTY; double Gmax_new; double Gmax_init=0; double d_old, d_diff; double loss_old=0, loss_new; double appxcond, cond; int *index = SG_MALLOC(int, w_size); int32_t *y = SG_MALLOC(int32_t, l); double *b = SG_MALLOC(double, l); // b = 1-ywTx double *xj_sq = SG_MALLOC(double, w_size); CDotFeatures* x = (CDotFeatures*) prob_col->x; void* iterator; int32_t ind; float64_t val; double C[3] = {Cn,0,Cp}; int n = prob_col->n; if (prob_col->use_bias) n--; for(j=0; j<l; j++) { b[j] = 1; if(prob_col->y[j] > 0) y[j] = 1; else y[j] = -1; } for(j=0; j<w_size; j++) { w.vector[j] = 0; index[j] = j; xj_sq[j] = 0; if (use_bias && j==n) { for (ind=0; ind<l; ind++) xj_sq[n] += C[GETI(ind)]; } else { iterator=x->get_feature_iterator(j); while (x->get_next_feature(ind, val, iterator)) xj_sq[j] += C[GETI(ind)]*val*val; x->free_feature_iterator(iterator); } } CTime start_time; while (iter < max_iterations && !CSignal::cancel_computations()) { if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) break; Gmax_new = 0; for(j=0; j<active_size; j++) { int i = j+rand()%(active_size-j); CMath::swap(index[i], index[j]); } for(s=0; s<active_size; s++) { j = index[s]; G_loss = 0; H = 0; if (use_bias && j==n) { for (ind=0; ind<l; ind++) { if(b[ind] > 0) { double tmp = C[GETI(ind)]*y[ind]; G_loss -= tmp*b[ind]; H += tmp*y[ind]; } } } else { iterator=x->get_feature_iterator(j); while (x->get_next_feature(ind, val, iterator)) { if(b[ind] > 0) { double tmp = C[GETI(ind)]*val*y[ind]; G_loss -= tmp*b[ind]; H += tmp*val*y[ind]; } } x->free_feature_iterator(iterator); } G_loss *= 2; G = G_loss; H *= 2; H = CMath::max(H, 1e-12); double Gp = G+1; double Gn = G-1; double violation = 0; if(w.vector[j] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; CMath::swap(index[s], index[active_size]); s--; continue; } } else if(w.vector[j] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = CMath::max(Gmax_new, violation); // obtain Newton direction d if(Gp <= H*w.vector[j]) d = -Gp/H; else if(Gn >= H*w.vector[j]) d = -Gn/H; else d = -w.vector[j]; if(fabs(d) < 1.0e-12) continue; double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d; d_old = 0; int num_linesearch; for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) { d_diff = d_old - d; cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta; appxcond = xj_sq[j]*d*d + G_loss*d + cond; if(appxcond <= 0) { if (use_bias && j==n) { for (ind=0; ind<l; ind++) b[ind] += d_diff*y[ind]; break; } else { iterator=x->get_feature_iterator(j); while (x->get_next_feature(ind, val, iterator)) b[ind] += d_diff*val*y[ind]; x->free_feature_iterator(iterator); break; } } if(num_linesearch == 0) { loss_old = 0; loss_new = 0; if (use_bias && j==n) { for (ind=0; ind<l; ind++) { if(b[ind] > 0) loss_old += C[GETI(ind)]*b[ind]*b[ind]; double b_new = b[ind] + d_diff*y[ind]; b[ind] = b_new; if(b_new > 0) loss_new += C[GETI(ind)]*b_new*b_new; } } else { iterator=x->get_feature_iterator(j); while (x->get_next_feature(ind, val, iterator)) { if(b[ind] > 0) loss_old += C[GETI(ind)]*b[ind]*b[ind]; double b_new = b[ind] + d_diff*val*y[ind]; b[ind] = b_new; if(b_new > 0) loss_new += C[GETI(ind)]*b_new*b_new; } x->free_feature_iterator(iterator); } } else { loss_new = 0; if (use_bias && j==n) { for (ind=0; ind<l; ind++) { double b_new = b[ind] + d_diff*y[ind]; b[ind] = b_new; if(b_new > 0) loss_new += C[GETI(ind)]*b_new*b_new; } } else { iterator=x->get_feature_iterator(j); while (x->get_next_feature(ind, val, iterator)) { double b_new = b[ind] + d_diff*val*y[ind]; b[ind] = b_new; if(b_new > 0) loss_new += C[GETI(ind)]*b_new*b_new; } x->free_feature_iterator(iterator); } } cond = cond + loss_new - loss_old; if(cond <= 0) break; else { d_old = d; d *= 0.5; delta *= 0.5; } } w.vector[j] += d; // recompute b[] if line search takes too many steps if(num_linesearch >= max_num_linesearch) { SG_INFO("#"); for(int i=0; i<l; i++) b[i] = 1; for(int i=0; i<n; i++) { if(w.vector[i]==0) continue; iterator=x->get_feature_iterator(i); while (x->get_next_feature(ind, val, iterator)) b[ind] -= w.vector[i]*val*y[ind]; x->free_feature_iterator(iterator); } if (use_bias && w.vector[n]) { for (ind=0; ind<l; ind++) b[ind] -= w.vector[n]*y[ind]; } } } if(iter == 0) Gmax_init = Gmax_new; iter++; SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6); if(Gmax_new <= eps*Gmax_init) { if(active_size == w_size) break; else { active_size = w_size; Gmax_old = CMath::INFTY; continue; } } Gmax_old = Gmax_new; } SG_DONE(); SG_INFO("optimization finished, #iter = %d\n", iter); if(iter >= max_iterations) SG_WARNING("\nWARNING: reaching max number of iterations\n"); // calculate objective value double v = 0; int nnz = 0; for(j=0; j<w_size; j++) { if(w.vector[j] != 0) { v += fabs(w.vector[j]); nnz++; } } for(j=0; j<l; j++) if(b[j] > 0) v += C[GETI(j)]*b[j]*b[j]; SG_INFO("Objective value = %lf\n", v); SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size); delete [] index; delete [] y; delete [] b; delete [] xj_sq; }