void Solver_MCSVM_CS::Solve(double *w) { int i, m, s; int iter = 0; double *alpha = new double[l*nr_class]; double *alpha_new = new double[nr_class]; int *index = new int[l]; double *QD = new double[l]; int *d_ind = new int[nr_class]; double *d_val = new double[nr_class]; int *alpha_index = new int[nr_class*l]; int *y_index = new int[l]; int active_size = l; int *active_size_i = new int[l]; double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking bool start_from_all = true; // initial for(i=0;i<l*nr_class;i++) alpha[i] = 0; for(i=0;i<n*nr_class;i++) w[i] = 0; for(i=0;i<l;i++) { for(m=0;m<nr_class;m++) alpha_index[i*nr_class+m] = m; feature_node *xi = prob->x[i]; QD[i] = 0; while(xi->index != -1) { QD[i] += (xi->value)*(xi->value); xi++; } active_size_i[i] = nr_class; y_index[i] = prob->y[i]; index[i] = i; } while(iter < max_iter) { double stopping = -INF; for(i=0;i<active_size;i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for(s=0;s<active_size;s++) { i = index[s]; double Ai = QD[i]; double *alpha_i = &alpha[i*nr_class]; int *alpha_index_i = &alpha_index[i*nr_class]; if(Ai > 0) { for(m=0;m<active_size_i[i];m++) G[m] = 1; if(y_index[i] < active_size_i[i]) G[y_index[i]] = 0; feature_node *xi = prob->x[i]; while(xi->index!= -1) { double *w_i = &w[(xi->index-1)*nr_class]; for(m=0;m<active_size_i[i];m++) G[m] += w_i[alpha_index_i[m]]*(xi->value); xi++; } double minG = INF; double maxG = -INF; for(m=0;m<active_size_i[i];m++) { if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG) minG = G[m]; if(G[m] > maxG) maxG = G[m]; } if(y_index[i] < active_size_i[i]) if(alpha_i[prob->y[i]] < C[prob->y[i]] && G[y_index[i]] < minG) minG = G[y_index[i]]; for(m=0;m<active_size_i[i];m++) { if(be_shrunken(m, y_index[i], alpha_i[alpha_index_i[m]], minG)) { active_size_i[i]--; while(active_size_i[i]>m) { if(!be_shrunken(active_size_i[i], y_index[i], alpha_i[alpha_index_i[active_size_i[i]]], minG)) { swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]); swap(G[m], G[active_size_i[i]]); if(y_index[i] == active_size_i[i]) y_index[i] = m; else if(y_index[i] == m) y_index[i] = active_size_i[i]; break; } active_size_i[i]--; } } } if(active_size_i[i] <= 1) { active_size--; swap(index[s], index[active_size]); s--; continue; } if(maxG-minG <= 1e-12) continue; else stopping = max(maxG - minG, stopping); for(m=0;m<active_size_i[i];m++) B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ; solve_sub_problem(Ai, y_index[i], C[prob->y[i]], active_size_i[i], alpha_new); int nz_d = 0; for(m=0;m<active_size_i[i];m++) { double d = alpha_new[m] - alpha_i[alpha_index_i[m]]; alpha_i[alpha_index_i[m]] = alpha_new[m]; if(fabs(d) >= 1e-12) { d_ind[nz_d] = alpha_index_i[m]; d_val[nz_d] = d; nz_d++; } } xi = prob->x[i]; while(xi->index != -1) { double *w_i = &w[(xi->index-1)*nr_class]; for(m=0;m<nz_d;m++) w_i[d_ind[m]] += d_val[m]*xi->value; xi++; } } } iter++; if(iter % 10 == 0) { info("."); info_flush(); } if(stopping < eps_shrink) { if(stopping < eps && start_from_all == true) break; else { active_size = l; for(i=0;i<l;i++) active_size_i[i] = nr_class; info("*"); info_flush(); eps_shrink = max(eps_shrink/2, eps); start_from_all = true; } } else start_from_all = false; } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("Warning: reaching max number of iterations\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0;i<n*nr_class;i++) v += w[i]*w[i]; v = 0.5*v; for(i=0;i<l*nr_class;i++) { v += alpha[i]; if(fabs(alpha[i]) > 0) nSV++; } for(i=0;i<l;i++) v -= alpha[i*nr_class+prob->y[i]]; info("Objective value = %lf\n",v); info("nSV = %d\n",nSV); delete [] alpha; delete [] alpha_new; delete [] index; delete [] QD; delete [] d_ind; delete [] d_val; delete [] alpha_index; delete [] y_index; delete [] active_size_i; }
static void solve_linear_c_svc( const problem *prob, double *w, double eps, double Cp, double Cn, int solver_type) { int l = prob->l; int n = prob->n; int i, s, iter = 0; double C, d, G; double *QD = new double[l]; int max_iter = 20000; int *index = new int[l]; double *alpha = new double[l]; schar *y = new schar[l]; int active_size = l; // PG: projected gradient, for shrinking and stopping double PG; double PGmax_old = INF; double PGmin_old = -INF; double PGmax_new, PGmin_new; // default solver_type: L2LOSS_SVM_DUAL double diag_p = 0.5/Cp, diag_n = 0.5/Cn; double upper_bound_p = INF, upper_bound_n = INF; if(solver_type == L1LOSS_SVM_DUAL) { diag_p = 0; diag_n = 0; upper_bound_p = Cp; upper_bound_n = Cn; } for(i=0; i<n; i++) w[i] = 0; for(i=0; i<l; i++) { alpha[i] = 0; if(prob->y[i] > 0) { y[i] = +1; QD[i] = diag_p; } else { y[i] = -1; QD[i] = diag_n; } feature_node *xi = prob->x[i]; while (xi->index != -1) { QD[i] += (xi->value)*(xi->value); xi++; } index[i] = i; } while (iter < max_iter) { PGmax_new = -INF; PGmin_new = INF; for (i=0; i<active_size; i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for (s=0;s<active_size;s++) { i = index[s]; G = 0; schar yi = y[i]; feature_node *xi = prob->x[i]; while(xi->index!= -1) { G += w[xi->index-1]*(xi->value); xi++; } G = G*yi-1; if(yi == 1) { C = upper_bound_p; G += alpha[i]*diag_p; } else { C = upper_bound_n; G += alpha[i]*diag_n; } PG = 0; if (alpha[i] == 0) { if (G > PGmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) PG = G; } else if (alpha[i] == C) { if (G < PGmin_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) PG = G; } else PG = G; PGmax_new = max(PGmax_new, PG); PGmin_new = min(PGmin_new, PG); if(fabs(PG) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C); d = (alpha[i] - alpha_old)*yi; xi = prob->x[i]; while (xi->index != -1) { w[xi->index-1] += d*xi->value; xi++; } } } iter++; if(iter % 10 == 0) { info("."); info_flush(); } if(PGmax_new - PGmin_new <= eps) { if(active_size == l) break; else { active_size = l; info("*"); info_flush(); PGmax_old = INF; PGmin_old = -INF; continue; } } PGmax_old = PGmax_new; PGmin_old = PGmin_new; if (PGmax_old <= 0) PGmax_old = INF; if (PGmin_old >= 0) PGmin_old = -INF; } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("Warning: reaching max number of iterations\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0; i<n; i++) v += w[i]*w[i]; for(i=0; i<l; i++) { if (y[i] == 1) v += alpha[i]*(alpha[i]*diag_p - 2); else v += alpha[i]*(alpha[i]*diag_n - 2); if(alpha[i] > 0) ++nSV; } info("Objective value = %lf\n",v/2); info("nSV = %d\n",nSV); delete [] QD; delete [] alpha; delete [] y; delete [] index; }
void Solver::Solve(int l, const Kernel& Q, const double *b_, const schar *y_, double *alpha_, double Cp, double Cn, double eps, SolutionInfo* si, int shrinking) { this->l = l; this->Q = &Q; clone(b, b_,l); clone(y, y_,l); clone(alpha,alpha_,l); this->Cp = Cp; this->Cn = Cn; this->eps = eps; unshrinked = false; // initialize alpha_status { alpha_status = new char[l]; for(int i=0;i<l;i++) update_alpha_status(i); } // initialize active set (for shrinking) { active_set = new int[l]; for(int i=0;i<l;i++) active_set[i] = i; active_size = l; } // initialize gradient { G = new double[l]; G_bar = new double[l]; int i; for(i=0;i<l;i++) { G[i] = b[i]; G_bar[i] = 0; } for(i=0;i<l;i++) if(!is_lower_bound(i)) { Qfloat *Q_i = Q.get_Q(i,l); double alpha_i = alpha[i]; int j; for(j=0;j<l;j++) G[j] += alpha_i*Q_i[j]; if(is_upper_bound(i)) for(j=0;j<l;j++) G_bar[j] += get_C(i) * Q_i[j]; } } // optimization step int iter = 0; int counter = min(l,1000)+1; while(1) { // show progress and do shrinking if(--counter == 0) { counter = min(l,1000); if(shrinking) do_shrinking(); info("."); info_flush(); } int i,j; if(select_working_set(i,j)!=0) { // reconstruct the whole gradient reconstruct_gradient(); // reset active set size and check active_size = l; info("*"); info_flush(); if(select_working_set(i,j)!=0) break; else counter = 1; // do shrinking next iteration } ++iter; // update alpha[i] and alpha[j], handle bounds carefully const Qfloat *Q_i = Q.get_Q(i,active_size); const Qfloat *Q_j = Q.get_Q(j,active_size); double C_i = get_C(i); double C_j = get_C(j); double old_alpha_i = alpha[i]; double old_alpha_j = alpha[j]; if(y[i]!=y[j]) { double delta = (-G[i]-G[j])/(Q_i[i]+Q_j[j]+2*Q_i[j]); double diff = alpha[i] - alpha[j]; alpha[i] += delta; alpha[j] += delta; if(diff > 0) { if(alpha[j] < 0) { alpha[j] = 0; alpha[i] = diff; } } else { if(alpha[i] < 0) { alpha[i] = 0; alpha[j] = -diff; } } if(diff > C_i - C_j) { if(alpha[i] > C_i) { alpha[i] = C_i; alpha[j] = C_i - diff; } } else { if(alpha[j] > C_j) { alpha[j] = C_j; alpha[i] = C_j + diff; } } } else { double delta = (G[i]-G[j])/(Q_i[i]+Q_j[j]-2*Q_i[j]); double sum = alpha[i] + alpha[j]; alpha[i] -= delta; alpha[j] += delta; if(sum > C_i) { if(alpha[i] > C_i) { alpha[i] = C_i; alpha[j] = sum - C_i; } } else { if(alpha[j] < 0) { alpha[j] = 0; alpha[i] = sum; } } if(sum > C_j) { if(alpha[j] > C_j) { alpha[j] = C_j; alpha[i] = sum - C_j; } } else { if(alpha[i] < 0) { alpha[i] = 0; alpha[j] = sum; } } } // update G double delta_alpha_i = alpha[i] - old_alpha_i; double delta_alpha_j = alpha[j] - old_alpha_j; for(int k=0;k<active_size;k++) { G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j; } // update alpha_status and G_bar { bool ui = is_upper_bound(i); bool uj = is_upper_bound(j); update_alpha_status(i); update_alpha_status(j); int k; if(ui != is_upper_bound(i)) { Q_i = Q.get_Q(i,l); if(ui) for(k=0;k<l;k++) G_bar[k] -= C_i * Q_i[k]; else for(k=0;k<l;k++) G_bar[k] += C_i * Q_i[k]; } if(uj != is_upper_bound(j)) { Q_j = Q.get_Q(j,l); if(uj) for(k=0;k<l;k++) G_bar[k] -= C_j * Q_j[k]; else for(k=0;k<l;k++) G_bar[k] += C_j * Q_j[k]; } } } // calculate rho si->rho = calculate_rho(); // calculate objective value { double v = 0; int i; for(i=0;i<l;i++) v += alpha[i] * (G[i] + b[i]); si->obj = v/2; } // put back the solution { for(int i=0;i<l;i++) alpha_[active_set[i]] = alpha[i]; } // juggle everything back /*{ for(int i=0;i<l;i++) while(active_set[i] != i) swap_index(i,active_set[i]); // or Q.swap_index(i,active_set[i]); }*/ si->upper_bound_p = Cp; si->upper_bound_n = Cn; info("\noptimization finished, #iter = %d\n",iter); delete[] b; delete[] y; delete[] alpha; delete[] alpha_status; delete[] active_set; delete[] G; delete[] G_bar; }