void Solver::do_shrinking() { int i,j,k; if(select_working_set(i,j)!=0) return; double Gm1 = -y[j]*G[j]; double Gm2 = y[i]*G[i]; // shrink for(k=0;k<active_size;k++) { if(is_lower_bound(k)) { if(y[k]==+1) { if(-G[k] >= Gm1) continue; } else if(-G[k] >= Gm2) continue; } else if(is_upper_bound(k)) { if(y[k]==+1) { if(G[k] >= Gm2) continue; } else if(G[k] >= Gm1) continue; } else continue; --active_size; swap_index(k,active_size); --k; // look at the newcomer } // unshrink, check all variables again before final iterations if(unshrinked || -(Gm1 + Gm2) > eps*10) return; unshrinked = true; reconstruct_gradient(); for(k=l-1;k>=active_size;k--) { if(is_lower_bound(k)) { if(y[k]==+1) { if(-G[k] < Gm1) continue; } else if(-G[k] < Gm2) continue; } else if(is_upper_bound(k)) { if(y[k]==+1) { if(G[k] < Gm2) continue; } else if(G[k] < Gm1) continue; } else continue; swap_index(k,active_size); active_size++; ++k; // look at the newcomer } }
int Solver_Lag::Solve(int *CoreIdx, int numCore, const double* newD) { assert( numCore >= this->l ); int prevSize = this->l; this->l = numCore; // update the patterns this->_CoreIdx = CoreIdx; // The problem is now larger (allocate space if necessary) if ( numCore >= allocatedSize ) { allocatedSize = (int)(1.5*allocatedSize); CacheQ_i = (Qfloat*)realloc(CacheQ_i, allocatedSize*sizeof(Qfloat)); alpha = (double*)realloc(alpha, allocatedSize*sizeof(double)); vec_d = (double*)realloc(vec_d, allocatedSize*sizeof(double)); alpha_status = (char*) realloc(alpha_status, allocatedSize*sizeof(char)); G = (double*)realloc(G, allocatedSize*sizeof(double)); } // Initialize the newly-added part: for (int i=prevSize; i<numCore; i++) { alpha[i] = 0.0; // adding zero at the end update_alpha_status2(i); vec_d[i] = newD[i-prevSize]*0.5; // gradient G[i] = -vec_d[i]; // linear -> constant const Qfloat *Q_i = Q->get_Q(CoreIdx[i],numCore,CoreIdx); for (int j=0; j<prevSize; j++) G[i] += Q_i[j] * alpha[j]; // quadratic -> linear } QD = Q->get_QD(); //////////////////////////////////////////////////////////////////////////////////// // optimization step int iter = 0; while(1) { int i,j, notSelected; if( (notSelected = select_working_set(i,j)) != 0 ) { if ( iter == 0 ) { // internal epsilon is too large while ( notSelected && (eps > minEps) ) { eps *= EPS_SCALING; eps = max(eps,minEps); #ifndef RELEASE_VER printf("Dynamic Scheme: EPS now is %.10g\n", eps); #else printf("o"); #endif notSelected = select_working_set(i,j); } if (notSelected) { printf("not selected\n"); break; } } else break; } ++iter; if (iter % 100 == 0) { double currentTime = pnow (); if ((saveExponential >= 0.0) && (currentTime - algorithmStartTime >= nextSaveTime + timeusedForModelSaving )) { break; } } // update alpha[i] and alpha[j], handle bounds carefully const double old_alpha_i = alpha[i]; const double old_alpha_j = alpha[j]; const double sum = alpha[i] + alpha[j]; // original sum (must maintain during update) const Qfloat *Q_i = CacheQ_i; const Qfloat *Q_j = Q->get_Q(CoreIdx[j],numCore, CoreIdx); const double P_val = max((double)(Q_i[i]+Q_j[j]-2.0*Q_i[j]), 0.0); // it should be non-negative const double Q_val = (G[i]-G[j]) - old_alpha_i * P_val; if ( P_val < EPSILON ) // non quadratic problem { if ( Q_val >= 0.0 ) alpha[i] = 0.0; else alpha[i] = sum; // unbounded linear problem } else { alpha[i] = (-Q_val/P_val); if ( alpha[i] < 0.0 ) alpha[i] = 0.0; else if ( alpha[i] > sum ) alpha[i] = sum; } alpha[j] = (sum - alpha[i]); // update the alpha's status update_alpha_status2(i); update_alpha_status2(j); // update G double delta_alpha_i = alpha[i] - old_alpha_i; double delta_alpha_j = alpha[j] - old_alpha_j; for(int k=0;k<numCore;k++) G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j; // stop if little improvement (eps^2 is a very very small number) if ( fabs(delta_alpha_i) + fabs(delta_alpha_j) < SQUARE(eps) ) break; } #ifndef RELEASE_VER info("#SMO=%d ",iter); #endif return iter; }
void Solver::Solve(int l, const Kernel& Q, const double *b_, const schar *y_, double *alpha_, double Cp, double Cn, double eps, SolutionInfo* si, int shrinking) { this->l = l; this->Q = &Q; clone(b, b_,l); clone(y, y_,l); clone(alpha,alpha_,l); this->Cp = Cp; this->Cn = Cn; this->eps = eps; unshrinked = false; // initialize alpha_status { alpha_status = new char[l]; for(int i=0;i<l;i++) update_alpha_status(i); } // initialize active set (for shrinking) { active_set = new int[l]; for(int i=0;i<l;i++) active_set[i] = i; active_size = l; } // initialize gradient { G = new double[l]; G_bar = new double[l]; int i; for(i=0;i<l;i++) { G[i] = b[i]; G_bar[i] = 0; } for(i=0;i<l;i++) if(!is_lower_bound(i)) { Qfloat *Q_i = Q.get_Q(i,l); double alpha_i = alpha[i]; int j; for(j=0;j<l;j++) G[j] += alpha_i*Q_i[j]; if(is_upper_bound(i)) for(j=0;j<l;j++) G_bar[j] += get_C(i) * Q_i[j]; } } // optimization step int iter = 0; int counter = min(l,1000)+1; while(1) { // show progress and do shrinking if(--counter == 0) { counter = min(l,1000); if(shrinking) do_shrinking(); info("."); info_flush(); } int i,j; if(select_working_set(i,j)!=0) { // reconstruct the whole gradient reconstruct_gradient(); // reset active set size and check active_size = l; info("*"); info_flush(); if(select_working_set(i,j)!=0) break; else counter = 1; // do shrinking next iteration } ++iter; // update alpha[i] and alpha[j], handle bounds carefully const Qfloat *Q_i = Q.get_Q(i,active_size); const Qfloat *Q_j = Q.get_Q(j,active_size); double C_i = get_C(i); double C_j = get_C(j); double old_alpha_i = alpha[i]; double old_alpha_j = alpha[j]; if(y[i]!=y[j]) { double delta = (-G[i]-G[j])/(Q_i[i]+Q_j[j]+2*Q_i[j]); double diff = alpha[i] - alpha[j]; alpha[i] += delta; alpha[j] += delta; if(diff > 0) { if(alpha[j] < 0) { alpha[j] = 0; alpha[i] = diff; } } else { if(alpha[i] < 0) { alpha[i] = 0; alpha[j] = -diff; } } if(diff > C_i - C_j) { if(alpha[i] > C_i) { alpha[i] = C_i; alpha[j] = C_i - diff; } } else { if(alpha[j] > C_j) { alpha[j] = C_j; alpha[i] = C_j + diff; } } } else { double delta = (G[i]-G[j])/(Q_i[i]+Q_j[j]-2*Q_i[j]); double sum = alpha[i] + alpha[j]; alpha[i] -= delta; alpha[j] += delta; if(sum > C_i) { if(alpha[i] > C_i) { alpha[i] = C_i; alpha[j] = sum - C_i; } } else { if(alpha[j] < 0) { alpha[j] = 0; alpha[i] = sum; } } if(sum > C_j) { if(alpha[j] > C_j) { alpha[j] = C_j; alpha[i] = sum - C_j; } } else { if(alpha[i] < 0) { alpha[i] = 0; alpha[j] = sum; } } } // update G double delta_alpha_i = alpha[i] - old_alpha_i; double delta_alpha_j = alpha[j] - old_alpha_j; for(int k=0;k<active_size;k++) { G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j; } // update alpha_status and G_bar { bool ui = is_upper_bound(i); bool uj = is_upper_bound(j); update_alpha_status(i); update_alpha_status(j); int k; if(ui != is_upper_bound(i)) { Q_i = Q.get_Q(i,l); if(ui) for(k=0;k<l;k++) G_bar[k] -= C_i * Q_i[k]; else for(k=0;k<l;k++) G_bar[k] += C_i * Q_i[k]; } if(uj != is_upper_bound(j)) { Q_j = Q.get_Q(j,l); if(uj) for(k=0;k<l;k++) G_bar[k] -= C_j * Q_j[k]; else for(k=0;k<l;k++) G_bar[k] += C_j * Q_j[k]; } } } // calculate rho si->rho = calculate_rho(); // calculate objective value { double v = 0; int i; for(i=0;i<l;i++) v += alpha[i] * (G[i] + b[i]); si->obj = v/2; } // put back the solution { for(int i=0;i<l;i++) alpha_[active_set[i]] = alpha[i]; } // juggle everything back /*{ for(int i=0;i<l;i++) while(active_set[i] != i) swap_index(i,active_set[i]); // or Q.swap_index(i,active_set[i]); }*/ si->upper_bound_p = Cp; si->upper_bound_n = Cn; info("\noptimization finished, #iter = %d\n",iter); delete[] b; delete[] y; delete[] alpha; delete[] alpha_status; delete[] active_set; delete[] G; delete[] G_bar; }
float Solver<TQ>::solve(int l, TQ& Q, const signed char *y_, float *alpha_, float C, float eps, int shrinking) { this->l = l; this->Q = &Q; this->QD = Q.get_QD(); this->C = C; this->eps = eps; unshrinked = false; p = new float [l]; std::fill(p, p + l, float(-1.0)); y = new signed char [l]; std::copy(y_, y_ + l, y); alpha = new float [l]; std::copy(alpha_, alpha_ + l, alpha); // initialize alpha_status { alpha_status = new int[l]; for(int i=0;i<l;i++) update_alpha_status(i); } // initialize active set (for shrinking) { active_set = new int[l]; for(int i=0;i<l;i++) active_set[i] = i; active_size = l; } // initialize gradient { G = new float[l]; G_bar = new float[l]; for(int i=0;i<l;i++) { G[i] = p[i]; G_bar[i] = 0; } for(int i=0;i<l;i++) if(!is_lower_bound(i)) { const float *Q_i = Q.get_Q(i,l); float alpha_i = alpha[i]; for(int j=0;j<l;j++) G[j] += alpha_i*Q_i[j]; if(is_upper_bound(i)) for(int j=0;j<l;j++) G_bar[j] += get_C(i) * Q_i[j]; } } // optimization step int iter = 0; int counter = std::min(l,1000)+1; while (1) { // show progress and do shrinking if(--counter == 0) { counter = std::min(l,1000); if(shrinking) do_shrinking(); } int i,j; if (select_working_set(i, j) != 0) { // reconstruct the whole gradient reconstruct_gradient(); // reset active set size and check active_size = l; if (select_working_set(i, j) != 0) break; else counter = 1; // do shrinking next iteration } ++iter; // update alpha[i] and alpha[j], handle bounds carefully const float *Q_i = Q.get_Q(i, active_size); const float *Q_j = Q.get_Q(j, active_size); NTA_ASSERT(Q_i != nullptr); NTA_ASSERT(Q_j != nullptr); float C_i = get_C(i); float C_j = get_C(j); float old_alpha_i = alpha[i]; float old_alpha_j = alpha[j]; if (y[i]!=y[j]) { float quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j]; if (quad_coef <= 0) quad_coef = TAU; NTA_ASSERT(quad_coef > 0); float delta = (-G[i]-G[j])/quad_coef; float diff = alpha[i] - alpha[j]; alpha[i] += delta; alpha[j] += delta; if(diff > 0) { if(alpha[j] < 0) { alpha[j] = 0; alpha[i] = diff; } } else { if(alpha[i] < 0) { alpha[i] = 0; alpha[j] = -diff; } } if(diff > C_i - C_j) { if(alpha[i] > C_i) { alpha[i] = C_i; alpha[j] = C_i - diff; } } else { if(alpha[j] > C_j) { alpha[j] = C_j; alpha[i] = C_j + diff; } } } else { float quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j]; if (quad_coef <= 0) quad_coef = TAU; NTA_ASSERT(quad_coef > 0); float delta = (G[i]-G[j])/quad_coef; float sum = alpha[i] + alpha[j]; alpha[i] -= delta; alpha[j] += delta; if(sum > C_i) { if(alpha[i] > C_i) { alpha[i] = C_i; alpha[j] = sum - C_i; } } else { if(alpha[j] < 0) { alpha[j] = 0; alpha[i] = sum; } } if(sum > C_j) { if(alpha[j] > C_j) { alpha[j] = C_j; alpha[i] = sum - C_j; } } else { if(alpha[i] < 0) { alpha[i] = 0; alpha[j] = sum; } } } // update G float delta_alpha_i = alpha[i] - old_alpha_i; float delta_alpha_j = alpha[j] - old_alpha_j; for(int k=0;k<active_size;k++) { G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j; NTA_ASSERT(-HUGE_VAL <= G[k] && G[k] <= HUGE_VAL); } // update alpha_status and G_bar { bool ui = is_upper_bound(i); bool uj = is_upper_bound(j); update_alpha_status(i); update_alpha_status(j); if(ui != is_upper_bound(i)) { Q_i = Q.get_Q(i,l); if(ui) for(int k=0;k<l;k++) G_bar[k] -= C_i * Q_i[k]; else for(int k=0;k<l;k++) G_bar[k] += C_i * Q_i[k]; } if(uj != is_upper_bound(j)) { Q_j = Q.get_Q(j,l); if(uj) for(int k=0;k<l;k++) G_bar[k] -= C_j * Q_j[k]; else for(int k=0;k<l;k++) G_bar[k] += C_j * Q_j[k]; } } } float rho = calculate_rho(); // put back the solution for(int i=0;i<l;i++) alpha_[active_set[i]] = alpha[i]; delete[] p; delete[] y; delete[] alpha; delete[] alpha_status; delete[] active_set; delete[] G; delete[] G_bar; return rho; }