bool CSubGradientLPM::train(CFeatures* data) { lpmtim=0; SG_INFO("C=%f epsilon=%f\n", C1, epsilon); ASSERT(labels); if (data) { if (!data->has_property(FP_DOT)) SG_ERROR("Specified features are not of type CDotFeatures\n"); set_features((CDotFeatures*) data); } ASSERT(features); int32_t num_iterations=0; int32_t num_train_labels=labels->get_num_labels(); int32_t num_feat=features->get_dim_feature_space(); int32_t num_vec=features->get_num_vectors(); ASSERT(num_vec==num_train_labels); init(num_vec, num_feat); int32_t num_active=0; int32_t num_bound=0; float64_t alpha=0; float64_t dir_deriv=0; float64_t obj=0; delta_active=num_vec; last_it_noimprovement=-1; work_epsilon=0.99; autoselected_epsilon=work_epsilon; compute_projection(num_feat, num_vec); CTime time; float64_t loop_time=0; while (!(CSignal::cancel_computations())) { CTime t; delta_active=find_active(num_feat, num_vec, num_active, num_bound); update_active(num_feat, num_vec); #ifdef DEBUG_SUBGRADIENTLPM SG_PRINT("==================================================\niteration: %d ", num_iterations); obj=compute_objective(num_feat, num_vec); SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n", obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time); #else SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6); #endif //CMath::display_vector(w, w_dim, "w"); //SG_PRINT("bias: %f\n", bias); //CMath::display_vector(proj, num_vec, "proj"); //CMath::display_vector(idx_active, num_active, "idx_active"); //SG_PRINT("num_active: %d\n", num_active); //CMath::display_vector(idx_bound, num_bound, "idx_bound"); //SG_PRINT("num_bound: %d\n", num_bound); //CMath::display_vector(sum_CXy_active, num_feat, "sum_CXy_active"); //SG_PRINT("sum_Cy_active: %f\n", sum_Cy_active); //CMath::display_vector(grad_w, num_feat, "grad_w"); //SG_PRINT("grad_b:%f\n", grad_b); dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound); alpha=line_search(num_feat, num_vec); if (num_it_noimprovement==10 || num_bound<qpsize_max) { float64_t norm_grad=CMath::dot(grad_w, grad_w, num_feat) + grad_b*grad_b; SG_PRINT("CHECKING OPTIMALITY CONDITIONS: " "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n", work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad)); if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6) break; else num_it_noimprovement=0; } //if (work_epsilon<=epsilon && delta_active==0 && num_it_noimprovement) if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0)) { if (last_it_noimprovement==num_iterations-1) { SG_PRINT("no improvement...\n"); num_it_noimprovement++; } else num_it_noimprovement=0; last_it_noimprovement=num_iterations; } CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat); bias-=alpha*grad_b; update_projection(alpha, num_vec); t.stop(); loop_time=t.time_diff_sec(); num_iterations++; if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time()) break; } SG_INFO("converged after %d iterations\n", num_iterations); obj=compute_objective(num_feat, num_vec); SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n", obj, alpha, dir_deriv, num_bound, num_active); #ifdef DEBUG_SUBGRADIENTLPM CMath::display_vector(w, w_dim, "w"); SG_PRINT("bias: %f\n", bias); #endif SG_PRINT("solver time:%f s\n", lpmtim); cleanup(); return true; }
float64_t CSubGradientSVM::compute_min_subgradient( int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound) { float64_t dir_deriv=0; if (num_bound > 0) { CTime t2; CMath::add(v, 1.0, w, -1.0, sum_CXy_active, num_feat); if (num_bound>=qpsize_max && num_it_noimprovement!=10) // if qp gets to large, lets just choose a random beta { //SG_PRINT("qpsize too large (%d>=%d) choosing random subgradient/beta\n", num_bound, qpsize_max); for (int32_t i=0; i<num_bound; i++) beta[i]=CMath::random(0.0,1.0); } else { memset(beta, 0, sizeof(float64_t)*num_bound); float64_t bias_const=0; if (use_bias) bias_const=1; for (int32_t i=0; i<num_bound; i++) { for (int32_t j=i; j<num_bound; j++) { Z[i*num_bound+j]= 2.0*C1*C1*get_label(idx_bound[i])*get_label(idx_bound[j])* (features->dot(idx_bound[i], features, idx_bound[j]) + bias_const); Z[j*num_bound+i]=Z[i*num_bound+j]; } Zv[i]=-2.0*C1*get_label(idx_bound[i])* (features->dense_dot(idx_bound[i], v, num_feat)-sum_Cy_active); } //CMath::display_matrix(Z, num_bound, num_bound, "Z"); //CMath::display_vector(Zv, num_bound, "Zv"); t2.stop(); #ifdef DEBUG_SUBGRADIENTSVM t2.time_diff_sec(true); #endif CTime t; CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0); //solver.set_solver(QPB_SOLVER_GRADDESC); //solver.set_solver(QPB_SOLVER_GS); #ifdef USE_CPLEX solver.set_solver(QPB_SOLVER_CPLEX); #else solver.set_solver(QPB_SOLVER_SCAS); #endif solver.solve_qp(beta, num_bound); t.stop(); #ifdef DEBUG_SUBGRADIENTSVM tim+=t.time_diff_sec(true); #else tim+=t.time_diff_sec(false); #endif //CMath::display_vector(beta, num_bound, "beta gs"); //solver.set_solver(QPB_SOLVER_CPLEX); //solver.solve_qp(beta, num_bound); //CMath::display_vector(beta, num_bound, "beta cplex"); //CMath::display_vector(grad_w, num_feat, "grad_w"); //SG_PRINT("grad_b:%f\n", grad_b); } CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat); grad_b = -sum_Cy_active; for (int32_t i=0; i<num_bound; i++) { features->add_to_dense_vec(-C1*beta[i]*get_label(idx_bound[i]), idx_bound[i], grad_w, num_feat); if (use_bias) grad_b -= C1 * get_label(idx_bound[i])*beta[i]; } dir_deriv = CMath::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active; for (int32_t i=0; i<num_bound; i++) { float64_t val= C1*get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], grad_w, num_feat)+grad_b); dir_deriv += CMath::max(0.0, val); } } else { CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat); grad_b = -sum_Cy_active; dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b; } return dir_deriv; }