bool QualityMetric::evaluate_with_gradient( PatchData& pd, size_t handle, double& value, std::vector<size_t>& indices, std::vector<Vector3D>& gradient, MsqError& err ) { indices.clear(); bool valid = evaluate_with_indices( pd, handle, value, indices, err); if (MSQ_CHKERR(err) || !valid) return false; if (indices.empty()) return true; // get initial pertubation amount double delta_C = finiteDiffEps; if (!haveFiniteDiffEps) { delta_C = get_delta_C( pd, indices, err ); MSQ_ERRZERO(err); if (keepFiniteDiffEps) { finiteDiffEps = delta_C; haveFiniteDiffEps = true; } } const double delta_inv_C = 1.0/delta_C; const int reduction_limit = 15; gradient.resize( indices.size() ); for (size_t v=0; v<indices.size(); ++v) { const Vector3D pos = pd.vertex_by_index(indices[v]); /* gradient in the x, y, z direction */ for (int j=0;j<3;++j) { double delta = delta_C; double delta_inv = delta_inv_C; double metric_value; Vector3D delta_v( 0, 0, 0 ); //perturb the node and calculate gradient. The while loop is a //safety net to make sure the epsilon perturbation does not take //the element out of the feasible region. int counter = 0; for (;;) { // perturb the coordinates of the free vertex in the j direction // by delta delta_v[j] = delta; pd.set_vertex_coordinates( pos+delta_v, indices[v], err ); MSQ_ERRZERO(err); //compute the function at the perturbed point location valid = evaluate( pd, handle, metric_value, err); if (!MSQ_CHKERR(err) && valid) break; if (++counter >= reduction_limit) { MSQ_SETERR(err)("Perturbing vertex by delta caused an inverted element.", MsqError::INTERNAL_ERROR); return false; } delta*=0.1; delta_inv*=10.; } // put the coordinates back where they belong pd.set_vertex_coordinates( pos, indices[v], err ); // compute the numerical gradient gradient[v][j] = (metric_value - value) * delta_inv; } // for(j) } // for(indices) return true; }
bool QualityMetric::evaluate_with_Hessian( PatchData& pd, size_t handle, double& value, std::vector<size_t>& indices, std::vector<Vector3D>& gradient, std::vector<Matrix3D>& Hessian, MsqError& err ) { indices.clear(); gradient.clear(); keepFiniteDiffEps = true; bool valid = evaluate_with_gradient( pd, handle, value, indices, gradient, err ); keepFiniteDiffEps = false; if (MSQ_CHKERR(err) || !valid) { haveFiniteDiffEps = false; return false; } if (indices.empty()){ haveFiniteDiffEps = false; return true; } // get initial pertubation amount double delta_C; if (haveFiniteDiffEps) { delta_C = finiteDiffEps; } else { delta_C = get_delta_C( pd, indices, err ); MSQ_ERRZERO(err); } assert(delta_C < 1e30); const double delta_inv_C = 1.0/delta_C; const int reduction_limit = 15; std::vector<Vector3D> temp_gradient( indices.size() ); const int num_hess = indices.size() * (indices.size() + 1) / 2; Hessian.resize( num_hess ); for (unsigned v = 0; v < indices.size(); ++v) { const Vector3D pos = pd.vertex_by_index(indices[v]); for (int j = 0; j < 3; ++j ) { // x, y, and z double delta = delta_C; double delta_inv = delta_inv_C; double metric_value; Vector3D delta_v(0,0,0); // find finite difference for gradient int counter = 0; for (;;) { delta_v[j] = delta; pd.set_vertex_coordinates( pos+delta_v, indices[v], err ); MSQ_ERRZERO(err); valid = evaluate_with_gradient( pd, handle, metric_value, indices, temp_gradient, err ); if (!MSQ_CHKERR(err) && valid) break; if (++counter >= reduction_limit) { MSQ_SETERR(err)("Algorithm did not successfully compute element's " "Hessian.\n",MsqError::INTERNAL_ERROR); haveFiniteDiffEps = false; return false; } delta *= 0.1; delta_inv *= 10.0; } pd.set_vertex_coordinates( pos, indices[v], err ); MSQ_ERRZERO(err); //compute the numerical Hessian for (unsigned w = 0; w <= v; ++w) { //finite difference to get some entries of the Hessian Vector3D fd( temp_gradient[w] ); fd -= gradient[w]; fd *= delta_inv; // For the block at position w,v in a matrix, we need the corresponding index // (mat_index) in a 1D array containing only upper triangular blocks. unsigned sum_w = w*(w+1)/2; // 1+2+3+...+w unsigned mat_index = w*indices.size() + v - sum_w; Hessian[mat_index][0][j] = fd[0]; Hessian[mat_index][1][j] = fd[1]; Hessian[mat_index][2][j] = fd[2]; } } // for(j) } // for(indices) haveFiniteDiffEps = false; return true; }
Eigen::VectorXd l2r_l1hinge_spdc::train_warm_start(const Eigen::VectorXd &alp) { set_alpha(alp); alpha_ = Eigen::VectorXd::Constant(num_ins_, C); // set_w_by_alpha(alpha_); double alpha_old = 0.0; std::default_random_engine g; std::uniform_int_distribution<> uni_dist(0, num_ins_ - 1); const auto ins_is_begin_it = std::begin(active_index_); w_ = Eigen::VectorXd::Zero(num_fea_); spdc_w_ = w_; auto random_it = std::next(ins_is_begin_it, uni_dist(g)); gamma_ = 0.1; double tau_ = std::sqrt(gamma_ / (1.0 * (1.0 / C))) / R_; double sigma_ = std::sqrt(((1.0 / C) * 1.0) / gamma_) / R_; double theta_ = 1.0 - 1.0 / ((1.0 / C) + R_ * std::sqrt((1.0 / C) / (1.0 * gamma_))); // double tau_ = std::sqrt(1.0) / (2.0 * R_); // double sigma_ = std::sqrt(1.0 / 1.0) / (2.0 * R_); // double theta_ = 1.0 - 1.0 / (1.0 + (R_ / gamma_) * std::sqrt(1.0 / 1.0)); const double sig_gam = -sigma_ * gamma_ - 1.0; int i = 0, itr_ = 0; double delta_alpha_i = 0.0, yi = 0.0, beta_i = 0.0, alpha_i_new = 0.0, eta = 0.0; Eigen::VectorXd delta_v(num_fea_), w_new(num_fea_); calculate_duality_gap(true, true); std::cout << tau_ << " " << sigma_ << " " << theta_ << " " << R_ << std::endl; std::cout << " start optimization " << max_iteration << std::endl; for (itr_ = 1; itr_ < max_iteration && duality_gap_ > 1e-6; ++itr_) { for (int ir = 0; ir < num_ins_; ++ir) { random_it = std::next(ins_is_begin_it, uni_dist(g)); i = *random_it; yi = y_[i]; // beta_i = -(sigma_ * (yi * (x_.row(i) * spdc_w_)(0) - 1.0)) + alpha_[i]; beta_i = (sigma_ * (yi * (x_.row(i) * spdc_w_)(0) - 1.0) - alpha_[i]) / (sig_gam); alpha_i_new = std::min(C, std::max(0.0, beta_i)); delta_alpha_i = alpha_i_new - alpha_[i]; alpha_[i] = alpha_i_new; delta_v = (yi * delta_alpha_i) * x_.row(i); // w_new = (1.0 / (1.0 + tau_)) * (w_ - tau_ * (za_ - delta_v)); for (int j = 0; j < num_fea_; ++j) { eta = (1.0 / (1.0 + tau_)) * (w_[j] + tau_ * (za_[j] + delta_v[j])); spdc_w_[j] = eta + theta_ * (eta - w_[j]); w_[j] = eta; } // spdc_w_ = w_new + theta_ * (w_new - w_); // w_ = w_new; // for (srm_iit it(x_, i); it; ++it) // za_[it.index()] += delta_v[it.index()]; za_ += delta_v; } // if (itr_) { calculate_duality_gap(true, true); std::cout << itr_ << " optimization end gap : " << duality_gap_ << " " << primal_obj_value_ << " " << dual_obj_value_ << std::endl; } std::cout << itr_ << " optimization end gap : " << duality_gap_ << " " << primal_obj_value_ << " " << dual_obj_value_ << std::endl; std::cout << w_.transpose() << std::endl; return w_; }