int main() { time_t start = time(NULL); int dim = L * (nmax + 1); const real epsg = EPSG; const real epsf = EPSF; const real epsx = EPSX; const int maxits = MAXITS; stpscal = 0.5; int info; real* x; int* nbd; real* l; real* u; memAlloc<real>(&x, dim); memAlloc<int>(&nbd, dim); memAlloc<real>(&l, dim); memAlloc<real>(&u, dim); memAllocHost<real>(&f_tb_host, &f_tb_dev, 1); cudaSetDeviceFlags(cudaDeviceMapHost); cublasCreate_v2(&cublasHd); U = 1; J = 0.1; mu = 0.5; initProb(x, nbd, l, u, dim); lbfgsbminimize(dim, 4, x, epsg, epsf, epsx, maxits, nbd, l, u, info); printf("info: %d\n", info); printf("f: %e\n", *f_tb_host); real* x_host = new real[dim]; memCopy(x_host, x, dim * sizeof(real), cudaMemcpyDeviceToHost); printf("x: "); for (int i = 0; i < dim; i++) { printf("%f, ", x_host[i]); } printf("\n"); memFreeHost(f_tb_host); memFree(x); memFree(nbd); memFree(l); memFree(u); cublasDestroy_v2(cublasHd); cudaDeviceReset(); time_t end = time(NULL); printf("Runtime: %ld", end-start); }
real BFGSOptimization() { printf("Before init sites(point_num=%d).\n", point_num); // Use L-BFGS method to compute new sites const real epsg = EPSG; const real epsf = EPSF; const real epsx = EPSX; const int maxits = MAXITS; stpscal = 2.0f; //Set for different problems! int info; total_time = 0; total_time_func = 0; real* x; int* nbd; real* l; real* u; memAlloc<real>(&x, point_num * 2); memAlloc<int>(&nbd, point_num * 2); memAlloc<real>(&l, point_num * 2); memAlloc<real>(&u, point_num * 2); memAllocHost<real>(&f_tb_host, &f_tb_dev, 1); // Kernel이 처리할 수 있도록 site_list를 매핑하는 site_list_dev를 전달. // site_list는 InitializeSites()에서 지정 // Device에 할당된 x에 site_list가 복사 InitSites(x, (float*)site_list_dev, sizeof(SiteType) / sizeof(float), nbd, l, u, point_num * 2, screenwidth, screenheight); printf("Start optimization..."); get_timestamp(start_time); int m = 8; if (point_num * 2 < m) m = point_num * 2; // 내부적으로 funcgrad()를 호출 lbfgsbminimize(point_num*2, m, x, epsg, epsf, epsx, maxits, nbd, l, u, info); //printf("Ending code:%d\n", info); get_timestamp(end_time); elapsed_time = (end_time-start_time); total_time += elapsed_time; printf("Done.\n JFA Time: %lf\tBFGS Time: %lf\tTotal time: %lf\t", total_time_func, elapsed_time - total_time_func, elapsed_time); bReCompute = false; real f = DrawVoronoi(x); // Device에 저장된 x가 실제 이동된 site 정보인 듯 // 이를 Host로 복사한 후, site_list에 할당 real* x_host = new real[point_num * 2]; memCopy(x_host, x, point_num * 2 * sizeof(real), cudaMemcpyDeviceToHost); FILE* fp = fopen("Result.txt", "w"); for(int i = 0; i < point_num; i++) { real ix = x_host[i * 2]; real iy = x_host[i * 2 + 1]; real ox = (ix + 1) * (screenwidth - 1) / 2.0 + 1.0; real oy = (iy + 1) * (screenheight - 1) / 2.0 + 1.0; if(1.0f > ox || ox > screenwidth - 1) continue; if(1.0f > oy || oy > screenheight - 1) continue; fprintf(fp, "%f, %f\n", ox, oy); } fclose(fp); delete[] x_host; memFreeHost(f_tb_host); memFree(x); memFree(nbd); memFree(l); memFree(u); return f; }
int LbfgsbOptimizer::solve() { // Solution vector ap::real_1d_array x0; x0.setbounds(1, x.dim); // copy current x into x0 for(size_t i = 0; i < x.dim; i++) { x0(i+1) = x.val[i]; } ap::integer_1d_array nbd; ap::real_1d_array l; ap::real_1d_array u; nbd.setbounds(1, x.dim); l.setbounds(1, x.dim); u.setbounds(1, x.dim); // Set bounds if(binary) { bounds_binary(nbd, l, u); } else { bounds(nbd, l, u); } // Lancelot: gamma_bar = 0.1 double gamma_bar = 0.1; // < 1 : // Lancelot: tau = 0.1 double tau = 0.1; // # < 1 // Lancelot: alpha_eta = 0.1 double alpha_eta = 0.01; // # min(1, self._alpha_omega) //Lancelot: beta_eta = 0.9 double beta_eta = 0.9; // # min(1, self._beta_omega) //Lancelot: alpha_omega = 1.0 double alpha_omega = 4.0; //Lancelot: beta_omega = 1.0 double beta_omega = 4.0; // Lancelot: omega_bar = firtsg/pow(std::min(mu, gamma_bar), alpha_omega); double omega_bar = 0.5; // Lancelot: eta_bar = firtsc/pow(std::min(mu, gamma_bar), alpha_eta); double eta_bar = 1.0; double mu_bar = 0.9; // must be <= 1 mu = mu_bar; // Lancelot: alpha = min(mu, gamma_bar) double alpha = std::min(mu, gamma_bar); // Lancelot: eta = max(etamin, eta_bar*pow(alpha, alpha_eta)) // svnvish: BUGBUG what is etamin? // etamin is the minimum norm of the constraint violation double eta = eta_bar*pow(alpha, alpha_eta); // Lancelot: omega = max(omemin, omega_bar*pow(alpha, alpha_omega)) // svnvish: BUGBUG what is omemin? // omemin is the tolerance for kkt gap double omega = omega_bar*pow(alpha, alpha_omega); DenseVector W; W.val = x.val; W.dim = num_weak_learners; for(size_t iteration = 0; iteration < LBFGSB::max_iterations; iteration++) { double epsg = omega; double epsf = 0; double epsx = 0; int info; lbfgsbminimize(x.dim, std::min(x.dim, LBFGSB::lbfgsb_m), x0, epsg, epsf, epsx, LBFGSB::lbfgsb_max_iterations, nbd, l, u, (void*) this, info); // copy current solution into x for(size_t i = 0; i < x.dim; i++) { x.val[i] = x0(i+1); } const double w_gap = sum(W) - 1.0; // std::cout << "info: " << info << std::endl; // assert(info > 0); assert(info == 4); if(std::abs(w_gap) < eta) { if(std::abs(w_gap) < Optimizer::wt_sum_tol) { if(duality_gap_met()) { // // svnvish: BUGBUG // // Extra gradient computation happening here // // Better to use norm gaps // dvec gradk = grad(); // if(converged(grad())){ report_statistics(); break; } } lambda = lambda - (w_gap/mu); //mu = mu; alpha = mu; eta = eta*pow(alpha, beta_eta); omega = omega*pow(alpha, beta_omega); } else { //lambda = lambda; mu *= tau; alpha = mu*gamma_bar; eta = eta_bar*pow(alpha, beta_eta); omega = omega_bar*pow(alpha, beta_omega); } } // end of "for each iteration" // This is not a memory leak! W.val = NULL; W.dim = 0; return 0; } // end of LbfgsbOptimizer::solve()