int main() {

	time_t start = time(NULL);

	int dim = L * (nmax + 1);

	const real epsg = EPSG;
	const real epsf = EPSF;
	const real epsx = EPSX;
	const int maxits = MAXITS;
	stpscal = 0.5;
	int info;

	real* x;
	int* nbd;
	real* l;
	real* u;
	memAlloc<real>(&x, dim);
	memAlloc<int>(&nbd, dim);
	memAlloc<real>(&l, dim);
	memAlloc<real>(&u, dim);
	memAllocHost<real>(&f_tb_host, &f_tb_dev, 1);

	cudaSetDeviceFlags(cudaDeviceMapHost);
	cublasCreate_v2(&cublasHd);

	U = 1;
	J = 0.1;
	mu = 0.5;

	initProb(x, nbd, l, u, dim);
	lbfgsbminimize(dim, 4, x, epsg, epsf, epsx, maxits, nbd, l, u, info);
	printf("info: %d\n", info);

	printf("f: %e\n", *f_tb_host);
	real* x_host = new real[dim];
	memCopy(x_host, x, dim * sizeof(real), cudaMemcpyDeviceToHost);
	printf("x: ");
	for (int i = 0; i < dim; i++) {
		printf("%f, ", x_host[i]);
	}
	printf("\n");

	memFreeHost(f_tb_host);
	memFree(x);
	memFree(nbd);
	memFree(l);
	memFree(u);

	cublasDestroy_v2(cublasHd);

	cudaDeviceReset();

	time_t end = time(NULL);

	printf("Runtime: %ld", end-start);
}
Exemplo n.º 2
0
real BFGSOptimization()
{
	printf("Before init sites(point_num=%d).\n", point_num);

	// Use L-BFGS method to compute new sites
	const real epsg = EPSG;
	const real epsf = EPSF;
	const real epsx = EPSX;
	const int maxits = MAXITS;
	stpscal = 2.0f; //Set for different problems!
	int info;

	total_time = 0;
	total_time_func = 0;

	real* x;
	int* nbd;
	real* l;
	real* u;
	memAlloc<real>(&x, point_num * 2);
	memAlloc<int>(&nbd, point_num * 2);
	memAlloc<real>(&l, point_num * 2);
	memAlloc<real>(&u, point_num * 2);
	memAllocHost<real>(&f_tb_host, &f_tb_dev, 1);

	// Kernel이 처리할 수 있도록 site_list를 매핑하는 site_list_dev를 전달.
	// site_list는 InitializeSites()에서 지정
	// Device에 할당된 x에 site_list가 복사
	InitSites(x, (float*)site_list_dev, sizeof(SiteType) / sizeof(float), nbd, l, u, point_num * 2, screenwidth, screenheight);

	printf("Start optimization...");
	get_timestamp(start_time);

	int	m = 8;
	if (point_num * 2 < m)
		m = point_num * 2;

	// 내부적으로 funcgrad()를 호출
	lbfgsbminimize(point_num*2, m, x, epsg, epsf, epsx, maxits, nbd, l, u, info);
	//printf("Ending code:%d\n", info);

	get_timestamp(end_time);
	elapsed_time = (end_time-start_time);
	total_time += elapsed_time;
	printf("Done.\n JFA Time: %lf\tBFGS Time: %lf\tTotal time: %lf\t", total_time_func, elapsed_time - total_time_func, elapsed_time);
	bReCompute = false;
	
	real f = DrawVoronoi(x);

	// Device에 저장된 x가 실제 이동된 site 정보인 듯
	// 이를 Host로 복사한 후, site_list에 할당
	real* x_host = new real[point_num * 2];
	memCopy(x_host, x, point_num * 2 * sizeof(real), cudaMemcpyDeviceToHost);

	FILE* fp = fopen("Result.txt", "w");
	for(int i = 0; i < point_num; i++) {
		real ix = x_host[i * 2];
		real iy = x_host[i * 2 + 1];

		real ox = (ix + 1) * (screenwidth - 1) / 2.0 + 1.0;
		real oy = (iy + 1) * (screenheight - 1) / 2.0 + 1.0;

		if(1.0f > ox || ox > screenwidth - 1)
			continue;

		if(1.0f > oy || oy > screenheight - 1)
			continue;

		fprintf(fp, "%f, %f\n", ox, oy);
	}
	fclose(fp);

	delete[] x_host;

	memFreeHost(f_tb_host);
	memFree(x);
	memFree(nbd);
	memFree(l);
	memFree(u);

	return f;
}
int LbfgsbOptimizer::solve()
{
    // Solution vector
    ap::real_1d_array x0;
    x0.setbounds(1, x.dim);

    // copy current x into x0
    for(size_t i = 0; i < x.dim; i++)
    {
        x0(i+1) = x.val[i];
    }

    ap::integer_1d_array nbd;
    ap::real_1d_array l;
    ap::real_1d_array u;
    nbd.setbounds(1, x.dim);
    l.setbounds(1, x.dim);
    u.setbounds(1, x.dim);

    // Set bounds
    if(binary)
    {
        bounds_binary(nbd, l, u);
    }
    else
    {
        bounds(nbd, l, u);
    }

    // Lancelot: gamma_bar = 0.1
    double gamma_bar = 0.1; // < 1 :

    // Lancelot: tau = 0.1
    double tau = 0.1; // # < 1

    // Lancelot: alpha_eta = 0.1
    double alpha_eta = 0.01; // # min(1, self._alpha_omega)

    //Lancelot: beta_eta = 0.9
    double beta_eta = 0.9; // # min(1, self._beta_omega)

    //Lancelot: alpha_omega = 1.0
    double alpha_omega = 4.0;

    //Lancelot: beta_omega = 1.0
    double beta_omega = 4.0;

    // Lancelot: omega_bar = firtsg/pow(std::min(mu, gamma_bar), alpha_omega);
    double omega_bar = 0.5;

    // Lancelot: eta_bar = firtsc/pow(std::min(mu, gamma_bar), alpha_eta);
    double eta_bar = 1.0;

    double mu_bar = 0.9; // must be <= 1

    mu = mu_bar;

    // Lancelot: alpha  = min(mu, gamma_bar)
    double alpha = std::min(mu, gamma_bar);

    // Lancelot: eta = max(etamin, eta_bar*pow(alpha, alpha_eta))
    // svnvish: BUGBUG what is etamin?
    // etamin is the minimum norm of the constraint violation
    double eta = eta_bar*pow(alpha, alpha_eta);

    // Lancelot: omega = max(omemin, omega_bar*pow(alpha, alpha_omega))
    // svnvish: BUGBUG what is omemin?
    // omemin is the tolerance for kkt gap
    double omega = omega_bar*pow(alpha, alpha_omega);

    DenseVector W;
    W.val = x.val;
    W.dim = num_weak_learners;

    for(size_t iteration = 0; iteration < LBFGSB::max_iterations; iteration++)
    {
        double epsg = omega;
        double epsf = 0;
        double epsx = 0;
        int info;

        lbfgsbminimize(x.dim,
                       std::min(x.dim, LBFGSB::lbfgsb_m),
                       x0,
                       epsg,
                       epsf,
                       epsx,
                       LBFGSB::lbfgsb_max_iterations,
                       nbd,
                       l,
                       u,
                       (void*) this,
                       info);

        // copy current solution into x
        for(size_t i = 0; i < x.dim; i++)
        {
            x.val[i] = x0(i+1);
        }

        const double w_gap = sum(W) - 1.0;

        // std::cout << "info: " << info << std::endl;
        // assert(info > 0);
        assert(info == 4);

        if(std::abs(w_gap) < eta)
        {

            if(std::abs(w_gap) < Optimizer::wt_sum_tol)
            {

                if(duality_gap_met())
                {
                    // // svnvish: BUGBUG
                    // // Extra gradient computation happening here
                    // // Better to use norm gaps
                    // dvec gradk = grad();

                    // if(converged(grad())){
                    report_statistics();
                    break;
                }
            }

            lambda = lambda - (w_gap/mu);
            //mu = mu;
            alpha = mu;
            eta = eta*pow(alpha, beta_eta);
            omega = omega*pow(alpha, beta_omega);

        }
        else
        {
            //lambda = lambda;
            mu *= tau;
            alpha = mu*gamma_bar;
            eta = eta_bar*pow(alpha, beta_eta);
            omega = omega_bar*pow(alpha, beta_omega);
        }

    } // end of "for each iteration"

    // This is not a memory leak!
    W.val = NULL;
    W.dim = 0;

    return 0;
} // end of LbfgsbOptimizer::solve()