int main() { time_t start = time(NULL); int dim = L * (nmax + 1); const real epsg = EPSG; const real epsf = EPSF; const real epsx = EPSX; const int maxits = MAXITS; stpscal = 0.5; int info; real* x; int* nbd; real* l; real* u; memAlloc<real>(&x, dim); memAlloc<int>(&nbd, dim); memAlloc<real>(&l, dim); memAlloc<real>(&u, dim); memAllocHost<real>(&f_tb_host, &f_tb_dev, 1); cudaSetDeviceFlags(cudaDeviceMapHost); cublasCreate_v2(&cublasHd); U = 1; J = 0.1; mu = 0.5; initProb(x, nbd, l, u, dim); lbfgsbminimize(dim, 4, x, epsg, epsf, epsx, maxits, nbd, l, u, info); printf("info: %d\n", info); printf("f: %e\n", *f_tb_host); real* x_host = new real[dim]; memCopy(x_host, x, dim * sizeof(real), cudaMemcpyDeviceToHost); printf("x: "); for (int i = 0; i < dim; i++) { printf("%f, ", x_host[i]); } printf("\n"); memFreeHost(f_tb_host); memFree(x); memFree(nbd); memFree(l); memFree(u); cublasDestroy_v2(cublasHd); cudaDeviceReset(); time_t end = time(NULL); printf("Runtime: %ld", end-start); }
real BFGSOptimization() { printf("Before init sites(point_num=%d).\n", point_num); // Use L-BFGS method to compute new sites const real epsg = EPSG; const real epsf = EPSF; const real epsx = EPSX; const int maxits = MAXITS; stpscal = 2.0f; //Set for different problems! int info; total_time = 0; total_time_func = 0; real* x; int* nbd; real* l; real* u; memAlloc<real>(&x, point_num * 2); memAlloc<int>(&nbd, point_num * 2); memAlloc<real>(&l, point_num * 2); memAlloc<real>(&u, point_num * 2); memAllocHost<real>(&f_tb_host, &f_tb_dev, 1); // Kernel이 처리할 수 있도록 site_list를 매핑하는 site_list_dev를 전달. // site_list는 InitializeSites()에서 지정 // Device에 할당된 x에 site_list가 복사 InitSites(x, (float*)site_list_dev, sizeof(SiteType) / sizeof(float), nbd, l, u, point_num * 2, screenwidth, screenheight); printf("Start optimization..."); get_timestamp(start_time); int m = 8; if (point_num * 2 < m) m = point_num * 2; // 내부적으로 funcgrad()를 호출 lbfgsbminimize(point_num*2, m, x, epsg, epsf, epsx, maxits, nbd, l, u, info); //printf("Ending code:%d\n", info); get_timestamp(end_time); elapsed_time = (end_time-start_time); total_time += elapsed_time; printf("Done.\n JFA Time: %lf\tBFGS Time: %lf\tTotal time: %lf\t", total_time_func, elapsed_time - total_time_func, elapsed_time); bReCompute = false; real f = DrawVoronoi(x); // Device에 저장된 x가 실제 이동된 site 정보인 듯 // 이를 Host로 복사한 후, site_list에 할당 real* x_host = new real[point_num * 2]; memCopy(x_host, x, point_num * 2 * sizeof(real), cudaMemcpyDeviceToHost); FILE* fp = fopen("Result.txt", "w"); for(int i = 0; i < point_num; i++) { real ix = x_host[i * 2]; real iy = x_host[i * 2 + 1]; real ox = (ix + 1) * (screenwidth - 1) / 2.0 + 1.0; real oy = (iy + 1) * (screenheight - 1) / 2.0 + 1.0; if(1.0f > ox || ox > screenwidth - 1) continue; if(1.0f > oy || oy > screenheight - 1) continue; fprintf(fp, "%f, %f\n", ox, oy); } fclose(fp); delete[] x_host; memFreeHost(f_tb_host); memFree(x); memFree(nbd); memFree(l); memFree(u); return f; }