コード例 #1
0
ファイル: main.c プロジェクト: meyr/clglPlayer
int main(int argc, char *argv[])
{
	int width, height;
	width = height = 0;
	/* read image file */
	if (argc >= 2)
		decode_init(argv[1], &width, &height);

	/* openGL */
	init_gl(argc, argv);
	createGLBufTex(width, height, GL_BUFFER_SORC);
	createGLBufTex(width/2, height, GL_BUFFER_DEST);

	/* openCL */
	init_cl();
	clloadProgram("./algorithm.cl");
	createCLBufferFromGL();
	setImageWidth(width);
	setKernelRange(width/2, height);
	transferParam();

	glutMainLoop();
exit:
	decode_close();
	exit_cl();
	exit_gl();
err:
	return 0;
}
コード例 #2
0
int main (int argc, char* argv[]) {

   /* Start GL processing */
   init_gl(argc, argv);

   /* Initialize CL data structures */
   init_cl();

   /* Create CL and GL data objects */
   configure_shared_data();

   /* Execute kernel */
   execute_kernel();

   /* Set callback functions */
   glutDisplayFunc(display);
   glutReshapeFunc(reshape);   

   /* Start processing loop */
   glutMainLoop();

   /* Deallocate OpenCL resources */
   clReleaseMemObject(in_texture);
   clReleaseMemObject(out_buffer);
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);

   /* Deallocate OpenGL resources */
   glDeleteBuffers(2, vbo);

   return 0;
}
コード例 #3
0
void ConcurrentMarkThread::run() {
  initialize_in_thread();
  _vtime_start = os::elapsedVTime();
  wait_for_universe_init();

  G1CollectedHeap* g1h = G1CollectedHeap::heap();
  G1CollectorPolicy* g1_policy = g1h->g1_policy();
  G1MMUTracker *mmu_tracker = g1_policy->mmu_tracker();
  Thread *current_thread = Thread::current();

  while (!_should_terminate) {
    // wait until started is set.
    sleepBeforeNextCycle();
    {
      ResourceMark rm;
      HandleMark   hm;
      double cycle_start = os::elapsedVTime();
      double mark_start_sec = os::elapsedTime();
      char verbose_str[128];

      if (PrintGC) {
        gclog_or_tty->date_stamp(PrintGCDateStamps);
        gclog_or_tty->stamp(PrintGCTimeStamps);
        gclog_or_tty->print_cr("[GC concurrent-mark-start]");
      }

      if (!g1_policy->in_young_gc_mode()) {
        // this ensures the flag is not set if we bail out of the marking
        // cycle; normally the flag is cleared immediately after cleanup
        g1h->set_marking_complete();

        if (g1_policy->adaptive_young_list_length()) {
          double now = os::elapsedTime();
          double init_prediction_ms = g1_policy->predict_init_time_ms();
          jlong sleep_time_ms = mmu_tracker->when_ms(now, init_prediction_ms);
          os::sleep(current_thread, sleep_time_ms, false);
        }

        // We don't have to skip here if we've been asked to restart, because
        // in the worst case we just enqueue a new VM operation to start a
        // marking.  Note that the init operation resets has_aborted()
        CMCheckpointRootsInitialClosure init_cl(_cm);
        strcpy(verbose_str, "GC initial-mark");
        VM_CGC_Operation op(&init_cl, verbose_str);
        VMThread::execute(&op);
      }

      int iter = 0;
      do {
        iter++;
        if (!cm()->has_aborted()) {
          _cm->markFromRoots();
        }

        double mark_end_time = os::elapsedVTime();
        double mark_end_sec = os::elapsedTime();
        _vtime_mark_accum += (mark_end_time - cycle_start);
        if (!cm()->has_aborted()) {
          if (g1_policy->adaptive_young_list_length()) {
            double now = os::elapsedTime();
            double remark_prediction_ms = g1_policy->predict_remark_time_ms();
            jlong sleep_time_ms = mmu_tracker->when_ms(now, remark_prediction_ms);
            os::sleep(current_thread, sleep_time_ms, false);
          }

          if (PrintGC) {
            gclog_or_tty->date_stamp(PrintGCDateStamps);
            gclog_or_tty->stamp(PrintGCTimeStamps);
            gclog_or_tty->print_cr("[GC concurrent-mark-end, %1.7lf sec]",
                                      mark_end_sec - mark_start_sec);
          }

          CMCheckpointRootsFinalClosure final_cl(_cm);
          sprintf(verbose_str, "GC remark");
          VM_CGC_Operation op(&final_cl, verbose_str);
          VMThread::execute(&op);
        }
        if (cm()->restart_for_overflow() &&
            G1TraceMarkStackOverflow) {
          gclog_or_tty->print_cr("Restarting conc marking because of MS overflow "
                                 "in remark (restart #%d).", iter);
        }

        if (cm()->restart_for_overflow()) {
          if (PrintGC) {
            gclog_or_tty->date_stamp(PrintGCDateStamps);
            gclog_or_tty->stamp(PrintGCTimeStamps);
            gclog_or_tty->print_cr("[GC concurrent-mark-restart-for-overflow]");
          }
        }
      } while (cm()->restart_for_overflow());
      double counting_start_time = os::elapsedVTime();

      // YSR: These look dubious (i.e. redundant) !!! FIX ME
      slt()->manipulatePLL(SurrogateLockerThread::acquirePLL);
      slt()->manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL);

      if (!cm()->has_aborted()) {
        double count_start_sec = os::elapsedTime();
        if (PrintGC) {
          gclog_or_tty->date_stamp(PrintGCDateStamps);
          gclog_or_tty->stamp(PrintGCTimeStamps);
          gclog_or_tty->print_cr("[GC concurrent-count-start]");
        }

        _sts.join();
        _cm->calcDesiredRegions();
        _sts.leave();

        if (!cm()->has_aborted()) {
          double count_end_sec = os::elapsedTime();
          if (PrintGC) {
            gclog_or_tty->date_stamp(PrintGCDateStamps);
            gclog_or_tty->stamp(PrintGCTimeStamps);
            gclog_or_tty->print_cr("[GC concurrent-count-end, %1.7lf]",
                                   count_end_sec - count_start_sec);
          }
        }
      }
      double end_time = os::elapsedVTime();
      _vtime_count_accum += (end_time - counting_start_time);
      // Update the total virtual time before doing this, since it will try
      // to measure it to get the vtime for this marking.  We purposely
      // neglect the presumably-short "completeCleanup" phase here.
      _vtime_accum = (end_time - _vtime_start);
      if (!cm()->has_aborted()) {
        if (g1_policy->adaptive_young_list_length()) {
          double now = os::elapsedTime();
          double cleanup_prediction_ms = g1_policy->predict_cleanup_time_ms();
          jlong sleep_time_ms = mmu_tracker->when_ms(now, cleanup_prediction_ms);
          os::sleep(current_thread, sleep_time_ms, false);
        }

        CMCleanUp cl_cl(_cm);
        sprintf(verbose_str, "GC cleanup");
        VM_CGC_Operation op(&cl_cl, verbose_str);
        VMThread::execute(&op);
      } else {
        g1h->set_marking_complete();
      }

      // Check if cleanup set the free_regions_coming flag. If it
      // hasn't, we can just skip the next step.
      if (g1h->free_regions_coming()) {
        // The following will finish freeing up any regions that we
        // found to be empty during cleanup. We'll do this part
        // without joining the suspendible set. If an evacuation pause
        // takes places, then we would carry on freeing regions in
        // case they are needed by the pause. If a Full GC takes
        // places, it would wait for us to process the regions
        // reclaimed by cleanup.

        double cleanup_start_sec = os::elapsedTime();
        if (PrintGC) {
          gclog_or_tty->date_stamp(PrintGCDateStamps);
          gclog_or_tty->stamp(PrintGCTimeStamps);
          gclog_or_tty->print_cr("[GC concurrent-cleanup-start]");
        }

        // Now do the remainder of the cleanup operation.
        _cm->completeCleanup();
        // Notify anyone who's waiting that there are no more free
        // regions coming. We have to do this before we join the STS,
        // otherwise we might deadlock: a GC worker could be blocked
        // waiting for the notification whereas this thread will be
        // blocked for the pause to finish while it's trying to join
        // the STS, which is conditional on the GC workers finishing.
        g1h->reset_free_regions_coming();

        _sts.join();
        g1_policy->record_concurrent_mark_cleanup_completed();
        _sts.leave();

        double cleanup_end_sec = os::elapsedTime();
        if (PrintGC) {
          gclog_or_tty->date_stamp(PrintGCDateStamps);
          gclog_or_tty->stamp(PrintGCTimeStamps);
          gclog_or_tty->print_cr("[GC concurrent-cleanup-end, %1.7lf]",
                                 cleanup_end_sec - cleanup_start_sec);
        }
      }
      guarantee(cm()->cleanup_list_is_empty(),
                "at this point there should be no regions on the cleanup list");

      if (cm()->has_aborted()) {
        if (PrintGC) {
          gclog_or_tty->date_stamp(PrintGCDateStamps);
          gclog_or_tty->stamp(PrintGCTimeStamps);
          gclog_or_tty->print_cr("[GC concurrent-mark-abort]");
        }
      }

      // we now want to allow clearing of the marking bitmap to be
      // suspended by a collection pause.
      _sts.join();
      _cm->clearNextBitmap();
      _sts.leave();
    }

    // Update the number of full collections that have been
    // completed. This will also notify the FullGCCount_lock in case a
    // Java thread is waiting for a full GC to happen (e.g., it
    // called System.gc() with +ExplicitGCInvokesConcurrent).
    _sts.join();
    g1h->increment_full_collections_completed(true /* concurrent */);
    _sts.leave();
  }
  assert(_should_terminate, "just checking");

  terminate();
}
コード例 #4
0
void go_cl(config c)
{
    cl_int error;
    cl_platform_id platform;
    cl_device_id device;
    cl_context context;
    cl_command_queue commands;
    cl_program program;
    cl_kernel kernel;
    cl_event ev;
    cl_mem mem;
    float *result = NULL;
    char buf[256];
    int work_size[2] = { 128, 128 };
    int offset[2];
    int x, y;

    cl_float4 camera_pos = { c.camera_pos.x, c.camera_pos.y, c.camera_pos.z, 0 };
    cl_float4 camera_dir = { 
        c.camera_target.x - c.camera_pos.x, 
        c.camera_target.y - c.camera_pos.y, 
        c.camera_target.z - c.camera_pos.z, 
        0 };
    cl_float4 light_pos = { c.light_pos.x, c.light_pos.y, c.light_pos.z, 0 };
    cl_int2 image_size = { c.width, c.height };

    sprintf(buf, "-DBAILOUT=%d -DSCALE=%f -DFOV=%f", c.bailout, c.scale, c.fov);

    printf("Starting\n");

    init_cl(&platform, &device, &context, &commands);

    dump_info(platform, device);

    printf("Creating kernel\n");

    program = load_program_from_file("kernel.cl", context, device, buf);
    kernel = create_kernel(program, "test");

    printf("Setting memory\n");

    mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * c.width * c.height, NULL, &error);
    check_error(error, "Could not allocate buffer");
    error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem);
    error = clSetKernelArg(kernel, 1, sizeof(cl_float4), &camera_pos);
    error = clSetKernelArg(kernel, 2, sizeof(cl_float4), &camera_dir);
    error = clSetKernelArg(kernel, 3, sizeof(cl_float4), &light_pos);
    error = clSetKernelArg(kernel, 4, sizeof(cl_int2), &image_size);

    clFinish(commands);

    printf("Running\n");

    for (x = 0; x < c.width; x += work_size[0])
    {
        for (y = 0; y < c.height; y += work_size[1])
        {
            offset[0] = x;
            offset[1] = y;
            error = clEnqueueNDRangeKernel(commands, kernel, 2, offset, work_size, NULL, 0, NULL, &ev);
            printf(".");
        }
    }

    clFinish(commands);

    printf("\nWriting image\n");

    result = malloc(sizeof(float) * c.width * c.height);
    error = clEnqueueReadBuffer(commands, mem, CL_TRUE, 0, sizeof(float) * c.width * c.height, result, 0, NULL, &ev);

    clFinish(commands);

    save(result, c, 0, 0);

    free(result);
    clReleaseMemObject(mem);
    release_cl(context, program, commands, kernel);
}
コード例 #5
0
ファイル: rt-seq.cpp プロジェクト: lscandolo/Tesina-RT
int main (int argc, char** argv)
{

	/*---------------------- Initialize OpenGL and OpenCL ----------------------*/

	if (init_gl(argc,argv,&glinfo, window_size, "RT") != 0){
		std::cerr << "Failed to initialize GL" << std::endl;
		exit(1);
	} else { 
		std::cout << "Initialized GL succesfully" << std::endl;
	}

	if (init_cl(glinfo,&clinfo) != CL_SUCCESS){
		std::cerr << "Failed to initialize CL" << std::endl;
		exit(1);
	} else { 
		std::cout << "Initialized CL succesfully" << std::endl;
	}
	print_cl_info(clinfo);

        /* Initialize device interface */
        if (device.initialize(clinfo)) {
                std::cerr << "Failed to initialize device interface" << std::endl;
                exit(1);
        }
        /* Initialize generic gpu library */
        DeviceFunctionLibrary::initialize(clinfo);

	/*---------------------- Create shared GL-CL texture ----------------------*/
	gl_tex = create_tex_gl(window_size[0],window_size[1]);
        tex_id = device.new_memory();
        DeviceMemory& tex_mem = device.memory(tex_id);
        if (tex_mem.initialize_from_gl_texture(gl_tex)) {
                std::cerr << "Failed to create memory object from gl texture" << std::endl;
                exit(1);
        }

	/*---------------------- Set up scene ---------------------------*/
	frames = 15;
	const std::string pack = "pack1OBJ";
	// const std::string pack = "pack2OBJ";
	double wave_attenuation = 1.f;

        scenes.resize(frames);
	for (uint32_t i = 0; i < frames ; ++i) {

                scenes[i].initialize();
		std::stringstream grid_path,visual_path;
		grid_path << "models/obj/" << pack << "/gridFluid" << i+1 << ".obj";
		mesh_id grid_mid = scenes[i].load_obj_file_as_aggregate(grid_path.str());
		object_id grid_oid = scenes[i].add_object(grid_mid);
		Object& grid = scenes[i].object(grid_oid);
		grid.mat.diffuse = White;
		grid.mat.reflectiveness = 0.95f;
		grid.mat.refractive_index = 1.5f;
		grid.geom.setScale(makeVector(1.f,wave_attenuation,1.f));

		/* ---- Solids ------ */
		// visual_path << "models/obj/" << pack << "/visual" << visual_frame << ".obj";
		// mesh_id visual_mid = scenes[i].load_obj_file_as_aggregate(visual_path.str());
		// object_id visual_oid = scenes[i].geometry.add_object(visual_mid);
		// Object& visual = scenes[i].geometry.object(visual_oid);
		// visual.mat.diffuse = Red;

		// for (uint32_t j = 0; j < bridge_parts ; ++j) {
		// 	std::stringstream bridge_path;
		// 	bridge_path << "models/obj/" << pack << "/bridge" << j << i+1 << ".obj";
		// 	mesh_id bridge_mid = scenes[i].load_obj_file_as_aggregate(bridge_path.str());
		// 	object_id bridge_oid = scenes[i].geometry.add_object(bridge_mid);
		// 	Object& bridge = scenes[i].geometry.object(bridge_oid);
		// 	bridge.mat.diffuse = Green;
		// }

		// mesh_id teapot_mesh_id = scenes[i].load_obj_file_as_aggregate("models/obj/teapot2.obj");
		// mesh_id teapot_mesh_id = scenes[i].load_obj_file_as_aggregate("models/obj/teapot-low_res.obj");
		// object_id teapot_obj_id = scenes[i].geometry.add_object(teapot_mesh_id);
		// Object& teapot_obj = scenes[i].geometry.object(teapot_obj_id);
		// teapot_obj.geom.setPos(makeVector(-1.f,0.f,0.f));
		// teapot_obj.geom.setScale(makeVector(3.f,3.f,3.f));
		// teapot_obj.mat.diffuse = Green;
		// teapot_obj.mat.shininess = 1.f;
		// teapot_obj.mat.reflectiveness = 0.3f;

		/* ------------------*/

                // scenes[i].set_accelerator_type(KDTREE_ACCELERATOR);
                scenes[i].set_accelerator_type(BVH_ACCELERATOR);
		scenes[i].create_aggregate_mesh();
		scenes[i].create_aggregate_accelerator();
		Mesh& scene_mesh = scenes[i].get_aggregate_mesh();
                if (scenes[i].transfer_aggregate_mesh_to_device() 
                    || scenes[i].transfer_aggregate_accelerator_to_device())
                        std::cerr << "Failed to transfer scene info to device"<< std::endl;


		/*---------------------- Print scene data ----------------------*/
		std::cerr << "Scene " << i << " stats: " << std::endl;
		std::cerr << "\tTriangle count: " << scene_mesh.triangleCount() << std::endl;
		std::cerr << "\tVertex count: " << scene_mesh.vertexCount() << std::endl;
		std::cerr << std::endl;
	}


	/*---------------------- Set initial Camera paramaters -----------------------*/
	camera.set(makeVector(0,3,-30), makeVector(0,0,1), makeVector(0,1,0), M_PI/4.,
		   window_size[0] / (float)window_size[1]);

	/*---------------------------- Set tile size ------------------------------*/
	best_tile_size = clinfo.max_compute_units * clinfo.max_work_item_sizes[0];
	best_tile_size *= 64;
	best_tile_size = std::min(pixel_count, best_tile_size);

	/*---------------------- Initialize ray bundles -----------------------------*/
	int32_t ray_bundle_size = best_tile_size * 4;

	if (ray_bundle_1.initialize(ray_bundle_size)) {
		std::cerr << "Error initializing ray bundle 1" << std::endl;
		std::cerr.flush();
		exit(1);
	}

	if (ray_bundle_2.initialize(ray_bundle_size)) {
		std::cerr << "Error initializing ray bundle 2" << std::endl;
		std::cerr.flush();
		exit(1);
	}
	std::cout << "Initialized ray bundles succesfully" << std::endl;

	/*---------------------- Initialize hit bundle -----------------------------*/
	int32_t hit_bundle_size = ray_bundle_size;

	if (hit_bundle.initialize(hit_bundle_size)) {
		std::cerr << "Error initializing hit bundle" << std::endl;
		std::cerr.flush();
		exit(1);
	}
	std::cout << "Initialized hit bundle succesfully" << std::endl;

	/*----------------------- Initialize cubemap ---------------------------*/
	if (cubemap.initialize("textures/cubemap/Path/posx.jpg",
                               "textures/cubemap/Path/negx.jpg",
                               "textures/cubemap/Path/posy.jpg",
                               "textures/cubemap/Path/negy.jpg",
                               "textures/cubemap/Path/posz.jpg",
                               "textures/cubemap/Path/negz.jpg")) {  
		std::cerr << "Failed to initialize cubemap." << std::endl;
		exit(1);
	}
	std::cerr << "Initialized cubemap succesfully." << std::endl;

	/*------------------------ Initialize FrameBuffer ---------------------------*/
	if (framebuffer.initialize(window_size)) {
		std::cerr << "Error initializing framebuffer." << std::endl;
		exit(1);
	}
	std::cout << "Initialized framebuffer succesfully." << std::endl;

	/* ------------------ Initialize ray tracer kernel ----------------------*/
	if (tracer.initialize()){
		std::cerr << "Failed to initialize tracer." << std::endl;
		return 0;
	}
	std::cerr << "Initialized tracer succesfully." << std::endl;


	/* ------------------ Initialize Primary Ray Generator ----------------------*/
	if (prim_ray_gen.initialize()) {
		std::cerr << "Error initializing primary ray generator." << std::endl;
		exit(1);
	}
	std::cout << "Initialized primary ray generator succesfully." << std::endl;


	/* ------------------ Initialize Secondary Ray Generator ----------------------*/
	if (sec_ray_gen.initialize()) {
		std::cerr << "Error initializing secondary ray generator." << std::endl;
		exit(1);
	}
	sec_ray_gen.set_max_rays(ray_bundle_1.count());
	std::cout << "Initialized secondary ray generator succesfully." << std::endl;

	/*------------------------ Initialize RayShader ---------------------------*/
	if (ray_shader.initialize()) {
		std::cerr << "Error initializing ray shader." << std::endl;
		exit(1);
	}
	std::cout << "Initialized ray shader succesfully." << std::endl;

	/*----------------------- Enable timing in all clases -------------------*/
	framebuffer.timing(true);
	prim_ray_gen.timing(true);
	sec_ray_gen.timing(true);
	tracer.timing(true);
	ray_shader.timing(true);

	/*------------------------- Count mem usage -----------------------------------*/
	int32_t total_cl_mem = 0;
	total_cl_mem += pixel_count * 4; /* 4bpp texture */
	// for (uint32_t i = 0; i < frames; ++i)  
	// 	total_cl_mem += scene_info[i].size();
	total_cl_mem += ray_bundle_1.mem().size() + ray_bundle_2.mem().size();
	total_cl_mem += hit_bundle.mem().size();
	total_cl_mem += cubemap.positive_x_mem().size() * 6;
	total_cl_mem += framebuffer.image_mem().size();

	std::cout << "\nMemory stats: " << std::endl;
	std::cout << "\tTotal opencl mem usage: " 
		  << total_cl_mem/1e6 << " MB." << std::endl;
	// for (uint32_t i = 0; i < frames; ++i)  
	// 	std::cout << "\tScene " << i << " mem usage: " << scene_info[i].size()/1e6 << " MB." << std::endl;

	std::cout << "\tFramebuffer+Tex mem usage: " 
		  << (framebuffer.image_mem().size() + pixel_count * 4)/1e6
		  << " MB."<< std::endl;
	std::cout << "\tCubemap mem usage: " 
		  << (cubemap.positive_x_mem().size()*6)/1e6 
		  << " MB."<< std::endl;
	std::cout << "\tRay mem usage: " 
		  << (ray_bundle_1.mem().size()*2)/1e6
		  << " MB."<< std::endl;
	std::cout << "\tRay hit info mem usage: " 
		  << hit_bundle.mem().size()/1e6
		  << " MB."<< std::endl;

        /* !! ---------------------- Test area ---------------- */
	std::cerr << std::endl;
	std::cerr << "Misc info: " << std::endl;

	std::cerr << "Tile size: " << best_tile_size << std::endl;
	std::cerr << "Tiles: " << pixel_count / (float)best_tile_size << std::endl;
	std::cerr << "color_cl size: "
		  << sizeof(color_cl)
		  << std::endl;
	std::cerr << "directional_light_cl size: "
		  << sizeof(directional_light_cl)
		  << std::endl;

	std::cerr << "ray_cl size: "
		  << sizeof(ray_cl)
		  << std::endl;
	std::cerr << "sample_cl size: "
		  << sizeof(sample_cl)
		  << std::endl;
	std::cerr << "sample_trace_info_cl size: "
		  << sizeof(sample_trace_info_cl)
		  << std::endl;

	/*------------------------ Set GLUT and misc functions -----------------------*/
	rt_time.snap_time();
	seq_time.snap_time();

	glutKeyboardFunc(gl_key);
	glutMotionFunc(gl_mouse);
	glutDisplayFunc(gl_loop);
	glutIdleFunc(gl_loop);
	glutMainLoop();	

	clinfo.release_resources();

	return 0;
}
コード例 #6
0
ファイル: engine.c プロジェクト: Envielox/engine
int main(int argc, char **argv)
{
    glfwSetErrorCallback(error_callback);

    /* Initialize the library */
    if (!glfwInit()){
        fprintf(stderr, "Initialization failed.\n");
        return 1;
    }

    /* Create a windowed mode window and its OpenGL context */
    window = glfwCreateWindow(width, height, "Hello World", NULL, NULL);
    if (!window) {
        glfwTerminate();
        fprintf(stderr, "Error creating window.\n");
        return 1;
    }

    /* Make the window's context current */
    glfwMakeContextCurrent(window);
    glfwSetInputMode(window, GLFW_STICKY_MOUSE_BUTTONS, 1);
    glfwSetKeyCallback(window, key_callback);
    glfwSetMouseButtonCallback(window, mouse_button_callback);
    glfwSetCursorPosCallback(window, cursor_pos_callback);

    //**************************** generowanie przykładowych piksli
    hs_init(&argc, &argv);
    initOctTree();
    hs_exit();
    float *piksele = malloc(height*width*3*sizeof(*piksele));

    printf("sizeof(OctTreeNode)=%d\n", (int)sizeof(OctTreeNode));

    //****************************

    init_cl();
    turnCamera(0.f,0.f,0.f); // Calculates initial camera direction
    fflush(stderr);

    /* Loop until the user closes the window */
    while (!glfwWindowShouldClose(window))
    {
        /* Render here */
        for (int i = 0; i < height * width * 3; i++)
            piksele[i] = 0.0;

        clock_t start = clock();
        captureOctTree(camera_pos, camera_target, up, width, height, piksele);
        clock_t end = clock();

        // show render time in window title
        char title[16];
        snprintf(title, 16, "%d ms", (int)((end - start) / (CLOCKS_PER_SEC / 1000)));
        glfwSetWindowTitle(window, title);

        /* Swap front and back buffers */
        glfwSwapBuffers(window);

        /* Poll for and process events */
        glfwPollEvents();
    }

    if (num_platforms > 0) {
        clReleaseMemObject(mainOctCL);
        clReleaseMemObject(image);
        clReleaseKernel(kernel);
        clReleaseCommandQueue(queue);
    }
    glfwDestroyWindow(window);

    glfwTerminate();
    return 0;
}
コード例 #7
0
ファイル: cl_runner.cpp プロジェクト: shines77/Dolphin
cl_int cl_runner::execute(const char *filename)
{
    if (filename == NULL || m_clContext == NULL)
        return -1;

    if (!m_bInitCL)
        init_cl();

    // Error code
    cl_int err_num = CL_SUCCESS;

    char *source_content = NULL;
    size_t src_length = 0;
    bool source_need_free = false;

#if 1
    std::ifstream ifs;
    source_need_free = true;
    err_num = clLoadProgramSource(filename, (const char **)&source_content, (size_t *)&src_length);
    if (err_num != CL_SUCCESS || src_length == 0 || source_content == NULL) {
        DOL_TRACE1("cl_runner: Error load program source. ErrCode = %d \n", err_num);
        return err_num;
    }
#else
    std::ifstream ifs(filename, std::ios_base::binary);
    if (!ifs.good()) {
        DOL_TRACE("cl_runner: Error load program source, open source file failed.\n");
        return -1;
    }

    // get file length
    ifs.seekg(0, std::ios_base::end);
    size_t length = ifs.tellg();
    ifs.seekg(0, std::ios_base::beg);

    // read program source
    std::vector<char> data(length + 1);
    ifs.read(&data[0], length);
    data[length] = 0;

    // create and build program
    source_content = &data[0];
    src_length = length;
#endif

    // Create the program
    err_num = CL_SUCCESS;
    m_clProgram = clCreateProgramWithSource(m_clContext, 1, (const char **)&source_content, &src_length, &err_num);  // 加载文件内容
    if (source_need_free) {
        if (source_content != NULL) {
            free(source_content);
            source_content = NULL;
        }
    }
    ifs.close();
    DOL_ASSERT(err_num == CL_SUCCESS);
    if (err_num != CL_SUCCESS || m_clProgram == NULL) {
        DOL_TRACE1("cl_runner: Error create program with source. ErrCode = %d \n", err_num);
        return err_num;
    }

    // Build the program
    err_num = clBuildProgram(m_clProgram, 1, &m_clDeviceId, NULL, NULL, NULL);   // 编译cl程序
    DOL_ASSERT(err_num == CL_SUCCESS);
    if (err_num != CL_SUCCESS) {
        DOL_TRACE1("cl_runner: Error build program. ErrCode = %d \n", err_num);
        return err_num;
    }

    // Show the log
    char *build_log;
    size_t log_size;

    // First call to know the proper size
    err_num = clGetProgramBuildInfo(m_clProgram, m_clDeviceId, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
    if (err_num != CL_SUCCESS) {
        DOL_TRACE1("cl_runner: Error get program build info step 1. ErrCode = %d \n", err_num);
        return err_num;
    }

    build_log = new char[log_size + 1]; // 编译CL的出错记录
    if (build_log == NULL)
        return (cl_int)-1;

    // Second call to get the log
    err_num = clGetProgramBuildInfo(m_clProgram, m_clDeviceId, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
    if (err_num != CL_SUCCESS) {
        if (build_log) {
            delete build_log;
            build_log = NULL;
        }
        DOL_TRACE1("cl_runner: Error get program build info step 2. ErrCode = %d \n", err_num);
        return err_num;
    }

    build_log[log_size] = '\0';
    std::string strLog(build_log);
    strLog += "\n";
    DOL_TRACE(strLog.c_str());      // 因为cl程序是在运行时编译的,在运行过程中如果出错,显示编译CL文件的错误,以便查找问题
    if (build_log) {
        delete build_log;
        build_log = NULL;
    }

    // set seed for rand()
    ::srand(FIXED_SRAND_SEED);

    const unsigned int DATA_SIZE = 1048576;
    std::vector<CL_FLOAT_T> a(DATA_SIZE), b(DATA_SIZE), ret(DATA_SIZE);
    for (unsigned int i = 0; i < DATA_SIZE; ++i) {
        a[i]    = std::rand() / (CL_FLOAT_T)RAND_MAX;
        b[i]    = std::rand() / (CL_FLOAT_T)RAND_MAX;
        ret[i]  = 0.0;
    }

    // Allocate the buffer memory objects
    cl_mem cl_a   = clCreateBuffer(m_clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(CL_FLOAT_T) * DATA_SIZE, (void *)&a[0], NULL);
    cl_mem cl_b   = clCreateBuffer(m_clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(CL_FLOAT_T) * DATA_SIZE, (void *)&b[0], NULL);
    cl_mem cl_ret = clCreateBuffer(m_clContext, CL_MEM_READ_WRITE,                       sizeof(CL_FLOAT_T) * DATA_SIZE, (void *)NULL, NULL);
    cl_mem cl_num = clCreateBuffer(m_clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_uint),                (void *)&DATA_SIZE, NULL);

    // 创建Kernel对应的函数

    // Extracting the kernel
#if defined(USE_CL_DOUBLE) && (USE_CL_DOUBLE != 0)
    m_clKernel = clCreateKernel(m_clProgram, "vector_add_double", &err_num);    // 这个引号中的字符串要对应cl文件中的kernel函数
#else
    m_clKernel = clCreateKernel(m_clProgram, "vector_add_float", &err_num);     // 这个引号中的字符串要对应cl文件中的kernel函数
#endif
    DOL_ASSERT(err_num == CL_SUCCESS);
    if (err_num != CL_SUCCESS) {
        DOL_TRACE1("cl_runner: Error create kernel. ErrCode = %d \n", err_num);
        return err_num;
    }

    if (m_clKernel != NULL) {
        // Set the args values
        err_num  = clSetKernelArg(m_clKernel, 0, sizeof(cl_mem), &cl_a);
        err_num |= clSetKernelArg(m_clKernel, 1, sizeof(cl_mem), &cl_b);
        err_num |= clSetKernelArg(m_clKernel, 2, sizeof(cl_mem), &cl_ret);
        //err_num |= clSetKernelArg(m_clKernel, 3, sizeof(cl_mem), &cl_num);
        err_num |= clSetKernelArg(m_clKernel, 3, sizeof(cl_uint), &DATA_SIZE);
        if (err_num != CL_SUCCESS)
            return err_num;

        // Set work-item dimensions
        size_t globalWorkSize[2];
        size_t localWorkSize[2] = { 0 };
        globalWorkSize[0] = DATA_SIZE;
        globalWorkSize[1] = DATA_SIZE;

        sw_kernel.start();

        // Execute kernel
        err_num = clEnqueueNDRangeKernel(m_clCmdQueue, m_clKernel, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL);

        sw_kernel.stop();
        if (err_num != CL_SUCCESS) {
            if (err_num == CL_INVALID_KERNEL_ARGS)
                DOL_TRACE("Invalid kernel args \n");
            else
                DOL_TRACE1("cl_runner: Error enqueue NDRange kernel. ErrCode = %d \n", err_num);
            //return err_num;
        }

        // Read output array
        if (err_num == CL_SUCCESS) {
            sw_kernel_readBuffer.start();
            err_num = clEnqueueReadBuffer(m_clCmdQueue, cl_ret, CL_TRUE, 0, sizeof(CL_FLOAT_T) * DATA_SIZE, &ret[0], 0, NULL, NULL);
            sw_kernel_readBuffer.stop();
            if (err_num != CL_SUCCESS) {
                DOL_TRACE1("cl_runner: Error enqueue read buffer. ErrCode = %d \n", err_num);
                //return err_num;
            }

            if (err_num == CL_SUCCESS) {
                bool correct = true;
                CL_FLOAT_T diff;
                for (unsigned int i = 0; i < DATA_SIZE; ++i) {
                    diff = a[i] + b[i] - ret[i];
                    if (fabs(diff) > 0.0001) {
                        correct = false;
                        break;
                    }
                }

                if (correct)
                    std::cout << "cl_runner: Data is correct" << endl;
                else
                    std::cout << "cl_runner: Data is incorrect" << endl;
            }
            else {
                std::cerr << "cl_runner: Can't run kernel or read back data" << endl;
            }
        }
    }

    if (cl_a)
        clReleaseMemObject(cl_a);
    if (cl_b)
        clReleaseMemObject(cl_b);
    if (cl_ret)
        clReleaseMemObject(cl_ret);
    if (cl_num)
        clReleaseMemObject(cl_num);

    if (m_clKernel) {
        clReleaseKernel(m_clKernel);
        m_clKernel = NULL;
    }

    if (m_clProgram) {
        clReleaseProgram(m_clProgram);
        m_clProgram = NULL;
    }

    return err_num;
}
コード例 #8
0
ファイル: transforms.cpp プロジェクト: henry931/HPCE-CW5
void transform(int deviceNumber, int levels, unsigned w, unsigned h, unsigned bits)
{
    uint64_t cbinput=uint64_t(w)*bits/8;
    
    
    std::vector<uint64_t> input(2*(cbinput/8));
    std::vector<uint64_t> output(2*(cbinput/8));
    
    std::vector<uint32_t> unpackedInput(2*(cbinput/4));
    std::vector<uint32_t> unpackedOutput(2*(cbinput/4));
    
    uint64_t* inputWriteptr = &input[0];
    uint64_t* inputReadptr = &input[cbinput/8];
    
    uint32_t* unpackedInputReadptr = &unpackedInput[cbinput/4];
    uint32_t* unpackedInputWriteptr = &unpackedInput[0];

    uint32_t* unpackedOutputReadptr = &unpackedOutput[cbinput/4];
    uint32_t* unpackedOutputWriteptr = &unpackedOutput[0];
    
    uint64_t* outputWriteptr = &output[0];
    uint64_t* outputReadptr = &output[cbinput/8];
    
    std::vector<uint32_t> gpuReadOffsets(2*levels+1,0);
    std::vector<uint32_t> gpuWriteOffsets(2*levels+1,0);
    
    std::vector<uint32_t> aboveOverrides(2*levels,0);
    std::vector<uint32_t> belowOverrides(2*levels,0);
    
    int j=0;
    
    for (int i=0; i<2*levels+1; i++)
    {
        gpuWriteOffsets[i] = j;
        
        j+= 2;
        j = j - (4 & -(j >= 4));
        j = j + (4 & -(j < 0));
        
        gpuReadOffsets[i] = j;
    }
    
    auto cl_instance = init_cl(levels,w,h,bits,"pipeline_kernels.cl",deviceNumber);
    
    tbb::task_group group;
    
    bool finished = false;
    
    int fullness = 0;
    
    bool full = false;
    
    int tailEnd = 4*levels+5;
    
    int image_line = 0;
    
    while(1){
        
        for (int i=0; i<levels; i++) {
            if (image_line == 4 + 2*i) aboveOverrides[i] = 0x0 ^ -(levels < 0);
            else aboveOverrides[i] = 0xFFFFFFFF ^ -(levels < 0);
            
            if (image_line == 3 + 2*i) belowOverrides[i] = 0x0 ^ -(levels < 0);
            else belowOverrides[i] = 0xFFFFFFFF ^ -(levels < 0);
        }
        for (int i=levels; i<2*levels; i++) {
            if (image_line == 4 + 2*i) aboveOverrides[i] = 0xFFFFFFFF ^ -(levels < 0);
            else aboveOverrides[i] = 0x0 ^ -(levels < 0);
            
            if (image_line == 3 + 2*i) belowOverrides[i] = 0xFFFFFFFF ^ -(levels < 0);
            else belowOverrides[i] = 0x0 ^ -(levels < 0);
        }
        
        group.run([&](){
            if(!finished && !read_blob(STDIN_FILENO, cbinput, inputWriteptr)) finished = true;
            
            unpack_blob_32(cbinput, inputReadptr, unpackedInputWriteptr);
            
            pack_blob_32(cbinput, unpackedOutputReadptr, outputWriteptr);
            
            if (fullness >= 4*levels+6 || full)
            {
                full = true;
                write_blob(STDOUT_FILENO, cbinput, outputReadptr);
            }
            else
            {
                fullness++;
            }
        });
        
        group.run([&](){
            process_opencl_packed_line(levels, w, bits, gpuReadOffsets, gpuWriteOffsets, unpackedInputReadptr, unpackedOutputWriteptr, aboveOverrides, belowOverrides, cl_instance);
            
            for (int i=0; i<2*levels+1; i++)
            {
                int j = gpuWriteOffsets[i];
                
                j++;
                j = j - (4 & -(j >= 4));
                
                gpuWriteOffsets[i] = j;
                
                j = gpuReadOffsets[i];
                
                j++;
                j = j - (4 & -(j >= 4));
                
                gpuReadOffsets[i] = j;
            }
        });
        
        group.wait();
        
        if (tailEnd == 0) {
            break;
        }
        if (finished) tailEnd--;
        
        std::swap(inputReadptr, inputWriteptr);
        std::swap(unpackedInputReadptr, unpackedInputWriteptr);
        std::swap(unpackedOutputReadptr, unpackedOutputWriteptr);
        std::swap(outputReadptr, outputWriteptr);
        
        image_line++;
        if (image_line == h) image_line = 0;
    }
}
コード例 #9
0
ファイル: transforms.cpp プロジェクト: henry931/HPCE-CW5
int test_cl_devices(int levels, unsigned w, unsigned h, unsigned bits, std::string source)
{
    std::vector<cl::Platform> platforms;
    
	cl::Platform::get(&platforms);
	
    if(platforms.size()==0) throw std::runtime_error("No OpenCL platforms found.");
    
    int selectedPlatform=0;
	if(getenv("HPCE_SELECT_PLATFORM")){
		selectedPlatform=atoi(getenv("HPCE_SELECT_PLATFORM"));
	}

	cl::Platform platform=platforms.at(selectedPlatform);
    
    std::vector<cl::Device> devices;
	platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
	if(devices.size()==0){
		throw std::runtime_error("No opencl devices found.\n");
	}
    
    uint64_t best = 0xFFFFFFFFFFFFFFFFul;
    int best_device = -1;
    
	for(unsigned i=0;i<devices.size();i++){
        uint64_t microseconds = 0xFFFFFFFFFFFFFFFFul;
        bool failedAttempt = false;
        
        try {
            uint64_t cbinput=uint64_t(w)*bits/8;
            
            std::vector<uint32_t> gpuReadOffsets(2*levels+1,0);
            std::vector<uint32_t> gpuWriteOffsets(2*levels+1,0);
            
            std::vector<uint32_t> aboveOverrides(2*levels,0);
            std::vector<uint32_t> belowOverrides(2*levels,0);
            
            std::vector<uint32_t> unpackedInput(2*(cbinput/4));
            std::vector<uint32_t> unpackedOutput(2*(cbinput/4));
            uint32_t* unpackedInputReadptr = &unpackedInput[cbinput/4];
            uint32_t* unpackedOutputWriteptr = &unpackedOutput[0];
            
            auto cl_instance = init_cl(levels,w,h,bits,"pipeline_kernels.cl",i);
            
            timeval before, after;
            
            gettimeofday(&before, NULL);
            
            for(int i=0; i<100; i++)
            {
                process_opencl_packed_line(levels, w, bits, gpuReadOffsets, gpuWriteOffsets, unpackedInputReadptr, unpackedOutputWriteptr, aboveOverrides, belowOverrides, cl_instance);
            }
            
            gettimeofday(&after, NULL);
            
            microseconds =(after.tv_sec - before.tv_sec)*1000000L + after.tv_usec - before.tv_usec;
            
        } catch (std::exception) {
            failedAttempt = true;
        }
        if (failedAttempt) continue;
        if (microseconds < best) {
            best_device = i;
            best = microseconds;
        }
	}
    
    return best_device;
}