Example #1
1
void runBenchmark(int iterations)
{
    // once without timing to prime the GPU
    nbody->update(activeParams.m_timestep);

    cutilSafeCall(cudaEventRecord(startEvent, 0));

    for (int i = 0; i < iterations; ++i)
    {
        nbody->update(activeParams.m_timestep);
    }

    cutilSafeCall(cudaEventRecord(stopEvent, 0));  
    cudaEventSynchronize(stopEvent);

    float milliseconds = 0;
    cutilSafeCall( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
    double interactionsPerSecond = 0;
    double gflops = 0;
    computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations);
    
    printf("%d bodies, total time for %d iterations: %0.3f ms\n", 
           numBodies, iterations, milliseconds);
    printf("= %0.3f billion interactions per second\n", interactionsPerSecond);
    printf("= %0.3f GFLOP/s at %d flops per interaction\n", gflops, 20);   
}
bool test(int (*setup)(float ** dev_ptr_ptr, unsigned int * w_ptr, unsigned int * h_ptr)) {
	float * dev;
	unsigned int width;
	unsigned int height;
	setup(&dev, &width, &height);
	float * val = sat_scan_gold<float>(dev,width, height);
	cutilSafeCall(cudaFree(dev));
	setup(&dev, &width, &height);
	float * yours = sat_scan<float, kind>(dev,width, height);
	cutilSafeCall(cudaFree(dev));
	float EPSILON = 0.1f;

	bool isItGood = true;
	/*
	for (unsigned int i = 0; i < len; i++) {
	printf("%f, ", val[i]);
	}
	printf("\n");
	for (unsigned int i = 0; i < len; i++) {
	printf("%f, ", yours[i]);
	}
	printf("\n");
	*/
	for (unsigned int i = 0; i < width * height; i++) {
		if (!(abs(val[i] - yours[i]) < EPSILON)) {
			printf("V: %f Y: %f at %d\n", val[i], yours[i], i);
			isItGood = false;
			break;
		}
	}

	free(val);
	free(yours);
	return isItGood;
}
Example #3
0
const unsigned long CUDARunner::RunStep()
{
	unsigned int best=0;
	unsigned int bestg=~0;

	if(m_in==0 || m_out==0 || m_devin==0 || m_devout==0)
	{
		AllocateResources(m_numb,m_numt);
	}

	cutilSafeCall(cudaMemcpy(m_devin,m_in,sizeof(cuda_in),cudaMemcpyHostToDevice));

	cuda_process_helper(m_devin,m_devout,GetStepIterations(),GetStepBitShift(),m_numb,m_numt);

	cutilSafeCall(cudaMemcpy(m_out,m_devout,m_numb*m_numt*sizeof(cuda_out),cudaMemcpyDeviceToHost));

	for(int i=0; i<m_numb*m_numt; i++)
	{
		if(m_out[i].m_bestnonce!=0 && m_out[i].m_bestg<bestg)
		{
			best=m_out[i].m_bestnonce;
			bestg=m_out[i].m_bestg;
		}
	}

	return CryptoPP::ByteReverse(best);

}
Example #4
0
void CUDARayCastSDF::render(const HashData& hashData, const HashParams& hashParams, const DepthCameraData& cameraData, const mat4f& lastRigidTransform)
{
	rayIntervalSplatting(hashData, hashParams, cameraData, lastRigidTransform);
	m_data.d_rayIntervalSplatMinArray = m_rayIntervalSplatting.mapMinToCuda();
	m_data.d_rayIntervalSplatMaxArray = m_rayIntervalSplatting.mapMaxToCuda();

	// Start query for timing
	if(GlobalAppState::getInstance().s_timingsDetailledEnabled)
	{
		cutilSafeCall(cudaDeviceSynchronize()); 
		m_timer.start();
	}

	renderCS(hashData, m_data, cameraData, m_params);

	//convertToCameraSpace(cameraData);
	if (!m_params.m_useGradients)
	{
		computeNormals(m_data.d_normals, m_data.d_depth4, m_params.m_width, m_params.m_height);
	}

	m_rayIntervalSplatting.unmapCuda();

	// Wait for query
	if(GlobalAppState::getInstance().s_timingsDetailledEnabled)
	{
		cutilSafeCall(cudaDeviceSynchronize()); 
		m_timer.stop();
		TimingLog::totalTimeRayCast+=m_timer.getElapsedTimeMS();
		TimingLog::countTimeRayCast++;
	}
}
void
benchmark(int iterations) 
{
    // allocate memory for result
    unsigned int *d_result;
    unsigned int size = width * height * sizeof(unsigned int);
    cutilSafeCall( cudaMalloc( (void**) &d_result, size));

    // warm-up
    gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads);

    cutilSafeCall( cudaThreadSynchronize() );
    cutilCheckError( cutStartTimer( timer));

    // execute the kernel
    for(int i=0; i<iterations; i++) {
        gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads);
    }

    cutilSafeCall( cudaThreadSynchronize() );
    cutilCheckError( cutStopTimer( timer));

    // check if kernel execution generated an error
    cutilCheckMsg("Kernel execution failed");

    printf("Processing time: %f (ms)\n", cutGetTimerValue( timer));
    printf("%.2f Mpixels/sec\n", (width*height*iterations / (cutGetTimerValue( timer) / 1000.0f)) / 1e6);

    cutilSafeCall(cudaFree(d_result));
}
__HOST__
int linearbackwardeuler_init(solver_props *props){
  solver_mem *mem;
  linearbackwardeuler_opts *opts = (linearbackwardeuler_opts*)&props->opts;
  unsigned int bandwidth = opts->upperhalfbw + opts->lowerhalfbw + 1;

#if defined TARGET_GPU
  // Allocates GPU global memory for solver's persistent data
  switch(opts->lsolver){
  case LSOLVER_DENSE:
    cutilSafeCall(cudaMalloc((void **)&mem, PARALLEL_MODELS * props->statesize * props->statesize * sizeof(CDATAFORMAT)));
    break;
  case LSOLVER_BANDED:
    cutilSafeCall(cudaMalloc((void **)&mem, PARALLEL_MODELS * props->statesize * bandwidth * sizeof(CDATAFORMAT)));
    break;
  default:
    return 1;
  }
#else // CPU and OPENMP targets
  switch(opts->lsolver){
  case LSOLVER_DENSE:
    mem = (solver_mem *)malloc(PARALLEL_MODELS * props->statesize * props->statesize * sizeof(CDATAFORMAT));
    break;
  case LSOLVER_BANDED:
    mem = (solver_mem *)malloc(PARALLEL_MODELS * props->statesize * bandwidth * sizeof(CDATAFORMAT));
    break;
  default:
    return 1;
  }
#endif

  props->mem = mem; /* The matrix */

  return 0;
}
Example #7
0
void SOLVER(bogacki_shampine, free, TARGET, SIMENGINE_STORAGE, bogacki_shampine_mem *mem) {
#if defined TARGET_GPU
  bogacki_shampine_mem tmem;

  cutilSafeCall(cudaMemcpy(&tmem, mem, sizeof(bogacki_shampine_mem), cudaMemcpyDeviceToHost));

  GPU_ENTRY(free_props, SIMENGINE_STORAGE, tmem.props);

  cutilSafeCall(cudaFree(tmem.k1));
  cutilSafeCall(cudaFree(tmem.k2));
  cutilSafeCall(cudaFree(tmem.k3));
  cutilSafeCall(cudaFree(tmem.k4));
  cutilSafeCall(cudaFree(tmem.temp));
  cutilSafeCall(cudaFree(tmem.next_states));
  cutilSafeCall(cudaFree(tmem.z_next_states));
  cutilSafeCall(cudaFree(tmem.cur_timestep));
  cutilSafeCall(cudaFree(mem));

  GPU_ENTRY(exit, SIMENGINE_STORAGE);

#else // Used for CPU and OPENMP targets

  free(mem->k1);
  free(mem->k2);
  free(mem->k3);
  free(mem->k4);
  free(mem->temp);
  free(mem->next_states);
  free(mem->z_next_states);
  free(mem->cur_timestep);
  free(mem);
#endif
}
Example #8
0
////////////////////////////////////////////////////////////////////////////////
//! Check if the result is correct or write data to file for external
//! regression testing
////////////////////////////////////////////////////////////////////////////////
void checkResultCuda(int argc, char** argv, const GLuint& vbo)
{
    cutilSafeCall(cudaGLUnregisterBufferObject(vbo));

    // map buffer object
    glBindBuffer(GL_ARRAY_BUFFER_ARB, vbo );
    float* data = (float*) glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);

    // check result
    if(cutCheckCmdLineFlag(argc, (const char**) argv, "regression")) {
        // write file for regression test
        cutilCheckError(cutWriteFilef("./data/regression.dat",
            data, mesh_width * mesh_height * 3, 0.0));
    }

    // unmap GL buffer object
    if(! glUnmapBuffer(GL_ARRAY_BUFFER)) {
        fprintf(stderr, "Unmap buffer failed.\n");
        fflush(stderr);
    }

    cutilSafeCall(cudaGLRegisterBufferObject(vbo));

    CUT_CHECK_ERROR_GL();
}
Example #9
0
forwardeuler_mem *SOLVER(forwardeuler, init, TARGET, SIMENGINE_STORAGE, solver_props *props) {
#if defined TARGET_GPU
  GPU_ENTRY(init, SIMENGINE_STORAGE);

  // Temporary CPU copies of GPU datastructures
  forwardeuler_mem tmem;
  // GPU datastructures
  forwardeuler_mem *dmem;
  
  // Allocate GPU space for mem and pointer fields of mem (other than props)
  cutilSafeCall(cudaMalloc((void**)&dmem, sizeof(forwardeuler_mem)));
  tmem.props = GPU_ENTRY(init_props, SIMENGINE_STORAGE, props);;
  cutilSafeCall(cudaMalloc((void**)&tmem.k1, props->statesize*props->num_models*sizeof(CDATAFORMAT)));

  // Copy mem structure to GPU
  cutilSafeCall(cudaMemcpy(dmem, &tmem, sizeof(forwardeuler_mem), cudaMemcpyHostToDevice));

  return dmem;
  
#else // Used for CPU and OPENMP targets

  forwardeuler_mem *mem = (forwardeuler_mem*)malloc(sizeof(forwardeuler_mem));

  mem->props = props;
  mem->k1 = (CDATAFORMAT*)malloc(props->statesize*props->num_models*sizeof(CDATAFORMAT));

  return mem;

#endif // defined TARGET_GPU
}
Example #10
0
void CUDAMarcher::PrepareTerrain()
{
	mCudaEdgeTable = 0;
	mCudaTriTable = 0;
	mCudaVertTable = 0;
	mCudaPerlinDst1 = 0;
	mCudaPerlinDst2 = 0;
	mCudaPerlinDst3 = 0;

	//-- Load tables
	load_tables_as_textures( &mCudaEdgeTable, &mCudaTriTable, &mCudaVertTable );

	//-- Create and load perlin data
	host_PerlinInitialize(0);
	host_InitPerlinData(PERLIN_DATA_RANK, PERLIN_DATA_SIZE);

	unsigned int bufsize = sizeof(uint) * MARCHING_BLOCK_SIZE*MARCHING_BLOCK_SIZE*MARCHING_BLOCK_SIZE;
	cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelVerts, bufsize));
	cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelVertsScan, bufsize));
	cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelOccupied, bufsize));
	cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelOccupiedScan, bufsize));
	cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_compVoxelArray, bufsize));

	//Init blocks
	Init(4, 3, 4);
}
Example #11
0
__HOST__
int heun_init(solver_props *props){
#if defined TARGET_GPU
  // Temporary CPU copies of GPU datastructures
  heun_mem tmem;
  // GPU datastructures
  heun_mem *dmem;
  
  // Allocate GPU space for mem and pointer fields of mem (other than props)
  cutilSafeCall(cudaMalloc((void**)&dmem, sizeof(heun_mem)));
  props->mem = dmem;
  cutilSafeCall(cudaMalloc((void**)&tmem.base, props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT)));
  cutilSafeCall(cudaMalloc((void**)&tmem.temp, props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT)));
  cutilSafeCall(cudaMalloc((void**)&tmem.predictor, props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT)));

  // Copy mem structure to GPU
  cutilSafeCall(cudaMemcpy(dmem, &tmem, sizeof(heun_mem), cudaMemcpyHostToDevice));

#else // Used for CPU and OPENMP targets

  heun_mem *mem = (heun_mem*)malloc(sizeof(heun_mem));

  props->mem = mem;
  mem->temp = (CDATAFORMAT*)malloc(props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT));
  mem->base = (CDATAFORMAT*)malloc(props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT));
  mem->predictor = (CDATAFORMAT*)malloc(props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT));
#endif

  return 0;
}
Example #12
0
/**
 * Allocates and copies sequence data in GPU.
 *
 * @param data the sequence data string.
 * @param len the length of the sequence.
 * @param padding_len extra padding length.
 * @param padding_char character to be used as padding.
 *
 * @return the pointer to the GPU memory allocated for the sequence.
 */
unsigned char* allocCudaSeq(const char* data, const int len, const int padding_len, const char padding_char) {
	unsigned char* out = (unsigned char*)allocCuda0(len+padding_len);
	if (DEBUG) printf("allocCudaSeq(%p, %d, %d, %d): %p\n", data, len, padding_len, padding_char, out);
	cutilSafeCall( cudaMemcpy(out, data, len, cudaMemcpyHostToDevice));
	cutilSafeCall( cudaMemset(out+len, padding_char, padding_len) );
    return out;
}
void initPixelBuffer()
{
    if (pbo) {
		// unregister this buffer object from CUDA C
		cutilSafeCall(cudaGraphicsUnregisterResource(cuda_pbo_resource));

		// delete old buffer
        glDeleteBuffersARB(1, &pbo);
        glDeleteTextures(1, &tex);
    }

    // create pixel buffer object for display
    glGenBuffersARB(1, &pbo);
	glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
	glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, width*height*sizeof(GLubyte)*4, 0, GL_STREAM_DRAW_ARB);
	glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);

    // register this buffer object with CUDA
	cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, pbo, cudaGraphicsMapFlagsWriteDiscard));	

    // create texture for display
    glGenTextures(1, &tex);
    glBindTexture(GL_TEXTURE_2D, tex);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    glBindTexture(GL_TEXTURE_2D, 0);
}
Example #14
0
/**
 * Allocates a vector in GPU and initializes it to zero.
 *
 * @param size length of the vector to be allocated.
 * @return the pointer to the GPU allocated memory.
 */
void* allocCuda0(int size) {
    void* out;
    cutilSafeCall( cudaMalloc((void**) &out, size));
    cutilSafeCall( cudaMemset(out, 0, size));
	if (DEBUG) printf("allocCuda(%d): %p\n", size, out);
    return out;
}
Example #15
0
void cleanup()
{
    cutilCheckError( cutDeleteTimer( timer));
    if(h_img)cutFree(h_img);
    cutilSafeCall(cudaFree(d_img));
    cutilSafeCall(cudaFree(d_temp));

    // Refer to boxFilter_kernel.cu for implementation
    freeTextures();

    //DEPRECATED: cutilSafeCall(cudaGLUnregisterBufferObject(pbo));
    cudaGraphicsUnregisterResource(cuda_pbo_resource);

    glDeleteBuffersARB(1, &pbo);
    glDeleteTextures(1, &texid);
    glDeleteProgramsARB(1, &shader);

    if (g_CheckRender) {
        delete g_CheckRender;
        g_CheckRender = NULL;
    }
    if (g_FrameBufferObject) {
        delete g_FrameBufferObject;
        g_FrameBufferObject = NULL;
    }
}
Example #16
0
__HOST__
int heun_free(solver_props *props){
#if defined TARGET_GPU
  heun_mem *dmem = (heun_mem*)props->mem;
  heun_mem tmem;

  cutilSafeCall(cudaMemcpy(&tmem, dmem, sizeof(heun_mem), cudaMemcpyDeviceToHost));

  cutilSafeCall(cudaFree(tmem.base));
  cutilSafeCall(cudaFree(tmem.temp));
  cutilSafeCall(cudaFree(tmem.predictor));
  cutilSafeCall(cudaFree(dmem));

#else // Used for CPU and OPENMP targets

  heun_mem *mem =(heun_mem*)props->mem;

  free(mem->temp);
  free(mem->base);
  free(mem->predictor);
  free(mem);
#endif // defined TARGET_GPU  free(mem->k1);

  return 0;
}
Example #17
0
void raytrace()
{
	uint* imagedata;
	cutilSafeCall(cudaGLMapBufferObject((void**)&imagedata, pbo));

	float3 A, B, C;
	camera.getImagePlane(A, B, C);
	dev_camera d_cam(camera.getPosition(), A, B, C, aperture, focal);
	dev_light d_light(light.getPosition(), light.getColor(), 4096);
	//need to change here.
	float3 minAABB, maxAABB;
	world.getAABB(minAABB, maxAABB);
	sceneInfo scene(world.getNumTriangles(), world.getNumSpheres(), world.getNumBoxes(), minAABB, maxAABB);
	//TODO: add control for clear buffer here.
	//change here for the many object case
	raytraceImage(imagedata, dev_lastframe_ptr, dev_num_layers, r_width, r_height, moved, d_cam, d_light, scene);
	//for showing the real frame rate
	cudaMemcpy(&frame_num, dev_num_layers, sizeof(float), cudaMemcpyDeviceToHost);
	frame_num++;
	cudaMemcpy(dev_num_layers, &frame_num, sizeof(int), cudaMemcpyHostToDevice);
	cutilSafeCall(cudaGLUnmapBufferObject(pbo));

	//download texture from pbo
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
	glBindTexture(GL_TEXTURE_2D, framebuffer);
	glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, r_width, r_height, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);

	CUT_CHECK_ERROR_GL();
}
Example #18
0
void uploadToGPU()
{//upload scene to GPU
	std::vector<float4> triangles;
	std::vector<float4> spheres;
	std::vector<float4> boxes;
	world.updateToArray(triangles, spheres, boxes);

	size_t triangle_size = triangles.size() * sizeof(float4);
	size_t sphere_size = spheres.size() * sizeof(float4);
	size_t boxes_size = boxes.size() * sizeof(float4);

	//merge into one scene
	std::vector<float4> sceneObj;
	sceneObj.insert(sceneObj.end(), triangles.begin(), triangles.end());
	sceneObj.insert(sceneObj.end(), spheres.begin(), spheres.end());
	sceneObj.insert(sceneObj.end(), boxes.begin(), boxes.end());

	//if the scene is dynamic. this function might has to be updated per frame.
	//in that case should avoid mallocing every frame.
	cutilSafeCall(cudaMalloc((void**)&dev_scene_pointer, triangle_size + sphere_size + boxes_size));
	cudaMemcpy(dev_scene_pointer, &sceneObj[0], triangle_size + sphere_size + boxes_size, cudaMemcpyHostToDevice);//&triangles[0]
	bindTexture(dev_scene_pointer, triangles.size()/4, spheres.size()/3, boxes.size()/4);//change the denominator for more information to bind 

	//add a device framebuffer for last frame.
	std::vector<float3> clean(r_width * r_height, make_float3(0.0));
	//float zero = 0;
	cutilSafeCall(cudaMalloc((void**)&dev_lastframe_ptr, r_width * r_height * sizeof(float3)));
	cudaMemcpy(dev_lastframe_ptr, &clean[0], r_width * r_height * sizeof(float3), cudaMemcpyHostToDevice);
	cutilSafeCall(cudaMalloc((void**)&dev_num_layers, sizeof(int)));
	cudaMemcpy(dev_num_layers, &frame_num, sizeof(int), cudaMemcpyHostToDevice);
}
void RemoteCUDARunner::DeallocateResources()
{
	if(m_in)
	{
		free(m_in);
		m_in=0;
	}
	if(m_devin)
	{
		cutilSafeCall(cudaFree(m_devin));
		m_devin=0;
	}
	if(m_out)
	{
		free(m_out);
		m_out=0;
	}
	if(m_devout)
	{
		cutilSafeCall(cudaFree(m_devout));
		m_devout=0;
	}
	if(m_metahash)
	{
		free(m_metahash);
		m_metahash=0;
	}
	if(m_devmetahash)
	{
		cutilSafeCall(cudaFree(m_devmetahash));
		m_devmetahash=0;
	}
}
Example #20
0
void
mvReductArraysToHost ( int reduct_bytes )
{
  cutilSafeCall ( cudaMemcpy ( OP_reduct_h, OP_reduct_d, reduct_bytes,
                               cudaMemcpyDeviceToHost ) );
  cutilSafeCall ( cudaThreadSynchronize (  ) );
}
Example #21
0
void
mvConstArraysToDevice ( int consts_bytes )
{
  cutilSafeCall ( cudaMemcpy ( OP_consts_d, OP_consts_h, consts_bytes,
                               cudaMemcpyHostToDevice ) );
  cutilSafeCall ( cudaThreadSynchronize (  ) );
}
Example #22
0
    void VBO::map()
    {
      size_t num_bytes; 

      cutilSafeCall(cudaGraphicsMapResources(1, &cuda_vbo_resource));
      cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **)&dev_vbo, &num_bytes,  
                                                         cuda_vbo_resource));
    }
Example #23
0
void
op_cpHostToDevice ( void ** data_d, void ** data_h, int size )
{
  cutilSafeCall ( cudaMalloc ( data_d, size ) );
  cutilSafeCall ( cudaMemcpy ( *data_d, *data_h, size,
                               cudaMemcpyHostToDevice ) );
  cutilSafeCall ( cudaThreadSynchronize (  ) );
}
Example #24
0
void
op_fetch_data ( op_dat dat )
{
  cutilSafeCall ( cudaMemcpy ( dat->data, dat->data_d,
                               dat->size * dat->set->size,
                               cudaMemcpyDeviceToHost ) );
  cutilSafeCall ( cudaThreadSynchronize (  ) );
}
// This is the normal display path
void display(void) 
{  
    cutilCheckError(cutStartTimer(timer));  

    // Sobel operation
    Pixel *data = NULL;

    // map PBO to get CUDA device pointer
	cutilSafeCall(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));
    size_t num_bytes; 
    cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **)&data, &num_bytes,  
						       cuda_pbo_resource));
    //printf("CUDA mapped PBO: May access %ld bytes\n", num_bytes);
	
	sobelFilter(data, imWidth, imHeight, g_SobelDisplayMode, imageScale, blockOp, pointOp );
    cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));

    glClear(GL_COLOR_BUFFER_BIT);

    glBindTexture(GL_TEXTURE_2D, texid);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer);
    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imWidth, imHeight, 
                   GL_LUMINANCE, GL_UNSIGNED_BYTE, OFFSET(0));
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

    glDisable(GL_DEPTH_TEST);
    glEnable(GL_TEXTURE_2D);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
        
    glBegin(GL_QUADS);
    glVertex2f(0, 0); glTexCoord2f(0, 0);
    glVertex2f(0, 1); glTexCoord2f(1, 0);
    glVertex2f(1, 1); glTexCoord2f(1, 1);
    glVertex2f(1, 0); glTexCoord2f(0, 1);
    glEnd();
    glBindTexture(GL_TEXTURE_2D, 0);

    if (g_CheckRender && g_CheckRender->IsQAReadback() && g_Verify) {
        printf("> (Frame %d) readback BackBuffer\n", frameCount);
        g_CheckRender->readback( imWidth, imHeight );
        g_CheckRender->savePPM ( sOriginal_ppm[g_Index], true, NULL );
        if (!g_CheckRender->PPMvsPPM(sOriginal_ppm[g_Index], sReference_ppm[g_Index], MAX_EPSILON_ERROR, 0.15f)) {
            g_TotalErrors++;
        }
        g_Verify = false;
    }
    glutSwapBuffers();

    cutilCheckError(cutStopTimer(timer));  

    computeFPS();

    glutPostRedisplay();
}
Example #26
0
void cleanup()
{
	//clean up pbo
	cutilSafeCall(cudaFree(dev_num_layers));
	cutilSafeCall(cudaFree(dev_lastframe_ptr));
	cutilSafeCall(cudaFree(dev_scene_pointer));
	glDeleteBuffers(1, &pbo);
	end();
}
float* CloudConstructor::getGPUPoints() {
	if(d_resultPoints == NULL) {
		size_t resultSize = pointCnt * 3 * sizeof(float);
		cutilSafeCall( cudaMalloc((void**)&d_resultPoints, resultSize));
		cutilSafeCall(cudaMemcpy(d_resultPoints, points, resultSize, cudaMemcpyHostToDevice));
	}
		return d_resultPoints;
	
}
Example #28
0
/////////////////////////////////////////////////////////
//////////// TEST HARNESS
/////////////////////////////////////////////////////////
int uploadToDevice(float * hst, const unsigned int width, const unsigned int height, 
				   float ** dev_ptr_ptr, unsigned int * w_ptr, unsigned int * h_ptr) {
					   float * dev = NULL;
					   cutilSafeCall(cudaMalloc((void**)&dev, sizeof(float)*width * height));
					   cutilSafeCall(cudaMemcpy(dev, hst, width * height * sizeof(float), cudaMemcpyHostToDevice));
					   *w_ptr = width;
					   *h_ptr = height;
					   *dev_ptr_ptr = dev;
					   return 0;
}
Example #29
0
WaterPlaneCUDA::~WaterPlaneCUDA(){
	cudaFree(gpu_normals);
	cudaFree(gpu_newVertices);
	cudaFree(gpu_oldVertices);
	disturbances.clear();
	cutilSafeCall(cudaGraphicsUnregisterResource(cuda_newVertex_resource));
	cutilSafeCall(cudaGraphicsUnregisterResource(cuda_oldVertex_resource));
	cutilSafeCall(cudaGraphicsUnregisterResource(cuda_normalsVB_resource));
	//cudaFree(DIM);
}
void cleanup()
{
    if (g_bQAReadback) {
        cudaFree(d_pos);
        cudaFree(d_normal);
    } else {
        cutilCheckError( cutDeleteTimer( timer ));

        deleteVBO(&posVbo,    &cuda_posvbo_resource);
        deleteVBO(&normalVbo, &cuda_normalvbo_resource);
    }

    cudppDestroyPlan(scanplan);

    cutilSafeCall(cudaFree(d_edgeTable));
    cutilSafeCall(cudaFree(d_triTable));
    cutilSafeCall(cudaFree(d_numVertsTable));

    cutilSafeCall(cudaFree(d_voxelVerts));
    cutilSafeCall(cudaFree(d_voxelVertsScan));
    cutilSafeCall(cudaFree(d_voxelOccupied));
    cutilSafeCall(cudaFree(d_voxelOccupiedScan));
    cutilSafeCall(cudaFree(d_compVoxelArray));

    if (d_volume) cutilSafeCall(cudaFree(d_volume));

    if (g_CheckRender) {
        delete g_CheckRender; g_CheckRender = NULL;
    }
    if (g_FrameBufferObject) {
        delete g_FrameBufferObject; g_FrameBufferObject = NULL;
    }
}