void runBenchmark(int iterations) { // once without timing to prime the GPU nbody->update(activeParams.m_timestep); cutilSafeCall(cudaEventRecord(startEvent, 0)); for (int i = 0; i < iterations; ++i) { nbody->update(activeParams.m_timestep); } cutilSafeCall(cudaEventRecord(stopEvent, 0)); cudaEventSynchronize(stopEvent); float milliseconds = 0; cutilSafeCall( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent)); double interactionsPerSecond = 0; double gflops = 0; computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations); printf("%d bodies, total time for %d iterations: %0.3f ms\n", numBodies, iterations, milliseconds); printf("= %0.3f billion interactions per second\n", interactionsPerSecond); printf("= %0.3f GFLOP/s at %d flops per interaction\n", gflops, 20); }
bool test(int (*setup)(float ** dev_ptr_ptr, unsigned int * w_ptr, unsigned int * h_ptr)) { float * dev; unsigned int width; unsigned int height; setup(&dev, &width, &height); float * val = sat_scan_gold<float>(dev,width, height); cutilSafeCall(cudaFree(dev)); setup(&dev, &width, &height); float * yours = sat_scan<float, kind>(dev,width, height); cutilSafeCall(cudaFree(dev)); float EPSILON = 0.1f; bool isItGood = true; /* for (unsigned int i = 0; i < len; i++) { printf("%f, ", val[i]); } printf("\n"); for (unsigned int i = 0; i < len; i++) { printf("%f, ", yours[i]); } printf("\n"); */ for (unsigned int i = 0; i < width * height; i++) { if (!(abs(val[i] - yours[i]) < EPSILON)) { printf("V: %f Y: %f at %d\n", val[i], yours[i], i); isItGood = false; break; } } free(val); free(yours); return isItGood; }
const unsigned long CUDARunner::RunStep() { unsigned int best=0; unsigned int bestg=~0; if(m_in==0 || m_out==0 || m_devin==0 || m_devout==0) { AllocateResources(m_numb,m_numt); } cutilSafeCall(cudaMemcpy(m_devin,m_in,sizeof(cuda_in),cudaMemcpyHostToDevice)); cuda_process_helper(m_devin,m_devout,GetStepIterations(),GetStepBitShift(),m_numb,m_numt); cutilSafeCall(cudaMemcpy(m_out,m_devout,m_numb*m_numt*sizeof(cuda_out),cudaMemcpyDeviceToHost)); for(int i=0; i<m_numb*m_numt; i++) { if(m_out[i].m_bestnonce!=0 && m_out[i].m_bestg<bestg) { best=m_out[i].m_bestnonce; bestg=m_out[i].m_bestg; } } return CryptoPP::ByteReverse(best); }
void CUDARayCastSDF::render(const HashData& hashData, const HashParams& hashParams, const DepthCameraData& cameraData, const mat4f& lastRigidTransform) { rayIntervalSplatting(hashData, hashParams, cameraData, lastRigidTransform); m_data.d_rayIntervalSplatMinArray = m_rayIntervalSplatting.mapMinToCuda(); m_data.d_rayIntervalSplatMaxArray = m_rayIntervalSplatting.mapMaxToCuda(); // Start query for timing if(GlobalAppState::getInstance().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); } renderCS(hashData, m_data, cameraData, m_params); //convertToCameraSpace(cameraData); if (!m_params.m_useGradients) { computeNormals(m_data.d_normals, m_data.d_depth4, m_params.m_width, m_params.m_height); } m_rayIntervalSplatting.unmapCuda(); // Wait for query if(GlobalAppState::getInstance().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeRayCast+=m_timer.getElapsedTimeMS(); TimingLog::countTimeRayCast++; } }
void benchmark(int iterations) { // allocate memory for result unsigned int *d_result; unsigned int size = width * height * sizeof(unsigned int); cutilSafeCall( cudaMalloc( (void**) &d_result, size)); // warm-up gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutStartTimer( timer)); // execute the kernel for(int i=0; i<iterations; i++) { gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); } cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutStopTimer( timer)); // check if kernel execution generated an error cutilCheckMsg("Kernel execution failed"); printf("Processing time: %f (ms)\n", cutGetTimerValue( timer)); printf("%.2f Mpixels/sec\n", (width*height*iterations / (cutGetTimerValue( timer) / 1000.0f)) / 1e6); cutilSafeCall(cudaFree(d_result)); }
__HOST__ int linearbackwardeuler_init(solver_props *props){ solver_mem *mem; linearbackwardeuler_opts *opts = (linearbackwardeuler_opts*)&props->opts; unsigned int bandwidth = opts->upperhalfbw + opts->lowerhalfbw + 1; #if defined TARGET_GPU // Allocates GPU global memory for solver's persistent data switch(opts->lsolver){ case LSOLVER_DENSE: cutilSafeCall(cudaMalloc((void **)&mem, PARALLEL_MODELS * props->statesize * props->statesize * sizeof(CDATAFORMAT))); break; case LSOLVER_BANDED: cutilSafeCall(cudaMalloc((void **)&mem, PARALLEL_MODELS * props->statesize * bandwidth * sizeof(CDATAFORMAT))); break; default: return 1; } #else // CPU and OPENMP targets switch(opts->lsolver){ case LSOLVER_DENSE: mem = (solver_mem *)malloc(PARALLEL_MODELS * props->statesize * props->statesize * sizeof(CDATAFORMAT)); break; case LSOLVER_BANDED: mem = (solver_mem *)malloc(PARALLEL_MODELS * props->statesize * bandwidth * sizeof(CDATAFORMAT)); break; default: return 1; } #endif props->mem = mem; /* The matrix */ return 0; }
void SOLVER(bogacki_shampine, free, TARGET, SIMENGINE_STORAGE, bogacki_shampine_mem *mem) { #if defined TARGET_GPU bogacki_shampine_mem tmem; cutilSafeCall(cudaMemcpy(&tmem, mem, sizeof(bogacki_shampine_mem), cudaMemcpyDeviceToHost)); GPU_ENTRY(free_props, SIMENGINE_STORAGE, tmem.props); cutilSafeCall(cudaFree(tmem.k1)); cutilSafeCall(cudaFree(tmem.k2)); cutilSafeCall(cudaFree(tmem.k3)); cutilSafeCall(cudaFree(tmem.k4)); cutilSafeCall(cudaFree(tmem.temp)); cutilSafeCall(cudaFree(tmem.next_states)); cutilSafeCall(cudaFree(tmem.z_next_states)); cutilSafeCall(cudaFree(tmem.cur_timestep)); cutilSafeCall(cudaFree(mem)); GPU_ENTRY(exit, SIMENGINE_STORAGE); #else // Used for CPU and OPENMP targets free(mem->k1); free(mem->k2); free(mem->k3); free(mem->k4); free(mem->temp); free(mem->next_states); free(mem->z_next_states); free(mem->cur_timestep); free(mem); #endif }
//////////////////////////////////////////////////////////////////////////////// //! Check if the result is correct or write data to file for external //! regression testing //////////////////////////////////////////////////////////////////////////////// void checkResultCuda(int argc, char** argv, const GLuint& vbo) { cutilSafeCall(cudaGLUnregisterBufferObject(vbo)); // map buffer object glBindBuffer(GL_ARRAY_BUFFER_ARB, vbo ); float* data = (float*) glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY); // check result if(cutCheckCmdLineFlag(argc, (const char**) argv, "regression")) { // write file for regression test cutilCheckError(cutWriteFilef("./data/regression.dat", data, mesh_width * mesh_height * 3, 0.0)); } // unmap GL buffer object if(! glUnmapBuffer(GL_ARRAY_BUFFER)) { fprintf(stderr, "Unmap buffer failed.\n"); fflush(stderr); } cutilSafeCall(cudaGLRegisterBufferObject(vbo)); CUT_CHECK_ERROR_GL(); }
forwardeuler_mem *SOLVER(forwardeuler, init, TARGET, SIMENGINE_STORAGE, solver_props *props) { #if defined TARGET_GPU GPU_ENTRY(init, SIMENGINE_STORAGE); // Temporary CPU copies of GPU datastructures forwardeuler_mem tmem; // GPU datastructures forwardeuler_mem *dmem; // Allocate GPU space for mem and pointer fields of mem (other than props) cutilSafeCall(cudaMalloc((void**)&dmem, sizeof(forwardeuler_mem))); tmem.props = GPU_ENTRY(init_props, SIMENGINE_STORAGE, props);; cutilSafeCall(cudaMalloc((void**)&tmem.k1, props->statesize*props->num_models*sizeof(CDATAFORMAT))); // Copy mem structure to GPU cutilSafeCall(cudaMemcpy(dmem, &tmem, sizeof(forwardeuler_mem), cudaMemcpyHostToDevice)); return dmem; #else // Used for CPU and OPENMP targets forwardeuler_mem *mem = (forwardeuler_mem*)malloc(sizeof(forwardeuler_mem)); mem->props = props; mem->k1 = (CDATAFORMAT*)malloc(props->statesize*props->num_models*sizeof(CDATAFORMAT)); return mem; #endif // defined TARGET_GPU }
void CUDAMarcher::PrepareTerrain() { mCudaEdgeTable = 0; mCudaTriTable = 0; mCudaVertTable = 0; mCudaPerlinDst1 = 0; mCudaPerlinDst2 = 0; mCudaPerlinDst3 = 0; //-- Load tables load_tables_as_textures( &mCudaEdgeTable, &mCudaTriTable, &mCudaVertTable ); //-- Create and load perlin data host_PerlinInitialize(0); host_InitPerlinData(PERLIN_DATA_RANK, PERLIN_DATA_SIZE); unsigned int bufsize = sizeof(uint) * MARCHING_BLOCK_SIZE*MARCHING_BLOCK_SIZE*MARCHING_BLOCK_SIZE; cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelVerts, bufsize)); cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelVertsScan, bufsize)); cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelOccupied, bufsize)); cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelOccupiedScan, bufsize)); cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_compVoxelArray, bufsize)); //Init blocks Init(4, 3, 4); }
__HOST__ int heun_init(solver_props *props){ #if defined TARGET_GPU // Temporary CPU copies of GPU datastructures heun_mem tmem; // GPU datastructures heun_mem *dmem; // Allocate GPU space for mem and pointer fields of mem (other than props) cutilSafeCall(cudaMalloc((void**)&dmem, sizeof(heun_mem))); props->mem = dmem; cutilSafeCall(cudaMalloc((void**)&tmem.base, props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT))); cutilSafeCall(cudaMalloc((void**)&tmem.temp, props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT))); cutilSafeCall(cudaMalloc((void**)&tmem.predictor, props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT))); // Copy mem structure to GPU cutilSafeCall(cudaMemcpy(dmem, &tmem, sizeof(heun_mem), cudaMemcpyHostToDevice)); #else // Used for CPU and OPENMP targets heun_mem *mem = (heun_mem*)malloc(sizeof(heun_mem)); props->mem = mem; mem->temp = (CDATAFORMAT*)malloc(props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT)); mem->base = (CDATAFORMAT*)malloc(props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT)); mem->predictor = (CDATAFORMAT*)malloc(props->statesize*PARALLEL_MODELS*sizeof(CDATAFORMAT)); #endif return 0; }
/** * Allocates and copies sequence data in GPU. * * @param data the sequence data string. * @param len the length of the sequence. * @param padding_len extra padding length. * @param padding_char character to be used as padding. * * @return the pointer to the GPU memory allocated for the sequence. */ unsigned char* allocCudaSeq(const char* data, const int len, const int padding_len, const char padding_char) { unsigned char* out = (unsigned char*)allocCuda0(len+padding_len); if (DEBUG) printf("allocCudaSeq(%p, %d, %d, %d): %p\n", data, len, padding_len, padding_char, out); cutilSafeCall( cudaMemcpy(out, data, len, cudaMemcpyHostToDevice)); cutilSafeCall( cudaMemset(out+len, padding_char, padding_len) ); return out; }
void initPixelBuffer() { if (pbo) { // unregister this buffer object from CUDA C cutilSafeCall(cudaGraphicsUnregisterResource(cuda_pbo_resource)); // delete old buffer glDeleteBuffersARB(1, &pbo); glDeleteTextures(1, &tex); } // create pixel buffer object for display glGenBuffersARB(1, &pbo); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, width*height*sizeof(GLubyte)*4, 0, GL_STREAM_DRAW_ARB); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); // register this buffer object with CUDA cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, pbo, cudaGraphicsMapFlagsWriteDiscard)); // create texture for display glGenTextures(1, &tex); glBindTexture(GL_TEXTURE_2D, tex); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glBindTexture(GL_TEXTURE_2D, 0); }
/** * Allocates a vector in GPU and initializes it to zero. * * @param size length of the vector to be allocated. * @return the pointer to the GPU allocated memory. */ void* allocCuda0(int size) { void* out; cutilSafeCall( cudaMalloc((void**) &out, size)); cutilSafeCall( cudaMemset(out, 0, size)); if (DEBUG) printf("allocCuda(%d): %p\n", size, out); return out; }
void cleanup() { cutilCheckError( cutDeleteTimer( timer)); if(h_img)cutFree(h_img); cutilSafeCall(cudaFree(d_img)); cutilSafeCall(cudaFree(d_temp)); // Refer to boxFilter_kernel.cu for implementation freeTextures(); //DEPRECATED: cutilSafeCall(cudaGLUnregisterBufferObject(pbo)); cudaGraphicsUnregisterResource(cuda_pbo_resource); glDeleteBuffersARB(1, &pbo); glDeleteTextures(1, &texid); glDeleteProgramsARB(1, &shader); if (g_CheckRender) { delete g_CheckRender; g_CheckRender = NULL; } if (g_FrameBufferObject) { delete g_FrameBufferObject; g_FrameBufferObject = NULL; } }
__HOST__ int heun_free(solver_props *props){ #if defined TARGET_GPU heun_mem *dmem = (heun_mem*)props->mem; heun_mem tmem; cutilSafeCall(cudaMemcpy(&tmem, dmem, sizeof(heun_mem), cudaMemcpyDeviceToHost)); cutilSafeCall(cudaFree(tmem.base)); cutilSafeCall(cudaFree(tmem.temp)); cutilSafeCall(cudaFree(tmem.predictor)); cutilSafeCall(cudaFree(dmem)); #else // Used for CPU and OPENMP targets heun_mem *mem =(heun_mem*)props->mem; free(mem->temp); free(mem->base); free(mem->predictor); free(mem); #endif // defined TARGET_GPU free(mem->k1); return 0; }
void raytrace() { uint* imagedata; cutilSafeCall(cudaGLMapBufferObject((void**)&imagedata, pbo)); float3 A, B, C; camera.getImagePlane(A, B, C); dev_camera d_cam(camera.getPosition(), A, B, C, aperture, focal); dev_light d_light(light.getPosition(), light.getColor(), 4096); //need to change here. float3 minAABB, maxAABB; world.getAABB(minAABB, maxAABB); sceneInfo scene(world.getNumTriangles(), world.getNumSpheres(), world.getNumBoxes(), minAABB, maxAABB); //TODO: add control for clear buffer here. //change here for the many object case raytraceImage(imagedata, dev_lastframe_ptr, dev_num_layers, r_width, r_height, moved, d_cam, d_light, scene); //for showing the real frame rate cudaMemcpy(&frame_num, dev_num_layers, sizeof(float), cudaMemcpyDeviceToHost); frame_num++; cudaMemcpy(dev_num_layers, &frame_num, sizeof(int), cudaMemcpyHostToDevice); cutilSafeCall(cudaGLUnmapBufferObject(pbo)); //download texture from pbo glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBindTexture(GL_TEXTURE_2D, framebuffer); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, r_width, r_height, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); CUT_CHECK_ERROR_GL(); }
void uploadToGPU() {//upload scene to GPU std::vector<float4> triangles; std::vector<float4> spheres; std::vector<float4> boxes; world.updateToArray(triangles, spheres, boxes); size_t triangle_size = triangles.size() * sizeof(float4); size_t sphere_size = spheres.size() * sizeof(float4); size_t boxes_size = boxes.size() * sizeof(float4); //merge into one scene std::vector<float4> sceneObj; sceneObj.insert(sceneObj.end(), triangles.begin(), triangles.end()); sceneObj.insert(sceneObj.end(), spheres.begin(), spheres.end()); sceneObj.insert(sceneObj.end(), boxes.begin(), boxes.end()); //if the scene is dynamic. this function might has to be updated per frame. //in that case should avoid mallocing every frame. cutilSafeCall(cudaMalloc((void**)&dev_scene_pointer, triangle_size + sphere_size + boxes_size)); cudaMemcpy(dev_scene_pointer, &sceneObj[0], triangle_size + sphere_size + boxes_size, cudaMemcpyHostToDevice);//&triangles[0] bindTexture(dev_scene_pointer, triangles.size()/4, spheres.size()/3, boxes.size()/4);//change the denominator for more information to bind //add a device framebuffer for last frame. std::vector<float3> clean(r_width * r_height, make_float3(0.0)); //float zero = 0; cutilSafeCall(cudaMalloc((void**)&dev_lastframe_ptr, r_width * r_height * sizeof(float3))); cudaMemcpy(dev_lastframe_ptr, &clean[0], r_width * r_height * sizeof(float3), cudaMemcpyHostToDevice); cutilSafeCall(cudaMalloc((void**)&dev_num_layers, sizeof(int))); cudaMemcpy(dev_num_layers, &frame_num, sizeof(int), cudaMemcpyHostToDevice); }
void RemoteCUDARunner::DeallocateResources() { if(m_in) { free(m_in); m_in=0; } if(m_devin) { cutilSafeCall(cudaFree(m_devin)); m_devin=0; } if(m_out) { free(m_out); m_out=0; } if(m_devout) { cutilSafeCall(cudaFree(m_devout)); m_devout=0; } if(m_metahash) { free(m_metahash); m_metahash=0; } if(m_devmetahash) { cutilSafeCall(cudaFree(m_devmetahash)); m_devmetahash=0; } }
void mvReductArraysToHost ( int reduct_bytes ) { cutilSafeCall ( cudaMemcpy ( OP_reduct_h, OP_reduct_d, reduct_bytes, cudaMemcpyDeviceToHost ) ); cutilSafeCall ( cudaThreadSynchronize ( ) ); }
void mvConstArraysToDevice ( int consts_bytes ) { cutilSafeCall ( cudaMemcpy ( OP_consts_d, OP_consts_h, consts_bytes, cudaMemcpyHostToDevice ) ); cutilSafeCall ( cudaThreadSynchronize ( ) ); }
void VBO::map() { size_t num_bytes; cutilSafeCall(cudaGraphicsMapResources(1, &cuda_vbo_resource)); cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **)&dev_vbo, &num_bytes, cuda_vbo_resource)); }
void op_cpHostToDevice ( void ** data_d, void ** data_h, int size ) { cutilSafeCall ( cudaMalloc ( data_d, size ) ); cutilSafeCall ( cudaMemcpy ( *data_d, *data_h, size, cudaMemcpyHostToDevice ) ); cutilSafeCall ( cudaThreadSynchronize ( ) ); }
void op_fetch_data ( op_dat dat ) { cutilSafeCall ( cudaMemcpy ( dat->data, dat->data_d, dat->size * dat->set->size, cudaMemcpyDeviceToHost ) ); cutilSafeCall ( cudaThreadSynchronize ( ) ); }
// This is the normal display path void display(void) { cutilCheckError(cutStartTimer(timer)); // Sobel operation Pixel *data = NULL; // map PBO to get CUDA device pointer cutilSafeCall(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0)); size_t num_bytes; cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **)&data, &num_bytes, cuda_pbo_resource)); //printf("CUDA mapped PBO: May access %ld bytes\n", num_bytes); sobelFilter(data, imWidth, imHeight, g_SobelDisplayMode, imageScale, blockOp, pointOp ); cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0)); glClear(GL_COLOR_BUFFER_BIT); glBindTexture(GL_TEXTURE_2D, texid); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imWidth, imHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, OFFSET(0)); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glDisable(GL_DEPTH_TEST); glEnable(GL_TEXTURE_2D); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glBegin(GL_QUADS); glVertex2f(0, 0); glTexCoord2f(0, 0); glVertex2f(0, 1); glTexCoord2f(1, 0); glVertex2f(1, 1); glTexCoord2f(1, 1); glVertex2f(1, 0); glTexCoord2f(0, 1); glEnd(); glBindTexture(GL_TEXTURE_2D, 0); if (g_CheckRender && g_CheckRender->IsQAReadback() && g_Verify) { printf("> (Frame %d) readback BackBuffer\n", frameCount); g_CheckRender->readback( imWidth, imHeight ); g_CheckRender->savePPM ( sOriginal_ppm[g_Index], true, NULL ); if (!g_CheckRender->PPMvsPPM(sOriginal_ppm[g_Index], sReference_ppm[g_Index], MAX_EPSILON_ERROR, 0.15f)) { g_TotalErrors++; } g_Verify = false; } glutSwapBuffers(); cutilCheckError(cutStopTimer(timer)); computeFPS(); glutPostRedisplay(); }
void cleanup() { //clean up pbo cutilSafeCall(cudaFree(dev_num_layers)); cutilSafeCall(cudaFree(dev_lastframe_ptr)); cutilSafeCall(cudaFree(dev_scene_pointer)); glDeleteBuffers(1, &pbo); end(); }
float* CloudConstructor::getGPUPoints() { if(d_resultPoints == NULL) { size_t resultSize = pointCnt * 3 * sizeof(float); cutilSafeCall( cudaMalloc((void**)&d_resultPoints, resultSize)); cutilSafeCall(cudaMemcpy(d_resultPoints, points, resultSize, cudaMemcpyHostToDevice)); } return d_resultPoints; }
///////////////////////////////////////////////////////// //////////// TEST HARNESS ///////////////////////////////////////////////////////// int uploadToDevice(float * hst, const unsigned int width, const unsigned int height, float ** dev_ptr_ptr, unsigned int * w_ptr, unsigned int * h_ptr) { float * dev = NULL; cutilSafeCall(cudaMalloc((void**)&dev, sizeof(float)*width * height)); cutilSafeCall(cudaMemcpy(dev, hst, width * height * sizeof(float), cudaMemcpyHostToDevice)); *w_ptr = width; *h_ptr = height; *dev_ptr_ptr = dev; return 0; }
WaterPlaneCUDA::~WaterPlaneCUDA(){ cudaFree(gpu_normals); cudaFree(gpu_newVertices); cudaFree(gpu_oldVertices); disturbances.clear(); cutilSafeCall(cudaGraphicsUnregisterResource(cuda_newVertex_resource)); cutilSafeCall(cudaGraphicsUnregisterResource(cuda_oldVertex_resource)); cutilSafeCall(cudaGraphicsUnregisterResource(cuda_normalsVB_resource)); //cudaFree(DIM); }
void cleanup() { if (g_bQAReadback) { cudaFree(d_pos); cudaFree(d_normal); } else { cutilCheckError( cutDeleteTimer( timer )); deleteVBO(&posVbo, &cuda_posvbo_resource); deleteVBO(&normalVbo, &cuda_normalvbo_resource); } cudppDestroyPlan(scanplan); cutilSafeCall(cudaFree(d_edgeTable)); cutilSafeCall(cudaFree(d_triTable)); cutilSafeCall(cudaFree(d_numVertsTable)); cutilSafeCall(cudaFree(d_voxelVerts)); cutilSafeCall(cudaFree(d_voxelVertsScan)); cutilSafeCall(cudaFree(d_voxelOccupied)); cutilSafeCall(cudaFree(d_voxelOccupiedScan)); cutilSafeCall(cudaFree(d_compVoxelArray)); if (d_volume) cutilSafeCall(cudaFree(d_volume)); if (g_CheckRender) { delete g_CheckRender; g_CheckRender = NULL; } if (g_FrameBufferObject) { delete g_FrameBufferObject; g_FrameBufferObject = NULL; } }