void copy(Renderable& out, const T * devicePtr) { cudaGraphicsResource *cudaVBOResource; CUDA_ERROR_CHECK(cudaGraphicsGLRegisterBuffer(&cudaVBOResource, out.vbo(), cudaGraphicsMapFlagsWriteDiscard)); size_t num_bytes; T* vboDevicePtr = NULL; CUDA_ERROR_CHECK(cudaGraphicsMapResources(1, &cudaVBOResource, 0)); CUDA_ERROR_CHECK(cudaGraphicsResourceGetMappedPointer((void **)&vboDevicePtr, &num_bytes, cudaVBOResource)); CUDA_ERROR_CHECK(cudaMemcpy(vboDevicePtr, devicePtr, num_bytes, cudaMemcpyDeviceToDevice)); CUDA_ERROR_CHECK(cudaGraphicsUnmapResources(1, &cudaVBOResource, 0)); CUDA_ERROR_CHECK(cudaGraphicsUnregisterResource(cudaVBOResource)); }
/*-------------------------------------------------------------------------*/ int FTI_ReadHDF5Var(FTIT_dataset *FTI_DataVar) { char str[FTI_BUFS]; int res; hid_t dataset = H5Dopen(FTI_DataVar->h5group->h5groupID, FTI_DataVar->name, H5P_DEFAULT); hid_t dataspace = H5Dget_space(dataset); // If my data are stored in the CPU side // Just store the data to the file and return; #ifdef GPUSUPPORT if ( !FTI_DataVar->isDevicePtr ){ #endif res = H5Dread(dataset,FTI_DataVar->type->h5datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, FTI_DataVar->ptr); if (res < 0) { sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } res = H5Dclose(dataset); if (res < 0) { sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } res = H5Sclose(dataspace); if (res < 0) { sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } return FTI_SCES; #ifdef GPUSUPPORT } hsize_t dimLength[32]; int j; for (j = 0; j < FTI_DataVar->rank; j++) { dimLength[j] = FTI_DataVar->dimLength[j]; } // This code is only executed in the GPU case. hsize_t *count = (hsize_t*) malloc (sizeof(hsize_t)*FTI_DataVar->rank); hsize_t *offset= (hsize_t*) calloc (FTI_DataVar->rank,sizeof(hsize_t)); if ( !count|| !offset){ sprintf(str, "Could Not allocate count and offset regions"); FTI_Print(str, FTI_EROR); return FTI_NSCS; } hsize_t seperator; size_t fetchBytes; size_t hostBufSize = FTI_getHostBuffSize(); //Calculate How many dimension I can compute each time //and how bug should the HOST-GPU communication buffer should be fetchBytes = FTI_calculateCountDim(FTI_DataVar->eleSize, hostBufSize ,count, FTI_DataVar->rank, dimLength, &seperator); //If the buffer is smaller than the minimum amount //then I need to allocate a bigger one. if (hostBufSize < fetchBytes){ if ( FTI_Try( FTI_DestroyDevices(), "Deleting host buffers" ) != FTI_SCES){ free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } if ( FTI_Try (FTI_InitDevices( fetchBytes ), "Allocating host buffers")!= FTI_SCES) { free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } } unsigned char *basePtr = NULL; int id = 0; int prevId = 1; hsize_t totalBytes = FTI_DataVar->size; cudaStream_t streams[2]; //Create the streams for the asynchronous data movement. CUDA_ERROR_CHECK(cudaStreamCreate(&(streams[0]))); CUDA_ERROR_CHECK(cudaStreamCreate(&(streams[1]))); unsigned char *dPtr = FTI_DataVar->devicePtr; // Perform the while loop until all data // are processed. while( totalBytes ){ basePtr = FTI_getHostBuffer(id); //Read file res = FTI_ReadElements( dataspace, FTI_DataVar->type->h5datatype, dataset, count, offset, FTI_DataVar->rank , basePtr); CUDA_ERROR_CHECK(cudaMemcpyAsync( dPtr , basePtr, fetchBytes, cudaMemcpyHostToDevice, streams[id])); if (res != FTI_SCES ) { free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } //Increase accordingly the file offset FTI_AdvanceOffset(seperator, offset,count, dimLength, FTI_DataVar->rank); //Syncing the cuda stream. CUDA_ERROR_CHECK(cudaStreamSynchronize(streams[prevId])); prevId = id; id = (id + 1)%2; dPtr = dPtr + fetchBytes; totalBytes -= fetchBytes; } CUDA_ERROR_CHECK(cudaStreamSynchronize(streams[prevId])); CUDA_ERROR_CHECK(cudaStreamDestroy(streams[0])); CUDA_ERROR_CHECK(cudaStreamDestroy(streams[1])); res = H5Dclose(dataset); if (res < 0) { free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } res = H5Sclose(dataspace); if (res < 0) { free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } free(offset); free(count); return FTI_SCES; #endif }