Example #1
0
void copy(Renderable& out, const T * devicePtr)
{
    cudaGraphicsResource *cudaVBOResource;
    CUDA_ERROR_CHECK(cudaGraphicsGLRegisterBuffer(&cudaVBOResource, out.vbo(), cudaGraphicsMapFlagsWriteDiscard));

    size_t num_bytes;
    T* vboDevicePtr = NULL;

    CUDA_ERROR_CHECK(cudaGraphicsMapResources(1, &cudaVBOResource, 0));
    CUDA_ERROR_CHECK(cudaGraphicsResourceGetMappedPointer((void **)&vboDevicePtr, &num_bytes, cudaVBOResource));
    CUDA_ERROR_CHECK(cudaMemcpy(vboDevicePtr, devicePtr, num_bytes, cudaMemcpyDeviceToDevice));
    CUDA_ERROR_CHECK(cudaGraphicsUnmapResources(1, &cudaVBOResource, 0));
    CUDA_ERROR_CHECK(cudaGraphicsUnregisterResource(cudaVBOResource));
}
Example #2
0
File: hdf5.c Project: leobago/fti
/*-------------------------------------------------------------------------*/
int FTI_ReadHDF5Var(FTIT_dataset *FTI_DataVar)
{
    char str[FTI_BUFS];
    int res;

    hid_t dataset = H5Dopen(FTI_DataVar->h5group->h5groupID, FTI_DataVar->name, H5P_DEFAULT);
    hid_t dataspace = H5Dget_space(dataset);

    // If my data are stored in the CPU side
    // Just store the data to the file and return;
#ifdef GPUSUPPORT    
    if ( !FTI_DataVar->isDevicePtr ){
#endif
        res = H5Dread(dataset,FTI_DataVar->type->h5datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, FTI_DataVar->ptr);  
        if (res < 0) {
            sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id);
            FTI_Print(str, FTI_EROR);
            return FTI_NSCS;
        }

        res = H5Dclose(dataset);
        if (res < 0) {
            sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id);
            FTI_Print(str, FTI_EROR);
            return FTI_NSCS;
        }
        res = H5Sclose(dataspace);
        if (res < 0) {
            sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id);
            FTI_Print(str, FTI_EROR);
            return FTI_NSCS;
        }
        return FTI_SCES;
#ifdef GPUSUPPORT        
    }

    hsize_t dimLength[32];
    int j;
    for (j = 0; j < FTI_DataVar->rank; j++) {
        dimLength[j] = FTI_DataVar->dimLength[j];
    }

    // This code is only executed in the GPU case.


    hsize_t *count = (hsize_t*) malloc (sizeof(hsize_t)*FTI_DataVar->rank); 
    hsize_t *offset= (hsize_t*) calloc (FTI_DataVar->rank,sizeof(hsize_t)); 

    if ( !count|| !offset){
        sprintf(str, "Could Not allocate count and offset regions");
        FTI_Print(str, FTI_EROR);
        return FTI_NSCS;
    }


    hsize_t seperator;
    size_t fetchBytes;
    size_t hostBufSize = FTI_getHostBuffSize();
    //Calculate How many dimension I can compute each time 
    //and how bug should the HOST-GPU communication buffer should be

    fetchBytes = FTI_calculateCountDim(FTI_DataVar->eleSize, hostBufSize ,count, FTI_DataVar->rank, dimLength, &seperator);

    //If the buffer is smaller than the minimum amount 
    //then I need to allocate a bigger one.
    if (hostBufSize < fetchBytes){
        if ( FTI_Try( FTI_DestroyDevices(), "Deleting host buffers" ) != FTI_SCES){
            free(offset);
            free(count);
            sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id);
            FTI_Print(str, FTI_EROR);
            return FTI_NSCS;
        }

        if ( FTI_Try (FTI_InitDevices( fetchBytes ), "Allocating host buffers")!= FTI_SCES) {
            free(offset);
            free(count);
            sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id);
            FTI_Print(str, FTI_EROR);
            return FTI_NSCS;
        }
    }

    unsigned char *basePtr = NULL;
    int id = 0;
    int prevId = 1;
    hsize_t totalBytes = FTI_DataVar->size;
    cudaStream_t streams[2]; 
    //Create the streams for the asynchronous data movement.
    CUDA_ERROR_CHECK(cudaStreamCreate(&(streams[0])));
    CUDA_ERROR_CHECK(cudaStreamCreate(&(streams[1])));
    unsigned char *dPtr = FTI_DataVar->devicePtr;
    // Perform the while loop until all data
    // are processed.
    while( totalBytes  ){
        basePtr = FTI_getHostBuffer(id); 
        //Read file 
        res = FTI_ReadElements( dataspace, FTI_DataVar->type->h5datatype, dataset, count, offset, FTI_DataVar->rank , basePtr);
        CUDA_ERROR_CHECK(cudaMemcpyAsync( dPtr , basePtr, fetchBytes, cudaMemcpyHostToDevice, streams[id]));
        if (res != FTI_SCES ) {
            free(offset);
            free(count);
            sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id);
            FTI_Print(str, FTI_EROR);
            return FTI_NSCS;
        }
        //Increase accordingly the file offset
        FTI_AdvanceOffset(seperator, offset,count, dimLength, FTI_DataVar->rank);
        //Syncing the cuda stream.
        CUDA_ERROR_CHECK(cudaStreamSynchronize(streams[prevId]));   
        prevId = id;
        id = (id + 1)%2;
        dPtr = dPtr + fetchBytes;
        totalBytes -= fetchBytes;
    }
    CUDA_ERROR_CHECK(cudaStreamSynchronize(streams[prevId]));   
    CUDA_ERROR_CHECK(cudaStreamDestroy(streams[0]));
    CUDA_ERROR_CHECK(cudaStreamDestroy(streams[1]));

    res = H5Dclose(dataset);
    if (res < 0) {
        free(offset);
        free(count);
        sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id);
        FTI_Print(str, FTI_EROR);
        return FTI_NSCS;
    }
    res = H5Sclose(dataspace);
    if (res < 0) {
        free(offset);
        free(count);
        sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id);
        FTI_Print(str, FTI_EROR);
        return FTI_NSCS;
    }
    free(offset);
    free(count);
    return FTI_SCES;
#endif
}