void PF_FUNC_NAME(set_device)( QSP_ARG_DECL Platform_Device *pdp ) { #ifdef HAVE_CUDA cudaError_t e; #endif // HAVE_CUDA if( curr_pdp == pdp ){ sprintf(DEFAULT_ERROR_STRING,"%s: current device is already %s!?", STRINGIFY(HOST_CALL_NAME(set_device)),PFDEV_NAME(pdp)); NWARN(DEFAULT_ERROR_STRING); return; } if( PFDEV_PLATFORM_TYPE(pdp) != PLATFORM_CUDA ){ sprintf(ERROR_STRING,"%s: device %s is not a CUDA device!?", STRINGIFY(HOST_CALL_NAME(set_device)),PFDEV_NAME(pdp)); WARN(ERROR_STRING); return; } #ifdef HAVE_CUDA e = cudaSetDevice( PFDEV_CUDA_DEV_INDEX(pdp) ); if( e != cudaSuccess ) describe_cuda_driver_error2(STRINGIFY(HOST_CALL_NAME(set_device)),"cudaSetDevice",e); else curr_pdp = pdp; #else // ! HAVE_CUDA NO_CUDA_MSG(set_device) #endif // ! HAVE_CUDA }
static void _init_cuda_checkpoints(QSP_ARG_DECL int n) { //CUresult e; cudaError_t drv_err; int i; if( max_cuda_checkpoints > 0 ){ sprintf(ERROR_STRING, "init_cuda_checkpoints(%d): already initialized with %d checpoints", n,max_cuda_checkpoints); warn(ERROR_STRING); return; } ckpt_tbl = (Cuda_Checkpoint *) getbuf( n * sizeof(*ckpt_tbl) ); if( ckpt_tbl == NULL ) error1("failed to allocate checkpoint table"); max_cuda_checkpoints = n; for(i=0;i<max_cuda_checkpoints;i++){ drv_err=cudaEventCreate(&ckpt_tbl[i].ckpt_event); if( drv_err != cudaSuccess ){ describe_cuda_driver_error2("init_cuda_checkpoints", "cudaEventCreate",drv_err); error1("failed to initialize checkpoint table"); } ckpt_tbl[i].ckpt_tag=NULL; } }
static void cu2_mem_free(QSP_ARG_DECL Data_Obj *dp) { cudaError_t e; // GLOBAL e = cudaFree(OBJ_DATA_PTR(dp)); if( e != cudaSuccess ){ describe_cuda_driver_error2("release_data","cudaFree",e); } }
static void prepare_image_for_mapping(Data_Obj *dp) { #ifdef HAVE_OPENGL int t; cudaError_t e; // unmap buffer before using w/ GL if( BUF_IS_MAPPED(dp) ){ e = cudaGLUnmapBufferObject( OBJ_BUF_ID(dp) ); if( e != cudaSuccess ){ describe_cuda_driver_error2("update_cuda_viewer", "cudaGLUnmapBufferObject",e); NERROR1("failed to unmap buffer object"); } CLEAR_OBJ_FLAG_BITS(dp, DT_BUF_MAPPED); // propagate change to children and parents propagate_flag(dp,DT_BUF_MAPPED); } // //bind_texture(OBJ_DATA_PTR(dp)); glClear(GL_COLOR_BUFFER_BIT); /* sprintf(ERROR_STRING,"update_cuda_viewer: tex_id = %d, buf_id = %d", OBJ_TEX_ID(dp),OBJ_BUF_ID(dp)); advise(ERROR_STRING); */ glBindTexture(GL_TEXTURE_2D, OBJ_TEX_ID(dp)); #ifdef HAVE_LIBGLEW glBindBuffer(GL_PIXEL_UNPACK_BUFFER, OBJ_BUF_ID(dp)); #endif // HAVE_LIBGLEW #ifdef FOOBAR switch(OBJ_COMPS(dp)){ /* what used to be here??? */ } #endif /* FOOBAR */ t=gl_pixel_type(dp); glTexSubImage2D(GL_TEXTURE_2D, 0, // target, level 0, 0, // x0, y0 OBJ_COLS(dp), OBJ_ROWS(dp), // dx, dy t, GL_UNSIGNED_BYTE, // type OFFSET(0)); // offset into PIXEL_UNPACK_BUFFER #ifdef HAVE_LIBGLEW glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); #endif // HAVE_LIBGLEW }
int cu2_map_buf(QSP_ARG_DECL Data_Obj *dp) { cudaError_t e; e = cudaGLMapBufferObject( &OBJ_DATA_PTR(dp), OBJ_BUF_ID(dp) ); if( e != cudaSuccess ){ describe_cuda_driver_error2("cu2_map_buf", "cudaGLMapBufferObject",e); return -1; } return 0; }
int cu2_register_buf(QSP_ARG_DECL Data_Obj *dp) { cudaError_t e; /* how do we check for an error? */ e = cudaGLRegisterBufferObject( OBJ_BUF_ID(dp) ); if( e != cudaSuccess ){ describe_cuda_driver_error2("cu2_register_buf", "cudaGLRegisterBufferObject",e); return -1; } return 0; }
void freetmp(void *ptr,const char *whence) { #ifdef HAVE_CUDA cudaError_t drv_err; //sprintf(ERROR_STRING,"freetmp %s: 0x%"PRIxPTR,whence,(uintptr_t)ptr); //advise(ERROR_STRING); drv_err=cudaFree(ptr); if( drv_err != cudaSuccess ){ sprintf(DEFAULT_MSG_STR,"freetmp (%s)",whence); describe_cuda_driver_error2(DEFAULT_MSG_STR,"cudaFree",drv_err); } #endif // HAVE_CUDA }
static int cu2_map_buf(QSP_ARG_DECL Data_Obj *dp) { #ifdef HAVE_OPENGL cudaError_t e; e = cudaGLMapBufferObject( &OBJ_DATA_PTR(dp), OBJ_BUF_ID(dp) ); if( e != cudaSuccess ){ describe_cuda_driver_error2("cu2_map_buf", "cudaGLMapBufferObject",e); return -1; } return 0; #else // ! HAVE_OPENGL WARN("cu2_map_buf: Sorry, no OpenGL support in this build!?"); return -1; #endif // ! HAVE_OPENGL }
void _set_cuda_device(QSP_ARG_DECL Cuda_Device *cdp ) { #ifdef HAVE_CUDA cudaError_t e; if( curr_cdp == cdp ){ sprintf(ERROR_STRING,"set_cuda_device: current device is already %s!?",cdp->cudev_name); warn(ERROR_STRING); return; } e = cudaSetDevice( cdp->cudev_index ); if( e != cudaSuccess ) describe_cuda_driver_error2("set_cuda_device","cudaSetDevice",e); else curr_cdp = cdp; #endif // HAVE_CUDA }
static int cu2_register_buf(QSP_ARG_DECL Data_Obj *dp) { #ifdef HAVE_OPENGL cudaError_t e; /* how do we check for an error? */ e = cudaGLRegisterBufferObject( OBJ_BUF_ID(dp) ); if( e != cudaSuccess ){ describe_cuda_driver_error2("cu2_register_buf", "cudaGLRegisterBufferObject",e); return -1; } return 0; #else // ! HAVE_OPENGL WARN("cu2_register_buf: Sorry, no OpenGL support in this build!?"); return -1; #endif // ! HAVE_OPENGL }
static int cu2_mem_alloc(QSP_ARG_DECL Data_Obj *dp, dimension_t size, int align) { cudaError_t e; // BUG? align arg is ignored here? // GLOBAL e = cudaMalloc( &OBJ_DATA_PTR(dp), size); if( e != cudaSuccess ){ if( e == cudaErrorDevicesUnavailable ) ERROR1("Cuda devices unavailable!?"); describe_cuda_driver_error2("cu2_mem_alloc","cudaMalloc",e); sprintf(ERROR_STRING,"Attempting to allocate %d bytes.",size); advise(ERROR_STRING); return(-1); } return 0; }
void *_tmpvec(QSP_ARG_DECL int size,int len,const char *whence) { #ifdef HAVE_CUDA void *cuda_mem; cudaError_t drv_err; drv_err = cudaMalloc(&cuda_mem, size * len ); if( drv_err != cudaSuccess ){ sprintf(MSG_STR,"tmpvec (%s)",whence); describe_cuda_driver_error2(MSG_STR,"cudaMalloc",drv_err); error1("CUDA memory allocation error"); } //sprintf(ERROR_STRING,"tmpvec: %d bytes allocated at 0x%"PRIxPTR,len,(uintptr_t)cuda_mem); //advise(ERROR_STRING); //sprintf(ERROR_STRING,"tmpvec %s: 0x%"PRIxPTR,whence,(uintptr_t)cuda_mem); //advise(ERROR_STRING); return(cuda_mem); #else // ! HAVE_CUDA return NULL; #endif // ! HAVE_CUDA }
// This is the normal display path static void update_pf_viewer(QSP_ARG_DECL Platform_Viewer *pvp, Data_Obj *dp) { #ifdef HAVE_OPENGL int t; //cudaError_t e; // unmap buffer before using w/ GL if( BUF_IS_MAPPED(dp) ){ if( (*PF_UNMAPBUF_FN(PFDEV_PLATFORM(OBJ_PFDEV(dp)))) (QSP_ARG dp) < 0 ) { WARN("update_pf_viewer: buffer unmap error!?"); } #ifdef FOOBAR e = cudaGLUnmapBufferObject( OBJ_BUF_ID(dp) ); if( e != cudaSuccess ){ describe_cuda_driver_error2("update_pf_viewer", "cudaGLUnmapBufferObject",e); NERROR1("failed to unmap buffer object"); } #endif // FOOBAR CLEAR_OBJ_FLAG_BITS(dp, DT_BUF_MAPPED); // propagate change to children and parents propagate_flag(dp,DT_BUF_MAPPED); } // //bind_texture(OBJ_DATA_PTR(dp)); glClear(GL_COLOR_BUFFER_BIT); /* sprintf(ERROR_STRING,"update_pf_viewer: tex_id = %d, buf_id = %d", OBJ_TEX_ID(dp),OBJ_BUF_ID(dp)); advise(ERROR_STRING); */ glBindTexture(GL_TEXTURE_2D, OBJ_TEX_ID(dp)); // is glBindBuffer REALLY part of libGLEW??? //#ifdef HAVE_LIBGLEW glBindBuffer(GL_PIXEL_UNPACK_BUFFER, OBJ_BUF_ID(dp)); //#endif // HAVE_LIBGLEW #ifdef FOOBAR switch(OBJ_COMPS(dp)){ /* what used to be here??? */ } #endif /* FOOBAR */ t=gl_pixel_type(dp); glTexSubImage2D(GL_TEXTURE_2D, 0, // target, level 0, 0, // x0, y0 OBJ_COLS(dp), OBJ_ROWS(dp), // dx, dy t, GL_UNSIGNED_BYTE, // type OFFSET(0)); // offset into PIXEL_UNPACK_BUFFER //#ifdef HAVE_LIBGLEW glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); //#endif // HAVE_LIBGLEW glBegin(GL_QUADS); glTexCoord2f(0, 1); glVertex2f(-1.0, -1.0); glTexCoord2f(0, 0); glVertex2f(-1.0, 1.0); glTexCoord2f(1, 0); glVertex2f(1.0, 1.0); glTexCoord2f(1, 1); glVertex2f(1.0, -1.0); glEnd(); glBindTexture(GL_TEXTURE_2D, 0); #ifdef FOOBAR e = cudaGLMapBufferObject( &OBJ_DATA_PTR(dp), OBJ_BUF_ID(dp) ); if( e != cudaSuccess ){ WARN("Error mapping buffer object!?"); // should we return now, with possibly other cleanup??? } #endif // FOOBAR if( (*PF_MAPBUF_FN(PFDEV_PLATFORM(OBJ_PFDEV(dp))))(QSP_ARG dp) < 0 ){ WARN("update_pf_viewer: Error mapping buffer!?"); } SET_OBJ_FLAG_BITS(dp, DT_BUF_MAPPED); // propagate change to children and parents propagate_flag(dp,DT_BUF_MAPPED); #else // ! HAVE_OPENGL NO_OGL_MSG #endif // ! HAVE_OPENGL }
static void init_cu2_device(QSP_ARG_DECL int index, Compute_Platform *cpp) { struct cudaDeviceProp deviceProp; cudaError_t e; Platform_Device *pdp; char name[LLEN]; char dev_name[LLEN]; char area_name[LLEN]; const char *name_p; char *s; Data_Area *ap; float comp_cap; if( index >= MAX_CUDA_DEVICES ){ sprintf(ERROR_STRING,"Program is compiled for a maximum of %d CUDA devices, can't inititialize device %d.", MAX_CUDA_DEVICES,index); ERROR1(ERROR_STRING); } if( verbose ){ sprintf(ERROR_STRING,"init_cu2_device %d BEGIN",index); advise(ERROR_STRING); } if( (e=cudaGetDeviceProperties(&deviceProp, index)) != cudaSuccess ){ describe_cuda_driver_error2("init_cu2_device","cudaGetDeviceProperties",e); return; } if (deviceProp.major == 9999 && deviceProp.minor == 9999){ sprintf(ERROR_STRING,"There is no CUDA device with dev = %d!?.\n",index); WARN(ERROR_STRING); /* What should we do here??? */ return; } /* Put the compute capability into a script variable so that we can use it */ comp_cap = deviceProp.major * 10 + deviceProp.minor; if( comp_cap > CUDA_COMP_CAP ){ sprintf(ERROR_STRING,"init_cu2_device: CUDA device %s has compute capability %d.%d, but program was configured for %d.%d!?", deviceProp.name,deviceProp.major,deviceProp.minor, CUDA_COMP_CAP/10,CUDA_COMP_CAP%10); WARN(ERROR_STRING); } /* BUG if there are multiple devices, we need to make sure that this is set * correctly for the current context!? */ sprintf(ERROR_STRING,"%d.%d",deviceProp.major,deviceProp.minor); assign_var(QSP_ARG "cuda_comp_cap",ERROR_STRING); /* What does this do??? */ e = cudaSetDeviceFlags( cudaDeviceMapHost ); if( e != cudaSuccess ){ describe_cuda_driver_error2("init_cu2_device", "cudaSetDeviceFlags",e); } strcpy(name,deviceProp.name); /* change spaces to underscores */ s=name; while(*s){ if( *s==' ' ) *s='_'; s++; } /* We might have two of the same devices installed in a single system. * In this case, we can't use the device name twice, because there will * be a conflict. The first one gets the name, then we have to check and * make sure that the name is not in use already. If it is, then we append * a number to the string... */ name_p = available_pfdev_name(QSP_ARG name,dev_name,cpp,MAX_CUDA_DEVICES); // reuse name as scratch string pdp = new_pfdev(QSP_ARG name_p); #ifdef CAUTIOUS if( pdp == NO_PFDEV ){ sprintf(ERROR_STRING,"CAUTIOUS: init_cu2_device: Error creating cuda device struct for %s!?",name_p); WARN(ERROR_STRING); return; } #endif /* CAUTIOUS */ /* Remember this name in case the default is not found */ if( first_cuda_dev_name == NULL ) first_cuda_dev_name = PFDEV_NAME(pdp); /* Compare this name against the default name set in * the environment, if it exists... */ if( default_cuda_dev_name != NULL && ! default_cuda_dev_found ){ if( !strcmp(PFDEV_NAME(pdp),default_cuda_dev_name) ) default_cuda_dev_found=1; } SET_PFDEV_PLATFORM(pdp,cpp); SET_PFDEV_CUDA_INFO( pdp, getbuf(sizeof(Cuda_Dev_Info)) ); SET_PFDEV_CUDA_DEV_INDEX(pdp,index); SET_PFDEV_CUDA_DEV_PROP(pdp,deviceProp); SET_PFDEV_CUDA_RNGEN(pdp,NULL); if( comp_cap >= 20 ){ SET_PFDEV_MAX_DIMS(pdp,3); } else { SET_PFDEV_MAX_DIMS(pdp,2); } //set_cuda_device(pdp); // is this call just so we can call cudaMalloc? PF_FUNC_NAME(set_device)(QSP_ARG pdp); // is this call just so we can call cudaMalloc? // address set to NULL says use custom allocator - see dobj/makedobj.c // BUG?? with pdp we may not need the DA_ flag??? sprintf(area_name,"%s.%s",PLATFORM_NAME(cpp),name_p); ap = pf_area_init(QSP_ARG area_name,NULL,0, MAX_CUDA_GLOBAL_OBJECTS,DA_CUDA_GLOBAL,pdp); if( ap == NO_AREA ){ sprintf(ERROR_STRING, "init_cu2_device: error creating global data area %s",area_name); WARN(ERROR_STRING); } // g++ won't take this line!? SET_AREA_CUDA_DEV(ap,pdp); //set_device_for_area(ap,pdp); SET_PFDEV_AREA(pdp,PFDEV_GLOBAL_AREA_INDEX,ap); /* We used to declare a heap for constant memory here, * but there wasn't much of a point because: * Constant memory can't be allocated, rather it is declared * in the .cu code, and placed by the compiler as it sees fit. * To have objects use this, we would have to declare a heap and * manage it ourselves... * There's only 64k, so we should be sparing... * We'll try this later... */ /* Make up another area for the host memory * which is locked and mappable to the device. * We don't allocate a pool here, but do it as needed... */ //strcpy(area_name,name_p); //strcat(area_name,"_host"); sprintf(area_name,"%s.%s_host",PLATFORM_NAME(cpp),name_p); ap = pf_area_init(QSP_ARG area_name,(u_char *)NULL,0,MAX_CUDA_MAPPED_OBJECTS, DA_CUDA_HOST,pdp); if( ap == NO_AREA ){ sprintf(ERROR_STRING, "init_cu2_device: error creating host data area %s",area_name); ERROR1(ERROR_STRING); } SET_AREA_CUDA_DEV(ap, pdp); //cuda_data_area[index][CUDA_HOST_AREA_INDEX] = ap; SET_PFDEV_AREA(pdp,PFDEV_HOST_AREA_INDEX,ap); /* Make up another psuedo-area for the mapped host memory; * This is the same memory as above, but mapped to the device. * In the current implementation, we create objects in the host * area, and then automatically create an alias on the device side. * There is a BUG in that by having this psuedo area in the data * area name space, a user could select it as the data area and * then try to create an object. We will detect this in make_dobj, * and complain. */ //strcpy(area_name,name_p); //strcat(area_name,"_host_mapped"); sprintf(area_name,"%s.%s_host_mapped",PLATFORM_NAME(cpp),name_p); ap = pf_area_init(QSP_ARG area_name,(u_char *)NULL,0,MAX_CUDA_MAPPED_OBJECTS, DA_CUDA_HOST_MAPPED,pdp); if( ap == NO_AREA ){ sprintf(ERROR_STRING, "init_cu2_device: error creating host-mapped data area %s",area_name); ERROR1(ERROR_STRING); } SET_AREA_CUDA_DEV(ap,pdp); //cuda_data_area[index][CUDA_HOST_MAPPED_AREA_INDEX] = ap; SET_PFDEV_AREA(pdp,PFDEV_HOST_MAPPED_AREA_INDEX,ap); // We don't change the data area by default any more when initializing... /* Restore the normal area */ //set_data_area(PFDEV_AREA(pdp,PFDEV_GLOBAL_AREA_INDEX)); if( verbose ){ sprintf(ERROR_STRING,"init_cu2_device %d DONE",index); advise(ERROR_STRING); } }