void insure_cu2_device( QSP_ARG_DECL Data_Obj *dp ) { Platform_Device *pdp; if( AREA_FLAGS(OBJ_AREA(dp)) & DA_RAM ){ sprintf(DEFAULT_ERROR_STRING, "insure_cu2_device: Object %s is a host RAM object!?",OBJ_NAME(dp)); NWARN(DEFAULT_ERROR_STRING); return; } pdp = AREA_PFDEV(OBJ_AREA(dp)); #ifdef CAUTIOUS if( pdp == NULL ) NERROR1("CAUTIOUS: null cuda device ptr in data area!?"); #endif /* CAUTIOUS */ if( curr_pdp != pdp ){ sprintf(DEFAULT_ERROR_STRING,"insure_cu2_device: curr_pdp = 0x%lx pdp = 0x%lx", (int_for_addr)curr_pdp,(int_for_addr)pdp); NADVISE(DEFAULT_ERROR_STRING); sprintf(DEFAULT_ERROR_STRING,"insure_cu2_device: current device is %s, want %s", PFDEV_NAME(curr_pdp),PFDEV_NAME(pdp)); NADVISE(DEFAULT_ERROR_STRING); PF_FUNC_NAME(set_device)(QSP_ARG pdp); } }
void PF_FUNC_NAME(set_device)( QSP_ARG_DECL Platform_Device *pdp ) { #ifdef HAVE_CUDA cudaError_t e; #endif // HAVE_CUDA if( curr_pdp == pdp ){ sprintf(DEFAULT_ERROR_STRING,"%s: current device is already %s!?", STRINGIFY(HOST_CALL_NAME(set_device)),PFDEV_NAME(pdp)); NWARN(DEFAULT_ERROR_STRING); return; } if( PFDEV_PLATFORM_TYPE(pdp) != PLATFORM_CUDA ){ sprintf(ERROR_STRING,"%s: device %s is not a CUDA device!?", STRINGIFY(HOST_CALL_NAME(set_device)),PFDEV_NAME(pdp)); WARN(ERROR_STRING); return; } #ifdef HAVE_CUDA e = cudaSetDevice( PFDEV_CUDA_DEV_INDEX(pdp) ); if( e != cudaSuccess ) describe_cuda_driver_error2(STRINGIFY(HOST_CALL_NAME(set_device)),"cudaSetDevice",e); else curr_pdp = pdp; #else // ! HAVE_CUDA NO_CUDA_MSG(set_device) #endif // ! HAVE_CUDA }
static void init_ocl_device(QSP_ARG_DECL cl_device_id dev_id, Compute_Platform *cpp) { cl_int status; //long param_data[MAX_PARAM_SIZE/sizeof(long)]; // force alignment //char name[LLEN]; static int n_ocl_devs=0; Platform_Device *pdp; CGLContextObj cgl_ctx=NULL; cl_context context; cl_command_queue command_queue; //"stream" in CUDA cl_context_properties props[3]={ CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, 0, // need to put cgl_ctx here 0 }; pdp = create_ocl_device(QSP_ARG dev_id, cpp); if( pdp == NULL ) return; /* Remember this name in case the default is not found */ if( first_ocl_dev_name == NULL ) first_ocl_dev_name = PFDEV_NAME(pdp); /* Compare this name against the default name set in * the environment, if it exists... */ if( default_ocl_dev_name != NULL && ! default_ocl_dev_found ){ if( !strcmp(PFDEV_NAME(pdp),default_ocl_dev_name) ) default_ocl_dev_found=1; } get_extensions(QSP_ARG pdp); SET_OCLDEV_DEV_ID(pdp,dev_id); SET_PFDEV_PLATFORM(pdp,cpp); if( n_ocl_devs >= MAX_OPENCL_DEVICES ){ sprintf(ERROR_STRING,"More than %d OpenCL devices found;" "need to increase MAX_OPENCL_DEVICES and recompile", MAX_OPENCL_DEVICES); error1(ERROR_STRING); } fprintf(stderr,"Setting %s device index to %d\n",PFDEV_NAME(pdp),n_ocl_devs); SET_PFDEV_SERIAL(pdp,n_ocl_devs++); SET_PFDEV_MAX_DIMS(pdp,DEFAULT_PFDEV_MAX_DIMS); // On the new MacBook Pro, with two devices, the Iris_Pro // throws an error at clCreateCommandQueue *iff* we set // the share group property here... Presumably because // that device doesn't handle the display? // We insert a hack below by excluding that device name, // but maybe there is another model where that would be // inappropriate? if( extension_supported(pdp,"cl_APPLE_gl_sharing") && strcmp(PFDEV_NAME(pdp),"Iris_Pro")){ CGLShareGroupObj share_group; cgl_ctx = CGLGetCurrentContext(); if( cgl_ctx != NULL){ // This means that we have an OpenGL window available... share_group = CGLGetShareGroup(cgl_ctx); assert( share_group != NULL ); props[1] = (cl_context_properties) share_group; } else { // If we let this go, it sometimes causes a seg fault // when we try to set the GL window afterwards!? // // But it should not be an error, because we don't know // for sure that we will ever attempt it. // We need to set a flag to prohibit it later... advise("init_ocl_device: OpenCL initialized without an OpenGL context;"); advise("init_ocl_device: Prohibiting OpenGL operations."); prohibit_opengl(); } } // Check for OpenGL capabilities //opengl_check(pdp); #ifdef TAKEN_FROM_DEMO_PROG #if (USE_GL_ATTACHMENTS) printf(SEPARATOR); printf("Using active OpenGL context...\n"); CGLContextObj kCGLContext = CGLGetCurrentContext(); CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext); cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup, 0 }; // Create a context from a CGL share group // ComputeContext = clCreateContext(properties, 0, 0, clLogMessagesToStdoutAPPLE, 0, 0); if(!ComputeContext) return -2; #else // ! USE_GL_ATTACHMENTS // Connect to a compute device // err = clGetDeviceIDs(NULL, ComputeDeviceType, 1, &ComputeDeviceId, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to locate compute device!\n"); return EXIT_FAILURE; } // Create a compute context // ComputeContext = clCreateContext(0, 1, &ComputeDeviceId, clLogMessagesToStdoutAPPLE, NULL, &err); if (!ComputeContext) { printf("Error: Failed to create a compute context!\n"); return EXIT_FAILURE; } #endif // ! USE_GL_ATTACHMENTS #endif // TAKEN_FROM_DEMO_PROG //create context on the specified device //if( cgl_ctx != NULL ) //fprintf(stderr,"creating clContext with share properties for %s...\n",PFDEV_NAME(pdp)); if( cgl_ctx == NULL ){ context = clCreateContext( NULL, // cl_context_properties *properties 1, // num_devices &dev_id, // devices NULL, // void *pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data ) NULL, // void *user_data &status // cl_int *errcode_ret ); } else { context = clCreateContext( props, // cl_context_properties *properties 0, // num_devices NULL, // devices clLogMessagesToStdoutAPPLE, // void *pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data ) NULL, // void *user_data &status // cl_int *errcode_ret ); } if( status != CL_SUCCESS ){ report_ocl_error(status, "clCreateContext"); SET_OCLDEV_CTX(pdp,NULL); //return; } // BUG check return value for error SET_OCLDEV_CTX(pdp,context); //create the command_queue (stream) //fprintf(stderr,"clContext = 0x%lx...\n",(long)context); //fprintf(stderr,"init_ocl_device: dev_id = 0x%lx\n",(long)dev_id); // At least once we have gotten an invalid value error here, // after receiving the advisory "OpenCL initialized without an OpenGL context // (which may or may not be relevant). This behavior was not repeatable, // perhaps because of different stack contents??? // The third arg is a properties bit field, with valid values being: // CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE // CL_QUEUE_PROFILING_ENABLE command_queue = clCreateCommandQueue(context, dev_id, 0, &status); if( status != CL_SUCCESS ){ report_ocl_error(status, "clCreateCommandQueue"); SET_OCLDEV_QUEUE(pdp,NULL); //return; } else { SET_OCLDEV_QUEUE(pdp,command_queue); } // set a ready flag? init_ocl_dev_memory(QSP_ARG pdp); curr_pdp = pdp; }
static void ocl_dev_info(QSP_ARG_DECL Platform_Device *pdp) { sprintf(MSG_STR,"%s:",PFDEV_NAME(pdp)); prt_msg(MSG_STR); prt_msg("Sorry, no OpenCL-specific device info yet."); }
static void init_ocl_dev_memory(QSP_ARG_DECL Platform_Device *pdp) { char area_name[MAX_AREA_NAME_LEN+1]; Data_Area *ap; //strcpy(area_name,PFDEV_NAME(pdp)); // make sure names will fit - longest name is %s.%s_host_mapped if( strlen(PLATFORM_NAME(PFDEV_PLATFORM(pdp)))+strlen(PFDEV_NAME(pdp))+strlen("._host_mapped") > MAX_AREA_NAME_LEN ) error1("init_ocl_dev_memory: area name too large for buffer, increase MAX_AREA_NAME_LEN!?"); sprintf(area_name,"%s.%s", PLATFORM_NAME(PFDEV_PLATFORM(pdp)),PFDEV_NAME(pdp)); // what should the name for the memory area be??? // address set to NULL says use custom allocator - see dobj/makedobj.c ap = pf_area_init(area_name,NULL,0, MAX_OCL_GLOBAL_OBJECTS,DA_OCL_GLOBAL,pdp); if( ap == NULL ){ sprintf(ERROR_STRING, "init_ocl_dev_memory: error creating global data area %s",area_name); warn(ERROR_STRING); } // g++ won't take this line!? SET_AREA_PFDEV(ap,pdp); // BUG should be per-device, not global table... pdp->pd_ap[PF_GLOBAL_AREA_INDEX] = ap; /* We used to declare a heap for constant memory here, * but there wasn't much of a point because: * Constant memory can't be allocated, rather it is declared * in the .cu code, and placed by the compiler as it sees fit. * To have objects use this, we would have to declare a heap and * manage it ourselves... * There's only 64k, so we should be sparing... * We'll try this later... */ /* Make up another area for the host memory * which is locked and mappable to the device. * We don't allocate a pool here, but do it as needed... */ //strcat(cname,"_host"); sprintf(area_name,"%s.%s_host", PLATFORM_NAME(PFDEV_PLATFORM(pdp)),PFDEV_NAME(pdp)); ap = pf_area_init(area_name,(u_char *)NULL,0,MAX_OCL_MAPPED_OBJECTS, DA_OCL_HOST,pdp); if( ap == NULL ){ sprintf(ERROR_STRING, "init_ocl_dev_memory: error creating host data area %s",area_name); error1(ERROR_STRING); } SET_AREA_PFDEV(ap, pdp); pdp->pd_ap[PF_HOST_AREA_INDEX] = ap; /* Make up another psuedo-area for the mapped host memory; * This is the same memory as above, but mapped to the device. * In the current implementation, we create objects in the host * area, and then automatically create an alias on the device side. * There is a BUG in that by having this psuedo area in the data * area name space, a user could select it as the data area and * then try to create an object. We will detect this in make_dobj, * and complain. */ //strcpy(cname,dname); //strcat(cname,"_host_mapped"); sprintf(area_name,"%s.%s_host_mapped", PLATFORM_NAME(PFDEV_PLATFORM(pdp)),PFDEV_NAME(pdp)); ap = pf_area_init(area_name,(u_char *)NULL,0,MAX_OCL_MAPPED_OBJECTS, DA_OCL_HOST_MAPPED,pdp); if( ap == NULL ){ sprintf(ERROR_STRING, "init_ocl_dev_memory: error creating host-mapped data area %s",area_name); error1(ERROR_STRING); } SET_AREA_PFDEV(ap,pdp); pdp->pd_ap[PF_HOST_MAPPED_AREA_INDEX] = ap; if( verbose ){ sprintf(ERROR_STRING,"init_ocl_dev_memory DONE"); advise(ERROR_STRING); } }
static void init_cu2_device(QSP_ARG_DECL int index, Compute_Platform *cpp) { struct cudaDeviceProp deviceProp; cudaError_t e; Platform_Device *pdp; char name[LLEN]; char dev_name[LLEN]; char area_name[LLEN]; const char *name_p; char *s; Data_Area *ap; float comp_cap; if( index >= MAX_CUDA_DEVICES ){ sprintf(ERROR_STRING,"Program is compiled for a maximum of %d CUDA devices, can't inititialize device %d.", MAX_CUDA_DEVICES,index); ERROR1(ERROR_STRING); } if( verbose ){ sprintf(ERROR_STRING,"init_cu2_device %d BEGIN",index); advise(ERROR_STRING); } if( (e=cudaGetDeviceProperties(&deviceProp, index)) != cudaSuccess ){ describe_cuda_driver_error2("init_cu2_device","cudaGetDeviceProperties",e); return; } if (deviceProp.major == 9999 && deviceProp.minor == 9999){ sprintf(ERROR_STRING,"There is no CUDA device with dev = %d!?.\n",index); WARN(ERROR_STRING); /* What should we do here??? */ return; } /* Put the compute capability into a script variable so that we can use it */ comp_cap = deviceProp.major * 10 + deviceProp.minor; if( comp_cap > CUDA_COMP_CAP ){ sprintf(ERROR_STRING,"init_cu2_device: CUDA device %s has compute capability %d.%d, but program was configured for %d.%d!?", deviceProp.name,deviceProp.major,deviceProp.minor, CUDA_COMP_CAP/10,CUDA_COMP_CAP%10); WARN(ERROR_STRING); } /* BUG if there are multiple devices, we need to make sure that this is set * correctly for the current context!? */ sprintf(ERROR_STRING,"%d.%d",deviceProp.major,deviceProp.minor); assign_var(QSP_ARG "cuda_comp_cap",ERROR_STRING); /* What does this do??? */ e = cudaSetDeviceFlags( cudaDeviceMapHost ); if( e != cudaSuccess ){ describe_cuda_driver_error2("init_cu2_device", "cudaSetDeviceFlags",e); } strcpy(name,deviceProp.name); /* change spaces to underscores */ s=name; while(*s){ if( *s==' ' ) *s='_'; s++; } /* We might have two of the same devices installed in a single system. * In this case, we can't use the device name twice, because there will * be a conflict. The first one gets the name, then we have to check and * make sure that the name is not in use already. If it is, then we append * a number to the string... */ name_p = available_pfdev_name(QSP_ARG name,dev_name,cpp,MAX_CUDA_DEVICES); // reuse name as scratch string pdp = new_pfdev(QSP_ARG name_p); #ifdef CAUTIOUS if( pdp == NO_PFDEV ){ sprintf(ERROR_STRING,"CAUTIOUS: init_cu2_device: Error creating cuda device struct for %s!?",name_p); WARN(ERROR_STRING); return; } #endif /* CAUTIOUS */ /* Remember this name in case the default is not found */ if( first_cuda_dev_name == NULL ) first_cuda_dev_name = PFDEV_NAME(pdp); /* Compare this name against the default name set in * the environment, if it exists... */ if( default_cuda_dev_name != NULL && ! default_cuda_dev_found ){ if( !strcmp(PFDEV_NAME(pdp),default_cuda_dev_name) ) default_cuda_dev_found=1; } SET_PFDEV_PLATFORM(pdp,cpp); SET_PFDEV_CUDA_INFO( pdp, getbuf(sizeof(Cuda_Dev_Info)) ); SET_PFDEV_CUDA_DEV_INDEX(pdp,index); SET_PFDEV_CUDA_DEV_PROP(pdp,deviceProp); SET_PFDEV_CUDA_RNGEN(pdp,NULL); if( comp_cap >= 20 ){ SET_PFDEV_MAX_DIMS(pdp,3); } else { SET_PFDEV_MAX_DIMS(pdp,2); } //set_cuda_device(pdp); // is this call just so we can call cudaMalloc? PF_FUNC_NAME(set_device)(QSP_ARG pdp); // is this call just so we can call cudaMalloc? // address set to NULL says use custom allocator - see dobj/makedobj.c // BUG?? with pdp we may not need the DA_ flag??? sprintf(area_name,"%s.%s",PLATFORM_NAME(cpp),name_p); ap = pf_area_init(QSP_ARG area_name,NULL,0, MAX_CUDA_GLOBAL_OBJECTS,DA_CUDA_GLOBAL,pdp); if( ap == NO_AREA ){ sprintf(ERROR_STRING, "init_cu2_device: error creating global data area %s",area_name); WARN(ERROR_STRING); } // g++ won't take this line!? SET_AREA_CUDA_DEV(ap,pdp); //set_device_for_area(ap,pdp); SET_PFDEV_AREA(pdp,PFDEV_GLOBAL_AREA_INDEX,ap); /* We used to declare a heap for constant memory here, * but there wasn't much of a point because: * Constant memory can't be allocated, rather it is declared * in the .cu code, and placed by the compiler as it sees fit. * To have objects use this, we would have to declare a heap and * manage it ourselves... * There's only 64k, so we should be sparing... * We'll try this later... */ /* Make up another area for the host memory * which is locked and mappable to the device. * We don't allocate a pool here, but do it as needed... */ //strcpy(area_name,name_p); //strcat(area_name,"_host"); sprintf(area_name,"%s.%s_host",PLATFORM_NAME(cpp),name_p); ap = pf_area_init(QSP_ARG area_name,(u_char *)NULL,0,MAX_CUDA_MAPPED_OBJECTS, DA_CUDA_HOST,pdp); if( ap == NO_AREA ){ sprintf(ERROR_STRING, "init_cu2_device: error creating host data area %s",area_name); ERROR1(ERROR_STRING); } SET_AREA_CUDA_DEV(ap, pdp); //cuda_data_area[index][CUDA_HOST_AREA_INDEX] = ap; SET_PFDEV_AREA(pdp,PFDEV_HOST_AREA_INDEX,ap); /* Make up another psuedo-area for the mapped host memory; * This is the same memory as above, but mapped to the device. * In the current implementation, we create objects in the host * area, and then automatically create an alias on the device side. * There is a BUG in that by having this psuedo area in the data * area name space, a user could select it as the data area and * then try to create an object. We will detect this in make_dobj, * and complain. */ //strcpy(area_name,name_p); //strcat(area_name,"_host_mapped"); sprintf(area_name,"%s.%s_host_mapped",PLATFORM_NAME(cpp),name_p); ap = pf_area_init(QSP_ARG area_name,(u_char *)NULL,0,MAX_CUDA_MAPPED_OBJECTS, DA_CUDA_HOST_MAPPED,pdp); if( ap == NO_AREA ){ sprintf(ERROR_STRING, "init_cu2_device: error creating host-mapped data area %s",area_name); ERROR1(ERROR_STRING); } SET_AREA_CUDA_DEV(ap,pdp); //cuda_data_area[index][CUDA_HOST_MAPPED_AREA_INDEX] = ap; SET_PFDEV_AREA(pdp,PFDEV_HOST_MAPPED_AREA_INDEX,ap); // We don't change the data area by default any more when initializing... /* Restore the normal area */ //set_data_area(PFDEV_AREA(pdp,PFDEV_GLOBAL_AREA_INDEX)); if( verbose ){ sprintf(ERROR_STRING,"init_cu2_device %d DONE",index); advise(ERROR_STRING); } }
static void cu2_dev_info(QSP_ARG_DECL Platform_Device *pdp) { sprintf(MSG_STR,"%s:",PFDEV_NAME(pdp)); prt_msg(MSG_STR); prt_msg("Sorry, Cuda-specific device info not implemented yet!?"); }