Пример #1
0
Файл: cu2.c Проект: E-LLP/QuIP
void insure_cu2_device( QSP_ARG_DECL  Data_Obj *dp )
{
	Platform_Device *pdp;

	if( AREA_FLAGS(OBJ_AREA(dp)) & DA_RAM ){
		sprintf(DEFAULT_ERROR_STRING,
	"insure_cu2_device:  Object %s is a host RAM object!?",OBJ_NAME(dp));
		NWARN(DEFAULT_ERROR_STRING);
		return;
	}

	pdp = AREA_PFDEV(OBJ_AREA(dp));

#ifdef CAUTIOUS
	if( pdp == NULL )
		NERROR1("CAUTIOUS:  null cuda device ptr in data area!?");
#endif /* CAUTIOUS */

	if( curr_pdp != pdp ){
sprintf(DEFAULT_ERROR_STRING,"insure_cu2_device:  curr_pdp = 0x%lx  pdp = 0x%lx",
(int_for_addr)curr_pdp,(int_for_addr)pdp);
NADVISE(DEFAULT_ERROR_STRING);

sprintf(DEFAULT_ERROR_STRING,"insure_cu2_device:  current device is %s, want %s",
PFDEV_NAME(curr_pdp),PFDEV_NAME(pdp));
NADVISE(DEFAULT_ERROR_STRING);
		PF_FUNC_NAME(set_device)(QSP_ARG  pdp);
	}

}
Пример #2
0
Файл: cu2.c Проект: E-LLP/QuIP
void PF_FUNC_NAME(set_device)( QSP_ARG_DECL  Platform_Device *pdp )
{
#ifdef HAVE_CUDA
	cudaError_t e;
#endif // HAVE_CUDA

	if( curr_pdp == pdp ){
		sprintf(DEFAULT_ERROR_STRING,"%s:  current device is already %s!?",
			STRINGIFY(HOST_CALL_NAME(set_device)),PFDEV_NAME(pdp));
		NWARN(DEFAULT_ERROR_STRING);
		return;
	}
	if( PFDEV_PLATFORM_TYPE(pdp) != PLATFORM_CUDA ){
		sprintf(ERROR_STRING,"%s:  device %s is not a CUDA device!?",
			STRINGIFY(HOST_CALL_NAME(set_device)),PFDEV_NAME(pdp));
		WARN(ERROR_STRING);
		return;
	}

#ifdef HAVE_CUDA
	e = cudaSetDevice( PFDEV_CUDA_DEV_INDEX(pdp) );
	if( e != cudaSuccess )
		describe_cuda_driver_error2(STRINGIFY(HOST_CALL_NAME(set_device)),"cudaSetDevice",e);
	else
		curr_pdp = pdp;
#else // ! HAVE_CUDA
	NO_CUDA_MSG(set_device)
#endif // ! HAVE_CUDA
}
Пример #3
0
Файл: ocl.c Проект: nasa/QuIP
static void init_ocl_device(QSP_ARG_DECL  cl_device_id dev_id,
							Compute_Platform *cpp)
{
	cl_int status;
	//long param_data[MAX_PARAM_SIZE/sizeof(long)];	// force alignment
	//char name[LLEN];
	static int n_ocl_devs=0;
	Platform_Device *pdp;
	CGLContextObj cgl_ctx=NULL;
	cl_context context;
	cl_command_queue command_queue; //"stream" in CUDA
	cl_context_properties props[3]={
		CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
		0,	// need to put cgl_ctx here
		0
		};

	pdp = create_ocl_device(QSP_ARG  dev_id, cpp);
	if( pdp == NULL ) return;

	/* Remember this name in case the default is not found */
	if( first_ocl_dev_name == NULL )
		first_ocl_dev_name = PFDEV_NAME(pdp);

	/* Compare this name against the default name set in
	 * the environment, if it exists...
	 */
	if( default_ocl_dev_name != NULL && ! default_ocl_dev_found ){
		if( !strcmp(PFDEV_NAME(pdp),default_ocl_dev_name) )
			default_ocl_dev_found=1;
	}

	get_extensions(QSP_ARG  pdp);
	SET_OCLDEV_DEV_ID(pdp,dev_id);
	SET_PFDEV_PLATFORM(pdp,cpp);
	if( n_ocl_devs >= MAX_OPENCL_DEVICES ){
		sprintf(ERROR_STRING,"More than %d OpenCL devices found;"
			"need to increase MAX_OPENCL_DEVICES and recompile",
			MAX_OPENCL_DEVICES);
		error1(ERROR_STRING);
	}
fprintf(stderr,"Setting %s device index to %d\n",PFDEV_NAME(pdp),n_ocl_devs);
	SET_PFDEV_SERIAL(pdp,n_ocl_devs++);

	SET_PFDEV_MAX_DIMS(pdp,DEFAULT_PFDEV_MAX_DIMS);

	// On the new MacBook Pro, with two devices, the Iris_Pro
	// throws an error at clCreateCommandQueue *iff* we set
	// the share group property here...  Presumably because
	// that device doesn't handle the display?
	// We insert a hack below by excluding that device name,
	// but maybe there is another model where that would be
	// inappropriate?

	if( extension_supported(pdp,"cl_APPLE_gl_sharing") &&
			strcmp(PFDEV_NAME(pdp),"Iris_Pro")){

		CGLShareGroupObj share_group;
	
		cgl_ctx = CGLGetCurrentContext();
		if( cgl_ctx != NULL){
			// This means that we have an OpenGL window available...
			share_group = CGLGetShareGroup(cgl_ctx);
			assert( share_group != NULL );
			props[1] = (cl_context_properties) share_group;
		} else {
			// If we let this go, it sometimes causes a seg fault
			// when we try to set the GL window afterwards!?
			//
			// But it should not be an error, because we don't know
			// for sure that we will ever attempt it.
			// We need to set a flag to prohibit it later...
			advise("init_ocl_device:  OpenCL initialized without an OpenGL context;");
			advise("init_ocl_device:  Prohibiting OpenGL operations.");
			prohibit_opengl();
		}
	}


	// Check for OpenGL capabilities
	//opengl_check(pdp);

#ifdef TAKEN_FROM_DEMO_PROG
#if (USE_GL_ATTACHMENTS)

	printf(SEPARATOR);
	printf("Using active OpenGL context...\n");

	CGLContextObj kCGLContext = CGLGetCurrentContext();			  
	CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
	
	cl_context_properties properties[] = { 
		CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, 
		(cl_context_properties)kCGLShareGroup, 0 
	};
		
	// Create a context from a CGL share group
	//
	ComputeContext = clCreateContext(properties, 0, 0, clLogMessagesToStdoutAPPLE, 0, 0);
	if(!ComputeContext)
		return -2;

#else	// ! USE_GL_ATTACHMENTS	

	// Connect to a compute device
	//
	err = clGetDeviceIDs(NULL, ComputeDeviceType, 1, &ComputeDeviceId, NULL);
	if (err != CL_SUCCESS)
	{
		printf("Error: Failed to locate compute device!\n");
		return EXIT_FAILURE;
	}
  
	// Create a compute context 
	//
	ComputeContext = clCreateContext(0, 1, &ComputeDeviceId, clLogMessagesToStdoutAPPLE, NULL, &err);
	if (!ComputeContext)
	{
		printf("Error: Failed to create a compute context!\n");
		return EXIT_FAILURE;
	}
#endif	// ! USE_GL_ATTACHMENTS	

#endif // TAKEN_FROM_DEMO_PROG

	//create context on the specified device
//if( cgl_ctx != NULL )
//fprintf(stderr,"creating clContext with share properties for %s...\n",PFDEV_NAME(pdp));
	if( cgl_ctx == NULL ){
		context = clCreateContext(
			NULL,		// cl_context_properties *properties
			1,		// num_devices
			&dev_id,	// devices
			NULL,		// void *pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data )
			NULL,		// void *user_data
			&status		// cl_int *errcode_ret
		);
	} else {
		context = clCreateContext(
			props,		// cl_context_properties *properties
			0,		// num_devices
			NULL,	// devices
			clLogMessagesToStdoutAPPLE,	// void *pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data )
			NULL,		// void *user_data
			&status		// cl_int *errcode_ret
		);
	}
	if( status != CL_SUCCESS ){
		report_ocl_error(status, "clCreateContext");
		SET_OCLDEV_CTX(pdp,NULL);
		//return;
	}
	// BUG check return value for error

	SET_OCLDEV_CTX(pdp,context);

	//create the command_queue (stream)
//fprintf(stderr,"clContext = 0x%lx...\n",(long)context);
//fprintf(stderr,"init_ocl_device:  dev_id = 0x%lx\n",(long)dev_id);

	// At least once we have gotten an invalid value error here,
	// after receiving the advisory "OpenCL initialized without an OpenGL context
	// (which may or may not be relevant).  This behavior was not repeatable,
	// perhaps because of different stack contents???

	// The third arg is a properties bit field, with valid values being:
	// CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
	// CL_QUEUE_PROFILING_ENABLE
	command_queue = clCreateCommandQueue(context, dev_id, 0, &status);
	if( status != CL_SUCCESS ){
		report_ocl_error(status, "clCreateCommandQueue");
		SET_OCLDEV_QUEUE(pdp,NULL);
		//return;
	} else {
		SET_OCLDEV_QUEUE(pdp,command_queue);
	}
	// set a ready flag?

	init_ocl_dev_memory(QSP_ARG  pdp);

	curr_pdp = pdp;
}
Пример #4
0
Файл: ocl.c Проект: nasa/QuIP
static void ocl_dev_info(QSP_ARG_DECL  Platform_Device *pdp)
{
	sprintf(MSG_STR,"%s:",PFDEV_NAME(pdp));
	prt_msg(MSG_STR);
	prt_msg("Sorry, no OpenCL-specific device info yet.");
}
Пример #5
0
Файл: ocl.c Проект: nasa/QuIP
static void init_ocl_dev_memory(QSP_ARG_DECL  Platform_Device *pdp)
{
	char area_name[MAX_AREA_NAME_LEN+1];
	Data_Area *ap;

	//strcpy(area_name,PFDEV_NAME(pdp));
	// make sure names will fit - longest name is %s.%s_host_mapped
	if( strlen(PLATFORM_NAME(PFDEV_PLATFORM(pdp)))+strlen(PFDEV_NAME(pdp))+strlen("._host_mapped") > MAX_AREA_NAME_LEN )
		error1("init_ocl_dev_memory:  area name too large for buffer, increase MAX_AREA_NAME_LEN!?");

	sprintf(area_name,"%s.%s",
		PLATFORM_NAME(PFDEV_PLATFORM(pdp)),PFDEV_NAME(pdp));

	// what should the name for the memory area be???

	// address set to NULL says use custom allocator - see dobj/makedobj.c

	ap = pf_area_init(area_name,NULL,0, MAX_OCL_GLOBAL_OBJECTS,DA_OCL_GLOBAL,pdp);
	if( ap == NULL ){
		sprintf(ERROR_STRING,
	"init_ocl_dev_memory:  error creating global data area %s",area_name);
		warn(ERROR_STRING);
	}
	// g++ won't take this line!?
	SET_AREA_PFDEV(ap,pdp);

	// BUG should be per-device, not global table...
	pdp->pd_ap[PF_GLOBAL_AREA_INDEX] = ap;

	/* We used to declare a heap for constant memory here,
	 * but there wasn't much of a point because:
	 * Constant memory can't be allocated, rather it is declared
	 * in the .cu code, and placed by the compiler as it sees fit.
	 * To have objects use this, we would have to declare a heap and
	 * manage it ourselves...
	 * There's only 64k, so we should be sparing...
	 * We'll try this later...
	 */


	/* Make up another area for the host memory
	 * which is locked and mappable to the device.
	 * We don't allocate a pool here, but do it as needed...
	 */

	//strcat(cname,"_host");
	sprintf(area_name,"%s.%s_host",
		PLATFORM_NAME(PFDEV_PLATFORM(pdp)),PFDEV_NAME(pdp));

	ap = pf_area_init(area_name,(u_char *)NULL,0,MAX_OCL_MAPPED_OBJECTS,
							DA_OCL_HOST,pdp);
	if( ap == NULL ){
		sprintf(ERROR_STRING,
	"init_ocl_dev_memory:  error creating host data area %s",area_name);
		error1(ERROR_STRING);
	}
	SET_AREA_PFDEV(ap, pdp);
	pdp->pd_ap[PF_HOST_AREA_INDEX] = ap;

	/* Make up another psuedo-area for the mapped host memory;
	 * This is the same memory as above, but mapped to the device.
	 * In the current implementation, we create objects in the host
	 * area, and then automatically create an alias on the device side.
	 * There is a BUG in that by having this psuedo area in the data
	 * area name space, a user could select it as the data area and
	 * then try to create an object.  We will detect this in make_dobj,
	 * and complain.
	 */

	//strcpy(cname,dname);
	//strcat(cname,"_host_mapped");
	sprintf(area_name,"%s.%s_host_mapped",
		PLATFORM_NAME(PFDEV_PLATFORM(pdp)),PFDEV_NAME(pdp));

	ap = pf_area_init(area_name,(u_char *)NULL,0,MAX_OCL_MAPPED_OBJECTS,
						DA_OCL_HOST_MAPPED,pdp);
	if( ap == NULL ){
		sprintf(ERROR_STRING,
	"init_ocl_dev_memory:  error creating host-mapped data area %s",area_name);
		error1(ERROR_STRING);
	}
	SET_AREA_PFDEV(ap,pdp);
	pdp->pd_ap[PF_HOST_MAPPED_AREA_INDEX] = ap;

	if( verbose ){
		sprintf(ERROR_STRING,"init_ocl_dev_memory DONE");
		advise(ERROR_STRING);
	}
}
Пример #6
0
Файл: cu2.c Проект: E-LLP/QuIP
static void init_cu2_device(QSP_ARG_DECL  int index, Compute_Platform *cpp)
{
	struct cudaDeviceProp deviceProp;
	cudaError_t e;
	Platform_Device *pdp;
	char name[LLEN];
	char dev_name[LLEN];
	char area_name[LLEN];
	const char *name_p;
	char *s;
	Data_Area *ap;
	float comp_cap;

	if( index >= MAX_CUDA_DEVICES ){
		sprintf(ERROR_STRING,"Program is compiled for a maximum of %d CUDA devices, can't inititialize device %d.",
			MAX_CUDA_DEVICES,index);
		ERROR1(ERROR_STRING);
	}

	if( verbose ){
		sprintf(ERROR_STRING,"init_cu2_device %d BEGIN",index);
		advise(ERROR_STRING);
	}

	if( (e=cudaGetDeviceProperties(&deviceProp, index)) != cudaSuccess ){
		describe_cuda_driver_error2("init_cu2_device","cudaGetDeviceProperties",e);
		return;
	}

	if (deviceProp.major == 9999 && deviceProp.minor == 9999){
		sprintf(ERROR_STRING,"There is no CUDA device with dev = %d!?.\n",index);
		WARN(ERROR_STRING);

		/* What should we do here??? */
		return;
	}

	/* Put the compute capability into a script variable so that we can use it */
	comp_cap = deviceProp.major * 10 + deviceProp.minor;
	if( comp_cap > CUDA_COMP_CAP ){
		sprintf(ERROR_STRING,"init_cu2_device:  CUDA device %s has compute capability %d.%d, but program was configured for %d.%d!?",
			deviceProp.name,deviceProp.major,deviceProp.minor,
			CUDA_COMP_CAP/10,CUDA_COMP_CAP%10);
		WARN(ERROR_STRING);
	}

	/* BUG if there are multiple devices, we need to make sure that this is set
	 * correctly for the current context!?
	 */
	sprintf(ERROR_STRING,"%d.%d",deviceProp.major,deviceProp.minor);
	assign_var(QSP_ARG  "cuda_comp_cap",ERROR_STRING);


	/* What does this do??? */
	e = cudaSetDeviceFlags( cudaDeviceMapHost );
	if( e != cudaSuccess ){
		describe_cuda_driver_error2("init_cu2_device",
			"cudaSetDeviceFlags",e);
	}

	strcpy(name,deviceProp.name);

	/* change spaces to underscores */
	s=name;
	while(*s){
		if( *s==' ' ) *s='_';
		s++;
	}

	/* We might have two of the same devices installed in a single system.
	 * In this case, we can't use the device name twice, because there will
	 * be a conflict.  The first one gets the name, then we have to check and
	 * make sure that the name is not in use already.  If it is, then we append
	 * a number to the string...
	 */
	name_p = available_pfdev_name(QSP_ARG  name,dev_name,cpp,MAX_CUDA_DEVICES);	// reuse name as scratch string
	pdp = new_pfdev(QSP_ARG  name_p);

#ifdef CAUTIOUS
	if( pdp == NO_PFDEV ){
		sprintf(ERROR_STRING,"CAUTIOUS:  init_cu2_device:  Error creating cuda device struct for %s!?",name_p);
		WARN(ERROR_STRING);
		return;
	}
#endif /* CAUTIOUS */

	/* Remember this name in case the default is not found */
	if( first_cuda_dev_name == NULL )
		first_cuda_dev_name = PFDEV_NAME(pdp);

	/* Compare this name against the default name set in
	 * the environment, if it exists...
	 */
	if( default_cuda_dev_name != NULL && ! default_cuda_dev_found ){
		if( !strcmp(PFDEV_NAME(pdp),default_cuda_dev_name) )
			default_cuda_dev_found=1;
	}

	SET_PFDEV_PLATFORM(pdp,cpp);
	SET_PFDEV_CUDA_INFO( pdp, getbuf(sizeof(Cuda_Dev_Info)) );

	SET_PFDEV_CUDA_DEV_INDEX(pdp,index);
	SET_PFDEV_CUDA_DEV_PROP(pdp,deviceProp);
	SET_PFDEV_CUDA_RNGEN(pdp,NULL);

	if( comp_cap >= 20 ){
		SET_PFDEV_MAX_DIMS(pdp,3);
	} else {
		SET_PFDEV_MAX_DIMS(pdp,2);
	}

	//set_cuda_device(pdp);	// is this call just so we can call cudaMalloc?
	PF_FUNC_NAME(set_device)(QSP_ARG  pdp);	// is this call just so we can call cudaMalloc?

	// address set to NULL says use custom allocator - see dobj/makedobj.c

	// BUG??  with pdp we may not need the DA_ flag???
	sprintf(area_name,"%s.%s",PLATFORM_NAME(cpp),name_p);
	ap = pf_area_init(QSP_ARG  area_name,NULL,0,
			MAX_CUDA_GLOBAL_OBJECTS,DA_CUDA_GLOBAL,pdp);
	if( ap == NO_AREA ){
		sprintf(ERROR_STRING,
	"init_cu2_device:  error creating global data area %s",area_name);
		WARN(ERROR_STRING);
	}
	// g++ won't take this line!?
	SET_AREA_CUDA_DEV(ap,pdp);
	//set_device_for_area(ap,pdp);

	SET_PFDEV_AREA(pdp,PFDEV_GLOBAL_AREA_INDEX,ap);

	/* We used to declare a heap for constant memory here,
	 * but there wasn't much of a point because:
	 * Constant memory can't be allocated, rather it is declared
	 * in the .cu code, and placed by the compiler as it sees fit.
	 * To have objects use this, we would have to declare a heap and
	 * manage it ourselves...
	 * There's only 64k, so we should be sparing...
	 * We'll try this later...
	 */


	/* Make up another area for the host memory
	 * which is locked and mappable to the device.
	 * We don't allocate a pool here, but do it as needed...
	 */

	//strcpy(area_name,name_p);
	//strcat(area_name,"_host");
	sprintf(area_name,"%s.%s_host",PLATFORM_NAME(cpp),name_p);
	ap = pf_area_init(QSP_ARG  area_name,(u_char *)NULL,0,MAX_CUDA_MAPPED_OBJECTS,
								DA_CUDA_HOST,pdp);
	if( ap == NO_AREA ){
		sprintf(ERROR_STRING,
	"init_cu2_device:  error creating host data area %s",area_name);
		ERROR1(ERROR_STRING);
	}
	SET_AREA_CUDA_DEV(ap, pdp);
	//cuda_data_area[index][CUDA_HOST_AREA_INDEX] = ap;
	SET_PFDEV_AREA(pdp,PFDEV_HOST_AREA_INDEX,ap);

	/* Make up another psuedo-area for the mapped host memory;
	 * This is the same memory as above, but mapped to the device.
	 * In the current implementation, we create objects in the host
	 * area, and then automatically create an alias on the device side.
	 * There is a BUG in that by having this psuedo area in the data
	 * area name space, a user could select it as the data area and
	 * then try to create an object.  We will detect this in make_dobj,
	 * and complain.
	 */

	//strcpy(area_name,name_p);
	//strcat(area_name,"_host_mapped");
	sprintf(area_name,"%s.%s_host_mapped",PLATFORM_NAME(cpp),name_p);
	ap = pf_area_init(QSP_ARG  area_name,(u_char *)NULL,0,MAX_CUDA_MAPPED_OBJECTS,
							DA_CUDA_HOST_MAPPED,pdp);
	if( ap == NO_AREA ){
		sprintf(ERROR_STRING,
	"init_cu2_device:  error creating host-mapped data area %s",area_name);
		ERROR1(ERROR_STRING);
	}
	SET_AREA_CUDA_DEV(ap,pdp);
	//cuda_data_area[index][CUDA_HOST_MAPPED_AREA_INDEX] = ap;
	SET_PFDEV_AREA(pdp,PFDEV_HOST_MAPPED_AREA_INDEX,ap);

	// We don't change the data area by default any more when initializing...
	/* Restore the normal area */
	//set_data_area(PFDEV_AREA(pdp,PFDEV_GLOBAL_AREA_INDEX));

	if( verbose ){
		sprintf(ERROR_STRING,"init_cu2_device %d DONE",index);
		advise(ERROR_STRING);
	}
}
Пример #7
0
Файл: cu2.c Проект: E-LLP/QuIP
static void cu2_dev_info(QSP_ARG_DECL  Platform_Device *pdp)
{
	sprintf(MSG_STR,"%s:",PFDEV_NAME(pdp));
	prt_msg(MSG_STR);
	prt_msg("Sorry, Cuda-specific device info not implemented yet!?");
}