static void init_vl2_pfdevs(QSP_ARG_DECL Compute_Platform *cpp) { Platform_Device *pdp; pdp = new_pfdev(QSP_ARG "CPU1"); SET_PFDEV_PLATFORM(pdp,cpp); SET_PFDEV_MAX_DIMS(pdp,DEFAULT_PFDEV_MAX_DIMS); // set the data area for the device? if( ram_area_p == NO_AREA ){ ram_area_p = pf_area_init(QSP_ARG "ram",NULL,0L,MAX_RAM_CHUNKS,DA_RAM,pdp); } SET_PFDEV_AREA(pdp,PFDEV_GLOBAL_AREA_INDEX,ram_area_p); SET_AREA_PFDEV( ram_area_p, pdp ); }
static void init_ocl_dev_memory(QSP_ARG_DECL Platform_Device *pdp) { char area_name[MAX_AREA_NAME_LEN+1]; Data_Area *ap; //strcpy(area_name,PFDEV_NAME(pdp)); // make sure names will fit - longest name is %s.%s_host_mapped if( strlen(PLATFORM_NAME(PFDEV_PLATFORM(pdp)))+strlen(PFDEV_NAME(pdp))+strlen("._host_mapped") > MAX_AREA_NAME_LEN ) error1("init_ocl_dev_memory: area name too large for buffer, increase MAX_AREA_NAME_LEN!?"); sprintf(area_name,"%s.%s", PLATFORM_NAME(PFDEV_PLATFORM(pdp)),PFDEV_NAME(pdp)); // what should the name for the memory area be??? // address set to NULL says use custom allocator - see dobj/makedobj.c ap = pf_area_init(area_name,NULL,0, MAX_OCL_GLOBAL_OBJECTS,DA_OCL_GLOBAL,pdp); if( ap == NULL ){ sprintf(ERROR_STRING, "init_ocl_dev_memory: error creating global data area %s",area_name); warn(ERROR_STRING); } // g++ won't take this line!? SET_AREA_PFDEV(ap,pdp); // BUG should be per-device, not global table... pdp->pd_ap[PF_GLOBAL_AREA_INDEX] = ap; /* We used to declare a heap for constant memory here, * but there wasn't much of a point because: * Constant memory can't be allocated, rather it is declared * in the .cu code, and placed by the compiler as it sees fit. * To have objects use this, we would have to declare a heap and * manage it ourselves... * There's only 64k, so we should be sparing... * We'll try this later... */ /* Make up another area for the host memory * which is locked and mappable to the device. * We don't allocate a pool here, but do it as needed... */ //strcat(cname,"_host"); sprintf(area_name,"%s.%s_host", PLATFORM_NAME(PFDEV_PLATFORM(pdp)),PFDEV_NAME(pdp)); ap = pf_area_init(area_name,(u_char *)NULL,0,MAX_OCL_MAPPED_OBJECTS, DA_OCL_HOST,pdp); if( ap == NULL ){ sprintf(ERROR_STRING, "init_ocl_dev_memory: error creating host data area %s",area_name); error1(ERROR_STRING); } SET_AREA_PFDEV(ap, pdp); pdp->pd_ap[PF_HOST_AREA_INDEX] = ap; /* Make up another psuedo-area for the mapped host memory; * This is the same memory as above, but mapped to the device. * In the current implementation, we create objects in the host * area, and then automatically create an alias on the device side. * There is a BUG in that by having this psuedo area in the data * area name space, a user could select it as the data area and * then try to create an object. We will detect this in make_dobj, * and complain. */ //strcpy(cname,dname); //strcat(cname,"_host_mapped"); sprintf(area_name,"%s.%s_host_mapped", PLATFORM_NAME(PFDEV_PLATFORM(pdp)),PFDEV_NAME(pdp)); ap = pf_area_init(area_name,(u_char *)NULL,0,MAX_OCL_MAPPED_OBJECTS, DA_OCL_HOST_MAPPED,pdp); if( ap == NULL ){ sprintf(ERROR_STRING, "init_ocl_dev_memory: error creating host-mapped data area %s",area_name); error1(ERROR_STRING); } SET_AREA_PFDEV(ap,pdp); pdp->pd_ap[PF_HOST_MAPPED_AREA_INDEX] = ap; if( verbose ){ sprintf(ERROR_STRING,"init_ocl_dev_memory DONE"); advise(ERROR_STRING); } }
static void init_cu2_device(QSP_ARG_DECL int index, Compute_Platform *cpp) { struct cudaDeviceProp deviceProp; cudaError_t e; Platform_Device *pdp; char name[LLEN]; char dev_name[LLEN]; char area_name[LLEN]; const char *name_p; char *s; Data_Area *ap; float comp_cap; if( index >= MAX_CUDA_DEVICES ){ sprintf(ERROR_STRING,"Program is compiled for a maximum of %d CUDA devices, can't inititialize device %d.", MAX_CUDA_DEVICES,index); ERROR1(ERROR_STRING); } if( verbose ){ sprintf(ERROR_STRING,"init_cu2_device %d BEGIN",index); advise(ERROR_STRING); } if( (e=cudaGetDeviceProperties(&deviceProp, index)) != cudaSuccess ){ describe_cuda_driver_error2("init_cu2_device","cudaGetDeviceProperties",e); return; } if (deviceProp.major == 9999 && deviceProp.minor == 9999){ sprintf(ERROR_STRING,"There is no CUDA device with dev = %d!?.\n",index); WARN(ERROR_STRING); /* What should we do here??? */ return; } /* Put the compute capability into a script variable so that we can use it */ comp_cap = deviceProp.major * 10 + deviceProp.minor; if( comp_cap > CUDA_COMP_CAP ){ sprintf(ERROR_STRING,"init_cu2_device: CUDA device %s has compute capability %d.%d, but program was configured for %d.%d!?", deviceProp.name,deviceProp.major,deviceProp.minor, CUDA_COMP_CAP/10,CUDA_COMP_CAP%10); WARN(ERROR_STRING); } /* BUG if there are multiple devices, we need to make sure that this is set * correctly for the current context!? */ sprintf(ERROR_STRING,"%d.%d",deviceProp.major,deviceProp.minor); assign_var(QSP_ARG "cuda_comp_cap",ERROR_STRING); /* What does this do??? */ e = cudaSetDeviceFlags( cudaDeviceMapHost ); if( e != cudaSuccess ){ describe_cuda_driver_error2("init_cu2_device", "cudaSetDeviceFlags",e); } strcpy(name,deviceProp.name); /* change spaces to underscores */ s=name; while(*s){ if( *s==' ' ) *s='_'; s++; } /* We might have two of the same devices installed in a single system. * In this case, we can't use the device name twice, because there will * be a conflict. The first one gets the name, then we have to check and * make sure that the name is not in use already. If it is, then we append * a number to the string... */ name_p = available_pfdev_name(QSP_ARG name,dev_name,cpp,MAX_CUDA_DEVICES); // reuse name as scratch string pdp = new_pfdev(QSP_ARG name_p); #ifdef CAUTIOUS if( pdp == NO_PFDEV ){ sprintf(ERROR_STRING,"CAUTIOUS: init_cu2_device: Error creating cuda device struct for %s!?",name_p); WARN(ERROR_STRING); return; } #endif /* CAUTIOUS */ /* Remember this name in case the default is not found */ if( first_cuda_dev_name == NULL ) first_cuda_dev_name = PFDEV_NAME(pdp); /* Compare this name against the default name set in * the environment, if it exists... */ if( default_cuda_dev_name != NULL && ! default_cuda_dev_found ){ if( !strcmp(PFDEV_NAME(pdp),default_cuda_dev_name) ) default_cuda_dev_found=1; } SET_PFDEV_PLATFORM(pdp,cpp); SET_PFDEV_CUDA_INFO( pdp, getbuf(sizeof(Cuda_Dev_Info)) ); SET_PFDEV_CUDA_DEV_INDEX(pdp,index); SET_PFDEV_CUDA_DEV_PROP(pdp,deviceProp); SET_PFDEV_CUDA_RNGEN(pdp,NULL); if( comp_cap >= 20 ){ SET_PFDEV_MAX_DIMS(pdp,3); } else { SET_PFDEV_MAX_DIMS(pdp,2); } //set_cuda_device(pdp); // is this call just so we can call cudaMalloc? PF_FUNC_NAME(set_device)(QSP_ARG pdp); // is this call just so we can call cudaMalloc? // address set to NULL says use custom allocator - see dobj/makedobj.c // BUG?? with pdp we may not need the DA_ flag??? sprintf(area_name,"%s.%s",PLATFORM_NAME(cpp),name_p); ap = pf_area_init(QSP_ARG area_name,NULL,0, MAX_CUDA_GLOBAL_OBJECTS,DA_CUDA_GLOBAL,pdp); if( ap == NO_AREA ){ sprintf(ERROR_STRING, "init_cu2_device: error creating global data area %s",area_name); WARN(ERROR_STRING); } // g++ won't take this line!? SET_AREA_CUDA_DEV(ap,pdp); //set_device_for_area(ap,pdp); SET_PFDEV_AREA(pdp,PFDEV_GLOBAL_AREA_INDEX,ap); /* We used to declare a heap for constant memory here, * but there wasn't much of a point because: * Constant memory can't be allocated, rather it is declared * in the .cu code, and placed by the compiler as it sees fit. * To have objects use this, we would have to declare a heap and * manage it ourselves... * There's only 64k, so we should be sparing... * We'll try this later... */ /* Make up another area for the host memory * which is locked and mappable to the device. * We don't allocate a pool here, but do it as needed... */ //strcpy(area_name,name_p); //strcat(area_name,"_host"); sprintf(area_name,"%s.%s_host",PLATFORM_NAME(cpp),name_p); ap = pf_area_init(QSP_ARG area_name,(u_char *)NULL,0,MAX_CUDA_MAPPED_OBJECTS, DA_CUDA_HOST,pdp); if( ap == NO_AREA ){ sprintf(ERROR_STRING, "init_cu2_device: error creating host data area %s",area_name); ERROR1(ERROR_STRING); } SET_AREA_CUDA_DEV(ap, pdp); //cuda_data_area[index][CUDA_HOST_AREA_INDEX] = ap; SET_PFDEV_AREA(pdp,PFDEV_HOST_AREA_INDEX,ap); /* Make up another psuedo-area for the mapped host memory; * This is the same memory as above, but mapped to the device. * In the current implementation, we create objects in the host * area, and then automatically create an alias on the device side. * There is a BUG in that by having this psuedo area in the data * area name space, a user could select it as the data area and * then try to create an object. We will detect this in make_dobj, * and complain. */ //strcpy(area_name,name_p); //strcat(area_name,"_host_mapped"); sprintf(area_name,"%s.%s_host_mapped",PLATFORM_NAME(cpp),name_p); ap = pf_area_init(QSP_ARG area_name,(u_char *)NULL,0,MAX_CUDA_MAPPED_OBJECTS, DA_CUDA_HOST_MAPPED,pdp); if( ap == NO_AREA ){ sprintf(ERROR_STRING, "init_cu2_device: error creating host-mapped data area %s",area_name); ERROR1(ERROR_STRING); } SET_AREA_CUDA_DEV(ap,pdp); //cuda_data_area[index][CUDA_HOST_MAPPED_AREA_INDEX] = ap; SET_PFDEV_AREA(pdp,PFDEV_HOST_MAPPED_AREA_INDEX,ap); // We don't change the data area by default any more when initializing... /* Restore the normal area */ //set_data_area(PFDEV_AREA(pdp,PFDEV_GLOBAL_AREA_INDEX)); if( verbose ){ sprintf(ERROR_STRING,"init_cu2_device %d DONE",index); advise(ERROR_STRING); } }