operator cl_event() const { cl_int err = CL_SUCCESS; if(e_.size() == 1) { return e_[0]; } if(!e_.empty()) { cl_context ctx = get_info<command_queue::context_info_type>(command_queue()); cl_event e = clCreateUserEvent(ctx, &err); OCLM_THROW_IF_EXCEPTION(err, "clCreateUserEvents"); #ifdef CL_VERSION_1_2 err = clEnqueueMarkerWithWaitList(command_queue(), static_cast<cl_uint>(e_.size()), &e_[0], &e); OCLM_THROW_IF_EXCEPTION(err, "clEnqueueMarkerWithWaitList"); #else get(); err = clSetUserEventStatus(e, CL_COMPLETE); #endif return e; } else return cl_event(); }
void pclu_call_kernel(pclu_program* pgm, cl_kernel kernel, pclu_range range) { int errcode; #define NO_CL_EVENTS 1 #ifdef NO_CL_EVENTS cl_event kernel_done = 0; #else cl_event kernel_done = clCreateUserEvent(pgm->pclu->context, &errcode); pclu_check_call("clCreateUserEvent", errcode); #endif size_t* local_size = 0; if (range.local[0] > 0) local_size = &(range.local[0]); errcode = clEnqueueNDRangeKernel(pgm->pclu->queue, kernel, range.nd, 0, range.global, local_size, 0, 0, &kernel_done); pclu_check_call("clEnqueueNDRangeKernel", errcode); #ifndef NO_CL_EVENTS pclu_check_call("clWaitForEvents", clWaitForEvents(1, &kernel_done)); #endif pclu_check_call("clFinish", clFinish(pgm->pclu->queue)); //pclu_check_call("clReleaseKernel", clReleaseKernel(kern)); }
int acc_event_create (void** event_p){ // debug info if (verbose_print){ fprintf(stdout, "\n ... EVENT CREATION ... \n"); fprintf(stdout, " ---> Entering: acc_event_create.\n"); } // local event object pointer *event_p = malloc(sizeof(cl_event)); cl_event *clevent = (cl_event *) *event_p; // get a device event object *clevent = clCreateUserEvent((*acc_opencl_my_device).ctx, &cl_error); if (acc_opencl_error_check(cl_error, __LINE__)) return -1; // debug info if (verbose_print){ fprintf(stdout, " ---> Leaving: acc_event_create.\n"); } cl_error = clSetUserEventStatus(*clevent, CL_COMPLETE); // assign return value return 0; }
/*! \brief Instantiates a user Event. * * You can add this event to track a command within a command Queue by adding this * Event into the EventList of the command. This * Event is created using the provided Context. */ ocl::Event::Event(ocl::Context& ctxt) : _id(0), _ctxt(&ctxt) { TRUE_ASSERT(this->_ctxt != 0, "Context not valid"); cl_int err; _id = clCreateUserEvent (_ctxt->id(), &err); OPENCL_SAFE_CALL(err); TRUE_ASSERT(_id != 0, "Could not create user event"); }
/*! \brief Instantiates a user Event. * * You can add this event to track a command within a command Queue by adding this * Event into the EventList of the command. This * Event is created using the provided Context. */ ocl::Event::Event(ocl::Context& ctxt) : _id(0), _ctxt(&ctxt) { if(this->_ctxt == nullptr) throw std::runtime_error("no active context"); cl_int err; _id = clCreateUserEvent (_ctxt->id(), &err); OPENCL_SAFE_CALL(err); if(this->_id == nullptr) throw std::runtime_error("could not create user event"); }
Event::Event(Context const& context) : Object{} { static const auto error_map = error::ErrorMap{ {ErrorCode::invalid_context, "the given context is invalid."} }; auto error = cl_int{CL_INVALID_VALUE}; auto new_id = clCreateUserEvent(context.id(), &error); if (error::handle<EventException>(error, error_map)) m_id = new_id; }
void CL::UserEvent::begin(const CL::Event& dependencies) { cl_int status; _event = clCreateUserEvent(_device.get_context(), &status); OPENCL_ASSERT(status); _id = _device.insert_user_event(_name, _event, dependencies); _active = true; }
int main() { cl_int err; cl_event user_evt = NULL; int i; // An user event can be set to either complete or a negative value, indicating error; // additionally, no objects involved in a command that waits on the user event should // be released before the event status is set; however, it should be possible to release // everything even if the status is set to something which is NOT CL_COMPLETE. So // try both CL_COMPLETE and a negative value cl_int status[] = {CL_INVALID_EVENT, CL_COMPLETE }; // We also query for profiling info of the event, which according to the standard // should return CL_PROFILING_INFO_NOT_AVAILABLE cl_ulong queued, submitted, started, endtime; for (i = 0; i < ARRAY_SIZE(status); ++i) { cl_context context; cl_command_queue queue; cl_device_id device; CHECK_CL_ERROR(poclu_get_any_device(&context, &device, &queue)); TEST_ASSERT( context ); TEST_ASSERT( device ); TEST_ASSERT( queue ); user_evt = clCreateUserEvent(context, &err); CHECK_OPENCL_ERROR_IN("clCreateUserEvent"); TEST_ASSERT( user_evt ); CHECK_CL_ERROR(clSetUserEventStatus(user_evt, status[i])); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_QUEUED, sizeof(queued), &queued, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_SUBMIT, sizeof(submitted), &submitted, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_START, sizeof(started), &started, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_END, sizeof(endtime), &endtime, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); CHECK_CL_ERROR(clReleaseEvent(user_evt)); CHECK_CL_ERROR(clReleaseCommandQueue(queue)); CHECK_CL_ERROR(clReleaseContext(context)); } return EXIT_SUCCESS; }
/*! \brief Instantiates a user Event. * * You can add this event to track a command within a command Queue by adding this * Event into the EventList of the command. * If there is an active Platform and an active Context * this Event is created. Otherwise do not forget * to provide a Context and to create this Event. */ ocl::Event::Event() : _id(0), _ctxt(0) { if(ocl::Platform::hasActivePlatform() && ocl::Platform::activePlatform()->hasActiveContext()){ _ctxt = ocl::Platform::activePlatform()->activeContext(); TRUE_ASSERT(_ctxt != 0, "No active context"); cl_int err; _id = clCreateUserEvent (_ctxt->id(), &err); OPENCL_SAFE_CALL(err); TRUE_ASSERT(_id != 0, "Could not create user event"); } }
PassRefPtr<WebCLUserEvent> WebCLUserEvent::create(PassRefPtr<WebCLContext> context, ExceptionState& es) { cl_int userEventError = 0; cl_event userEvent = clCreateUserEvent(context->getContext(), &userEventError); if (userEventError != CL_SUCCESS) { WebCLException::throwException(userEventError, es); return nullptr; } return adoptRef(new WebCLUserEvent(userEvent, context)); }
/*! Creates a user event. Returns null if user events are not supported. User events are a feature of OpenCL 1.1 which allows an application to insert a marker into the command queue. Commands that depend upon the marker will not be executed until the application triggers the user event with QCLUserEvent::setFinished(). */ QCLUserEvent QCLContext::createUserEvent() { #ifdef QT_OPENCL_1_1 Q_D(QCLContext); cl_int error = CL_INVALID_CONTEXT; cl_event event = clCreateUserEvent(d->id, &error); reportError("QCLContext::createUserEvent:", error); return QCLUserEvent(event, true); #else return QCLUserEvent(); #endif }
/*! \brief Instantiates a user Event. * * You can add this event to track a command within a command Queue by adding this * Event into the EventList of the command. * If there is an active Platform and an active Context * this Event is created. Otherwise do not forget * to provide a Context and to create this Event. */ ocl::Event::Event() : _id(0), _ctxt(0) { if(!ocl::Platform::hasActivePlatform() || !ocl::Platform::activePlatform()->hasActiveContext()) return; _ctxt = ocl::Platform::activePlatform()->activeContext(); if(this->_ctxt == nullptr) throw std::runtime_error("no active context"); cl_int err; _id = clCreateUserEvent (_ctxt->id(), &err); OPENCL_SAFE_CALL(err); if(this->_id == nullptr) throw std::runtime_error("could not create user event"); }
cl_event mwCreateEvent(CLInfo* ci) { cl_int err; cl_event ev; ev = clCreateUserEvent(ci->clctx, &err); if (err != CL_SUCCESS) { mwPerrorCL(err, "Failed to create custom event"); return NULL; } return ev; }
static void register_event(hpx::opencl::device cldevice, const hpx::naming::id_type & event_id) { boost::shared_ptr<hpx::opencl::server::device> parent_device = hpx::get_ptr<hpx::opencl::server::device> (cldevice.get_gid()).get(); // create a fake event cl_int err; cl_event event_cl = clCreateUserEvent ( parent_device->get_context(), &err); cl_ensure(err, "clEnqueueWriteBuffer()"); err = clSetUserEventStatus(event_cl, CL_COMPLETE); cl_ensure(err, "clSetUserEventStatus()"); parent_device->register_event(event_id, event_cl); }
void pclu_call_kernel(pclu_program* pgm, const char* name, pclu_range range, size_t argc, ...) { cl_int errcode; cl_kernel kern = clCreateKernel(pgm->program, name, &errcode); pclu_check_call("clCreateKernel", errcode); va_list ap; va_start(ap, argc); for (cl_uint ii = 0; ii < argc; ++ii) { size_t size = va_arg(ap, size_t); void* arg = va_arg(ap, void*); pclu_check_call("clSetKernelArg", clSetKernelArg(kern, ii, size, arg)); } va_end(ap); #define NO_CL_EVENTS 1 #ifdef NO_CL_EVENTS cl_event kernel_done = 0; #else cl_event kernel_done = clCreateUserEvent(pgm->pclu->context, &errcode); pclu_check_call("clCreateUserEvent", errcode); #endif errcode = clEnqueueNDRangeKernel(pgm->pclu->queue, kern, range.nd, 0, range.global, 0, 0, 0, &kernel_done); pclu_check_call("clEnqueueNDRangeKernel", errcode); #ifndef NO_CL_EVENTS pclu_check_call("clWaitForEvents", clWaitForEvents(1, &kernel_done)); #endif pclu_check_call("clReleaseKernel", clReleaseKernel(kern)); }
int main(int argc, char **argv) { int i; if (argc < 2) { fprintf(stderr, "Usage : %s <image file name> [<device number>]\nThe program will threshold the image, apply CCL,\nand output the result to output.png.\n", argv[0]); fprintf(stderr, "\nAvailable OpenCL Devices :\n"); simpleGetDevice(-1); exit(-1); } // IplImage *img = 0; img = cvLoadImage(argv[1], CV_LOAD_IMAGE_COLOR); if( !img ) abortf("Could not load %s\n", argv[1]); if (img->nChannels != 3) abortf("nChannels != 3\n"); int iw = img->width, ih = img->height; uint8_t *data = (uint8_t *)img->imageData; // cl_int *bufPix = (cl_int *)calloc(iw * ih, sizeof(cl_int)); cl_int *bufLabel = (cl_int *)calloc(iw * ih, sizeof(cl_int)); cl_int *bufFlags = (cl_int *)calloc(MAXPASS+1, sizeof(cl_int)); { int x, y; for(y=0;y<ih;y++) { for(x=0;x<iw;x++) { bufPix[y * iw + x] = data[y * img->widthStep + x * 3 + 1] > 127 ? 1 : 0; } } } // int did = 0; if (argc >= 3) did = atoi(argv[2]); cl_device_id device = simpleGetDevice(did); cl_context context = simpleCreateContext(device); cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL); char *source = readFileAsStr("ccl.cl"); cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source, 0, NULL); cl_int ret = clBuildProgram(program, 1, &device, NULL, NULL, NULL); if (ret != CL_SUCCESS) { fprintf(stderr, "Could not build program : %d\n", ret); if (ret == CL_BUILD_PROGRAM_FAILURE) fprintf(stderr, "CL_BUILD_PROGRAM_FAILURE\n"); if (clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 10000, strbuf, NULL) == CL_SUCCESS) { fprintf(stderr, "Build log follows\n"); fprintf(stderr, "%s\n", strbuf); } exit(-1); } cl_kernel kernel_prepare = clCreateKernel(program, "labelxPreprocess_int_int", NULL); cl_kernel kernel_propagate = clCreateKernel(program, "label8xMain_int_int", NULL); // By specifying CL_MEM_COPY_HOST_PTR, device buffers are cleared. cl_mem memPix = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, iw * ih * sizeof(cl_int), bufPix, NULL); cl_mem memLabel = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, iw * ih * sizeof(cl_int), bufLabel, NULL); cl_mem memFlags = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, (MAXPASS+1) * sizeof(cl_int), bufFlags, NULL); size_t work_size[2] = {(size_t)((iw + 31) & ~31), (size_t)((ih + 31) & ~31)}; cl_event events[MAXPASS+1]; for(i=0;i<=MAXPASS;i++) { events[i] = clCreateUserEvent(context, NULL); } // clSetKernelArg(kernel_prepare, 0, sizeof(cl_mem), (void *) &memLabel); clSetKernelArg(kernel_prepare, 1, sizeof(cl_mem), (void *) &memPix); clSetKernelArg(kernel_prepare, 2, sizeof(cl_mem), (void *) &memFlags); i = MAXPASS; clSetKernelArg(kernel_prepare, 3, sizeof(cl_int), (void *) &i); i = 0; clSetKernelArg(kernel_prepare, 4, sizeof(cl_int), (void *) &i); clSetKernelArg(kernel_prepare, 5, sizeof(cl_int), (int *) &iw); clSetKernelArg(kernel_prepare, 6, sizeof(cl_int), (int *) &ih); clEnqueueNDRangeKernel(queue, kernel_prepare, 2, NULL, work_size, NULL, 0, NULL, &events[0]); for(i=1;i<=MAXPASS;i++) { clSetKernelArg(kernel_propagate, 0, sizeof(cl_mem), (void *) &memLabel); clSetKernelArg(kernel_propagate, 1, sizeof(cl_mem), (void *) &memPix); clSetKernelArg(kernel_propagate, 2, sizeof(cl_mem), (void *) &memFlags); clSetKernelArg(kernel_propagate, 3, sizeof(cl_int), (void *) &i); clSetKernelArg(kernel_propagate, 4, sizeof(cl_int), (int *) &iw); clSetKernelArg(kernel_propagate, 5, sizeof(cl_int), (int *) &ih); clEnqueueNDRangeKernel(queue, kernel_propagate, 2, NULL, work_size, NULL, 0, NULL, &events[i]); } clEnqueueReadBuffer(queue, memLabel, CL_TRUE, 0, iw * ih * sizeof(cl_int), bufLabel, 0, NULL, NULL); clEnqueueReadBuffer(queue, memFlags, CL_TRUE, 0, (MAXPASS+1) * sizeof(cl_int), bufFlags, 0, NULL, NULL); clFinish(queue); long long int total = 0; for(i=0;i<=MAXPASS;i++) { cl_ulong tstart, tend; clGetEventProfilingInfo(events[i], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &tstart, NULL); clGetEventProfilingInfo(events[i], CL_PROFILING_COMMAND_END , sizeof(cl_ulong), &tend , NULL); clReleaseEvent(events[i]); printf("pass %2d : %10lld nano sec\n", i, (long long int)(tend - tstart)); total += tend - tstart; } printf("total : %10lld nano sec\n", total); clReleaseMemObject(memFlags); clReleaseMemObject(memLabel); clReleaseMemObject(memPix); clReleaseKernel(kernel_propagate); clReleaseKernel(kernel_prepare); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); // { int x, y; for(y=0;y<ih;y++) { for(x=0;x<iw;x++) { int rgb = bufLabel[y * iw + x] == -1 ? 0 : (bufLabel[y * iw + x] * 1103515245 + 12345); //int rgb = bufLabel[y * iw + x] == -1 ? 0 : (bufLabel[y * iw + x]); data[y * img->widthStep + x * 3 + 0] = rgb & 0xff; rgb >>= 8; data[y * img->widthStep + x * 3 + 1] = rgb & 0xff; rgb >>= 8; data[y * img->widthStep + x * 3 + 2] = rgb & 0xff; rgb >>= 8; } } } int params[3] = { CV_IMWRITE_PNG_COMPRESSION, 9, 0 }; cvSaveImage("output.png", img, params); free(bufFlags); free(bufLabel); free(bufPix); exit(0); }
int main() { cl_platform_id platform_id = NULL; cl_device_id device_id = NULL; cl_context context = NULL; cl_command_queue command_queue = NULL; cl_mem objA = NULL; cl_mem objB = NULL; cl_mem objC = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_uint ret_num_devices; cl_uint ret_num_platforms; cl_int ret; cl_event event1; int i, j; float *A; float *B; float *C; A = (float *)malloc(4*4*sizeof(float)); B = (float *)malloc(4*4*sizeof(float)); C = (float *)malloc(4*4*sizeof(float)); /* Initialize input data */ for (i=0; i<4; i++) { for (j=0; j<4; j++) { A[i*4+j] = i*4+j+1; B[i*4+j] = j*4+i+1; } } /* Get Platform/Device Information*/ ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices); /* Create OpenCL Context */ context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); /* Create command queue */ command_queue = clCreateCommandQueue(context, device_id, 0, &ret); /* Create Buffer Object */ objA = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret); objB = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret); objC = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret); /* * Creating an user event * As a user event is created, its execution status is set to be CL_SUBMITTED * and we tag the event to a callback so when event reaches CL_COMPLETE, it will * execute postProcess */ event1 = clCreateUserEvent(context, &ret); clSetEventCallback(event1, CL_COMPLETE, &postProcess, "Looks like its done."); /* Copy input data to the memory buffer */ ret = clEnqueueWriteBuffer(command_queue, objA, CL_TRUE, 0, 4*4*sizeof(float), A, 0, NULL, NULL ); printf("A has been written\n"); /* The next command will wait for event1 according to its status*/ ret = clEnqueueWriteBuffer(command_queue, objB, CL_TRUE, 0, 4*4*sizeof(float), B, 1, &event1, NULL); printf("B has been written\n"); /* Tell event1 to complete */ clSetUserEventStatus(event1, CL_COMPLETE); const char *file_names[] = {"sample_kernel.cl"}; const int NUMBER_OF_FILES = 1; char* buffer[NUMBER_OF_FILES]; size_t sizes[NUMBER_OF_FILES]; loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes); /* Create kernel program from source file*/ program = clCreateProgramWithSource(context, 1, (const char **)buffer, sizes, &ret); ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); /* Create data parallel OpenCL kernel */ kernel = clCreateKernel(program, "sample", &ret); /* Set OpenCL kernel arguments */ ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&objA); ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&objB); ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&objC); size_t global_item_size = 4; size_t local_item_size = 1; /* Execute OpenCL kernel as data parallel */ ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL); /* Transfer result to host */ ret = clEnqueueReadBuffer(command_queue, objC, CL_TRUE, 0, 4*4*sizeof(float), C, 0, NULL, NULL); /* Display Results */ for (i=0; i<4; i++) { for (j=0; j<4; j++) { printf("%7.2f ", C[i*4+j]); } printf("\n"); } /* Finalization */ ret = clFlush(command_queue); ret = clFinish(command_queue); ret = clReleaseKernel(kernel); ret = clReleaseProgram(program); ret = clReleaseMemObject(objA); ret = clReleaseMemObject(objB); ret = clReleaseMemObject(objC); ret = clReleaseCommandQueue(command_queue); ret = clReleaseContext(context); free(A); free(B); free(C); return 0; }
int TestDtfield_CL(void){ bool test = true; if(!cldevice_is_acceptable(nplatform_cl, ndevice_cl)) { printf("OpenCL device not acceptable.\n"); return true; } field f; // 2D meshes: // test/disque2d.msh // test/testdisque2d.msh // test/testmacromesh.msh // test/unit-cube.msh char *mshname = "test/disque2d.msh"; ReadMacroMesh(&(f.macromesh), mshname); Detect2DMacroMesh(&f.macromesh); BuildConnectivity(&f.macromesh); #if 1 // 2D version assert(f.macromesh.is2d); f.model.cfl = 0.05; f.model.m = 1; m = f.model.m; f.model.NumFlux = TransNumFlux2d; f.model.BoundaryFlux = TransBoundaryFlux2d; f.model.InitData = TransInitData2d; f.model.ImposedData = TransImposedData2d; f.varindex = GenericVarindex; f.interp.interp_param[0] = f.model.m; f.interp.interp_param[1] = 2; // x direction degree f.interp.interp_param[2] = 2; // y direction degree f.interp.interp_param[3] = 0; // z direction degree f.interp.interp_param[4] = 4; // x direction refinement f.interp.interp_param[5] = 4; // y direction refinement f.interp.interp_param[6] = 1; // z direction refinement #else // 3D version f.model.cfl = 0.05; f.model.m = 1; f.model.NumFlux = TransNumFlux; f.model.BoundaryFlux = TestTransBoundaryFlux; f.model.InitData = TestTransInitData; f.model.ImposedData = TestTransImposedData; f.varindex = GenericVarindex; f.interp.interp_param[0] = f.model.m; f.interp.interp_param[1] = 2; // x direction degree f.interp.interp_param[2] = 2; // y direction degree f.interp.interp_param[3] = 2; // z direction degree f.interp.interp_param[4] = 3; // x direction refinement f.interp.interp_param[5] = 3; // y direction refinement f.interp.interp_param[6] = 3; // z direction refinement #endif set_global_m(f.model.m); set_source_CL(&f, "OneSource"); Initfield(&f); cl_event clv_dtfield = clCreateUserEvent(f.cli.context, NULL); dtfield_CL(&f, &f.wn_cl, 0, NULL, &clv_dtfield); clWaitForEvents(1, &clv_dtfield); CopyfieldtoCPU(&f); // Displayfield(&f); show_cl_timing(&f); real *saveptr = f.dtwn; f.dtwn = calloc(f.wsize, sizeof(real)); f.model.Source = OneSource; dtfield(&f, f.wn, f.dtwn); real maxerr = 0; for(int i = 0; i < f.wsize; i++) { real error = f.dtwn[i] - saveptr[i]; //printf("error= \t%f\t%f\t%f\n", error, f.dtwn[i], saveptr[i]); maxerr = fmax(fabs(error), maxerr); } printf("max error: %f\n", maxerr); test = (maxerr < 1e-8); return test; }
/* main */ int main(int argc, char **argv) { /*OpenCL variables */ cl_device_id device; cl_device_type device_type; /*to test if we are on cpu or gpu*/ cl_context context; cl_command_queue cmdQueue; /* The event variables are created only when needed */ #ifdef _UNBLOCK cl_uint num_events = 3; cl_event event[num_events]; #endif FPTYPE * buffers[3]; cl_mdsys_t cl_sys; cl_int status; int nprint, i, nthreads = 0; char restfile[BLEN], trajfile[BLEN], ergfile[BLEN], line[BLEN]; FILE *fp,*traj,*erg; mdsys_t sys; /* Start profiling */ #ifdef __PROFILING double t1, t2; t1 = second(); #endif /* handling the command line arguments */ switch (argc) { case 2: /* only the cpu/gpu argument was passed, setting default nthreads */ if( !strcmp( argv[1], "cpu" ) ) nthreads = 16; else nthreads = 1024; break; case 3: /* both the device type (cpu/gpu) and the number of threads were passed */ nthreads = strtol(argv[2],NULL,10); if( nthreads<0 ) { fprintf( stderr, "\n. The number of threads must be more than 1.\n"); PrintUsageAndExit(); } break; default: PrintUsageAndExit(); break; } /* Initialize the OpenCL environment */ if( InitOpenCLEnvironment( argv[1], &device, &context, &cmdQueue ) != CL_SUCCESS ){ fprintf( stderr, "Program Error! OpenCL Environment was not initialized correctly.\n" ); return 4; } /* The event initialization is performed only when needed */ #ifdef _UNBLOCK /* initialize the cl_event handler variables */ for( i = 0; i < num_events; ++i) { event[i] = clCreateUserEvent( context, NULL ); clSetUserEventStatus( event[i], CL_COMPLETE ); } #endif /* read input file */ if(get_me_a_line(stdin,line)) return 1; sys.natoms=atoi(line); if(get_me_a_line(stdin,line)) return 1; sys.mass=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.epsilon=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.sigma=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.rcut=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.box=atof(line); if(get_me_a_line(stdin,restfile)) return 1; if(get_me_a_line(stdin,trajfile)) return 1; if(get_me_a_line(stdin,ergfile)) return 1; if(get_me_a_line(stdin,line)) return 1; sys.nsteps=atoi(line); if(get_me_a_line(stdin,line)) return 1; sys.dt=atof(line); if(get_me_a_line(stdin,line)) return 1; nprint=atoi(line); /* allocate memory */ cl_sys.natoms = sys.natoms; cl_sys.rx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.ry = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.rz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.vx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.vy = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.vz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.fx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.fy = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.fz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); buffers[0] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) ); buffers[1] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) ); buffers[2] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) ); /* read restart */ fp = fopen( restfile, "r" ); if( fp ) { for( i = 0; i < 2 * cl_sys.natoms; ++i ){ #ifdef _USE_FLOAT fscanf( fp, "%f%f%f", buffers[0] + i, buffers[1] + i, buffers[2] + i); #else fscanf( fp, "%lf%lf%lf", buffers[0] + i, buffers[1] + i, buffers[2] + i); #endif } status = clEnqueueWriteBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0] + cl_sys.natoms, 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vy, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1] + cl_sys.natoms, 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2] + cl_sys.natoms, 0, NULL, NULL ); fclose(fp); } else { perror("cannot read restart file"); return 3; } /* initialize forces and energies.*/ sys.nfi=0; size_t globalWorkSize[1]; globalWorkSize[0] = nthreads; const char * sourcecode = #include <opencl_kernels_as_string.h> ; cl_program program = clCreateProgramWithSource( context, 1, (const char **) &sourcecode, NULL, &status ); status |= clBuildProgram( program, 0, NULL, kernelflags, NULL, NULL ); #ifdef __DEBUG size_t log_size; char log [200000]; clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, sizeof(log), log, &log_size ); fprintf( stderr, "\nLog: \n\n %s", log ); #endif cl_kernel kernel_force = clCreateKernel( program, "opencl_force", &status ); cl_kernel kernel_ekin = clCreateKernel( program, "opencl_ekin", &status ); cl_kernel kernel_verlet_first = clCreateKernel( program, "opencl_verlet_first", &status ); cl_kernel kernel_verlet_second = clCreateKernel( program, "opencl_verlet_second", &status ); cl_kernel kernel_azzero = clCreateKernel( program, "opencl_azzero", &status ); FPTYPE * tmp_epot; cl_mem epot_buffer; tmp_epot = (FPTYPE *) malloc( nthreads * sizeof(FPTYPE) ); epot_buffer = clCreateBuffer( context, CL_MEM_READ_WRITE, nthreads * sizeof(FPTYPE), NULL, &status ); /* precompute some constants */ FPTYPE c12 = 4.0 * sys.epsilon * pow( sys.sigma, 12.0); FPTYPE c6 = 4.0 * sys.epsilon * pow( sys.sigma, 6.0); FPTYPE rcsq = sys.rcut * sys.rcut; FPTYPE boxby2 = HALF * sys.box; FPTYPE dtmf = HALF * sys.dt / mvsq2e / sys.mass; sys.epot = ZERO; sys.ekin = ZERO; /* Azzero force buffer */ status = clSetMultKernelArgs( kernel_azzero, 0, 4, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.natoms)); status = clEnqueueNDRangeKernel( cmdQueue, kernel_azzero, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); status |= clSetMultKernelArgs( kernel_force, 0, 13, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.rx), KArg(cl_sys.ry), KArg(cl_sys.rz), KArg(cl_sys.natoms), KArg(epot_buffer), KArg(c12), KArg(c6), KArg(rcsq), KArg(boxby2), KArg(sys.box)); status = clEnqueueNDRangeKernel( cmdQueue, kernel_force, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, NULL ); for( i = 0; i < nthreads; i++) sys.epot += tmp_epot[i]; FPTYPE * tmp_ekin; cl_mem ekin_buffer; tmp_ekin = (FPTYPE *) malloc( nthreads * sizeof(FPTYPE) ); ekin_buffer = clCreateBuffer( context, CL_MEM_READ_WRITE, nthreads * sizeof(FPTYPE), NULL, &status ); status |= clSetMultKernelArgs( kernel_ekin, 0, 5, KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(ekin_buffer)); status = clEnqueueNDRangeKernel( cmdQueue, kernel_ekin, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, NULL ); for( i = 0; i < nthreads; i++) sys.ekin += tmp_ekin[i]; sys.ekin *= HALF * mvsq2e * sys.mass; sys.temp = TWO * sys.ekin / ( THREE * sys.natoms - THREE ) / kboltz; erg=fopen(ergfile,"w"); traj=fopen(trajfile,"w"); printf("Starting simulation with %d atoms for %d steps.\n",sys.natoms, sys.nsteps); printf(" NFI TEMP EKIN EPOT ETOT\n"); /* download data on host */ status = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL ); sys.rx = buffers[0]; sys.ry = buffers[1]; sys.rz = buffers[2]; output(&sys, erg, traj); /**************************************************/ /* main MD loop */ for(sys.nfi=1; sys.nfi <= sys.nsteps; ++sys.nfi) { /* propagate system and recompute energies */ /* 2) verlet_first */ status |= clSetMultKernelArgs( kernel_verlet_first, 0, 12, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.rx), KArg(cl_sys.ry), KArg(cl_sys.rz), KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(sys.dt), KArg(dtmf)); CheckSuccess(status, 2); /* When the data transfer is non blocking, this kernel has to wait the completion of part 8 (event[2]) */ #ifdef _UNBLOCK status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_first, 1, NULL, globalWorkSize, NULL, 1, &event[2], NULL ); #else status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_first, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); #endif /* 6) download position@device to position@host */ if ((sys.nfi % nprint) == nprint-1) { /* In non blocking mode (CL_FALSE) this data transfer raises events[i] */ #ifdef _UNBLOCK status = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, &event[2] ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, &event[1] ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, &event[0] ); #else status = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL ); #endif CheckSuccess(status, 6); } /* 3) force */ status |= clSetMultKernelArgs( kernel_force, 0, 13, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.rx), KArg(cl_sys.ry), KArg(cl_sys.rz), KArg(cl_sys.natoms), KArg(epot_buffer), KArg(c12), KArg(c6), KArg(rcsq), KArg(boxby2), KArg(sys.box)); CheckSuccess(status, 3); status = clEnqueueNDRangeKernel( cmdQueue, kernel_force, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); /* 7) download E_pot[i]@device and perform reduction to E_pot@host */ if ((sys.nfi % nprint) == nprint-1) { /* In non blocking mode (CL_FALSE) this data transfer kernel raises an event[1] */ #ifdef _UNBLOCK status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_FALSE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, &event[1] ); #else status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, NULL ); #endif CheckSuccess(status, 7); } /* 4) verlet_second */ status |= clSetMultKernelArgs( kernel_verlet_second, 0, 9, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(sys.dt), KArg(dtmf)); CheckSuccess(status, 4); status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_second, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); if ((sys.nfi % nprint) == nprint-1) { /* 5) ekin */ status |= clSetMultKernelArgs( kernel_ekin, 0, 5, KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(ekin_buffer)); CheckSuccess(status, 5); status = clEnqueueNDRangeKernel( cmdQueue, kernel_ekin, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); /* 8) download E_kin[i]@device and perform reduction to E_kin@host */ /* In non blocking mode (CL_FALSE) this data transfer kernel raises an event[2] */ #ifdef _UNBLOCK status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_FALSE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, &event[2] ); #else status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, NULL ); #endif CheckSuccess(status, 8); } /* 1) write output every nprint steps */ if ((sys.nfi % nprint) == 0) { /* Calling a synchronization function (only when in non blocking mode) that will wait until all the * events[i], related to the data transfers, to be completed */ #ifdef _UNBLOCK clWaitForEvents(3, event); #endif sys.rx = buffers[0]; sys.ry = buffers[1]; sys.rz = buffers[2]; /* initialize the sys.epot@host and sys.ekin@host variables to ZERO */ sys.epot = ZERO; sys.ekin = ZERO; /* reduction on the tmp_Exxx[i] buffers downloaded from the device * during parts 7 and 8 of the previous MD loop iteration */ for( i = 0; i < nthreads; i++) { sys.epot += tmp_epot[i]; sys.ekin += tmp_ekin[i]; } /* multiplying the kinetic energy by prefactors */ sys.ekin *= HALF * mvsq2e * sys.mass; sys.temp = TWO * sys.ekin / ( THREE * sys.natoms - THREE ) / kboltz; /* writing output files (positions, energies and temperature) */ output(&sys, erg, traj); } } /**************************************************/ /* End profiling */ #ifdef __PROFILING t2 = second(); fprintf( stdout, "\n\nTime of execution = %.3g (seconds)\n", (t2 - t1) ); #endif /* clean up: close files, free memory */ printf("Simulation Done.\n"); fclose(erg); fclose(traj); free(buffers[0]); free(buffers[1]); free(buffers[2]); return 0; }
END_TEST START_TEST (test_misc_events) { cl_platform_id platform = 0; cl_device_id device; cl_context ctx; cl_command_queue queue; cl_int result; cl_event uevent1, uevent2, marker1, marker2; result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, 0); fail_if( result != CL_SUCCESS, "unable to get the default device" ); ctx = clCreateContext(0, 1, &device, 0, 0, &result); fail_if( result != CL_SUCCESS || ctx == 0, "unable to create a valid context" ); queue = clCreateCommandQueue(ctx, device, 0, &result); fail_if( result != CL_SUCCESS || queue == 0, "cannot create a command queue" ); /* * This test will build a command queue blocked by an user event. The events * will be in this order : * * -: UserEvent1 * 0: WaitForEvents1 (wait=UserEvent1) * 1: Marker1 * -: UserEvent2 * 2: WaitForEvents2 (wait=UserEvent2) * 3: Barrier * 4: Marker2 (to check the barrier worked) * * When the command queue is built, we : * - Check that Marker1 is Queued (WaitForEvents waits) * - Set UserEvent1 to Complete * - Check that Marker1 is Complete (WaitForEvents stopped to wait) * - Check that Marker2 is Queued (Barrier is there) * - Set UserEvent2 to Complete * - Check that Marker2 is Complete (no more barrier) */ uevent1 = clCreateUserEvent(ctx, &result); fail_if( result != CL_SUCCESS, "unable to create UserEvent1" ); uevent2 = clCreateUserEvent(ctx, &result); fail_if( result != CL_SUCCESS, "unable to create UserEvent2" ); result = clEnqueueWaitForEvents(queue, 1, &uevent1); fail_if( result != CL_SUCCESS, "unable to enqueue WaitForEvents(UserEvent1)" ); result = clEnqueueMarker(queue, &marker1); fail_if( result != CL_SUCCESS, "unable to enqueue Marker1" ); result = clEnqueueWaitForEvents(queue, 1, &uevent2); fail_if( result != CL_SUCCESS, "unable to enqueue WaitForEvents(UserEvent2)" ); result = clEnqueueBarrier(queue); fail_if( result != CL_SUCCESS, "unable to enqueue Barrier" ); result = clEnqueueMarker(queue, &marker2); fail_if( result != CL_SUCCESS, "unable to enqueue Marker2" ); // Now the checks cl_int status; result = clGetEventInfo(marker1, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, 0); fail_if( result != CL_SUCCESS || status != CL_QUEUED, "Marker1 must be Queued" ); result = clSetUserEventStatus(uevent1, CL_COMPLETE); fail_if( result != CL_SUCCESS, "unable to set UserEvent1 to Complete" ); result = clGetEventInfo(marker1, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, 0); fail_if( result != CL_SUCCESS || status != CL_COMPLETE, "Marker1 must be Complete" ); result = clGetEventInfo(marker2, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, 0); fail_if( result != CL_SUCCESS || status != CL_QUEUED, "Marker2 must be Queued" ); result = clSetUserEventStatus(uevent2, CL_COMPLETE); fail_if( result != CL_SUCCESS, "unable to set UserEvent2 to Complete" ); result = clGetEventInfo(marker2, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, 0); fail_if( result != CL_SUCCESS || status != CL_COMPLETE, "Marker2 must be Complete" ); clFinish(queue); clReleaseEvent(uevent1); clReleaseEvent(uevent2); clReleaseEvent(marker1); clReleaseEvent(marker2); clReleaseCommandQueue(queue); clReleaseContext(ctx); }
enum piglit_result piglit_cl_test(const int argc, const char **argv, const struct piglit_cl_api_test_config* config, const struct piglit_cl_api_test_env* env) { #if defined(CL_VERSION_1_2) enum piglit_result result = PIGLIT_PASS; cl_int err; #define IMG_WIDTH 4 #define IMG_HEIGHT 4 #define IMG_DATA_SIZE 4 #define IMG_BUFFER_SIZE IMG_WIDTH * IMG_HEIGHT * IMG_DATA_SIZE unsigned char img_buf[IMG_BUFFER_SIZE] = {0}; unsigned char dst_buf[IMG_BUFFER_SIZE] = {0}; unsigned char exp_buf[IMG_BUFFER_SIZE] = {0}; int pattern[4] = {129, 33, 77, 255}; size_t origin[3] = {0, 0, 0}; size_t region[3] = {2, 2, 1}; size_t tmp; cl_event event; cl_mem image; cl_image_format img_format; cl_image_desc img_desc = {0}; cl_command_queue queue = env->context->command_queues[0]; int i; cl_bool *image_support = piglit_cl_get_device_info(env->context->device_ids[0], CL_DEVICE_IMAGE_SUPPORT); if (!*image_support) { fprintf(stderr, "No image support\n"); free(image_support); return PIGLIT_SKIP; } img_format.image_channel_order = CL_RGBA; img_format.image_channel_data_type = CL_UNSIGNED_INT8; img_desc.image_type = CL_MEM_OBJECT_IMAGE2D; img_desc.image_width = IMG_WIDTH; img_desc.image_height = IMG_HEIGHT; img_desc.buffer = NULL; /*** Normal usage ***/ image = clCreateImage(env->context->cl_ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &img_format, &img_desc, &img_buf, &err); if(!piglit_cl_check_error(err, CL_SUCCESS)) { fprintf(stderr, "Failed (error code: %s): Creating an image\n", piglit_cl_get_error_name(err)); return PIGLIT_FAIL; } if (!test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_SUCCESS, &result, "Enqueuing the image to be filled")) { return PIGLIT_FAIL; } region[0] = IMG_WIDTH; region[1] = IMG_HEIGHT; err = clEnqueueReadImage(queue, image, 1, origin, region, 0, 0, dst_buf, 0, NULL, NULL); if(!piglit_cl_check_error(err, CL_SUCCESS)) { fprintf(stderr, "Failed (error code: %s): Reading image\n", piglit_cl_get_error_name(err)); return PIGLIT_FAIL; } /* * fill the host buffer with the pattern * for exemple : pattern == 1234 * * 12341234abcdabcd * 12341234abcdabcd * abcdabcdabcdabcd * abcdabcdabcdabcd */ exp_buf[0] = pattern[0]; exp_buf[1] = pattern[1]; exp_buf[2] = pattern[2]; exp_buf[3] = pattern[3]; memcpy(exp_buf + (IMG_DATA_SIZE * 1), exp_buf, IMG_DATA_SIZE); memcpy(exp_buf + (IMG_DATA_SIZE * 4), exp_buf, IMG_DATA_SIZE); memcpy(exp_buf + (IMG_DATA_SIZE * 5), exp_buf, IMG_DATA_SIZE); for (i = 0; i < sizeof(dst_buf) / sizeof(dst_buf[0]); ++i) { if (!piglit_cl_probe_integer(dst_buf[i], exp_buf[i], 0)) { fprintf(stderr, "Error at %d: got %d, expected %d\n", i, dst_buf[i], exp_buf[i]); return PIGLIT_FAIL; } } /*** Errors ***/ /* * CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue. */ test(NULL, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_COMMAND_QUEUE, &result, "CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue"); /* * CL_INVALID_CONTEXT if the context associated with command_queue and * image are not the same or if the context associated with command_queue * and events in event_wait_list are not the same. */ { piglit_cl_context context; cl_int err; context = piglit_cl_create_context(env->platform_id, env->context->device_ids, 1); if (context) { event = clCreateUserEvent(context->cl_ctx, &err); if (err == CL_SUCCESS) { err = clSetUserEventStatus(event, CL_COMPLETE); if (err == CL_SUCCESS) { test(context->command_queues[0], image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_CONTEXT, &result, "CL_INVALID_CONTEXT if the context associated with command_queue and image are not the same"); test(queue, image, pattern, origin, region, 1, &event, NULL, CL_INVALID_CONTEXT, &result, "CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list are not the same"); } else { fprintf(stderr, "Could not set event status.\n"); piglit_merge_result(&result, PIGLIT_WARN); } clReleaseEvent(event); } else { fprintf(stderr, "Could not create user event.\n"); piglit_merge_result(&result, PIGLIT_WARN); } piglit_cl_release_context(context); } else { fprintf(stderr, "Could not test triggering CL_INVALID_CONTEXT.\n"); piglit_merge_result(&result, PIGLIT_WARN); } } /* * CL_INVALID_MEM_OBJECT if image is not a valid buffer object. */ test(queue, NULL, pattern, origin, region, 0, NULL, NULL, CL_INVALID_MEM_OBJECT, &result, "CL_INVALID_MEM_OBJECT if image is not a valid buffer object"); /* * CL_INVALID_VALUE if fill_color is NULL. */ test(queue, image, NULL, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if fill_color is NULL"); /* * CL_INVALID_VALUE if the region being written specified by origin and * region is out of bounds or if ptr is a NULL value. */ tmp = origin[0]; origin[0] = IMG_WIDTH + 1; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if the region being written specified by origin and region is out of bounds (origin)"); origin[0] = tmp; tmp = region[0]; region[0] = IMG_WIDTH + 1; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if the region being written specified by origin and region is out of bounds (region)"); region[0] = tmp; test(queue, image, pattern, NULL, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if ptr is a NULL value (origin)"); test(queue, image, pattern, origin, NULL, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if ptr is a NULL value (region)"); /* * CL_INVALID_VALUE if values in origin and region do not follow rules * described in the argument description for origin and region. */ tmp = origin[2]; origin[2] = 1; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if values in origin do not follow rules described in the argument description for origin"); origin[2] = tmp; tmp = region[2]; region[2] = 0; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if values in region do not follow rules described in the argument description for region"); region[2] = tmp; /* * CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and * num_events_in_wait_list > 0, or event_wait_list is not NULL and * num_events_in_wait_list is 0, or if event objects in event_wait_list * are not valid events. */ event = NULL; test(queue, image, pattern, origin, region, 1, NULL, NULL, CL_INVALID_EVENT_WAIT_LIST, &result, "CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0"); test(queue, image, pattern, origin, region, 0, &event, NULL, CL_INVALID_EVENT_WAIT_LIST, &result, "CL_INVALID_EVENT_WAIT_LIST if event_wait_list is not NULL and num_events_in_wait_list is 0"); test(queue, image, pattern, origin, region, 1, &event, NULL, CL_INVALID_EVENT_WAIT_LIST, &result, "CL_INVALID_EVENT_WAIT_LIST if event objects in event_wait_list are not valid events"); /* * CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified * or compute row and/or slice pitch) for image are not supported by device * associated with queue. */ /* This is a per device test, clCreateImage would have failed before */ /* * CL_INVALID_IMAGE_FORMAT if image format (image channel order and data type) * for image are not supported by device associated with queue. */ /* This is a per device test, clCreateImage would have failed before */ free(image_support); clReleaseMemObject(image); return result; #else return PIGLIT_SKIP; #endif }
void Device::setup(unsigned int minScan, unsigned int maxScan) { int err; this->minScan = minScan; this->maxScan = maxScan; // Determine Configuration this->candidateBufferSize = this->reduce_scores_multiple; this->reduce_scores_size = this->reduce_scores_multiple; size_t hostMem = sizeof(mObj) * Tempest::data.iNumSpectra * Tempest::params.numInternalPSMs + sizeof(eObj) * Tempest::data.iNumSpectra + sizeof(cl_mem) * Tempest::data.iNumSpectra + sizeof(std::vector<int>) + sizeof(int)*Tempest::data.host_iPeakBins.size() + sizeof(std::vector<float>) + sizeof(float)*Tempest::data.host_fPeakInts.size() + sizeof(int)*Tempest::data.iNumSpectra + sizeof(long)*Tempest::data.iNumSpectra; for (int candidateBufferSize=this->reduce_scores_multiple; hostMem + candidateBufferSize*Tempest::data.iNumSpectra*sizeof(cObj) < Tempest::config.maxHostMem; candidateBufferSize += this->reduce_scores_multiple) { for (int reduceScoresSize = 1; reduceScoresSize <= candidateBufferSize && reduceScoresSize <= this->reduce_scores_size_max && reduceScoresSize*(sizeof(int) + sizeof(float)) + this->reduce_scores_size_local <= this->lLocalMemSize; reduceScoresSize *= 2) { if (reduceScoresSize%(this->reduce_scores_multiple) == 0 && candidateBufferSize%reduceScoresSize == 0) if (candidateBufferSize * reduceScoresSize > this->candidateBufferSize * this->reduce_scores_size) { this->candidateBufferSize = candidateBufferSize; this->reduce_scores_size = reduceScoresSize; } } } if (Tempest::config.profile) { printf("cl_build: local_work_size=%ld\n", this->build_size); printf("cl_transform: local_work_size=%ld\n", this->transform_size); printf("cl_score: local_work_size=%ld\n", this->score_size); printf("candidate buffer size=%ld\n", this->candidateBufferSize); printf("cl_reduce_scores: local_work_size=%ld\n", this->reduce_scores_size); } for (int i=minScan+deviceInd; i<maxScan; i+=Tempest::config.iDevices.size()) { eObj* e = Tempest::data.eScans[i]; e->candidateBuffer = (cObj*)malloc(this->candidateBufferSize * sizeof(cObj)); e->candidateBufferSize = this->candidateBufferSize; e->clEventSent = clCreateUserEvent(clContext, NULL); clSetUserEventStatus(e->clEventSent, 0); e->device = this; } // peaks size_t size_iPeakBins = Tempest::data.lNumMS2Peaks * sizeof(cl_int); size_t size_fPeakInts = Tempest::data.lNumMS2Peaks * sizeof(cl_float); cl_iPeakBins = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size_iPeakBins, &(Tempest::data.host_iPeakBins[0]), &err); Tempest::check_cl_error(__FILE__, __LINE__, err, "Unable to allocate device memory for peak bins."); cl_fPeakInts = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size_fPeakInts, &(Tempest::data.host_fPeakInts[0]), &err); Tempest::check_cl_error(__FILE__, __LINE__, err, "Unable to allocate device memory for peak intensities."); // cleanup host //std::vector<int>().swap(Tempest::data.host_iPeakBins); //std::vector<float>().swap(Tempest::data.host_fPeakInts); //cudaMalloc((void**) &cl_fSpectra, Tempest::data.iNumMS2Bins * sizeof(float)); //cl_fSpectra = clCreateBuffer(clContext, CL_MEM_READ_WRITE, Tempest::data.iNumMS2Bins * sizeof(float), NULL, &err); float * init_fSpectra = (float *) calloc(Tempest::data.iNumMS2Bins, sizeof(float)); size_t size_init_fSpectra = Tempest::data.iNumMS2Bins * sizeof(cl_float); cl_init_fSpectra = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size_init_fSpectra, init_fSpectra, &err); free(init_fSpectra); // candidate and results mObj * init_mPSMs = (mObj *) calloc(Tempest::data.iNumSpectra * Tempest::params.numInternalPSMs, sizeof(mObj)); // for (int i=0; i<Tempest::data.iNumSpectra * Tempest::params.numInternalPSMs; i++) // init_mPSMs[i].fScore = MIN_SCORE; //float * init_fNextScores = (float *) calloc(Tempest::data.iNumSpectra, sizeof(float)); size_t size_cCandidates = sizeof(cObj) * this->candidateBufferSize; size_t size_fScores = sizeof(cl_float) * this->candidateBufferSize; size_t size_mPSMs = sizeof(mObj) * Tempest::data.iNumSpectra * Tempest::params.numInternalPSMs; //size_t size_fNextScores = sizeof(float) * Tempest::data.iNumSpectra; cl_cCandidates = clCreateBuffer(clContext, CL_MEM_READ_ONLY, size_cCandidates, NULL, &err); cl_fScores = clCreateBuffer(clContext, CL_MEM_READ_WRITE, size_fScores, NULL, &err); cl_mPSMs = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size_mPSMs , init_mPSMs, &err); //cl_fNextScores = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size_fNextScores, init_fNextScores, &err); //MEA: need to block free until previous clCreateBuffer commands complete? free(init_mPSMs); //free(init_fNextScores); Tempest::check_cl_error(__FILE__, __LINE__, err, "Unable to allocate device memory for candidates and results."); //determine how many spectra can be kept in device memory at a time size_t availMemSpectra = lGlobalMemSize - size_iPeakBins - size_fPeakInts - size_init_fSpectra - size_cCandidates - size_fScores - size_mPSMs; if (availMemSpectra > Tempest::config.maxDeviceMem) availMemSpectra = Tempest::config.maxDeviceMem; long maxCachedSpectra = availMemSpectra / (Tempest::data.iNumMS2Bins*sizeof(cl_float)); if (maxCachedSpectra > (long)ceil(float(Tempest::data.iNumSpectra)/Tempest::devices.size())) maxCachedSpectra = (long)ceil(float(Tempest::data.iNumSpectra)/Tempest::devices.size()); if (maxCachedSpectra <= 0) maxCachedSpectra = 1; printf(" » (%d:%d) Allocating %.2f MB of device memory for %ld cached %s.\n", platformID, deviceID, (float)maxCachedSpectra*Tempest::data.iNumMS2Bins*sizeof(cl_float)/MB, maxCachedSpectra, maxCachedSpectra==1 ? "spectrum" : "spectra"); for (int i=0; i<maxCachedSpectra; i++) { cl_mem newBuffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, Tempest::data.iNumMS2Bins*sizeof(cl_float), NULL, &err); Tempest::check_cl_error(__FILE__, __LINE__, err, "Unable to allocate spectrum memory on device."); unusedBuffers.push(newBuffer); } setup_constant_memory(); //initialize profiling variables scoreEvent = clCreateUserEvent(clContext, NULL); reduceEvent = clCreateUserEvent(clContext, NULL); buildEvent = clCreateUserEvent(clContext, NULL); memsetEvent = clCreateUserEvent(clContext, NULL); transformEvent = clCreateUserEvent(clContext, NULL); totalScoreTime = 0; totalReduceTime = 0; totalBuildTime = 0; totalTransformTime = 0; totalMemsetTime = 0; totalSendTime = 0; buildLaunches = 0; scoreKernelLaunches = 0; lastBuildIndex = -1; }
void mat_mul_opencl_binary(float *M_A, float *M_B, float *M_C, size_t ROW_A, size_t COL_A, size_t COL_B) { cl_platform_id *platform; cl_device_type dev_type; cl_device_id dev; cl_context context; cl_command_queue cmd_queue; cl_program program; cl_kernel kernel; cl_mem mem_A, mem_B, mem_C; cl_event ev_kernel, ev_bp; cl_int err; cl_uint num_platforms; cl_uint num_dev = 0; int i; // Platform err = clGetPlatformIDs(0, NULL, &num_platforms); CHECK_ERROR(err); if (num_platforms == 0) { fprintf(stderr, "[%s:%d] ERROR: No OpenCL platform\n", __FILE__,__LINE__); exit(EXIT_FAILURE); } printf("Number of platforms: %u\n", num_platforms); platform = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms); err = clGetPlatformIDs(num_platforms, platform, NULL); CHECK_ERROR(err); // Device dev_type = get_device_type(); for (i = 0; i < num_platforms; i++) { err = clGetDeviceIDs(platform[i], dev_type, 1, &dev, &num_dev); if (err != CL_DEVICE_NOT_FOUND) CHECK_ERROR(err); if (num_dev == 1) break; } if (num_dev < 1) { fprintf(stderr, "[%s:%d] ERROR: No device\n", __FILE__, __LINE__); exit(EXIT_FAILURE); } print_device_name(dev); free(platform); // Context context = clCreateContext(NULL, 1, &dev, NULL, NULL, &err); CHECK_ERROR(err); // Create a program. char *source_code = get_source_code("./kernel_2d.cl"); program = clCreateProgramWithSource(context, 1, (const char **)&source_code, NULL, &err); free(source_code); CHECK_ERROR(err); // Callback data for clBuildProgram ev_bp = clCreateUserEvent(context, &err); CHECK_ERROR(err); bp_data_t bp_data; bp_data.dev = dev; bp_data.event = &ev_bp; // Build the program. char build_opts[200]; sprintf(build_opts, "-DROW_A=%lu -DCOL_A=%lu -DCOL_B=%lu", ROW_A, COL_A, COL_B); err = clBuildProgram(program, 1, &dev, build_opts, build_program_callback, &bp_data); CHECK_ERROR(err); // Command queue cmd_queue = clCreateCommandQueue(context, dev, CL_QUEUE_PROFILING_ENABLE, &err); CHECK_ERROR(err); // Buffers mem_A = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * ROW_A * COL_A, M_A, &err); CHECK_ERROR(err); mem_B = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * COL_A * COL_B, NULL, &err); CHECK_ERROR(err); err = clEnqueueWriteBuffer(cmd_queue, mem_B, CL_FALSE, 0, sizeof(float) * COL_A * COL_B, M_B, 0, NULL, NULL); CHECK_ERROR(err) mem_C = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * ROW_A * COL_B, NULL, &err); CHECK_ERROR(err); // Index space (gws) and work-group size (lws) size_t lws[2] = {16, 16}; size_t gws[2]; gws[1] = (size_t)ceil((double)ROW_A / lws[1]) * lws[1]; gws[0] = (size_t)ceil((double)COL_B / lws[0]) * lws[0]; // Wait for the kernel creation. clWaitForEvents(1, bp_data.event); // Kernel kernel = clCreateKernel(program, "mat_mul", &err); CHECK_ERROR(err); // Set the arguments. err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_A); CHECK_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem_B); CHECK_ERROR(err); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &mem_C); CHECK_ERROR(err); // Enqueue the kernel. err = clEnqueueNDRangeKernel(cmd_queue, kernel, 2, NULL, gws, lws, 0, NULL, &ev_kernel); CHECK_ERROR(err); // Read the result. err = clEnqueueReadBuffer(cmd_queue, mem_C, CL_TRUE, 0, sizeof(float) * ROW_A * COL_B, M_C, 0, NULL, NULL); CHECK_ERROR(err); // Read the profiling info. cl_ulong start_time, end_time; err = clGetEventProfilingInfo(ev_kernel, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start_time, NULL); CHECK_ERROR(err); err = clGetEventProfilingInfo(ev_kernel, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end_time, NULL); CHECK_ERROR(err); printf("Kernel time : %lf sec\n", (double)(end_time - start_time) / 10e9); // Release clReleaseEvent(ev_kernel); clReleaseEvent(ev_bp); clReleaseMemObject(mem_A); clReleaseMemObject(mem_B); clReleaseMemObject(mem_C); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); }