int acc_event_create (void** event_p){ // debug info if (verbose_print){ fprintf(stdout, "\n ... EVENT CREATION ... \n"); fprintf(stdout, " ---> Entering: acc_event_create.\n"); } // local event object pointer *event_p = malloc(sizeof(cl_event)); cl_event *clevent = (cl_event *) *event_p; // get a device event object *clevent = clCreateUserEvent((*acc_opencl_my_device).ctx, &cl_error); if (acc_opencl_error_check(cl_error, __LINE__)) return -1; // debug info if (verbose_print){ fprintf(stdout, " ---> Leaving: acc_event_create.\n"); } cl_error = clSetUserEventStatus(*clevent, CL_COMPLETE); // assign return value return 0; }
operator cl_event() const { cl_int err = CL_SUCCESS; if(e_.size() == 1) { return e_[0]; } if(!e_.empty()) { cl_context ctx = get_info<command_queue::context_info_type>(command_queue()); cl_event e = clCreateUserEvent(ctx, &err); OCLM_THROW_IF_EXCEPTION(err, "clCreateUserEvents"); #ifdef CL_VERSION_1_2 err = clEnqueueMarkerWithWaitList(command_queue(), static_cast<cl_uint>(e_.size()), &e_[0], &e); OCLM_THROW_IF_EXCEPTION(err, "clEnqueueMarkerWithWaitList"); #else get(); err = clSetUserEventStatus(e, CL_COMPLETE); #endif return e; } else return cl_event(); }
void WebCLUserEvent::setStatus(cl_int executionStatus, ExceptionState& es) { ASSERT(isUserEvent()); if (isReleased()) { es.throwWebCLException(WebCLException::INVALID_EVENT, WebCLException::invalidEventMessage); return; } if (!(executionStatus < 0 || executionStatus == CL_COMPLETE)) { es.throwWebCLException(WebCLException::INVALID_VALUE, WebCLException::invalidValueMessage); return; } if (m_eventStatusSituation == StatusSet) { es.throwWebCLException(WebCLException::INVALID_OPERATION, WebCLException::invalidOperationMessage); return; } m_eventStatusSituation = StatusSet; m_executionStatus = executionStatus; cl_int err = clSetUserEventStatus(m_clEvent, executionStatus); if (err != CL_SUCCESS) WebCLException::throwException(err, es); }
cl_int mwFinishEvent(cl_event ev) { cl_int err; err = clSetUserEventStatus(ev, CL_COMPLETE); if (err != CL_SUCCESS) mwPerrorCL(err, "Failed to mark custom event as completed"); return err; }
int main() { cl_int err; cl_event user_evt = NULL; int i; // An user event can be set to either complete or a negative value, indicating error; // additionally, no objects involved in a command that waits on the user event should // be released before the event status is set; however, it should be possible to release // everything even if the status is set to something which is NOT CL_COMPLETE. So // try both CL_COMPLETE and a negative value cl_int status[] = {CL_INVALID_EVENT, CL_COMPLETE }; // We also query for profiling info of the event, which according to the standard // should return CL_PROFILING_INFO_NOT_AVAILABLE cl_ulong queued, submitted, started, endtime; for (i = 0; i < ARRAY_SIZE(status); ++i) { cl_context context; cl_command_queue queue; cl_device_id device; CHECK_CL_ERROR(poclu_get_any_device(&context, &device, &queue)); TEST_ASSERT( context ); TEST_ASSERT( device ); TEST_ASSERT( queue ); user_evt = clCreateUserEvent(context, &err); CHECK_OPENCL_ERROR_IN("clCreateUserEvent"); TEST_ASSERT( user_evt ); CHECK_CL_ERROR(clSetUserEventStatus(user_evt, status[i])); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_QUEUED, sizeof(queued), &queued, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_SUBMIT, sizeof(submitted), &submitted, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_START, sizeof(started), &started, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_END, sizeof(endtime), &endtime, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); CHECK_CL_ERROR(clReleaseEvent(user_evt)); CHECK_CL_ERROR(clReleaseCommandQueue(queue)); CHECK_CL_ERROR(clReleaseContext(context)); } return EXIT_SUCCESS; }
void Event::setStatus(CommandExecutionStatus status) { static const auto error_map = error::ErrorMap{ {ErrorCode::invalid_event, "the given user event was invalid."}, {ErrorCode::invalid_value, "the given execution status is invalid."}, {ErrorCode::invalid_operation, "the execution status has already been set for this event."} }; error::handle<EventException>( clSetUserEventStatus( m_id, static_cast<std::underlying_type<CommandExecutionStatus>::type>(status) ), error_map ); }
void UserEvent::setStatus(EEventStatus status) { #if defined(HAVE_OPENCL_1_1) cl_int error = CL_SUCCESS; if(!_id || ((error = clSetUserEventStatus(_id, cl_int(status))) != CL_SUCCESS)) { detail::reportError("UserEvent::setStatus(): ", error); } #else (void)status; #endif }
cl_int EventWrapper::setUserEventStatus (cl_int aExecutionStatus) { #if CL_WRAPPER_CL_VERSION_SUPPORT >= 110 D_METHOD_START; cl_int err = clSetUserEventStatus (mWrapped, aExecutionStatus); if (err != CL_SUCCESS) { D_LOG (LOG_LEVEL_ERROR, " clSetUserEventStatus failed. (error %d)", err); } return err; #else // CL_WRAPPER_CL_VERSION_SUPPORT >= 110 (void)aExecutionStatus; D_LOG (LOG_LEVEL_ERROR, "CLWrapper support for OpenCL 1.1 API was not enabled at build time."); return CL_INVALID_VALUE; #endif }
void CL::UserEvent::end() { cl_int status; if (!_active) return; status = clSetUserEventStatus(_event, CL_COMPLETE); OPENCL_ASSERT(status); _device.end_user_event(_id); _event = 0; _id = -1; _active = false; }
static void register_event(hpx::opencl::device cldevice, const hpx::naming::id_type & event_id) { boost::shared_ptr<hpx::opencl::server::device> parent_device = hpx::get_ptr<hpx::opencl::server::device> (cldevice.get_gid()).get(); // create a fake event cl_int err; cl_event event_cl = clCreateUserEvent ( parent_device->get_context(), &err); cl_ensure(err, "clEnqueueWriteBuffer()"); err = clSetUserEventStatus(event_cl, CL_COMPLETE); cl_ensure(err, "clSetUserEventStatus()"); parent_device->register_event(event_id, event_cl); }
static void build_program_callback(cl_program program, void *user_data) { cl_int err; cl_build_status build_status; bp_data_t *bp_data = (bp_data_t *)user_data; // Check the build status. err = clGetProgramBuildInfo(program, bp_data->dev, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &build_status, NULL); CHECK_ERROR(err); if (build_status != CL_BUILD_SUCCESS) { print_build_log(program, bp_data->dev); exit(EXIT_FAILURE); } // Set the event status err = clSetUserEventStatus(*(bp_data->event), CL_COMPLETE); CHECK_ERROR(err); }
void Device::setup(unsigned int minScan, unsigned int maxScan) { int err; this->minScan = minScan; this->maxScan = maxScan; // Determine Configuration this->candidateBufferSize = this->reduce_scores_multiple; this->reduce_scores_size = this->reduce_scores_multiple; size_t hostMem = sizeof(mObj) * Tempest::data.iNumSpectra * Tempest::params.numInternalPSMs + sizeof(eObj) * Tempest::data.iNumSpectra + sizeof(cl_mem) * Tempest::data.iNumSpectra + sizeof(std::vector<int>) + sizeof(int)*Tempest::data.host_iPeakBins.size() + sizeof(std::vector<float>) + sizeof(float)*Tempest::data.host_fPeakInts.size() + sizeof(int)*Tempest::data.iNumSpectra + sizeof(long)*Tempest::data.iNumSpectra; for (int candidateBufferSize=this->reduce_scores_multiple; hostMem + candidateBufferSize*Tempest::data.iNumSpectra*sizeof(cObj) < Tempest::config.maxHostMem; candidateBufferSize += this->reduce_scores_multiple) { for (int reduceScoresSize = 1; reduceScoresSize <= candidateBufferSize && reduceScoresSize <= this->reduce_scores_size_max && reduceScoresSize*(sizeof(int) + sizeof(float)) + this->reduce_scores_size_local <= this->lLocalMemSize; reduceScoresSize *= 2) { if (reduceScoresSize%(this->reduce_scores_multiple) == 0 && candidateBufferSize%reduceScoresSize == 0) if (candidateBufferSize * reduceScoresSize > this->candidateBufferSize * this->reduce_scores_size) { this->candidateBufferSize = candidateBufferSize; this->reduce_scores_size = reduceScoresSize; } } } if (Tempest::config.profile) { printf("cl_build: local_work_size=%ld\n", this->build_size); printf("cl_transform: local_work_size=%ld\n", this->transform_size); printf("cl_score: local_work_size=%ld\n", this->score_size); printf("candidate buffer size=%ld\n", this->candidateBufferSize); printf("cl_reduce_scores: local_work_size=%ld\n", this->reduce_scores_size); } for (int i=minScan+deviceInd; i<maxScan; i+=Tempest::config.iDevices.size()) { eObj* e = Tempest::data.eScans[i]; e->candidateBuffer = (cObj*)malloc(this->candidateBufferSize * sizeof(cObj)); e->candidateBufferSize = this->candidateBufferSize; e->clEventSent = clCreateUserEvent(clContext, NULL); clSetUserEventStatus(e->clEventSent, 0); e->device = this; } // peaks size_t size_iPeakBins = Tempest::data.lNumMS2Peaks * sizeof(cl_int); size_t size_fPeakInts = Tempest::data.lNumMS2Peaks * sizeof(cl_float); cl_iPeakBins = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size_iPeakBins, &(Tempest::data.host_iPeakBins[0]), &err); Tempest::check_cl_error(__FILE__, __LINE__, err, "Unable to allocate device memory for peak bins."); cl_fPeakInts = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size_fPeakInts, &(Tempest::data.host_fPeakInts[0]), &err); Tempest::check_cl_error(__FILE__, __LINE__, err, "Unable to allocate device memory for peak intensities."); // cleanup host //std::vector<int>().swap(Tempest::data.host_iPeakBins); //std::vector<float>().swap(Tempest::data.host_fPeakInts); //cudaMalloc((void**) &cl_fSpectra, Tempest::data.iNumMS2Bins * sizeof(float)); //cl_fSpectra = clCreateBuffer(clContext, CL_MEM_READ_WRITE, Tempest::data.iNumMS2Bins * sizeof(float), NULL, &err); float * init_fSpectra = (float *) calloc(Tempest::data.iNumMS2Bins, sizeof(float)); size_t size_init_fSpectra = Tempest::data.iNumMS2Bins * sizeof(cl_float); cl_init_fSpectra = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size_init_fSpectra, init_fSpectra, &err); free(init_fSpectra); // candidate and results mObj * init_mPSMs = (mObj *) calloc(Tempest::data.iNumSpectra * Tempest::params.numInternalPSMs, sizeof(mObj)); // for (int i=0; i<Tempest::data.iNumSpectra * Tempest::params.numInternalPSMs; i++) // init_mPSMs[i].fScore = MIN_SCORE; //float * init_fNextScores = (float *) calloc(Tempest::data.iNumSpectra, sizeof(float)); size_t size_cCandidates = sizeof(cObj) * this->candidateBufferSize; size_t size_fScores = sizeof(cl_float) * this->candidateBufferSize; size_t size_mPSMs = sizeof(mObj) * Tempest::data.iNumSpectra * Tempest::params.numInternalPSMs; //size_t size_fNextScores = sizeof(float) * Tempest::data.iNumSpectra; cl_cCandidates = clCreateBuffer(clContext, CL_MEM_READ_ONLY, size_cCandidates, NULL, &err); cl_fScores = clCreateBuffer(clContext, CL_MEM_READ_WRITE, size_fScores, NULL, &err); cl_mPSMs = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size_mPSMs , init_mPSMs, &err); //cl_fNextScores = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size_fNextScores, init_fNextScores, &err); //MEA: need to block free until previous clCreateBuffer commands complete? free(init_mPSMs); //free(init_fNextScores); Tempest::check_cl_error(__FILE__, __LINE__, err, "Unable to allocate device memory for candidates and results."); //determine how many spectra can be kept in device memory at a time size_t availMemSpectra = lGlobalMemSize - size_iPeakBins - size_fPeakInts - size_init_fSpectra - size_cCandidates - size_fScores - size_mPSMs; if (availMemSpectra > Tempest::config.maxDeviceMem) availMemSpectra = Tempest::config.maxDeviceMem; long maxCachedSpectra = availMemSpectra / (Tempest::data.iNumMS2Bins*sizeof(cl_float)); if (maxCachedSpectra > (long)ceil(float(Tempest::data.iNumSpectra)/Tempest::devices.size())) maxCachedSpectra = (long)ceil(float(Tempest::data.iNumSpectra)/Tempest::devices.size()); if (maxCachedSpectra <= 0) maxCachedSpectra = 1; printf(" » (%d:%d) Allocating %.2f MB of device memory for %ld cached %s.\n", platformID, deviceID, (float)maxCachedSpectra*Tempest::data.iNumMS2Bins*sizeof(cl_float)/MB, maxCachedSpectra, maxCachedSpectra==1 ? "spectrum" : "spectra"); for (int i=0; i<maxCachedSpectra; i++) { cl_mem newBuffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, Tempest::data.iNumMS2Bins*sizeof(cl_float), NULL, &err); Tempest::check_cl_error(__FILE__, __LINE__, err, "Unable to allocate spectrum memory on device."); unusedBuffers.push(newBuffer); } setup_constant_memory(); //initialize profiling variables scoreEvent = clCreateUserEvent(clContext, NULL); reduceEvent = clCreateUserEvent(clContext, NULL); buildEvent = clCreateUserEvent(clContext, NULL); memsetEvent = clCreateUserEvent(clContext, NULL); transformEvent = clCreateUserEvent(clContext, NULL); totalScoreTime = 0; totalReduceTime = 0; totalBuildTime = 0; totalTransformTime = 0; totalMemsetTime = 0; totalSendTime = 0; buildLaunches = 0; scoreKernelLaunches = 0; lastBuildIndex = -1; }
/*! \brief Forces commands within a command Queue associated with this Event to wait until * the commmands to be completed. */ void ocl::Event::waitUntilCompleted() const { OPENCL_SAFE_CALL( clSetUserEventStatus (this->id(), CL_COMPLETE) ); }
END_TEST START_TEST (test_misc_events) { cl_platform_id platform = 0; cl_device_id device; cl_context ctx; cl_command_queue queue; cl_int result; cl_event uevent1, uevent2, marker1, marker2; result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, 0); fail_if( result != CL_SUCCESS, "unable to get the default device" ); ctx = clCreateContext(0, 1, &device, 0, 0, &result); fail_if( result != CL_SUCCESS || ctx == 0, "unable to create a valid context" ); queue = clCreateCommandQueue(ctx, device, 0, &result); fail_if( result != CL_SUCCESS || queue == 0, "cannot create a command queue" ); /* * This test will build a command queue blocked by an user event. The events * will be in this order : * * -: UserEvent1 * 0: WaitForEvents1 (wait=UserEvent1) * 1: Marker1 * -: UserEvent2 * 2: WaitForEvents2 (wait=UserEvent2) * 3: Barrier * 4: Marker2 (to check the barrier worked) * * When the command queue is built, we : * - Check that Marker1 is Queued (WaitForEvents waits) * - Set UserEvent1 to Complete * - Check that Marker1 is Complete (WaitForEvents stopped to wait) * - Check that Marker2 is Queued (Barrier is there) * - Set UserEvent2 to Complete * - Check that Marker2 is Complete (no more barrier) */ uevent1 = clCreateUserEvent(ctx, &result); fail_if( result != CL_SUCCESS, "unable to create UserEvent1" ); uevent2 = clCreateUserEvent(ctx, &result); fail_if( result != CL_SUCCESS, "unable to create UserEvent2" ); result = clEnqueueWaitForEvents(queue, 1, &uevent1); fail_if( result != CL_SUCCESS, "unable to enqueue WaitForEvents(UserEvent1)" ); result = clEnqueueMarker(queue, &marker1); fail_if( result != CL_SUCCESS, "unable to enqueue Marker1" ); result = clEnqueueWaitForEvents(queue, 1, &uevent2); fail_if( result != CL_SUCCESS, "unable to enqueue WaitForEvents(UserEvent2)" ); result = clEnqueueBarrier(queue); fail_if( result != CL_SUCCESS, "unable to enqueue Barrier" ); result = clEnqueueMarker(queue, &marker2); fail_if( result != CL_SUCCESS, "unable to enqueue Marker2" ); // Now the checks cl_int status; result = clGetEventInfo(marker1, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, 0); fail_if( result != CL_SUCCESS || status != CL_QUEUED, "Marker1 must be Queued" ); result = clSetUserEventStatus(uevent1, CL_COMPLETE); fail_if( result != CL_SUCCESS, "unable to set UserEvent1 to Complete" ); result = clGetEventInfo(marker1, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, 0); fail_if( result != CL_SUCCESS || status != CL_COMPLETE, "Marker1 must be Complete" ); result = clGetEventInfo(marker2, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, 0); fail_if( result != CL_SUCCESS || status != CL_QUEUED, "Marker2 must be Queued" ); result = clSetUserEventStatus(uevent2, CL_COMPLETE); fail_if( result != CL_SUCCESS, "unable to set UserEvent2 to Complete" ); result = clGetEventInfo(marker2, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, 0); fail_if( result != CL_SUCCESS || status != CL_COMPLETE, "Marker2 must be Complete" ); clFinish(queue); clReleaseEvent(uevent1); clReleaseEvent(uevent2); clReleaseEvent(marker1); clReleaseEvent(marker2); clReleaseCommandQueue(queue); clReleaseContext(ctx); }
int main() { cl_platform_id platform_id = NULL; cl_device_id device_id = NULL; cl_context context = NULL; cl_command_queue command_queue = NULL; cl_mem objA = NULL; cl_mem objB = NULL; cl_mem objC = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_uint ret_num_devices; cl_uint ret_num_platforms; cl_int ret; cl_event event1; int i, j; float *A; float *B; float *C; A = (float *)malloc(4*4*sizeof(float)); B = (float *)malloc(4*4*sizeof(float)); C = (float *)malloc(4*4*sizeof(float)); /* Initialize input data */ for (i=0; i<4; i++) { for (j=0; j<4; j++) { A[i*4+j] = i*4+j+1; B[i*4+j] = j*4+i+1; } } /* Get Platform/Device Information*/ ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices); /* Create OpenCL Context */ context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); /* Create command queue */ command_queue = clCreateCommandQueue(context, device_id, 0, &ret); /* Create Buffer Object */ objA = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret); objB = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret); objC = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret); /* * Creating an user event * As a user event is created, its execution status is set to be CL_SUBMITTED * and we tag the event to a callback so when event reaches CL_COMPLETE, it will * execute postProcess */ event1 = clCreateUserEvent(context, &ret); clSetEventCallback(event1, CL_COMPLETE, &postProcess, "Looks like its done."); /* Copy input data to the memory buffer */ ret = clEnqueueWriteBuffer(command_queue, objA, CL_TRUE, 0, 4*4*sizeof(float), A, 0, NULL, NULL ); printf("A has been written\n"); /* The next command will wait for event1 according to its status*/ ret = clEnqueueWriteBuffer(command_queue, objB, CL_TRUE, 0, 4*4*sizeof(float), B, 1, &event1, NULL); printf("B has been written\n"); /* Tell event1 to complete */ clSetUserEventStatus(event1, CL_COMPLETE); const char *file_names[] = {"sample_kernel.cl"}; const int NUMBER_OF_FILES = 1; char* buffer[NUMBER_OF_FILES]; size_t sizes[NUMBER_OF_FILES]; loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes); /* Create kernel program from source file*/ program = clCreateProgramWithSource(context, 1, (const char **)buffer, sizes, &ret); ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); /* Create data parallel OpenCL kernel */ kernel = clCreateKernel(program, "sample", &ret); /* Set OpenCL kernel arguments */ ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&objA); ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&objB); ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&objC); size_t global_item_size = 4; size_t local_item_size = 1; /* Execute OpenCL kernel as data parallel */ ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL); /* Transfer result to host */ ret = clEnqueueReadBuffer(command_queue, objC, CL_TRUE, 0, 4*4*sizeof(float), C, 0, NULL, NULL); /* Display Results */ for (i=0; i<4; i++) { for (j=0; j<4; j++) { printf("%7.2f ", C[i*4+j]); } printf("\n"); } /* Finalization */ ret = clFlush(command_queue); ret = clFinish(command_queue); ret = clReleaseKernel(kernel); ret = clReleaseProgram(program); ret = clReleaseMemObject(objA); ret = clReleaseMemObject(objB); ret = clReleaseMemObject(objC); ret = clReleaseCommandQueue(command_queue); ret = clReleaseContext(context); free(A); free(B); free(C); return 0; }
enum piglit_result piglit_cl_test(const int argc, const char **argv, const struct piglit_cl_api_test_config* config, const struct piglit_cl_api_test_env* env) { #if defined(CL_VERSION_1_2) enum piglit_result result = PIGLIT_PASS; cl_int err; #define IMG_WIDTH 4 #define IMG_HEIGHT 4 #define IMG_DATA_SIZE 4 #define IMG_BUFFER_SIZE IMG_WIDTH * IMG_HEIGHT * IMG_DATA_SIZE unsigned char img_buf[IMG_BUFFER_SIZE] = {0}; unsigned char dst_buf[IMG_BUFFER_SIZE] = {0}; unsigned char exp_buf[IMG_BUFFER_SIZE] = {0}; int pattern[4] = {129, 33, 77, 255}; size_t origin[3] = {0, 0, 0}; size_t region[3] = {2, 2, 1}; size_t tmp; cl_event event; cl_mem image; cl_image_format img_format; cl_image_desc img_desc = {0}; cl_command_queue queue = env->context->command_queues[0]; int i; cl_bool *image_support = piglit_cl_get_device_info(env->context->device_ids[0], CL_DEVICE_IMAGE_SUPPORT); if (!*image_support) { fprintf(stderr, "No image support\n"); free(image_support); return PIGLIT_SKIP; } img_format.image_channel_order = CL_RGBA; img_format.image_channel_data_type = CL_UNSIGNED_INT8; img_desc.image_type = CL_MEM_OBJECT_IMAGE2D; img_desc.image_width = IMG_WIDTH; img_desc.image_height = IMG_HEIGHT; img_desc.buffer = NULL; /*** Normal usage ***/ image = clCreateImage(env->context->cl_ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &img_format, &img_desc, &img_buf, &err); if(!piglit_cl_check_error(err, CL_SUCCESS)) { fprintf(stderr, "Failed (error code: %s): Creating an image\n", piglit_cl_get_error_name(err)); return PIGLIT_FAIL; } if (!test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_SUCCESS, &result, "Enqueuing the image to be filled")) { return PIGLIT_FAIL; } region[0] = IMG_WIDTH; region[1] = IMG_HEIGHT; err = clEnqueueReadImage(queue, image, 1, origin, region, 0, 0, dst_buf, 0, NULL, NULL); if(!piglit_cl_check_error(err, CL_SUCCESS)) { fprintf(stderr, "Failed (error code: %s): Reading image\n", piglit_cl_get_error_name(err)); return PIGLIT_FAIL; } /* * fill the host buffer with the pattern * for exemple : pattern == 1234 * * 12341234abcdabcd * 12341234abcdabcd * abcdabcdabcdabcd * abcdabcdabcdabcd */ exp_buf[0] = pattern[0]; exp_buf[1] = pattern[1]; exp_buf[2] = pattern[2]; exp_buf[3] = pattern[3]; memcpy(exp_buf + (IMG_DATA_SIZE * 1), exp_buf, IMG_DATA_SIZE); memcpy(exp_buf + (IMG_DATA_SIZE * 4), exp_buf, IMG_DATA_SIZE); memcpy(exp_buf + (IMG_DATA_SIZE * 5), exp_buf, IMG_DATA_SIZE); for (i = 0; i < sizeof(dst_buf) / sizeof(dst_buf[0]); ++i) { if (!piglit_cl_probe_integer(dst_buf[i], exp_buf[i], 0)) { fprintf(stderr, "Error at %d: got %d, expected %d\n", i, dst_buf[i], exp_buf[i]); return PIGLIT_FAIL; } } /*** Errors ***/ /* * CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue. */ test(NULL, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_COMMAND_QUEUE, &result, "CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue"); /* * CL_INVALID_CONTEXT if the context associated with command_queue and * image are not the same or if the context associated with command_queue * and events in event_wait_list are not the same. */ { piglit_cl_context context; cl_int err; context = piglit_cl_create_context(env->platform_id, env->context->device_ids, 1); if (context) { event = clCreateUserEvent(context->cl_ctx, &err); if (err == CL_SUCCESS) { err = clSetUserEventStatus(event, CL_COMPLETE); if (err == CL_SUCCESS) { test(context->command_queues[0], image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_CONTEXT, &result, "CL_INVALID_CONTEXT if the context associated with command_queue and image are not the same"); test(queue, image, pattern, origin, region, 1, &event, NULL, CL_INVALID_CONTEXT, &result, "CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list are not the same"); } else { fprintf(stderr, "Could not set event status.\n"); piglit_merge_result(&result, PIGLIT_WARN); } clReleaseEvent(event); } else { fprintf(stderr, "Could not create user event.\n"); piglit_merge_result(&result, PIGLIT_WARN); } piglit_cl_release_context(context); } else { fprintf(stderr, "Could not test triggering CL_INVALID_CONTEXT.\n"); piglit_merge_result(&result, PIGLIT_WARN); } } /* * CL_INVALID_MEM_OBJECT if image is not a valid buffer object. */ test(queue, NULL, pattern, origin, region, 0, NULL, NULL, CL_INVALID_MEM_OBJECT, &result, "CL_INVALID_MEM_OBJECT if image is not a valid buffer object"); /* * CL_INVALID_VALUE if fill_color is NULL. */ test(queue, image, NULL, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if fill_color is NULL"); /* * CL_INVALID_VALUE if the region being written specified by origin and * region is out of bounds or if ptr is a NULL value. */ tmp = origin[0]; origin[0] = IMG_WIDTH + 1; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if the region being written specified by origin and region is out of bounds (origin)"); origin[0] = tmp; tmp = region[0]; region[0] = IMG_WIDTH + 1; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if the region being written specified by origin and region is out of bounds (region)"); region[0] = tmp; test(queue, image, pattern, NULL, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if ptr is a NULL value (origin)"); test(queue, image, pattern, origin, NULL, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if ptr is a NULL value (region)"); /* * CL_INVALID_VALUE if values in origin and region do not follow rules * described in the argument description for origin and region. */ tmp = origin[2]; origin[2] = 1; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if values in origin do not follow rules described in the argument description for origin"); origin[2] = tmp; tmp = region[2]; region[2] = 0; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if values in region do not follow rules described in the argument description for region"); region[2] = tmp; /* * CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and * num_events_in_wait_list > 0, or event_wait_list is not NULL and * num_events_in_wait_list is 0, or if event objects in event_wait_list * are not valid events. */ event = NULL; test(queue, image, pattern, origin, region, 1, NULL, NULL, CL_INVALID_EVENT_WAIT_LIST, &result, "CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0"); test(queue, image, pattern, origin, region, 0, &event, NULL, CL_INVALID_EVENT_WAIT_LIST, &result, "CL_INVALID_EVENT_WAIT_LIST if event_wait_list is not NULL and num_events_in_wait_list is 0"); test(queue, image, pattern, origin, region, 1, &event, NULL, CL_INVALID_EVENT_WAIT_LIST, &result, "CL_INVALID_EVENT_WAIT_LIST if event objects in event_wait_list are not valid events"); /* * CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified * or compute row and/or slice pitch) for image are not supported by device * associated with queue. */ /* This is a per device test, clCreateImage would have failed before */ /* * CL_INVALID_IMAGE_FORMAT if image format (image channel order and data type) * for image are not supported by device associated with queue. */ /* This is a per device test, clCreateImage would have failed before */ free(image_support); clReleaseMemObject(image); return result; #else return PIGLIT_SKIP; #endif }
/* main */ int main(int argc, char **argv) { /*OpenCL variables */ cl_device_id device; cl_device_type device_type; /*to test if we are on cpu or gpu*/ cl_context context; cl_command_queue cmdQueue; /* The event variables are created only when needed */ #ifdef _UNBLOCK cl_uint num_events = 3; cl_event event[num_events]; #endif FPTYPE * buffers[3]; cl_mdsys_t cl_sys; cl_int status; int nprint, i, nthreads = 0; char restfile[BLEN], trajfile[BLEN], ergfile[BLEN], line[BLEN]; FILE *fp,*traj,*erg; mdsys_t sys; /* Start profiling */ #ifdef __PROFILING double t1, t2; t1 = second(); #endif /* handling the command line arguments */ switch (argc) { case 2: /* only the cpu/gpu argument was passed, setting default nthreads */ if( !strcmp( argv[1], "cpu" ) ) nthreads = 16; else nthreads = 1024; break; case 3: /* both the device type (cpu/gpu) and the number of threads were passed */ nthreads = strtol(argv[2],NULL,10); if( nthreads<0 ) { fprintf( stderr, "\n. The number of threads must be more than 1.\n"); PrintUsageAndExit(); } break; default: PrintUsageAndExit(); break; } /* Initialize the OpenCL environment */ if( InitOpenCLEnvironment( argv[1], &device, &context, &cmdQueue ) != CL_SUCCESS ){ fprintf( stderr, "Program Error! OpenCL Environment was not initialized correctly.\n" ); return 4; } /* The event initialization is performed only when needed */ #ifdef _UNBLOCK /* initialize the cl_event handler variables */ for( i = 0; i < num_events; ++i) { event[i] = clCreateUserEvent( context, NULL ); clSetUserEventStatus( event[i], CL_COMPLETE ); } #endif /* read input file */ if(get_me_a_line(stdin,line)) return 1; sys.natoms=atoi(line); if(get_me_a_line(stdin,line)) return 1; sys.mass=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.epsilon=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.sigma=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.rcut=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.box=atof(line); if(get_me_a_line(stdin,restfile)) return 1; if(get_me_a_line(stdin,trajfile)) return 1; if(get_me_a_line(stdin,ergfile)) return 1; if(get_me_a_line(stdin,line)) return 1; sys.nsteps=atoi(line); if(get_me_a_line(stdin,line)) return 1; sys.dt=atof(line); if(get_me_a_line(stdin,line)) return 1; nprint=atoi(line); /* allocate memory */ cl_sys.natoms = sys.natoms; cl_sys.rx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.ry = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.rz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.vx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.vy = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.vz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.fx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.fy = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.fz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); buffers[0] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) ); buffers[1] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) ); buffers[2] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) ); /* read restart */ fp = fopen( restfile, "r" ); if( fp ) { for( i = 0; i < 2 * cl_sys.natoms; ++i ){ #ifdef _USE_FLOAT fscanf( fp, "%f%f%f", buffers[0] + i, buffers[1] + i, buffers[2] + i); #else fscanf( fp, "%lf%lf%lf", buffers[0] + i, buffers[1] + i, buffers[2] + i); #endif } status = clEnqueueWriteBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0] + cl_sys.natoms, 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vy, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1] + cl_sys.natoms, 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2] + cl_sys.natoms, 0, NULL, NULL ); fclose(fp); } else { perror("cannot read restart file"); return 3; } /* initialize forces and energies.*/ sys.nfi=0; size_t globalWorkSize[1]; globalWorkSize[0] = nthreads; const char * sourcecode = #include <opencl_kernels_as_string.h> ; cl_program program = clCreateProgramWithSource( context, 1, (const char **) &sourcecode, NULL, &status ); status |= clBuildProgram( program, 0, NULL, kernelflags, NULL, NULL ); #ifdef __DEBUG size_t log_size; char log [200000]; clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, sizeof(log), log, &log_size ); fprintf( stderr, "\nLog: \n\n %s", log ); #endif cl_kernel kernel_force = clCreateKernel( program, "opencl_force", &status ); cl_kernel kernel_ekin = clCreateKernel( program, "opencl_ekin", &status ); cl_kernel kernel_verlet_first = clCreateKernel( program, "opencl_verlet_first", &status ); cl_kernel kernel_verlet_second = clCreateKernel( program, "opencl_verlet_second", &status ); cl_kernel kernel_azzero = clCreateKernel( program, "opencl_azzero", &status ); FPTYPE * tmp_epot; cl_mem epot_buffer; tmp_epot = (FPTYPE *) malloc( nthreads * sizeof(FPTYPE) ); epot_buffer = clCreateBuffer( context, CL_MEM_READ_WRITE, nthreads * sizeof(FPTYPE), NULL, &status ); /* precompute some constants */ FPTYPE c12 = 4.0 * sys.epsilon * pow( sys.sigma, 12.0); FPTYPE c6 = 4.0 * sys.epsilon * pow( sys.sigma, 6.0); FPTYPE rcsq = sys.rcut * sys.rcut; FPTYPE boxby2 = HALF * sys.box; FPTYPE dtmf = HALF * sys.dt / mvsq2e / sys.mass; sys.epot = ZERO; sys.ekin = ZERO; /* Azzero force buffer */ status = clSetMultKernelArgs( kernel_azzero, 0, 4, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.natoms)); status = clEnqueueNDRangeKernel( cmdQueue, kernel_azzero, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); status |= clSetMultKernelArgs( kernel_force, 0, 13, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.rx), KArg(cl_sys.ry), KArg(cl_sys.rz), KArg(cl_sys.natoms), KArg(epot_buffer), KArg(c12), KArg(c6), KArg(rcsq), KArg(boxby2), KArg(sys.box)); status = clEnqueueNDRangeKernel( cmdQueue, kernel_force, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, NULL ); for( i = 0; i < nthreads; i++) sys.epot += tmp_epot[i]; FPTYPE * tmp_ekin; cl_mem ekin_buffer; tmp_ekin = (FPTYPE *) malloc( nthreads * sizeof(FPTYPE) ); ekin_buffer = clCreateBuffer( context, CL_MEM_READ_WRITE, nthreads * sizeof(FPTYPE), NULL, &status ); status |= clSetMultKernelArgs( kernel_ekin, 0, 5, KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(ekin_buffer)); status = clEnqueueNDRangeKernel( cmdQueue, kernel_ekin, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, NULL ); for( i = 0; i < nthreads; i++) sys.ekin += tmp_ekin[i]; sys.ekin *= HALF * mvsq2e * sys.mass; sys.temp = TWO * sys.ekin / ( THREE * sys.natoms - THREE ) / kboltz; erg=fopen(ergfile,"w"); traj=fopen(trajfile,"w"); printf("Starting simulation with %d atoms for %d steps.\n",sys.natoms, sys.nsteps); printf(" NFI TEMP EKIN EPOT ETOT\n"); /* download data on host */ status = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL ); sys.rx = buffers[0]; sys.ry = buffers[1]; sys.rz = buffers[2]; output(&sys, erg, traj); /**************************************************/ /* main MD loop */ for(sys.nfi=1; sys.nfi <= sys.nsteps; ++sys.nfi) { /* propagate system and recompute energies */ /* 2) verlet_first */ status |= clSetMultKernelArgs( kernel_verlet_first, 0, 12, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.rx), KArg(cl_sys.ry), KArg(cl_sys.rz), KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(sys.dt), KArg(dtmf)); CheckSuccess(status, 2); /* When the data transfer is non blocking, this kernel has to wait the completion of part 8 (event[2]) */ #ifdef _UNBLOCK status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_first, 1, NULL, globalWorkSize, NULL, 1, &event[2], NULL ); #else status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_first, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); #endif /* 6) download position@device to position@host */ if ((sys.nfi % nprint) == nprint-1) { /* In non blocking mode (CL_FALSE) this data transfer raises events[i] */ #ifdef _UNBLOCK status = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, &event[2] ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, &event[1] ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, &event[0] ); #else status = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL ); #endif CheckSuccess(status, 6); } /* 3) force */ status |= clSetMultKernelArgs( kernel_force, 0, 13, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.rx), KArg(cl_sys.ry), KArg(cl_sys.rz), KArg(cl_sys.natoms), KArg(epot_buffer), KArg(c12), KArg(c6), KArg(rcsq), KArg(boxby2), KArg(sys.box)); CheckSuccess(status, 3); status = clEnqueueNDRangeKernel( cmdQueue, kernel_force, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); /* 7) download E_pot[i]@device and perform reduction to E_pot@host */ if ((sys.nfi % nprint) == nprint-1) { /* In non blocking mode (CL_FALSE) this data transfer kernel raises an event[1] */ #ifdef _UNBLOCK status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_FALSE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, &event[1] ); #else status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, NULL ); #endif CheckSuccess(status, 7); } /* 4) verlet_second */ status |= clSetMultKernelArgs( kernel_verlet_second, 0, 9, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(sys.dt), KArg(dtmf)); CheckSuccess(status, 4); status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_second, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); if ((sys.nfi % nprint) == nprint-1) { /* 5) ekin */ status |= clSetMultKernelArgs( kernel_ekin, 0, 5, KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(ekin_buffer)); CheckSuccess(status, 5); status = clEnqueueNDRangeKernel( cmdQueue, kernel_ekin, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); /* 8) download E_kin[i]@device and perform reduction to E_kin@host */ /* In non blocking mode (CL_FALSE) this data transfer kernel raises an event[2] */ #ifdef _UNBLOCK status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_FALSE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, &event[2] ); #else status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, NULL ); #endif CheckSuccess(status, 8); } /* 1) write output every nprint steps */ if ((sys.nfi % nprint) == 0) { /* Calling a synchronization function (only when in non blocking mode) that will wait until all the * events[i], related to the data transfers, to be completed */ #ifdef _UNBLOCK clWaitForEvents(3, event); #endif sys.rx = buffers[0]; sys.ry = buffers[1]; sys.rz = buffers[2]; /* initialize the sys.epot@host and sys.ekin@host variables to ZERO */ sys.epot = ZERO; sys.ekin = ZERO; /* reduction on the tmp_Exxx[i] buffers downloaded from the device * during parts 7 and 8 of the previous MD loop iteration */ for( i = 0; i < nthreads; i++) { sys.epot += tmp_epot[i]; sys.ekin += tmp_ekin[i]; } /* multiplying the kinetic energy by prefactors */ sys.ekin *= HALF * mvsq2e * sys.mass; sys.temp = TWO * sys.ekin / ( THREE * sys.natoms - THREE ) / kboltz; /* writing output files (positions, energies and temperature) */ output(&sys, erg, traj); } } /**************************************************/ /* End profiling */ #ifdef __PROFILING t2 = second(); fprintf( stdout, "\n\nTime of execution = %.3g (seconds)\n", (t2 - t1) ); #endif /* clean up: close files, free memory */ printf("Simulation Done.\n"); fclose(erg); fclose(traj); free(buffers[0]); free(buffers[1]); free(buffers[2]); return 0; }