예제 #1
0
static inline double get_time(cl::Event& event)
{
    using namespace std;
    using namespace cl;
    event.wait();
    cl_ulong start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
    cl_ulong end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
    return (end - start) * 1.0e-9;
}
예제 #2
0
파일: bolt.cpp 프로젝트: daemondn/Bolt
 void wait(const bolt::cl::control &ctl, ::cl::Event &e) 
 {
     const bolt::cl::control::e_WaitMode waitMode = ctl.getWaitMode();
     if (waitMode == bolt::cl::control::BusyWait) {
         const ::cl::CommandQueue& q = ctl.getCommandQueue();
         q.flush();
         while (e.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() != CL_COMPLETE) {
             // spin here for fast completion detection...
         };
     } else if ((waitMode == bolt::cl::control::NiceWait) || (waitMode == bolt::cl::control::BalancedWait)) {
         cl_int l_Error = e.wait();
         V_OPENCL( l_Error, "wait call failed" );
     } else if (waitMode == bolt::cl::control::ClFinish) {
         const ::cl::CommandQueue& q = ctl.getCommandQueue();
         cl_int l_Error = q.finish();
         V_OPENCL( l_Error, "clFinish call failed" );
     }
 };
int MaxValueSimple::maxValueCL(int* values, size_t len) {
	try {
		cl_int status = CL_SUCCESS;

		/*** Ausgabe von Informationen ueber gewaehltes OpenCL-Device ***/
		/* TODO logging
		 Logger::logDebug(
		 METHOD,
		 Logger::sStream << "max compute units: " << devices[0].getInfo<
		 CL_DEVICE_MAX_COMPUTE_UNITS> ());
		 Logger::logDebug(
		 METHOD,
		 Logger::sStream << "max work item sizes: "
		 << devices[0].getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES> ()[0]);
		 Logger::logDebug(
		 METHOD,
		 Logger::sStream << "max work group sizes: "
		 << devices[0].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE> ());
		 Logger::logDebug(
		 METHOD,
		 Logger::sStream << "max global mem size (KB): "
		 << devices[0].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE> ()
		 / 1024);
		 Logger::logDebug(
		 METHOD,
		 Logger::sStream << "max local mem size (KB): "
		 << devices[0].getInfo<CL_DEVICE_LOCAL_MEM_SIZE> ()
		 / 1024);
		 */

		/*** Erstellen und Vorbereiten der Daten ***/
		cl::Buffer vBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
				sizeof(cl_int) * len, &values[0], &status);
		if (status != CL_SUCCESS) {
			throw cl::Error(status, "cl::Buffer values");
		}
		cmdQ.finish();

		/*** Arbeitsgroeszen berechnen ***/
		// Anzahl der Work-Items = globalSize
		// Work-Items pro Work-Group = localSize
		const size_t MAX_GROUP_SIZE = devices[0].getInfo<
				CL_DEVICE_MAX_WORK_GROUP_SIZE> ();
		size_t globalSize;
		size_t localSize;

		do {
			globalSize = len;
			localSize = MaxValueSimple::calcWorkGroupSize(globalSize,
					MAX_GROUP_SIZE);
			if (localSize == 1) {
				globalSize = ceil((double) len / WG_FAC) * WG_FAC;
				localSize = MaxValueSimple::calcWorkGroupSize(globalSize,
						MAX_GROUP_SIZE);
				/* TODO logging
				 Logger::logDebug(
				 METHOD,
				 Logger::sStream << "GlobalSize has been extended to "
				 << globalSize);
				 */
			}
			/* TODO logging
			 Logger::logDebug(METHOD,
			 Logger::sStream << "globalSize: " << globalSize);
			 Logger::logDebug(METHOD,
			 Logger::sStream << "localSize: " << localSize);
			 */

			/*** Kernel-Argumente setzen  ***/
			status = kernel.setArg(0, vBuffer);
			if (status != CL_SUCCESS) {
				throw cl::Error(status, "Kernel.SetArg");
			}

			status = kernel.setArg(1, sizeof(cl_int) * localSize, NULL);
			if (status != CL_SUCCESS) {
				throw cl::Error(status, "Kernel.SetArg");
			}

			/*** Kernel ausfuehren und auf Abarbeitung warten ***/
			cl::KernelFunctor func = kernel.bind(cmdQ, cl::NDRange(globalSize),
					cl::NDRange(localSize));

			event = func();

			event.wait();
			cmdQ.finish();

			/*
			 runtimeKernel
			 += event.getProfilingInfo<CL_PROFILING_COMMAND_END> ();
			 runtimeKernel
			 -= event.getProfilingInfo<CL_PROFILING_COMMAND_START> ();
			 */
			len = globalSize / localSize;
		} while (globalSize > localSize && localSize > 1);

		/*** Daten vom OpenCL-Device holen ***/
		// TODO nur 1. element auslesen
		status = cmdQ.enqueueReadBuffer(vBuffer, true, 0, sizeof(cl_int) * 1,
				&values[0]);
		if (status != CL_SUCCESS) {
			throw cl::Error(status, "CommandQueue.enqueueReadBuffer");
		}

		/* TODO logging
		 Logger::log(
		 METHOD,
		 TIME,
		 Logger::sStream << "timeKernel=" << 1.0e-9 * runtimeKernel
		 << ";");
		 */
		return values[0];
	} catch (cl::Error& err) {
		// TODO Logger::logError(METHOD, Logger::sStream << err.what());
		std::cerr << "[ERROR] MaxValueSimple::maxValueCL(int*, size_t): "
				<< err.what() << " (" << err.err() << ")" << std::endl;
		return MaxValueSimple::MAX_FAILURE;
	} catch (std::exception& err) {
		// TODO Logger::logError(METHOD, Logger::sStream << err.what());
		std::cerr << "[ERROR] MaxValueSimple::maxValueCL(int*, size_t): "
				<< err.what() << std::endl;
		return MaxValueSimple::MAX_FAILURE;
	}
}