Example #1
0
void Dispatcher::dispatchRequest( unsigned int uCommand,PushFramework::LogicalConnection* pClient,IncomingPacket& packet,unsigned int serviceBytes )
{
    //StopWatch dispatchWatch(m_QPFrequency);

    serviceMapT::iterator it = serviceMap.find(uCommand);
    if (it == serviceMap.end())
        return;
    //
    Service* pHandler = it->second->pService;

    //wcout << L"Locating Service : " << dispatchWatch.GetElapsedTime(false) << std::endl;

    //Mark dispatched service :

    setCurrentService(it->second->serviceName);

    StopWatch watch;
    pHandler->handle(pClient, &packet);


    double duration = watch.GetElapsedTime();
    /*	wcout << L"Service Time : " << watch.GetElapsedTime() << std::endl;
     */


    //StopWatch statsClock(m_QPFrequency);
    stats.addToDistribution(ServerStats::PerformanceProcessingTimePerService, it->second->serviceName, duration);
    //wcout << L"Stat 1 : " << statsClock.GetElapsedTime(false) << std::endl;

    stats.addToDuration(ServerStats::PerformanceProcessingTime, duration);
    //wcout << L"Stat 2 : " << statsClock.GetElapsedTime(false) << std::endl;

    UnsetCurrentService();

    //Stats. :

    stats.addToDistribution(ServerStats::BandwidthInboundVolPerRequest, it->second->serviceName, serviceBytes);
    //wcout << L"Stat 3 : " << statsClock.GetElapsedTime(false) << std::endl;


    stats.addToDistribution(ServerStats::PerformanceRequestVolPerRequest, it->second->serviceName, 1);
    //wcout << L"Stat 4 : " << statsClock.GetElapsedTime(false) << std::endl;


    //wcout << L"Dispatch Time : " << dispatchWatch.GetElapsedTime() << std::endl;
}
Example #2
0
void clppSort_RadixSort::sort()
{
	// Satish et al. empirically set b = 4. The size of a work-group is in hundreds of
	// work-items, depending on the concrete device and each work-item processes more than one
	// stream element, usually 4, in order to hide latencies.

	StopWatch sw;

	cl_int clStatus;
    unsigned int numBlocks = roundUpDiv(_datasetSize, _workgroupSize * 4);
	unsigned int Ndiv4 = roundUpDiv(_datasetSize, 4);

	size_t global[1] = {toMultipleOf(Ndiv4, _workgroupSize)};
    size_t local[1] = {_workgroupSize};

	cl_mem* dataA = &_clBuffer_dataSet;
    cl_mem* dataB = &_clBuffer_dataSetOut;
    for(unsigned int bitOffset = 0; bitOffset < _bits; bitOffset += 4)
	{
		// 1) Each workgroup sorts its tile by using local memory
		// 2) Create an histogram of d=2^b digits entries
#ifdef BENCHMARK
		sw.StartTimer();
#endif

        radixLocal(global, local, dataA, bitOffset);

#ifdef BENCHMARK
		sw.StopTimer();
		cout << "Local sort       " << sw.GetElapsedTime() << endl;

		sw.StartTimer();
#endif

        localHistogram(global, local, dataA, &_clBuffer_radixHist1, &_clBuffer_radixHist2, bitOffset);

#ifdef BENCHMARK
		sw.StopTimer();
		cout << "Local histogram  " << sw.GetElapsedTime() << endl;

		//**********
		//clEnqueueReadBuffer(_context->clQueue, dataA, CL_TRUE, 0, sizeof(int) * _datasetSize, _dataSetOut, 0, NULL, NULL);
		//**********
		
		// 3) Scan the p*2^b = p*(16) entry histogram table. Stored in column-major order, computes global digit offsets.
		sw.StartTimer();
#endif

		_scan->pushCLDatas(_clBuffer_radixHist1, 16 * numBlocks);
		_scan->scan();

#ifdef BENCHMARK
		_scan->waitCompletion();
		sw.StopTimer();
		cout << "Global scan      " << sw.GetElapsedTime() << endl;
        
		// 4) Prefix sum results are used to scatter each work-group's elements to their correct position.
		sw.StartTimer();
#endif

		radixPermute(global, local, dataA, dataB, &_clBuffer_radixHist1, &_clBuffer_radixHist2, bitOffset, numBlocks);

#ifdef BENCHMARK
		sw.StopTimer();
		cout << "Global reorder   " << sw.GetElapsedTime() << endl;
#endif

        std::swap(dataA, dataB);
    }
}