Exemplo n.º 1
0
int main(int argc, char** argv) {
	cuInit(0);
	
	DevicePtr device;
	CUresult result = CreateCuDevice(0, &device);

	ContextPtr context;
	result = CreateCuContext(device, 0, &context);

	std::auto_ptr<MaxIndexEngine> engine;
	result = CreateMaxIndexEngine("../../src/maxindex/maxindex.cubin", &engine);
	if(CUDA_SUCCESS != result) {
		printf("Could not create max index engine.\n");
		return 0;
	}

	// Search through 5 million elements.
	const int NumElements = 5000000;
	std::vector<float> data(NumElements);
	std::tr1::uniform_real<float> r(-1e9, 1e9);
	for(int i(0); i < NumElements; ++i)
		data[i] = r(mt19937);
	
	// Use CPU to find the max element and index.
	float maxX = -1e37f;
	int maxIndex = 0;

	for(int i(0); i < NumElements; ++i)
		if(data[i] > maxX) {
			maxX = data[i];
			maxIndex = i;
		}

	printf("CPU says max x = %f, max index = %d.\n", maxX, maxIndex);

	// Use GPU to find the max element and index.
	DeviceMemPtr deviceData;
	context->MemAlloc(data, &deviceData);

	result = FindGlobalMax(engine.get(), deviceData->Handle(), NumElements, 
		&maxX, &maxIndex);
	if(CUDA_SUCCESS != result) {
		printf("Failure running max index kernel.\n");
		return 0;
	}

	printf("GPU says max x = %f, max index = %d.\n", maxX, maxIndex);
}
Exemplo n.º 2
0
int main(int argc, char** argv) {
	cuInit(0);

	DevicePtr device;
	CUresult result = CreateCuDevice(0, &device);

	ContextPtr context;
	result = CreateCuContext(device, 0, &context);

	scanEngine_t engine;
	scanStatus_t status = scanCreateEngine(
		"../../src/mgpuscan/globalscan.cubin", &engine);

	int count = 1<< 19;
	std::vector<int> vals(count);

	std::tr1::uniform_int<int> r(0, 15);
	for(int i(0); i < count; ++i)
		vals[i] = r(mt19937);

	DeviceMemPtr deviceMem;
	result = context->MemAlloc(vals, &deviceMem);

	uint scanTotal;
	status = scanArray(engine, deviceMem->Handle(), count, &scanTotal, false);
	std::vector<int> deviceScan;
	deviceMem->ToHost(deviceScan);

	std::vector<int> hostScan(count);
	for(int i(1); i < count; ++i) {
		hostScan[i] = hostScan[i - 1] + vals[i - 1];
		
	}

	scanDestroyEngine(engine);

	bool success = hostScan == deviceScan;
	if(success) printf("Global scan success.\n");
	else printf("Global scan failure.\n");

	return 0;
}