int main(int argc, char **argv) {
	////Part 1 - handle command line options such as device selection, verbosity, etc.
	int platform_id = 0;
	int device_id = 0;

	for (int i = 1; i < argc; i++)	{
		if ((strcmp(argv[i], "-p") == 0) && (i < (argc - 1))) { platform_id = atoi(argv[++i]); }
		else if ((strcmp(argv[i], "-d") == 0) && (i < (argc - 1))) { device_id = atoi(argv[++i]); }
		else if (strcmp(argv[i], "-l") == 0) { std::cout << ListPlatformsDevices() << std::endl; }
		else if (strcmp(argv[i], "-h") == 0) { print_help(); }
	}

	ifstream inFile; //create obkject of class
	inFile.open("temp_lincolnshire.txt"); //open file

	//check for error
	if (inFile.fail()) {
		cerr << "Error opening File" <<endl;
		exit(1);
	}
	else {
		cout << "Reading file, this may take some time" << endl;
	}

	int count = 0;
	string location;
	int year;
	int month;
	int day;
	int time;
	float temp;

	std::vector<string> locations;
	std::vector<int> years;
	std::vector<int> months;
	std::vector<int> days;
	std::vector<int> times;
	std::vector<int> temps;

	//read file until end is reached
	while (!inFile.eof()) {
		inFile >> location >> year >> month >> day >> time >> temp;

		locations.push_back(location);
		years.push_back(year);
		months.push_back(month);
		days.push_back(day);
		times.push_back(time);
		temps.push_back((int)temp);
		count++;
	}

	cout << "File read complete, " << count << " items found" << endl;

	//detect any potential exceptions
	try {
		//Part 2 - host operations
		//2.1 Select computing devices
		cl::Context context = GetContext(platform_id, device_id);

		//display the selected device
		std::cout << "Runinng on " << GetPlatformName(platform_id) << ", " << GetDeviceName(platform_id, device_id) << std::endl;

		//create a queue to which we will push commands for the device
		cl::CommandQueue queue(context);

		//2.2 Load & build the device code
		cl::Program::Sources sources;

		AddSources(sources, "my_kernels3.cl");

		cl::Program program(context, sources);

		//build and debug the kernel code
		try {
			program.build();
		}
		catch (const cl::Error& err) {
			std::cout << "Build Status: " << program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(context.getInfo<CL_CONTEXT_DEVICES>()[0]) << std::endl;
			std::cout << "Build Options:\t" << program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(context.getInfo<CL_CONTEXT_DEVICES>()[0]) << std::endl;
			std::cout << "Build Log:\t " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(context.getInfo<CL_CONTEXT_DEVICES>()[0]) << std::endl;
			throw err;
		}

		typedef int mytype;

		//input
		std::vector<int> A = temps; //AVG / SUM
		
		std::vector<int> minTemps = temps; //MIN
		
		std::vector<int> maxTemps = temps; //MAX


		//number of input elements
		size_t original_Length = A.size();

		//the following part adjusts the length of the input vector so it can be run for a specific workgroup size
		//if the total input length is divisible by the workgroup size
		//this makes the code more efficient
		size_t local_size = 1024;
		
		size_t padding_size = A.size() % local_size;

		//if the input vector is not a multiple of the local_size
		//insert additional neutral elements so that the total will not be affected
		if (padding_size) {
			
			//AVERAGE
			std::vector<int> A_ext(local_size-padding_size, 0);

			//MINIMUM
			std::vector<int> minTemps_ext(local_size - padding_size, INT_MAX);

			//MAXIMUM
			std::vector<int> maxTemps_ext(local_size - padding_size, INT_MIN);
			
			//append that extra vector to our input
			A.insert(A.end(), A_ext.begin(), A_ext.end()); 
			minTemps.insert(minTemps.end(), minTemps_ext.begin(), minTemps_ext.end());
			maxTemps.insert(maxTemps.end(), maxTemps_ext.begin(), maxTemps_ext.end());
		}

		size_t input_elements = A.size();	//number of input elements with padding
		size_t input_size = A.size()*sizeof(mytype);	//size in bytes
		size_t nr_groups = input_elements / local_size;

		//host - output
		//max - min + 1 for hist size
		std::vector<mytype> output(1);
		size_t output_size = output.size()*sizeof(mytype);//size in bytes

		//device - buffers
		cl::Buffer buffer_A(context, CL_MEM_READ_ONLY, input_size);
		cl::Buffer buffer_minTemps(context, CL_MEM_READ_ONLY, input_size);
		cl::Buffer buffer_maxTemps(context, CL_MEM_READ_ONLY, input_size);

		cl::Buffer buffer_output(context, CL_MEM_READ_WRITE, output_size);

		//-----------------------------------------------------------------------------------------------------------------

		//5.1 copy array A to and initialise other arrays on device memory
		queue.enqueueWriteBuffer(buffer_minTemps, CL_TRUE, 0, input_size, &minTemps[0]);
		queue.enqueueFillBuffer(buffer_output, INT_MAX, 0, output_size);

		//5.2 Setup and execute all kernels (i.e. device code)
		cl::Kernel kernel_1 = cl::Kernel(program, "minVal");
		kernel_1.setArg(0, buffer_minTemps);
		kernel_1.setArg(1, buffer_output);
		kernel_1.setArg(2, cl::Local(local_size*sizeof(mytype)));//local memory size

		//call all kernels in a sequence
		queue.enqueueNDRangeKernel(kernel_1, cl::NullRange, cl::NDRange(input_elements), cl::NDRange(local_size));

		//5.3 Copy the result from device to host
		queue.enqueueReadBuffer(buffer_output, CL_TRUE, 0, output_size, &output[0]);

		int minimumTemp = output[0];

		//------------------------------------------------------------------------------------------------------------------

		//5.1 copy array A to and initialise other arrays on device memory
		queue.enqueueWriteBuffer(buffer_maxTemps, CL_TRUE, 0, input_size, &maxTemps[0]);
		queue.enqueueFillBuffer(buffer_output, INT_MIN, 0, output_size);

																   //5.2 Setup and execute all kernels (i.e. device code)
		cl::Kernel kernel_2 = cl::Kernel(program, "maxVal");
		kernel_2.setArg(0, buffer_maxTemps);
		kernel_2.setArg(1, buffer_output);
		kernel_2.setArg(2, cl::Local(local_size*sizeof(mytype)));//local memory size

																 //call all kernels in a sequence
		queue.enqueueNDRangeKernel(kernel_2, cl::NullRange, cl::NDRange(input_elements), cl::NDRange(local_size));

		//5.3 Copy the result from device to host
		queue.enqueueReadBuffer(buffer_output, CL_TRUE, 0, output_size, &output[0]);

		int maximumTemp = output[0];

		//-------------------------------------------------------------------------------------------------------------------

		//5.1 copy array A to and initialise other arrays on device memory
		queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, input_size, &A[0]);
		queue.enqueueFillBuffer(buffer_output, 0, 0, output_size);

		//5.2 Setup and execute all kernels (i.e. device code)
		cl::Kernel kernel_3 = cl::Kernel(program, "sum");
		kernel_3.setArg(0, buffer_A);
		kernel_3.setArg(1, buffer_output);
		kernel_3.setArg(2, cl::Local(local_size*sizeof(mytype)));//local memory size

																 //call all kernels in a sequence
		queue.enqueueNDRangeKernel(kernel_3, cl::NullRange, cl::NDRange(input_elements), cl::NDRange(local_size));

		//5.3 Copy the result from device to host
		queue.enqueueReadBuffer(buffer_output, CL_TRUE, 0, output_size, &output[0]);

		double avgTemp = 1.00* output[0] / original_Length;

		//-----------------------------------------------------------------------------------------------------------------

		int binCount = 20;
		int range = (maximumTemp - minimumTemp) + 1;
		int minVal = minimumTemp;

		cout << "How many bins would you like in your histogram" << endl;
		cin >> binCount;
				
		std::vector<mytype> histOutput(binCount);
		size_t hist_output_size = histOutput.size()*sizeof(mytype);//size in bytes

		std::vector<mytype> his_bin = { binCount };
		size_t hist_binCount_size = his_bin.size()*sizeof(mytype);//size in bytes

		std::vector<mytype> his_range = { range };
		size_t hist_range_size = his_range.size()*sizeof(mytype);//size in bytes

		std::vector<mytype> his_min = { minimumTemp };
		size_t hist_min_size = his_min.size()*sizeof(mytype);//size in bytes
		
		//device - buffer
		cl::Buffer buffer_histOutput(context, CL_MEM_READ_WRITE, hist_output_size);
		cl::Buffer buffer_bin(context, CL_MEM_READ_WRITE, hist_binCount_size); 
		cl::Buffer buffer_range(context, CL_MEM_READ_WRITE, hist_range_size); 
		cl::Buffer buffer_minimum(context, CL_MEM_READ_WRITE, hist_min_size); 

		//5.1 copy array A to and initialise other arrays on device memory
		queue.enqueueWriteBuffer(buffer_minTemps, CL_TRUE, 0, input_size, &minTemps[0]);
		queue.enqueueFillBuffer(buffer_histOutput, 0, 0, hist_output_size);
		queue.enqueueWriteBuffer(buffer_bin, CL_TRUE, 0, hist_binCount_size, &his_bin[0]); 
		queue.enqueueWriteBuffer(buffer_range, CL_TRUE, 0, hist_range_size, &his_range[0]);
		queue.enqueueWriteBuffer(buffer_minimum, CL_TRUE, 0, hist_min_size, &his_min[0]);

		//5.2 Setup and execute all kernels (i.e. device code)
		cl::Kernel kernel_4 = cl::Kernel(program, "hist2");
		kernel_4.setArg(0, buffer_minTemps);
		kernel_4.setArg(1, buffer_histOutput);
		kernel_4.setArg(2, buffer_bin);
		kernel_4.setArg(3, buffer_range);
		kernel_4.setArg(4, buffer_minimum);
		
		//call all kernels in a sequence
		queue.enqueueNDRangeKernel(kernel_4, cl::NullRange, cl::NDRange(input_elements), cl::NDRange(local_size));

		//5.3 Copy the result from device to host
		queue.enqueueReadBuffer(buffer_histOutput, CL_TRUE, 0, hist_output_size, &histOutput[0]);


		//--------------------------------------------------------------------------------------------------------------------

		std::cout << "\nMinimum Temp = " << minimumTemp << std::endl;
		std::cout << "Maximum Temp = " << maximumTemp << std::endl;
		std::cout << "Average Temp = " << avgTemp << std::endl;


		std::cout << "\nHistogram:\n " << std::endl;
		std::cout << histOutput << std::endl;

		cin.get();

	}
	catch (cl::Error err) {
		std::cerr << "ERROR: " << err.what() << ", " << getErrorString(err.err()) << std::endl;
	}	


	cin.get();

	return 0;
}
Esempio n. 2
0
int main() {
    // get all platforms (drivers), e.g. NVIDIA
    std::vector<cl::Platform> all_platforms;
    cl::Platform::get(&all_platforms);

    if (all_platforms.size()==0) {
        std::cout<<" No platforms found. Check OpenCL installation!\n";
        exit(1);
    }
    cl::Platform default_platform=all_platforms[0];
    std::cout << "Using platform: "<<default_platform.getInfo<CL_PLATFORM_NAME>()<<"\n";

    // get default device (CPUs, GPUs) of the default platform
    std::vector<cl::Device> all_devices;
    default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
    if(all_devices.size()==0){
        std::cout<<" No devices found. Check OpenCL installation!\n";
        exit(1);
    }

    // use device[1] because that's a GPU; device[0] is the CPU
    cl::Device default_device=all_devices[1];
    std::cout<< "Using device: "<<default_device.getInfo<CL_DEVICE_NAME>()<<"\n";

    // a context is like a "runtime link" to the device and platform;
    // i.e. communication is possible
    cl::Context context({default_device});

    // create the program that we want to execute on the device
    cl::Program::Sources sources;

    // calculates for each element; C = A + B
    std::string kernel_code=
        "   void kernel simple_add(global const int* A, global const int* B, global int* C, "
        "                          global const int* N) {"
        "       int ID, Nthreads, n, ratio, start, stop;"
        ""
        "       ID = get_global_id(0);"
        "       Nthreads = get_global_size(0);"
        "       n = N[0];"
        ""
        "       ratio = (n / Nthreads);"  // number of elements for each thread
        "       start = ratio * ID;"
        "       stop  = ratio * (ID + 1);"
        ""
        "       for (int i=start; i<stop; i++)"
        "           C[i] = A[i] + B[i];"
        "   }";
    sources.push_back({kernel_code.c_str(), kernel_code.length()});

    cl::Program program(context, sources);
    if (program.build({default_device}) != CL_SUCCESS) {
        std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device) << std::endl;
        exit(1);
    }
    
    // apparently OpenCL only likes arrays ...
    // N holds the number of elements in the vectors we want to add
    int N[1] = {100};
    int n = N[0];

    // create buffers on device (allocate space on GPU)
    cl::Buffer buffer_A(context, CL_MEM_READ_WRITE, sizeof(int) * n);
    cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, sizeof(int) * n);
    cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, sizeof(int) * n);
    cl::Buffer buffer_N(context, CL_MEM_READ_ONLY,  sizeof(int));

    // create things on here (CPU)
    int A[n], B[n];
    for (int i=0; i<n; i++) {
        A[i] = i;
        B[i] = n - i - 1;
    }
    // create a queue (a queue of commands that the GPU will execute)
    cl::CommandQueue queue(context, default_device);

    // push write commands to queue
    queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(int)*n, A);
    queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, sizeof(int)*n, B);
    queue.enqueueWriteBuffer(buffer_N, CL_TRUE, 0, sizeof(int),   N);

    // RUN ZE KERNEL
    cl::KernelFunctor simple_add(cl::Kernel(program, "simple_add"), queue, cl::NullRange, cl::NDRange(10), cl::NullRange);
    simple_add(buffer_A, buffer_B, buffer_C, buffer_N);

    int C[n];
    // read result from GPU to here
    queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, sizeof(int)*n, C);

    std::cout << "result: {";
    for (int i=0; i<n; i++) {
        std::cout << C[i] << " ";
    }
    std::cout << "}" << std::endl;

    return 0;
}