void GenerationDevice::bufferPlots() throw (std::exception) {
	std::size_t offsetGpu = 0;
	std::size_t offsetCpu = 0;
	for(unsigned int i = 0, end = m_config->getGlobalWorkSize() ; i < end ; ++i, offsetGpu += GEN_SIZE, offsetCpu += PLOT_SIZE) {
		int error = clEnqueueReadBuffer(m_commandQueue, m_bufferDevice, CL_TRUE, sizeof(unsigned char) * offsetGpu, sizeof(unsigned char) * PLOT_SIZE, m_bufferCpu + offsetCpu, 0, 0, 0);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Error in synchronous read");
		}
	}
}
void GenerationDevice::computePlots(unsigned long long p_address, unsigned long long p_startNonce, unsigned int p_workSize) throw (std::exception) {
	if(p_workSize > m_config->getGlobalWorkSize()) {
		throw std::runtime_error("Global work size too low for the requested work size");
	}

	cl_int error;
	std::size_t globalWorkSize = m_config->getGlobalWorkSize();
	std::size_t localWorkSize = m_config->getLocalWorkSize();

	error = clSetKernelArg(m_kernels[0], 1, sizeof(unsigned int), (void*)&p_workSize);
	error |= clSetKernelArg(m_kernels[0], 2, sizeof(unsigned long long), (void*)&p_address);
	error |= clSetKernelArg(m_kernels[0], 3, sizeof(unsigned long long), (void*)&p_startNonce);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to set the OpenCL step1 kernel arguments");
	}

	error = clEnqueueNDRangeKernel(m_commandQueue, m_kernels[0], 1, 0, &globalWorkSize, &localWorkSize, 0, 0, 0);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Error in step1 kernel launch");
	}

	unsigned int hashesNumber = m_config->getHashesNumber();
	unsigned int hashesSize = hashesNumber * HASH_SIZE;
	for(unsigned int i = 0 ; i < PLOT_SIZE ; i += hashesSize) {
		unsigned int hashesOffset = PLOT_SIZE - i;

		error = clSetKernelArg(m_kernels[1], 1, sizeof(unsigned int), (void*)&p_workSize);
		error |= clSetKernelArg(m_kernels[1], 2, sizeof(unsigned long long), (void*)&p_startNonce);
		error |= clSetKernelArg(m_kernels[1], 3, sizeof(unsigned int), (void*)&hashesOffset);
		error |= clSetKernelArg(m_kernels[1], 4, sizeof(unsigned int), (void*)&hashesNumber);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to set the OpenCL step2 kernel arguments");
		}

		error = clEnqueueNDRangeKernel(m_commandQueue, m_kernels[1], 1, 0, &globalWorkSize, &localWorkSize, 0, 0, 0);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Error in step2 kernel launch");
		}

		error = clFinish(m_commandQueue);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Error in step2 kernel finish");
		}
	}

	error = clSetKernelArg(m_kernels[2], 1, sizeof(unsigned int), (void*)&p_workSize);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to set the OpenCL step3 kernel arguments");
	}

	error = clEnqueueNDRangeKernel(m_commandQueue, m_kernels[2], 1, 0, &globalWorkSize, &localWorkSize, 0, 0, 0);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Error in step3 kernel launch");
	}
}
GenerationDevice::GenerationDevice(const std::shared_ptr<DeviceConfig>& p_config, const std::shared_ptr<OpenclDevice>& p_device) throw (std::exception)
: m_config(p_config), m_device(p_device), m_context(0), m_commandQueue(0), m_bufferDevice(0), m_program(0), m_kernels{0, 0, 0}, m_available(true) {
	m_bufferCpu = new unsigned char[getMemorySize()];

	cl_int error;

	m_context = clCreateContext(0, 1, &m_device->getHandle(), NULL, NULL, &error);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to create the OpenCL context");
	}

	m_commandQueue = clCreateCommandQueue(m_context, m_device->getHandle(), 0, &error);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to create the OpenCL command queue");
	}

	m_bufferDevice = clCreateBuffer(m_context, CL_MEM_READ_WRITE, sizeof(unsigned char) * m_config->getGlobalWorkSize() * GEN_SIZE, 0, &error);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to create the OpenCL GPU buffer");
	}

	std::string source(loadSource(KERNEL_PATH + "/nonce.cl"));
	const char* sources[] = {source.c_str()};
	std::size_t sourcesLength[] = {source.length()};
	m_program = clCreateProgramWithSource(m_context, 1, sources, sourcesLength, &error);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to create the OpenCL program");
	}

	std::string includePath("-I " + KERNEL_PATH);
	error = clBuildProgram(m_program, 1, &m_device->getHandle(), includePath.c_str(), 0, 0);
	if(error != CL_SUCCESS) {
		std::size_t logSize;
		cl_int subError = clGetProgramBuildInfo(m_program, m_device->getHandle(), CL_PROGRAM_BUILD_LOG, 0, 0, &logSize);
		if(subError != CL_SUCCESS) {
			throw OpenclError(subError, "Unable to retrieve the OpenCL build info size");
		}

		std::unique_ptr<char[]> log(new char[logSize]);
		subError = clGetProgramBuildInfo(m_program, m_device->getHandle(), CL_PROGRAM_BUILD_LOG, logSize, (void*)log.get(), 0);
		if(subError != CL_SUCCESS) {
			throw OpenclError(subError, "Unable to retrieve the OpenCL build info");
		}

		std::ostringstream message;
		message << "Unable to build the OpenCL program" << std::endl;
		message << log.get();

		throw OpenclError(error, message.str());
	}

	m_kernels[0] = clCreateKernel(m_program, "nonce_step1", &error);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to create the OpenCL step1 kernel");
	}

	error = clSetKernelArg(m_kernels[0], 0, sizeof(cl_mem), (void*)&m_bufferDevice);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to set the OpenCL step1 kernel arguments");
	}

	m_kernels[1] = clCreateKernel(m_program, "nonce_step2", &error);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to create the OpenCL step2 kernel");
	}

	error = clSetKernelArg(m_kernels[1], 0, sizeof(cl_mem), (void*)&m_bufferDevice);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to set the OpenCL step2 kernel arguments");
	}

	m_kernels[2] = clCreateKernel(m_program, "nonce_step3", &error);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to create the OpenCL step3 kernel");
	}

	error = clSetKernelArg(m_kernels[2], 0, sizeof(cl_mem), (void*)&m_bufferDevice);
	if(error != CL_SUCCESS) {
		throw OpenclError(error, "Unable to set the OpenCL step3 kernel arguments");
	}
}
Пример #4
0
int CommandGenerate::execute(const std::vector<std::string>& p_args) {
	if(p_args.size() < 10) {
		help();
		return -1;
	}

	unsigned int platformId = atol(p_args[1].c_str());
	unsigned int deviceId = atol(p_args[2].c_str());
	unsigned int staggerSize = atol(p_args[3].c_str());
	unsigned int threadsNumber = atol(p_args[4].c_str());
	unsigned int hashesNumber = atol(p_args[5].c_str());
	unsigned int nonceSize = PLOT_SIZE * staggerSize;

	std::cerr << "Threads number: " << threadsNumber << std::endl;
	std::cerr << "Hashes number: " << hashesNumber << std::endl;

	unsigned int numjobs = (p_args.size() - 5)/4;
	std::cerr << numjobs << " plot(s) to do." << std::endl;
	unsigned int staggerMbSize = staggerSize / 4;
	std::cerr << "Non-GPU memory usage: " << staggerMbSize*numjobs << "MB" << std::endl;
	
	std::vector<std::string> paths(numjobs);
	std::vector<std::ofstream *> out_files(numjobs);
	std::vector<unsigned long long> addresses(numjobs);
	std::vector<unsigned long long> startNonces(numjobs);
	std::vector<unsigned long long> endNonces(numjobs);
	std::vector<unsigned int> noncesNumbers(numjobs);
	std::vector<unsigned char*> buffersCpu(numjobs);
	std::vector<bool> saving_thread_flags(numjobs);
	std::vector<std::future<void>> save_threads(numjobs);
	unsigned long long maxNonceNumber = 0;
	unsigned long long totalNonces = 0;

	int returnCode = 0;

	try {
		for (unsigned int i = 0; i < numjobs; i++) {
			std::cerr << "----" << std::endl;
			std::cerr << "Job number " << i << std::endl;
			unsigned int argstart = 6 + i*4;
			paths[i] = std::string(p_args[argstart]);
			addresses[i] = strtoull(p_args[argstart+1].c_str(), NULL, 10);
			startNonces[i] = strtoull(p_args[argstart+2].c_str(), NULL, 10);
			noncesNumbers[i] = atol(p_args[argstart+3].c_str());
			maxNonceNumber = std::max(maxNonceNumber, (long long unsigned int)noncesNumbers[i]);
			totalNonces += noncesNumbers[i];

			std::ostringstream outFile;
			outFile << paths[i] << "/" << addresses[i] << "_" << startNonces[i] << "_" << \
				noncesNumbers[i] << "_" << staggerSize;
			std::ios_base::openmode file_mode = std::ios::out | std::ios::binary | std::ios::trunc;
			out_files[i] = new std::ofstream(outFile.str(), file_mode);
			assert(out_files[i]);

			if(noncesNumbers[i] % staggerSize != 0) {
				noncesNumbers[i] -= noncesNumbers[i] % staggerSize;
				noncesNumbers[i] += staggerSize;
			}

			endNonces[i] = startNonces[i] + noncesNumbers[i];
			unsigned int noncesGbSize = noncesNumbers[i] / 4 / 1024;
			std::cerr << "Path: " << outFile.str() << std::endl;
			std::cerr << "Nonces: " << startNonces[i] << " to " << endNonces[i] << " (" << noncesGbSize << " GB)" << std::endl;
			std::cerr << "Creating CPU buffer" << std::endl;
			buffersCpu[i] = new unsigned char[nonceSize];
			if(!buffersCpu[i]) {
				throw std::runtime_error("Unable to create the CPU buffer (probably out of host memory.)");
			}
			saving_thread_flags[i] = false;
			std::cerr << "----" << std::endl;
		}

		cl_platform_id platforms[4];
		cl_uint platformsNumber;
		cl_device_id devices[32];
		cl_uint devicesNumber;
		cl_context context = 0;
		cl_command_queue commandQueue = 0;
		cl_mem bufferGpuGen = 0;
		cl_mem bufferGpuScoops = 0;
		cl_program program = 0;
		cl_kernel kernelStep1 = 0;
		cl_kernel kernelStep2 = 0;
		cl_kernel kernelStep3 = 0;

		int error;

		std::cerr << "Retrieving OpenCL platforms" << std::endl;
		error = clGetPlatformIDs(4, platforms, &platformsNumber);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to retrieve the OpenCL platforms");
		}

		if(platformId >= platformsNumber) {
			throw std::runtime_error("No platform found with the provided id");
		}

		std::cerr << "Retrieving OpenCL GPU devices" << std::endl;
		error = clGetDeviceIDs(platforms[platformId], CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU, 32, devices, &devicesNumber);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to retrieve the OpenCL devices");
		}

		if(deviceId >= devicesNumber) {
			throw std::runtime_error("No device found with the provided id");
		}

		std::cerr << "Creating OpenCL context" << std::endl;
		context = clCreateContext(0, 1, &devices[deviceId], NULL, NULL, &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL context");
		}

		std::cerr << "Creating OpenCL command queue" << std::endl;
		commandQueue = clCreateCommandQueue(context, devices[deviceId], 0, &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL command queue");
		}

		std::cerr << "Creating OpenCL GPU generation buffer" << std::endl;
		bufferGpuGen = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uchar) * GEN_SIZE * staggerSize, 0, &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL GPU generation buffer");
		}

		std::cerr << "Creating OpenCL GPU scoops buffer" << std::endl;
		bufferGpuScoops = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_uchar) * nonceSize, 0, &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL GPU scoops buffer");
		}

		std::cerr << "Creating OpenCL program" << std::endl;
		std::string source = loadSource("kernel/nonce.cl");
		const char* sources[] = {source.c_str()};
		size_t sourcesLength[] = {source.length()};
		program = clCreateProgramWithSource(context, 1, sources, sourcesLength, &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL program");
		}

		std::cerr << "Building OpenCL program" << std::endl;
		error = clBuildProgram(program, 1, &devices[deviceId], "-I kernel", 0, 0);
		if(error != CL_SUCCESS) {
			size_t logSize;
			clGetProgramBuildInfo(program, devices[deviceId], CL_PROGRAM_BUILD_LOG, 0, 0, &logSize);

			char* log = new char[logSize];
			clGetProgramBuildInfo(program, devices[deviceId], CL_PROGRAM_BUILD_LOG, logSize, (void*)log, 0);
			std::cerr << log << std::endl;
			delete[] log;

			throw OpenclError(error, "Unable to build the OpenCL program");
		}

		std::cerr << "Creating OpenCL step1 kernel" << std::endl;
		kernelStep1 = clCreateKernel(program, "nonce_step1", &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL kernel");
		}

		std::cerr << "Setting OpenCL step1 kernel static arguments" << std::endl;
		error = clSetKernelArg(kernelStep1, 2, sizeof(cl_mem), (void*)&bufferGpuGen);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to set the OpenCL kernel arguments");
		}

		std::cerr << "Creating OpenCL step2 kernel" << std::endl;
		kernelStep2 = clCreateKernel(program, "nonce_step2", &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL kernel");
		}

		std::cerr << "Setting OpenCL step2 kernel static arguments" << std::endl;
		error = clSetKernelArg(kernelStep2, 1, sizeof(cl_mem), (void*)&bufferGpuGen);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to set the OpenCL kernel arguments");
		}

		std::cerr << "Creating OpenCL step3 kernel" << std::endl;
		kernelStep3 = clCreateKernel(program, "nonce_step3", &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL kernel");
		}

		std::cerr << "Setting OpenCL step3 kernel static arguments" << std::endl;
		error = clSetKernelArg(kernelStep3, 0, sizeof(cl_uint), (void*)&staggerSize);
		error = clSetKernelArg(kernelStep3, 1, sizeof(cl_mem), (void*)&bufferGpuGen);
		error = clSetKernelArg(kernelStep3, 2, sizeof(cl_mem), (void*)&bufferGpuScoops);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to set the OpenCL kernel arguments");
		}

		size_t globalWorkSize = staggerSize;
		size_t localWorkSize = (staggerSize < threadsNumber) ? staggerSize : threadsNumber;
		time_t startTime = time(0);
		unsigned int totalNoncesCompleted = 0;
		for (unsigned long long nonce_ordinal = 0; nonce_ordinal < maxNonceNumber; nonce_ordinal += staggerSize) {
			for (unsigned int jobnum = 0; jobnum < paths.size(); jobnum += 1) {
				unsigned long long nonce = startNonces[jobnum] + nonce_ordinal;
				if (nonce > endNonces[jobnum]) {
				  break;
				}

				std::cout << "Running with start nonce " << nonce << std::endl;
				// Is a cl_ulong always an unsigned long long?
				unsigned int error = 0;
				error = clSetKernelArg(kernelStep1, 0, sizeof(cl_ulong), (void*)&addresses[jobnum]);
				if(error != CL_SUCCESS) {
					throw OpenclError(error, "Unable to set the OpenCL step1 kernel arguments");
				}
				error = clSetKernelArg(kernelStep1, 1, sizeof(cl_ulong), (void*)&nonce);
				if(error != CL_SUCCESS) {
					throw OpenclError(error, "Unable to set the OpenCL step1 kernel arguments");
				}

				error = clEnqueueNDRangeKernel(commandQueue, kernelStep1, 1, 0, &globalWorkSize, &localWorkSize, 0, 0, 0);
				if(error != CL_SUCCESS) {
					throw OpenclError(error, "Error in step1 kernel launch");
				}

				unsigned int hashesSize = hashesNumber * HASH_SIZE;
				for(int hashesOffset = PLOT_SIZE ; hashesOffset > 0 ; hashesOffset -= hashesSize) {
					error = clSetKernelArg(kernelStep2, 0, sizeof(cl_ulong), (void*)&nonce);
					error = clSetKernelArg(kernelStep2, 2, sizeof(cl_uint), (void*)&hashesOffset);
					error = clSetKernelArg(kernelStep2, 3, sizeof(cl_uint), (void*)&hashesNumber);
					if(error != CL_SUCCESS) {
						throw OpenclError(error, "Unable to set the OpenCL step2 kernel arguments");
					}

					error = clEnqueueNDRangeKernel(commandQueue, kernelStep2, 1, 0, &globalWorkSize, &localWorkSize, 0, 0, 0);
					if(error != CL_SUCCESS) {
						throw OpenclError(error, "Error in step2 kernel launch");
					}

					error = clFinish(commandQueue);
					if(error != CL_SUCCESS) {
						throw OpenclError(error, "Error in step2 kernel finish");
					}
				}

				totalNoncesCompleted += staggerSize;
				double percent = 100.0 * (double)totalNoncesCompleted / totalNonces;
				time_t currentTime = time(0);
				double speed = (double)totalNoncesCompleted / difftime(currentTime, startTime) * 60.0;
				double estimatedTime = (double)(totalNonces - totalNoncesCompleted) / speed;
				std::cerr << "\r" << percent << "% (" << totalNoncesCompleted << "/" << totalNonces << " nonces)";
				std::cerr << ", " << speed << " nonces/minutes";
				std::cerr << ", ETA: " << ((int)estimatedTime / 60) << "h" << ((int)estimatedTime % 60) << "m" << ((int)(estimatedTime * 60.0) % 60) << "s";
				std::cerr << "...                    ";

				error = clEnqueueNDRangeKernel(commandQueue, kernelStep3, 1, 0, &globalWorkSize, &localWorkSize, 0, 0, 0);
				if(error != CL_SUCCESS) {
					throw OpenclError(error, "Error in step3 kernel launch");
				}

				if (saving_thread_flags[jobnum]) {
					save_threads[jobnum].wait(); // Wait for last job to finish
					saving_thread_flags[jobnum] = false;
				}

				error = clEnqueueReadBuffer(commandQueue, bufferGpuScoops, CL_TRUE, 0, sizeof(cl_uchar) * nonceSize, buffersCpu[jobnum], 0, 0, 0);
				if(error != CL_SUCCESS) {
					throw OpenclError(error, "Error in synchronous read");
				}
				saving_thread_flags[jobnum] = true;
				save_threads[jobnum] = std::async(std::launch::async, save_nonces, nonceSize, out_files[jobnum], buffersCpu[jobnum]);
			}
		}

		//Clean up
		for (unsigned int i = 0; i < paths.size(); i += 1) {
		  if (saving_thread_flags[i]) {
		    std::cerr << "waiting for final save to " << paths[i] << " to finish" << std::endl;
		    save_threads[i].wait();
		    saving_thread_flags[i] = false;
		    std::cerr << "done waiting for final save" << std::endl;
		    if (buffersCpu[i]) {
		      delete[] buffersCpu[i];
		    }
		  }
		}
		
		if(kernelStep3) { clReleaseKernel(kernelStep3); }
		if(kernelStep2) { clReleaseKernel(kernelStep2); }
		if(kernelStep1) { clReleaseKernel(kernelStep1); }
		if(program) { clReleaseProgram(program); }
		if(bufferGpuGen) { clReleaseMemObject(bufferGpuGen); }
		if(bufferGpuScoops) { clReleaseMemObject(bufferGpuScoops); }
		if(commandQueue) { clReleaseCommandQueue(commandQueue); }
		if(context) { clReleaseContext(context); }


		time_t currentTime = time(0);
		double elapsedTime = difftime(currentTime, startTime) / 60.0;
		double speed = (double)totalNonces / elapsedTime;
		std::cerr << "\r100% (" << totalNonces << "/" << totalNonces << " nonces)";
		std::cerr << ", " << speed << " nonces/minutes";
		std::cerr << ", " << ((int)elapsedTime / 60) << "h" << ((int)elapsedTime % 60) << "m" << ((int)(elapsedTime * 60.0) % 60) << "s";
		std::cerr << "                    " << std::endl;
	} catch(const OpenclError& ex) {
		std::cerr << "[ERROR] [" << ex.getCode() << "] " << ex.what() << std::endl;
		returnCode = -1;
	} catch(const std::exception& ex) {
		std::cerr << "[ERROR] " << ex.what() << std::endl;
		returnCode = -1;
	}
	return returnCode;
}