void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
{
	bool initialize = false;
	bool exit = false;
	if (memcmp(&m_current_header, header, sizeof(hash32_t)))
	{
		m_current_header = *reinterpret_cast<hash32_t const *>(header);
		set_header(m_current_header);
		initialize = true;
	}
	if (m_current_target != target)
	{
		m_current_target = target;
		set_target(m_current_target);
		initialize = true;
	}
	if (initialize)
	{
		random_device engine;
		m_current_nonce = uniform_int_distribution<uint64_t>()(engine);
		m_current_index = 0;
		CUDA_SAFE_CALL(cudaDeviceSynchronize());
		for (unsigned int i = 0; i < s_numStreams; i++)
			m_search_buf[i][0] = 0;
	}
	uint64_t batch_size = s_gridSize * s_blockSize;
	for (; !exit; m_current_index++, m_current_nonce += batch_size)
	{
		unsigned int stream_index = m_current_index % s_numStreams;
		cudaStream_t stream = m_streams[stream_index];
		volatile uint32_t* buffer = m_search_buf[stream_index];
		uint32_t found_count = 0;
		uint64_t nonces[SEARCH_RESULT_BUFFER_SIZE - 1];
		uint64_t nonce_base = m_current_nonce - s_numStreams * batch_size;
		if (m_current_index >= s_numStreams)
		{
			CUDA_SAFE_CALL(cudaStreamSynchronize(stream));
			found_count = buffer[0];
			if (found_count)
				buffer[0] = 0;
			for (unsigned int j = 0; j < found_count; j++)
				nonces[j] = nonce_base + buffer[j + 1];
		}
		run_ethash_search(s_gridSize, s_blockSize, stream, buffer, m_current_nonce);
		if (m_current_index >= s_numStreams)
		{
			exit = found_count && hook.found(nonces, found_count);
			exit |= hook.searched(nonce_base, batch_size);
		}
	}
}
Beispiel #2
0
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
{
	struct pending_batch
	{
		uint64_t start_nonce;
		unsigned buf;
	};
	std::queue<pending_batch> pending;

	static uint32_t const c_zero = 0;

	// update header constant buffer
	m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);
	for (unsigned i = 0; i != c_num_buffers; ++i)
	{
		m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero);
	}

#if CL_VERSION_1_2 && 0
	cl::Event pre_return_event;
	if (!m_opencl_1_1)
	{
		m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event);
	}
	else
#endif
	{
		m_queue.finish();
	}

	/*
	__kernel void ethash_combined_search(
		__global hash32_t* g_hashes,			// 0
		__constant hash32_t const* g_header,	// 1
		__global hash128_t const* g_dag,		// 2
		ulong start_nonce,						// 3
		ulong target,							// 4
		uint isolate							// 5
	)
	*/
	m_search_kernel.setArg(1, m_header);
	m_search_kernel.setArg(2, m_dag);

	// pass these to stop the compiler unrolling the loops
	m_search_kernel.setArg(4, target);
	m_search_kernel.setArg(5, ~0u);


	unsigned buf = 0;
	for (uint64_t start_nonce = 0; ; start_nonce += c_search_batch_size)
	{
		// supply output buffer to kernel
		m_search_kernel.setArg(0, m_search_buf[buf]);
		m_search_kernel.setArg(3, start_nonce);

		// execute it!
		m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size);

		pending.push({start_nonce, buf});
		buf = (buf + 1) % c_num_buffers;

		// read results
		if (pending.size() == c_num_buffers)
		{
			pending_batch const& batch = pending.front();

			// could use pinned host pointer instead
			uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t));
			unsigned num_found = std::min<unsigned>(results[0], c_max_search_results);

			uint64_t nonces[c_max_search_results];
			for (unsigned i = 0; i != num_found; ++i)
			{
				nonces[i] = batch.start_nonce + results[i+1];
			}

			m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results);

			bool exit = num_found && hook.found(nonces, num_found);
			exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
			if (exit)
				break;

			// reset search buffer if we're still going
			if (num_found)
				m_queue.enqueueWriteBuffer(m_search_buf[batch.buf], true, 0, 4, &c_zero);

			pending.pop();
		}
	}

	// not safe to return until this is ready
#if CL_VERSION_1_2 && 0
	if (!m_opencl_1_1)
	{
		pre_return_event.wait();
	}
#endif
}
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
{
	try
	{
		struct pending_batch
		{
			uint64_t start_nonce;
			unsigned buf;
		};
		queue<pending_batch> pending;

		// this can't be a static because in MacOSX OpenCL implementation a segfault occurs when a static is passed to OpenCL functions
		uint32_t const c_zero = 0;

		// update header constant buffer
		m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);
		for (unsigned i = 0; i != c_bufferCount; ++i)
			m_queue.enqueueWriteBuffer(m_searchBuffer[i], false, 0, 4, &c_zero);

#if CL_VERSION_1_2 && 0
		cl::Event pre_return_event;
		if (!m_opencl_1_1)
			m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event);
		else
#endif
			m_queue.finish();

		
		m_searchKernel.setArg(1, m_header);
		m_searchKernel.setArg(2, m_dag );
		// pass these to stop the compiler unrolling the loops
		m_searchKernel.setArg(4, target);
		m_searchKernel.setArg(5, ~0u);

		unsigned buf = 0;
		random_device engine;
		uint64_t start_nonce = uniform_int_distribution<uint64_t>()(engine);
		for (;; start_nonce += m_globalWorkSize)
		{
			auto t = chrono::high_resolution_clock::now();
			// supply output buffer to kernel
			m_searchKernel.setArg(0, m_searchBuffer[buf]);
			m_searchKernel.setArg(3, start_nonce);

			// execute it!
			cl::Event event;
			m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize, nullptr, &event);
			if (s_kernelProfiling) {
			    uint64_t* hashes = new uint64_t;
			    *hashes = m_globalWorkSize;
			    event.setCallback(CL_COMPLETE, printStats, hashes);
			}

			pending.push({ start_nonce, buf });
			buf = (buf + 1) % c_bufferCount;

			// read results
			if (pending.size() == c_bufferCount)
			{
				pending_batch const& batch = pending.front();

				// could use pinned host pointer instead
				uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_searchBuffer[batch.buf], true, CL_MAP_READ, 0, (1 + c_maxSearchResults) * sizeof(uint32_t));
				unsigned num_found = min<unsigned>(results[0], c_maxSearchResults);

				uint64_t nonces[c_maxSearchResults];
				for (unsigned i = 0; i != num_found; ++i)
					nonces[i] = batch.start_nonce + results[i + 1];

				m_queue.enqueueUnmapMemObject(m_searchBuffer[batch.buf], results);
				bool exit = num_found && hook.found(nonces, num_found);
				exit |= hook.searched(batch.start_nonce, m_globalWorkSize); // always report searched before exit
				if (exit)
					break;

				// reset search buffer if we're still going
				if (num_found)
					m_queue.enqueueWriteBuffer(m_searchBuffer[batch.buf], true, 0, 4, &c_zero);

				pending.pop();
			}
		}

		// not safe to return until this is ready
#if CL_VERSION_1_2 && 0
		if (!m_opencl_1_1)
			pre_return_event.wait();
#endif
	}
	catch (cl::Error const& err)
	{
		ETHCL_LOG(err.what() << "(" << err.err() << ")");
	}
}