void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_hook& hook) { bool initialize = false; bool exit = false; if (memcmp(&m_current_header, header, sizeof(hash32_t))) { m_current_header = *reinterpret_cast<hash32_t const *>(header); set_header(m_current_header); initialize = true; } if (m_current_target != target) { m_current_target = target; set_target(m_current_target); initialize = true; } if (initialize) { random_device engine; m_current_nonce = uniform_int_distribution<uint64_t>()(engine); m_current_index = 0; CUDA_SAFE_CALL(cudaDeviceSynchronize()); for (unsigned int i = 0; i < s_numStreams; i++) m_search_buf[i][0] = 0; } uint64_t batch_size = s_gridSize * s_blockSize; for (; !exit; m_current_index++, m_current_nonce += batch_size) { unsigned int stream_index = m_current_index % s_numStreams; cudaStream_t stream = m_streams[stream_index]; volatile uint32_t* buffer = m_search_buf[stream_index]; uint32_t found_count = 0; uint64_t nonces[SEARCH_RESULT_BUFFER_SIZE - 1]; uint64_t nonce_base = m_current_nonce - s_numStreams * batch_size; if (m_current_index >= s_numStreams) { CUDA_SAFE_CALL(cudaStreamSynchronize(stream)); found_count = buffer[0]; if (found_count) buffer[0] = 0; for (unsigned int j = 0; j < found_count; j++) nonces[j] = nonce_base + buffer[j + 1]; } run_ethash_search(s_gridSize, s_blockSize, stream, buffer, m_current_nonce); if (m_current_index >= s_numStreams) { exit = found_count && hook.found(nonces, found_count); exit |= hook.searched(nonce_base, batch_size); } } }
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook) { struct pending_batch { uint64_t start_nonce; unsigned buf; }; std::queue<pending_batch> pending; static uint32_t const c_zero = 0; // update header constant buffer m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header); for (unsigned i = 0; i != c_num_buffers; ++i) { m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero); } #if CL_VERSION_1_2 && 0 cl::Event pre_return_event; if (!m_opencl_1_1) { m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event); } else #endif { m_queue.finish(); } /* __kernel void ethash_combined_search( __global hash32_t* g_hashes, // 0 __constant hash32_t const* g_header, // 1 __global hash128_t const* g_dag, // 2 ulong start_nonce, // 3 ulong target, // 4 uint isolate // 5 ) */ m_search_kernel.setArg(1, m_header); m_search_kernel.setArg(2, m_dag); // pass these to stop the compiler unrolling the loops m_search_kernel.setArg(4, target); m_search_kernel.setArg(5, ~0u); unsigned buf = 0; for (uint64_t start_nonce = 0; ; start_nonce += c_search_batch_size) { // supply output buffer to kernel m_search_kernel.setArg(0, m_search_buf[buf]); m_search_kernel.setArg(3, start_nonce); // execute it! m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size); pending.push({start_nonce, buf}); buf = (buf + 1) % c_num_buffers; // read results if (pending.size() == c_num_buffers) { pending_batch const& batch = pending.front(); // could use pinned host pointer instead uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t)); unsigned num_found = std::min<unsigned>(results[0], c_max_search_results); uint64_t nonces[c_max_search_results]; for (unsigned i = 0; i != num_found; ++i) { nonces[i] = batch.start_nonce + results[i+1]; } m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results); bool exit = num_found && hook.found(nonces, num_found); exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit if (exit) break; // reset search buffer if we're still going if (num_found) m_queue.enqueueWriteBuffer(m_search_buf[batch.buf], true, 0, 4, &c_zero); pending.pop(); } } // not safe to return until this is ready #if CL_VERSION_1_2 && 0 if (!m_opencl_1_1) { pre_return_event.wait(); } #endif }
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook) { try { struct pending_batch { uint64_t start_nonce; unsigned buf; }; queue<pending_batch> pending; // this can't be a static because in MacOSX OpenCL implementation a segfault occurs when a static is passed to OpenCL functions uint32_t const c_zero = 0; // update header constant buffer m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header); for (unsigned i = 0; i != c_bufferCount; ++i) m_queue.enqueueWriteBuffer(m_searchBuffer[i], false, 0, 4, &c_zero); #if CL_VERSION_1_2 && 0 cl::Event pre_return_event; if (!m_opencl_1_1) m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event); else #endif m_queue.finish(); m_searchKernel.setArg(1, m_header); m_searchKernel.setArg(2, m_dag ); // pass these to stop the compiler unrolling the loops m_searchKernel.setArg(4, target); m_searchKernel.setArg(5, ~0u); unsigned buf = 0; random_device engine; uint64_t start_nonce = uniform_int_distribution<uint64_t>()(engine); for (;; start_nonce += m_globalWorkSize) { auto t = chrono::high_resolution_clock::now(); // supply output buffer to kernel m_searchKernel.setArg(0, m_searchBuffer[buf]); m_searchKernel.setArg(3, start_nonce); // execute it! cl::Event event; m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize, nullptr, &event); if (s_kernelProfiling) { uint64_t* hashes = new uint64_t; *hashes = m_globalWorkSize; event.setCallback(CL_COMPLETE, printStats, hashes); } pending.push({ start_nonce, buf }); buf = (buf + 1) % c_bufferCount; // read results if (pending.size() == c_bufferCount) { pending_batch const& batch = pending.front(); // could use pinned host pointer instead uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_searchBuffer[batch.buf], true, CL_MAP_READ, 0, (1 + c_maxSearchResults) * sizeof(uint32_t)); unsigned num_found = min<unsigned>(results[0], c_maxSearchResults); uint64_t nonces[c_maxSearchResults]; for (unsigned i = 0; i != num_found; ++i) nonces[i] = batch.start_nonce + results[i + 1]; m_queue.enqueueUnmapMemObject(m_searchBuffer[batch.buf], results); bool exit = num_found && hook.found(nonces, num_found); exit |= hook.searched(batch.start_nonce, m_globalWorkSize); // always report searched before exit if (exit) break; // reset search buffer if we're still going if (num_found) m_queue.enqueueWriteBuffer(m_searchBuffer[batch.buf], true, 0, 4, &c_zero); pending.pop(); } } // not safe to return until this is ready #if CL_VERSION_1_2 && 0 if (!m_opencl_1_1) pre_return_event.wait(); #endif } catch (cl::Error const& err) { ETHCL_LOG(err.what() << "(" << err.err() << ")"); } }