static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, int64_t __maybe_unused max_nonce) { const int thr_id = thr->id; struct opencl_thread_data *thrdata = thr->cgpu_data; struct cgpu_info *gpu = thr->cgpu; _clState *clState = clStates[thr_id]; const cl_kernel *kernel = &clState->kernel; const int dynamic_us = opt_dynamic_interval * 1000; cl_int status; size_t globalThreads[1]; size_t localThreads[1] = { clState->wsize }; int64_t hashes; /* Windows' timer resolution is only 15ms so oversample 5x */ if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) { struct timeval tv_gpuend; double gpu_us; gettimeofday(&tv_gpuend, NULL); gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals; if (gpu_us > dynamic_us) { if (gpu->intensity > MIN_INTENSITY) --gpu->intensity; } else if (gpu_us < dynamic_us / 2) { if (gpu->intensity < MAX_INTENSITY) ++gpu->intensity; } memcpy(&(gpu->tv_gpustart), &tv_gpuend, sizeof(struct timeval)); gpu->intervals = 0; } set_threads_hashes(clState->vwidth, &hashes, globalThreads, localThreads[0], &gpu->intensity); if (hashes > gpu->max_hashes) gpu->max_hashes = hashes; status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); return -1; } if (clState->goffset) { size_t global_work_offset[1]; global_work_offset[0] = work->blk.nonce; status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset, globalThreads, localThreads, 0, NULL, NULL); } else status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status); return -1; } status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, BUFFERSIZE, thrdata->res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status); return -1; } /* The amount of work scanned can fluctuate when intensity changes * and since we do this one cycle behind, we increment the work more * than enough to prevent repeating work */ work->blk.nonce += gpu->max_hashes; /* This finish flushes the readbuffer set with CL_FALSE in clEnqueueReadBuffer */ clFinish(clState->commandQueue); /* FOUND entry is used as a counter to say how many nonces exist */ if (thrdata->res[FOUND]) { /* Clear the buffer again */ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, BUFFERSIZE, blank_res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); return -1; } applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id); postcalc_hash_async(thr, work, thrdata->res); memset(thrdata->res, 0, BUFFERSIZE); /* This finish flushes the writebuffer set with CL_FALSE in clEnqueueWriteBuffer */ clFinish(clState->commandQueue); } return hashes; }
static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work, uint64_t __maybe_unused max_nonce) { const int thr_id = thr->id; struct opencl_thread_data *thrdata = thr->cgpu_data; struct cgpu_info *gpu = thr->cgpu; _clState *clState = clStates[thr_id]; const cl_kernel *kernel = &clState->kernel; const int dynamic_us = opt_dynamic_interval * 1000; cl_int status; size_t globalThreads[1]; size_t localThreads[1] = { clState->wsize }; unsigned int threads; unsigned int hashes; /* This finish flushes the readbuffer set with CL_FALSE later */ clFinish(clState->commandQueue); gettimeofday(&gpu->tv_gpuend, NULL); if (gpu->dynamic) { struct timeval diff; suseconds_t gpu_us; timersub(&gpu->tv_gpuend, &gpu->tv_gpustart, &diff); gpu_us = diff.tv_sec * 1000000 + diff.tv_usec; if (likely(gpu_us > 0)) { gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63; /* Try to not let the GPU be out for longer than * opt_dynamic_interval in ms, but increase * intensity when the system is idle in dynamic mode */ if (gpu->gpu_us_average > dynamic_us) { if (gpu->intensity > MIN_INTENSITY) --gpu->intensity; } else if (gpu->gpu_us_average < dynamic_us / 2) { if (gpu->intensity < MAX_INTENSITY) ++gpu->intensity; } } } set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads, localThreads[0], gpu->intensity); if (hashes > gpu->max_hashes) gpu->max_hashes = hashes; status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); return 0; } /* MAXBUFFERS entry is used as a flag to say nonces exist */ if (thrdata->res[FOUND]) { /* Clear the buffer again */ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, BUFFERSIZE, blank_res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); return 0; } if (unlikely(thrdata->last_work)) { applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id); postcalc_hash_async(thr, thrdata->last_work, thrdata->res); thrdata->last_work = NULL; } else { applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id); postcalc_hash_async(thr, work, thrdata->res); } memset(thrdata->res, 0, BUFFERSIZE); clFinish(clState->commandQueue); } gettimeofday(&gpu->tv_gpustart, NULL); if (clState->goffset) { size_t global_work_offset[1]; global_work_offset[0] = work->blk.nonce; status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset, globalThreads, localThreads, 0, NULL, NULL); } else status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); return 0; } status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, BUFFERSIZE, thrdata->res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); return 0; } /* The amount of work scanned can fluctuate when intensity changes * and since we do this one cycle behind, we increment the work more * than enough to prevent repeating work */ work->blk.nonce += gpu->max_hashes; return hashes; }