extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) { uint32_t start_nonce = pdata[19]++; uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8 throughput = min(throughput, max_nonce - start_nonce); uint32_t *outputHash = (uint32_t*)malloc(throughput * 16 * sizeof(uint32_t)); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000000ff; // init if(!init[thr_id]) { groestlcoin_cpu_init(thr_id, throughput); init[thr_id] = true; } // Endian Drehung ist notwendig uint32_t endiandata[32]; for (int kk=0; kk < 32; kk++) be32enc(&endiandata[kk], pdata[kk]); // Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt) groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget); do { // GPU uint32_t foundNounce = 0xFFFFFFFF; const uint32_t Htarg = ptarget[7]; groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce); if(foundNounce < 0xffffffff) { uint32_t tmpHash[8]; endiandata[19] = SWAP32(foundNounce); groestlhash(tmpHash, endiandata); if (tmpHash[7] <= Htarg && fulltest(tmpHash, ptarget)) { pdata[19] = foundNounce; *hashes_done = foundNounce - start_nonce + 1; free(outputHash); return true; } else { applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNounce); } foundNounce = 0xffffffff; } pdata[19] += throughput; } while (!work_restart[thr_id].restart && ((uint64_t)max_nonce > ((uint64_t)(pdata[19]) + (uint64_t)throughput))); *hashes_done = pdata[19] - start_nonce + 1; free(outputHash); return 0; }
extern int scanhash_groestlcoin(int thr_id, uint32_t *pdata, uint32_t *ptarget, uint32_t max_nonce, uint32_t *hashes_done) { static THREAD uint32_t *foundNounce = nullptr; uint32_t start_nonce = pdata[19]; unsigned int intensity = (device_sm[device_map[thr_id]] > 500) ? 24 : 23; uint32_t throughputmax = device_intensity(device_map[thr_id], __func__, 1U << intensity); uint32_t throughput = min(throughputmax, max_nonce - start_nonce) & 0xfffffc00; if (opt_benchmark) ptarget[7] = 0x0000000f; // init static THREAD volatile bool init = false; if(!init) { CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id])); groestlcoin_cpu_init(thr_id, throughputmax); CUDA_SAFE_CALL(cudaMallocHost(&foundNounce, 2 * 4)); init = true; } // Endian Drehung ist notwendig uint32_t endiandata[32]; for (int kk=0; kk < 32; kk++) be32enc(&endiandata[kk], pdata[kk]); // Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt) groestlcoin_cpu_setBlock(thr_id, endiandata); do { // GPU const uint32_t Htarg = ptarget[7]; groestlcoin_cpu_hash(thr_id, throughput, pdata[19], foundNounce, ptarget[7]); if(stop_mining) {mining_has_stopped[thr_id] = true; cudaStreamDestroy(gpustream[thr_id]); pthread_exit(nullptr);} if(foundNounce[0] < 0xffffffff) { uint32_t tmpHash[8]; endiandata[19] = SWAP32(foundNounce[0]); groestlhash(tmpHash, endiandata); if(tmpHash[7] <= Htarg && fulltest(tmpHash, ptarget)) { int res = 1; if(opt_benchmark) applog(LOG_INFO, "GPU #%d Found nounce %08x", device_map[thr_id], foundNounce[0]); *hashes_done = pdata[19] - start_nonce + throughput; if(foundNounce[1] != 0xffffffff) { endiandata[19] = SWAP32(foundNounce[1]); groestlhash(tmpHash, endiandata); if(tmpHash[7] <= Htarg && fulltest(tmpHash, ptarget)) { pdata[21] = foundNounce[1]; res++; if(opt_benchmark) applog(LOG_INFO, "GPU #%d Found second nounce %08x", device_map[thr_id], foundNounce[1]); } else { if(tmpHash[7] != Htarg) { applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNounce[1]); } } } pdata[19] = foundNounce[0]; return res; } else { if(tmpHash[7] != Htarg) { applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNounce[0]); } } } pdata[19] += throughput; cudaError_t err = cudaGetLastError(); if(err != cudaSuccess) { applog(LOG_ERR, "GPU #%d: %s", device_map[thr_id], cudaGetErrorString(err)); exit(EXIT_FAILURE); } } while(!work_restart[thr_id].restart && ((uint64_t)max_nonce > ((uint64_t)(pdata[19]) + (uint64_t)throughput))); *hashes_done = pdata[19] - start_nonce; return 0; }