void OpenCLMomentumV9::find_collisions(uint8_t* message, collision_struct* collisions, size_t* collision_count) { // temp storage *collision_count = 0; uint32_t ht_size = 1<<HASH_BITS; SHA512_Context c512_avxsse; SHA512_Init(&c512_avxsse); uint8_t midhash[32+4]; memcpy(midhash+4, message, 32); *((uint32_t*)midhash) = 0; SHA512_Update_Simple(&c512_avxsse, midhash, 32+4); SHA512_PreFinal(&c512_avxsse); *(uint32_t *)(&c512_avxsse.buffer.bytes[0]) = 0; uint64_t * swap_helper = (uint64_t*)(&c512_avxsse.buffer.bytes[0]); for (int i = 1; i < 5; i++) { swap_helper[i] = SWAP64(swap_helper[i]); } OpenCLContext *context = OpenCLMain::getInstance().getDevice(device_num)->getContext(); OpenCLProgram *program = context->getProgram(0); OpenCLKernel *kernel = program->getKernel("kernel_sha512"); OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table"); assert(kernel != NULL); //size_t BLOCKSIZE = main.getPlatform(0)->getDevice(0)->getMaxWorkGroupSize(); size_t BLOCKSIZE = kernel->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num)); //has to be a power of 2 BLOCKSIZE = 1<<log2(BLOCKSIZE); size_t BLOCKSIZE_CLEAN = kernel_cleanup->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num)); BLOCKSIZE_CLEAN = 1<<log2(BLOCKSIZE_CLEAN); // printf("BLOCKSIZE = %ld\n", BLOCKSIZE); // printf("BLOCKSIZE_CLEAN = %ld\n", BLOCKSIZE_CLEAN); // cleans up the hash table queue->enqueueKernel1D(kernel_cleanup, 1<<HASH_BITS, BLOCKSIZE_CLEAN); queue->enqueueWriteBuffer(cl_message, c512_avxsse.buffer.bytes, sizeof(uint8_t)*SHA512_BLOCK_SIZE); queue->enqueueWriteBuffer(temp_collisions_count, collision_count, sizeof(size_t)); queue->enqueueKernel1D(kernel, MAX_MOMENTUM_NONCE/8, BLOCKSIZE); queue->enqueueReadBuffer(temp_collisions_count, collision_count, sizeof(size_t)); queue->enqueueReadBuffer(temp_collisions, collisions, sizeof(collision_struct)*getCollisionCeiling()); queue->finish(); }
void protoshares_process_512(blockHeader_t* block, CBlockProvider* bp, unsigned int thread_id, GPUHasher *_gpu, uint64_t *hashblock) { // generate mid hash using sha256 (header hash) uint8_t midHash[32+4]; { //SPH sph_sha256_context c256; sph_sha256_init(&c256); sph_sha256(&c256, (unsigned char*)block, 80); sph_sha256_close(&c256, midHash+4); sph_sha256_init(&c256); sph_sha256(&c256, (unsigned char*)(midHash+4), 32); sph_sha256_close(&c256, midHash+4); } SHA512_Context c512_avxsse; SHA512_Init(&c512_avxsse); SHA512_Update_Simple(&c512_avxsse, midHash, 32+4); SHA512_PreFinal(&c512_avxsse); *(uint32_t *)(&c512_avxsse.buffer.bytes[0]) = 0; _gpu->ComputeHashes((uint64_t *)c512_avxsse.buffer.bytes, hashblock); uint32_t n_hashes_plus_one = *((uint32_t *)hashblock); boost::unordered_map<uint64_t, uint32_t> resmap; for (uint32_t i = 0; i < (n_hashes_plus_one-1); i++) { uint64_t birthday = hashblock[1+i*2]; uint32_t mine = hashblock[1+i*2+1]; boost::unordered_map<uint64_t,uint32_t>::const_iterator r = resmap.find(birthday); if (r != resmap.end()) { uint32_t other = r->second; protoshares_revalidateCollision<shamode>(block, midHash+4, other, mine, birthday, bp, thread_id); } resmap[birthday] = mine; } }
void OpenCLMomentumV8::find_collisions(uint8_t* message, collision_struct* out_buff, size_t* out_count) { // temp storage *out_count = 0; uint32_t ht_size = 1<<HASH_BITS; SHA512_Context c512_avxsse; SHA512_Init(&c512_avxsse); uint8_t midhash[32+4]; memcpy(midhash+4, message, 32); *((uint32_t*)midhash) = 0; SHA512_Update_Simple(&c512_avxsse, midhash, 32+4); SHA512_PreFinal(&c512_avxsse); *(uint32_t *)(&c512_avxsse.buffer.bytes[0]) = 0; uint64_t * swap_helper = (uint64_t*)(&c512_avxsse.buffer.bytes[0]); for (int i = 1; i < 5; i++) { swap_helper[i] = SWAP64(swap_helper[i]); } OpenCLContext *context = OpenCLMain::getInstance().getDevice(device_num)->getContext(); OpenCLProgram *program = context->getProgram(0); OpenCLKernel *kernel_calculate_all_hashes = program->getKernel("calculate_all_hashes"); OpenCLKernel *kernel_fill_table = program->getKernel("fill_table"); OpenCLKernel *kernel_find_collisions = program->getKernel("find_collisions"); OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table"); OpenCLDevice * device = OpenCLMain::getInstance().getDevice(device_num); // cleans up the hash table size_t kc_wgsize = kernel_cleanup->getWorkGroupSize(device); kc_wgsize = 1<<log2(kc_wgsize); queue->enqueueKernel1D(kernel_cleanup, 1<<HASH_BITS, kc_wgsize); // printf("Cleaning the HT\n"); // queue->finish(); queue->enqueueWriteBuffer(cl_message, c512_avxsse.buffer.bytes, sizeof(uint8_t)*SHA512_BLOCK_SIZE); // step 1, calculate hashes size_t kcah_wgsize = kernel_calculate_all_hashes->getWorkGroupSize(device); kcah_wgsize = 1<<log2(kcah_wgsize); queue->enqueueKernel1D(kernel_calculate_all_hashes, MAX_MOMENTUM_NONCE/8, kcah_wgsize); // uint64_t * apa = new uint64_t[MAX_MOMENTUM_NONCE]; // queue->enqueueReadBuffer(hashes, apa, sizeof(uint64_t)*MAX_MOMENTUM_NONCE); // queue->finish(); // // printf("testing hashes\n"); // uint64_t count = 0; // for (int i = 0; i < MAX_MOMENTUM_NONCE; i++) { // if (apa[i] == 0) { // count++; // printf("BAD HASH AT: %d %X\n", i, apa[i]); // } // } // printf("counted %X bad hashes\n", count); // printf("NOW REALLY TEST THEM hashes\n"); // count = 0; // for (uint32_t i = 0; i < MAX_MOMENTUM_NONCE/8; i+=8) { // sph_sha512_context c512_sph; //SPH // sph_sha512_init(&c512_sph); // sph_sha512(&c512_sph, &i, 4); // sph_sha512(&c512_sph, message, 32); // uint64_t out[8]; // sph_sha512_close(&c512_sph, out); // for (int j =0; j < 8; j++) { // if (apa[i+j] != out[j]) { // count++; // uint64_t xxx = apa[i+j]; // printf("BAD HASH AT: %d => %X != %X\n", i, apa[i+j], out[j]); // } // } // } // printf("counted %X bad hashes\n", count); // step 2, populate hashtable size_t kft_wgsize = kernel_fill_table->getWorkGroupSize(device); kft_wgsize = 1<<log2(kft_wgsize); queue->enqueueKernel1D(kernel_fill_table, MAX_MOMENTUM_NONCE, kft_wgsize); // printf("step 2, populate hashtable\n"); // queue->finish(); queue->enqueueWriteBuffer(collisions_count, out_count, sizeof(size_t)); // step 3, find collisions size_t kfc_wgsize = kernel_find_collisions->getWorkGroupSize(device); kfc_wgsize = 1<<log2(kfc_wgsize); queue->enqueueKernel1D(kernel_find_collisions, MAX_MOMENTUM_NONCE, kfc_wgsize); // printf("step 3, find collisions\n"); // queue->finish(); queue->enqueueReadBuffer(collisions_count, out_count, sizeof(size_t)); queue->enqueueReadBuffer(collisions, out_buff, sizeof(collision_struct)*getCollisionCeiling()); // printf("step 4, copy output\n"); queue->finish(); #ifdef DEBUG printf("Collision Count = %d\n", (*out_count)); #endif }