// This is the complete hash reference function. Given the current round parameters, // and the solution, which is a vector of indices, it calculates the proof. The goodness // of the solution is determined by the numerical value of the proof. bigint_t HashReference( const Packet_ServerBeginRound *pParams, unsigned nIndices, const uint32_t *pIndices ){ if(nIndices>pParams->maxIndices) throw std::invalid_argument("HashReference - Too many indices for parameter set."); bigint_t acc; wide_zero(8, acc.limbs); for(unsigned i=0;i<nIndices;i++){ if(i>0){ if(pIndices[i-1] >= pIndices[i]) throw std::invalid_argument("HashReference - Indices are not in monotonically increasing order."); } // Calculate the hash for this specific point bigint_t point=PoolHash(pParams, pIndices[i]); // Combine the hashes of the points together using xor wide_xor(8, acc.limbs, acc.limbs, point.limbs); } return acc; }
virtual void MakeBid( const std::shared_ptr<Packet_ServerBeginRound> roundInfo, const std::shared_ptr<Packet_ServerRequestBid> request, double period, double skewEstimate, std::vector<uint32_t> &solution, uint32_t *pProof ){ double startTime = now()*1e-9; double tSafetyMargin = timeGuard; double tFinish = request->timeStampReceiveBids*1e-9 + skewEstimate - tSafetyMargin; Log(Log_Verbose, "MakeBid - start, total period=%lg.", period); const Packet_ServerBeginRound *pParams = roundInfo.get(); // This doesn't change within each round, so calculate it once and re-use. hash::fnv<64> hasher; uint64_t chainHash = hasher((const char*)&pParams->chainData[0], pParams->chainData.size()); std::vector<ensemble> candidates; double t = now()*1e-9; double timeBudget = tFinish - t; uint32_t shortListLength = timeBudget > 1.0 ? shortListLengthDefault : shortListLengthFast; auto compMin = [](const ensemble& left, const ensemble& right) { return wide_compare(8, left.value.limbs, right.value.limbs) == 1; }; auto compMax = [](const ensemble& left, const ensemble& right) { return wide_compare(8, left.value.limbs, right.value.limbs) == -1; }; tbb::task_group group; std::vector<std::priority_queue<ensemble, std::vector<ensemble>, decltype(compMin)>> priorityQueues; std::vector<uint32_t> totalTrials(tbbCores); for (int i = 0; i < tbbCores; i++) { std::priority_queue<ensemble, std::vector<ensemble>, decltype(compMin)> ensemble_priority_queue(compMin); priorityQueues.push_back(ensemble_priority_queue); group.run([&, i](){ std::priority_queue<ensemble, std::vector<ensemble>, decltype(compMax)> ensemble_priority_queue_reversed(compMax); unsigned nTrials = 0; unsigned offset = tbbOffset * i; while (1) { bigint_t proof = PoolHash(pParams, nTrials + offset, chainHash); if (priorityQueues[i].size() < shortListLength || wide_compare(8, proof.limbs, ensemble_priority_queue_reversed.top().value.limbs) == -1) { std::vector<uint32_t> indexes; indexes.push_back(nTrials + offset); ensemble e = ensemble{ proof, indexes }; priorityQueues[i].push(e); ensemble_priority_queue_reversed.push(e); } if (ensemble_priority_queue_reversed.size() > shortListLength) ensemble_priority_queue_reversed.pop(); double t = now()*1e-9; double timeBudget = tFinish - t; Log(Log_Debug, "Finish trial %d, time remaining =%lg seconds.", nTrials, timeBudget); nTrials++; if ((timeBudget <= 0 && priorityQueues[i].size() >= shortListLength) || nTrials >= tbbOffset-1) { totalTrials[i] = nTrials; break; // We have run out of time, send what we have } } }); } group.wait(); uint32_t overallTrials = std::accumulate(totalTrials.begin(), totalTrials.end(), 0); for (int i = 0; i < shortListLength; i++) { auto nextQueue = std::min_element(priorityQueues.begin(), priorityQueues.end(), [](const std::priority_queue<ensemble, std::vector<ensemble>, decltype(compMin)>& left, const std::priority_queue<ensemble, std::vector<ensemble>, decltype(compMin)>& right) { return wide_compare(8, left.top().value.limbs, right.top().value.limbs) == -1; }); candidates.push_back(nextQueue->top()); nextQueue->pop(); } Log(Log_Info, "Tried %d elements", overallTrials); double gStart = now()*1e-9; std::reverse(candidates.begin(),candidates.end()); // This is where we store all the best combinations of xor'ed vectors. Each combination is of size roundInfo->maxIndices std::vector<ensemble> finalCandidates; // We find optimal combinations of the proofs calculated for each index using 'Gaussian elimination' (but xor-ing instead of adding/subtracting). We start in the column of the MSB, and xor vectors that have this bit high together to make the bit in this column 0 for as many vectors as possible. We then move to the next most significant bit, and repeat the process. At each stage, we keep track of what set of indexes we are xor-ing with what orther set of indexes. The the combined set size reaches roundInfo->maxIndices, we add this candidate solution to finalCandidates. //// -- Gaussian Elimination Starts Here -- //// std::vector<uint32_t> usedIndexes; for (int col = 255; col > -1; col--) { int firstNonzeroRow = -1; for (int row = 0; row < candidates.size(); row++) { if (bitIsHigh(candidates[row].value, col) && std::find(usedIndexes.begin(), usedIndexes.end(), row) == usedIndexes.end()) { firstNonzeroRow = row; usedIndexes.push_back(row); break; } } if (firstNonzeroRow == -1) continue; bigint_t firstNonzeroRowValue = candidates[firstNonzeroRow].value; std::sort(candidates[firstNonzeroRow].components.begin(), candidates[firstNonzeroRow].components.end()); for (int row = 0; row < candidates.size(); row++) { if (row == firstNonzeroRow) continue; if (bitIsHigh(candidates[row].value, col)) { std::sort(candidates[row].components.begin(), candidates[row].components.end()); std::vector<uint32_t> mergedList(candidates[row].components.size() + candidates[firstNonzeroRow].components.size()); std::vector<uint32_t>::iterator iterator; iterator = std::set_symmetric_difference(candidates[row].components.begin(), candidates[row].components.end(), candidates[firstNonzeroRow].components.begin(), candidates[firstNonzeroRow].components.end(), mergedList.begin()); mergedList.resize(iterator - mergedList.begin()); if (mergedList.size() <= roundInfo->maxIndices) { bigint_t tmp; wide_xor(8, tmp.limbs, firstNonzeroRowValue.limbs, candidates[row].value.limbs); candidates[row].value = tmp; candidates[row].components = mergedList; } if (mergedList.size() == roundInfo->maxIndices) finalCandidates.push_back(candidates[row]); } } } //// -- Gaussian Elimination Ends Here -- //// // Sort the finalists in descending order, s.t. smallest value is in highest index std::sort(std::begin(finalCandidates), std::end(finalCandidates), [](const ensemble& left, const ensemble& right) { return wide_compare(8, left.value.limbs, right.value.limbs) == 1; }); // Choose the finalist with the lowest score for our final bid. if (!finalCandidates.empty()) { ensemble bestEnsemble = finalCandidates.back(); std::sort(bestEnsemble.components.begin(), bestEnsemble.components.end()); solution = bestEnsemble.components; wide_copy(BIGINT_WORDS, pProof, bestEnsemble.value.limbs); } else { // Last ditch attempt to make sure we always submit something valid. Ideally we should never come in here. std::vector<uint32_t> indices(roundInfo->maxIndices); uint32_t curr = 0; for (unsigned j = 0; j < indices.size(); j++){ curr = curr + 1 + (rand() % 10); indices[j] = curr; } bigint_t proof = HashReference(pParams, indices.size(), &indices[0], chainHash); solution = indices; wide_copy(BIGINT_WORDS, pProof, proof.limbs); } double gEnd = now()*1e-9; Log(Log_Info, "GE Time Elapsed = %lg seconds.", gEnd - gStart); Log(Log_Verbose, "MakeBid - finish."); double endTime = now()*1e-9; Log(Log_Info, "Time used = %lg seconds.", endTime - startTime); }
void MakeBid( std::shared_ptr<Packet_ServerBeginRound> roundInfo, // Information about this particular round const std::shared_ptr<Packet_ServerRequestBid> request, // The specific request we received double period, // How long this bidding period will last double skewEstimate, // An estimate of the time difference between us and the server (positive -> we are ahead) std::vector<uint32_t> &solution, // Our vector of indices describing the solution uint32_t *pProof // Will contain the "proof", which is just the value ) { double tSafetyMargin = 0.5; // accounts for uncertainty in network conditions /* This is when the server has said all bids must be produced by, plus the adjustment for clock skew, and the safety margin */ double tFinish = request->timeStampReceiveBids * 1e-9 + skewEstimate - tSafetyMargin; Log(Log_Verbose, "MakeBid - start, total period=%lg.", period); /* We will use this to track the best solution we have created so far. */ roundInfo->maxIndices = 4; std::vector<uint32_t> bestSolution(roundInfo->maxIndices); std::vector<uint32_t> gpuBestSolution(roundInfo->maxIndices); bigint_t bestProof, gpuBestProof; wide_ones(BIGINT_WORDS, bestProof.limbs); // Incorporate the existing block chain data - in a real system this is the // list of transactions we are signing. This is the FNV hash: // http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function hash::fnv<64> hasher; uint64_t chainHash = hasher((const char *)&roundInfo->chainData[0], roundInfo->chainData.size()); bigint_t x; wide_x_init(&x.limbs[0], uint32_t(0), roundInfo->roundId, roundInfo->roundSalt, chainHash); std::vector<uint32_t> indices(roundInfo->maxIndices); //Define TBB arrays uint32_t *parallel_Indices = (uint32_t *)malloc(sizeof(uint32_t) * TBB_PARALLEL_COUNT); uint32_t *parallel_BestSolutions = (uint32_t *)malloc(sizeof(uint32_t) * TBB_PARALLEL_COUNT * roundInfo->maxIndices); uint32_t *parallel_Proofs = (uint32_t *)malloc(sizeof(uint32_t) * 8 * TBB_PARALLEL_COUNT); uint32_t *parallel_BestProofs = (uint32_t *)malloc(sizeof(uint32_t) * 8 * TBB_PARALLEL_COUNT); //Define GPU arrays uint32_t *d_ParallelBestSolutions; checkCudaErrors(cudaMalloc((void **)&d_ParallelBestSolutions, sizeof(uint32_t) * CUDA_DIM * CUDA_DIM * roundInfo->maxIndices)); checkCudaErrors(cudaMemcpy(d_hashConstant, &roundInfo->c[0], sizeof(uint32_t) * 4, cudaMemcpyHostToDevice)); unsigned gpuTrials = 0; unsigned cpuTrials = 0; unsigned maxNum = uint32_t(0xFFFFFFFF); auto runGPU = [ = , &gpuTrials] { cudaInit(CUDA_DIM, d_ParallelBestProofs); do { cudaIteration(d_ParallelIndices, d_ParallelProofs, d_ParallelBestProofs, d_ParallelBestSolutions, x, d_hashConstant, roundInfo->hashSteps, CUDA_DIM, gpuTrials, CUDA_TRIALS, roundInfo->maxIndices); gpuTrials += CUDA_TRIALS; } while ((tFinish - now() * 1e-9) > 0); }; std::thread runGPUThread(runGPU); auto tbbInitial = [ = ](unsigned i) { bigint_t ones; wide_ones(8, ones.limbs); wide_copy(8, ¶llel_BestProofs[i * 8], ones.limbs); }; tbb::parallel_for<unsigned>(0, TBB_PARALLEL_COUNT, tbbInitial); do { auto tbbIteration = [ = ](unsigned i) { uint32_t index = maxNum - (TBB_PARALLEL_COUNT<<2) - cpuTrials + (i<<1); bigint_t proof = tbbHash(roundInfo.get(), index, x); wide_copy(8, ¶llel_Proofs[i * 8], proof.limbs); parallel_Indices[i] = index; }; tbb::parallel_for<unsigned>(0, TBB_PARALLEL_COUNT, tbbIteration); auto tbbCrossHash = [ = ](unsigned i) { for (unsigned xorStride = 1; xorStride < TBB_PARALLEL_COUNT >> 2; xorStride++) { if (i + (roundInfo->maxIndices * xorStride) < TBB_PARALLEL_COUNT) { bigint_t candidateBestProof; wide_copy(8, candidateBestProof.limbs, ¶llel_Proofs[i * 8]); for (unsigned indexNum = 1; indexNum < roundInfo->maxIndices; indexNum++) { wide_xor(8, candidateBestProof.limbs, candidateBestProof.limbs, ¶llel_Proofs[(i + (indexNum * xorStride)) * 8]); } if (wide_compare(8, candidateBestProof.limbs, ¶llel_BestProofs[i * 8]) < 0) { wide_copy(8, ¶llel_BestProofs[i * 8], candidateBestProof.limbs); for (unsigned ID = 0; ID < roundInfo->maxIndices; ID++) { parallel_BestSolutions[(i * roundInfo->maxIndices) + ID] = parallel_Indices[i + (ID * xorStride)]; } } } } }; tbb::parallel_for<unsigned>(0, TBB_PARALLEL_COUNT, tbbCrossHash); cpuTrials += TBB_PARALLEL_COUNT; } while ((tFinish - now() * 1e-9) > 0); runGPUThread.join(); auto reduceGPU = [ = , &gpuBestSolution, &gpuBestProof] { cudaParallelReduce(CUDA_DIM, roundInfo->maxIndices, d_ParallelBestProofs, d_ParallelBestSolutions, &gpuBestSolution[0], gpuBestProof.limbs); }; std::thread reduceThread(reduceGPU); //TBB for (int toDo = TBB_PARALLEL_COUNT / 2; toDo >= 1; toDo >>= 1) { auto tbbReduce = [ = ](unsigned i) { if (wide_compare(BIGINT_WORDS, ¶llel_BestProofs[(i + toDo) * 8], ¶llel_BestProofs[i * 8]) < 0) { wide_copy(8, ¶llel_BestProofs[i * 8], ¶llel_BestProofs[(i + toDo) * 8]); wide_copy(roundInfo->maxIndices, ¶llel_BestSolutions[i * roundInfo->maxIndices], ¶llel_BestSolutions[(i + toDo) * roundInfo->maxIndices]); } }; tbb::parallel_for<unsigned>(0, toDo, tbbReduce); } wide_copy(BIGINT_WORDS, bestProof.limbs, ¶llel_BestProofs[0]); wide_copy(roundInfo->maxIndices, &bestSolution[0], ¶llel_BestSolutions[0]); reduceThread.join(); if (wide_compare(BIGINT_WORDS, gpuBestProof.limbs, bestProof.limbs) < 0) { Log(Log_Verbose, "Accepting GPU Solution"); wide_copy(8, bestProof.limbs, gpuBestProof.limbs); wide_copy(roundInfo->maxIndices, &bestSolution[0], &gpuBestSolution[0]); } solution = bestSolution; wide_copy(BIGINT_WORDS, pProof, bestProof.limbs); free(parallel_Indices); free(parallel_BestSolutions); free(parallel_Proofs); free(parallel_BestProofs); checkCudaErrors(cudaFree(d_ParallelBestSolutions)); Log(Log_Verbose, "MakeBid - finish. Total trials %d, cpu: %d, gpu %d", cpuTrials + gpuTrials, cpuTrials, gpuTrials); }