Ejemplo n.º 1
0
		virtual void MakeBid(
			const std::shared_ptr<Packet_ServerBeginRound> roundInfo,
			const std::shared_ptr<Packet_ServerRequestBid> request,
			double period,
			double skewEstimate,
			std::vector<uint32_t> &solution,
			uint32_t *pProof
			){
			double startTime = now()*1e-9;
			double tSafetyMargin = timeGuard;
			double tFinish = request->timeStampReceiveBids*1e-9 + skewEstimate - tSafetyMargin;

			Log(Log_Verbose, "MakeBid - start, total period=%lg.", period);

			const Packet_ServerBeginRound *pParams = roundInfo.get();

			// This doesn't change within each round, so calculate it once and re-use.
			hash::fnv<64> hasher;
			uint64_t chainHash = hasher((const char*)&pParams->chainData[0], pParams->chainData.size());

			std::vector<ensemble> candidates;
            
            double t = now()*1e-9;
            double timeBudget = tFinish - t;
            
            uint32_t shortListLength = timeBudget > 1.0 ? shortListLengthDefault : shortListLengthFast;
            
			auto compMin = [](const ensemble& left, const ensemble& right) {
				return wide_compare(8, left.value.limbs, right.value.limbs) == 1;
			};

			auto compMax = [](const ensemble& left, const ensemble& right) {
				return wide_compare(8, left.value.limbs, right.value.limbs) == -1;
			};

			tbb::task_group group;

			std::vector<std::priority_queue<ensemble, std::vector<ensemble>, decltype(compMin)>> priorityQueues;
			std::vector<uint32_t> totalTrials(tbbCores);

			for (int i = 0; i < tbbCores; i++)
			{
				std::priority_queue<ensemble, std::vector<ensemble>, decltype(compMin)> ensemble_priority_queue(compMin);

				priorityQueues.push_back(ensemble_priority_queue);

				group.run([&, i](){

					std::priority_queue<ensemble, std::vector<ensemble>, decltype(compMax)> ensemble_priority_queue_reversed(compMax);

					unsigned nTrials = 0;
					unsigned offset = tbbOffset * i;
					while (1)
					{
						bigint_t proof = PoolHash(pParams, nTrials + offset, chainHash);

						if (priorityQueues[i].size() < shortListLength || wide_compare(8, proof.limbs, ensemble_priority_queue_reversed.top().value.limbs) == -1)
						{
							std::vector<uint32_t> indexes;

							indexes.push_back(nTrials + offset);

							ensemble e = ensemble{ proof, indexes };

							priorityQueues[i].push(e);
							ensemble_priority_queue_reversed.push(e);
						}

						if (ensemble_priority_queue_reversed.size() > shortListLength) ensemble_priority_queue_reversed.pop();

						double t = now()*1e-9;
						double timeBudget = tFinish - t;

						Log(Log_Debug, "Finish trial %d, time remaining =%lg seconds.", nTrials, timeBudget);

						nTrials++;

						if ((timeBudget <= 0 && priorityQueues[i].size() >= shortListLength) || nTrials >= tbbOffset-1)
						{
							totalTrials[i] = nTrials;
							break;	// We have run out of time, send what we have
						}

					}
				});

			}

			group.wait();

			uint32_t overallTrials = std::accumulate(totalTrials.begin(), totalTrials.end(), 0);

			for (int i = 0; i < shortListLength; i++)
			{
				auto nextQueue = std::min_element(priorityQueues.begin(), priorityQueues.end(), [](const std::priority_queue<ensemble, std::vector<ensemble>, decltype(compMin)>& left, const std::priority_queue<ensemble, std::vector<ensemble>, decltype(compMin)>& right) { return wide_compare(8, left.top().value.limbs, right.top().value.limbs) == -1;
				});

				candidates.push_back(nextQueue->top());
				nextQueue->pop();
			}

			Log(Log_Info, "Tried %d elements", overallTrials);

			double gStart = now()*1e-9;
            
            std::reverse(candidates.begin(),candidates.end());
            
			// This is where we store all the best combinations of xor'ed vectors. Each combination is of size roundInfo->maxIndices
			std::vector<ensemble> finalCandidates;

			// We find optimal combinations of the proofs calculated for each index using 'Gaussian elimination' (but xor-ing instead of adding/subtracting). We start in the column of the MSB, and xor vectors that have this bit high together to make the bit in this column 0 for as many vectors as possible. We then move to the next most significant bit, and repeat the process. At each stage, we keep track of what set of indexes we are xor-ing with what orther set of indexes. The the combined set size reaches roundInfo->maxIndices, we add this candidate solution to finalCandidates.

			//// -- Gaussian Elimination Starts Here -- ////

			std::vector<uint32_t> usedIndexes;

			for (int col = 255; col > -1; col--)
			{
				int firstNonzeroRow = -1;

				for (int row = 0; row < candidates.size(); row++)
				{
					if (bitIsHigh(candidates[row].value, col) && std::find(usedIndexes.begin(), usedIndexes.end(), row) == usedIndexes.end())
					{
						firstNonzeroRow = row;
						usedIndexes.push_back(row);
						break;
					}
				}

				if (firstNonzeroRow == -1) continue;

				bigint_t firstNonzeroRowValue = candidates[firstNonzeroRow].value;
				std::sort(candidates[firstNonzeroRow].components.begin(), candidates[firstNonzeroRow].components.end());

				for (int row = 0; row < candidates.size(); row++)
				{
					if (row == firstNonzeroRow) continue;

					if (bitIsHigh(candidates[row].value, col))
					{
						std::sort(candidates[row].components.begin(), candidates[row].components.end());

						std::vector<uint32_t> mergedList(candidates[row].components.size() + candidates[firstNonzeroRow].components.size());

						std::vector<uint32_t>::iterator iterator;

						iterator = std::set_symmetric_difference(candidates[row].components.begin(), candidates[row].components.end(), candidates[firstNonzeroRow].components.begin(), candidates[firstNonzeroRow].components.end(), mergedList.begin());

						mergedList.resize(iterator - mergedList.begin());

						if (mergedList.size() <= roundInfo->maxIndices)
						{
							bigint_t tmp;
							wide_xor(8, tmp.limbs, firstNonzeroRowValue.limbs, candidates[row].value.limbs);
							candidates[row].value = tmp;
							candidates[row].components = mergedList;
						}

						if (mergedList.size() == roundInfo->maxIndices) finalCandidates.push_back(candidates[row]);
					}
				}
			}

			//// -- Gaussian Elimination Ends Here -- ////

			// Sort the finalists in descending order, s.t. smallest value is in highest index
			std::sort(std::begin(finalCandidates), std::end(finalCandidates), [](const ensemble& left, const ensemble& right) {
				return wide_compare(8, left.value.limbs, right.value.limbs) == 1;
			});

			// Choose the finalist with the lowest score for our final bid.
			if (!finalCandidates.empty())
			{
				ensemble bestEnsemble = finalCandidates.back();

				std::sort(bestEnsemble.components.begin(), bestEnsemble.components.end());

				solution = bestEnsemble.components;

				wide_copy(BIGINT_WORDS, pProof, bestEnsemble.value.limbs);
			}
			else
			{
				// Last ditch attempt to make sure we always submit something valid. Ideally we should never come in here.

				std::vector<uint32_t> indices(roundInfo->maxIndices);
				uint32_t curr = 0;
				for (unsigned j = 0; j < indices.size(); j++){
					curr = curr + 1 + (rand() % 10);
					indices[j] = curr;
				}

				bigint_t proof = HashReference(pParams, indices.size(), &indices[0], chainHash);

				solution = indices;

				wide_copy(BIGINT_WORDS, pProof, proof.limbs);
			}

			double gEnd = now()*1e-9;

			Log(Log_Info, "GE Time Elapsed = %lg seconds.", gEnd - gStart);

			Log(Log_Verbose, "MakeBid - finish.");

			double endTime = now()*1e-9;

			Log(Log_Info, "Time used = %lg seconds.", endTime - startTime);
		}
Ejemplo n.º 2
0
    void MakeBid(
        std::shared_ptr<Packet_ServerBeginRound> roundInfo,   // Information about this particular round
        const std::shared_ptr<Packet_ServerRequestBid> request,     // The specific request we received
        double period,                                                                          // How long this bidding period will last
        double skewEstimate,                                                                // An estimate of the time difference between us and the server (positive -> we are ahead)
        std::vector<uint32_t> &solution,                                                // Our vector of indices describing the solution
        uint32_t *pProof                                                                        // Will contain the "proof", which is just the value
    )
    {
        double tSafetyMargin = 0.5; // accounts for uncertainty in network conditions
        /* This is when the server has said all bids must be produced by, plus the
            adjustment for clock skew, and the safety margin
        */
        double tFinish = request->timeStampReceiveBids * 1e-9 + skewEstimate - tSafetyMargin;

        Log(Log_Verbose, "MakeBid - start, total period=%lg.", period);

        /*
            We will use this to track the best solution we have created so far.
        */
        roundInfo->maxIndices = 4;
        std::vector<uint32_t> bestSolution(roundInfo->maxIndices);
        std::vector<uint32_t> gpuBestSolution(roundInfo->maxIndices);
        bigint_t bestProof, gpuBestProof;

        wide_ones(BIGINT_WORDS, bestProof.limbs);

        // Incorporate the existing block chain data - in a real system this is the
        // list of transactions we are signing. This is the FNV hash:
        // http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
        hash::fnv<64> hasher;
        uint64_t chainHash = hasher((const char *)&roundInfo->chainData[0], roundInfo->chainData.size());

        bigint_t x;
        wide_x_init(&x.limbs[0], uint32_t(0), roundInfo->roundId, roundInfo->roundSalt, chainHash);

        std::vector<uint32_t> indices(roundInfo->maxIndices);

        //Define TBB arrays
        uint32_t *parallel_Indices = (uint32_t *)malloc(sizeof(uint32_t) * TBB_PARALLEL_COUNT);
        uint32_t *parallel_BestSolutions = (uint32_t *)malloc(sizeof(uint32_t) * TBB_PARALLEL_COUNT * roundInfo->maxIndices);
        uint32_t *parallel_Proofs = (uint32_t *)malloc(sizeof(uint32_t) * 8 * TBB_PARALLEL_COUNT);
        uint32_t *parallel_BestProofs = (uint32_t *)malloc(sizeof(uint32_t) * 8 * TBB_PARALLEL_COUNT);

        //Define GPU arrays
        uint32_t *d_ParallelBestSolutions;

        checkCudaErrors(cudaMalloc((void **)&d_ParallelBestSolutions, sizeof(uint32_t) * CUDA_DIM * CUDA_DIM * roundInfo->maxIndices));

        checkCudaErrors(cudaMemcpy(d_hashConstant, &roundInfo->c[0], sizeof(uint32_t) * 4, cudaMemcpyHostToDevice));

        unsigned gpuTrials = 0;
        unsigned cpuTrials = 0;

        unsigned maxNum = uint32_t(0xFFFFFFFF);

        auto runGPU = [ = , &gpuTrials]
        {
            cudaInit(CUDA_DIM, d_ParallelBestProofs);

            do
            {
                cudaIteration(d_ParallelIndices, d_ParallelProofs, d_ParallelBestProofs, d_ParallelBestSolutions, x, d_hashConstant, roundInfo->hashSteps, CUDA_DIM, gpuTrials, CUDA_TRIALS, roundInfo->maxIndices);

                gpuTrials += CUDA_TRIALS;
            }
            while ((tFinish - now() * 1e-9) > 0);
        };

        std::thread runGPUThread(runGPU);

        auto tbbInitial = [ = ](unsigned i)
        {
            bigint_t ones;
            wide_ones(8, ones.limbs);
            wide_copy(8, &parallel_BestProofs[i * 8], ones.limbs);
        };

        tbb::parallel_for<unsigned>(0, TBB_PARALLEL_COUNT, tbbInitial);

        do
        {
            auto tbbIteration = [ = ](unsigned i)
            {
                uint32_t index = maxNum - (TBB_PARALLEL_COUNT<<2) - cpuTrials + (i<<1);

                bigint_t proof = tbbHash(roundInfo.get(),
                                         index,
                                         x);

                wide_copy(8, &parallel_Proofs[i * 8], proof.limbs);
                parallel_Indices[i] = index;
            };

            tbb::parallel_for<unsigned>(0, TBB_PARALLEL_COUNT, tbbIteration);

            auto tbbCrossHash = [ = ](unsigned i)
            {
                for (unsigned xorStride = 1; xorStride < TBB_PARALLEL_COUNT >> 2; xorStride++)
                {
                    if (i + (roundInfo->maxIndices * xorStride) < TBB_PARALLEL_COUNT)
                    {
                        bigint_t candidateBestProof;
                        wide_copy(8, candidateBestProof.limbs, &parallel_Proofs[i * 8]);

                        for (unsigned indexNum = 1; indexNum < roundInfo->maxIndices; indexNum++)
                        {
                            wide_xor(8, candidateBestProof.limbs, candidateBestProof.limbs, &parallel_Proofs[(i + (indexNum * xorStride)) * 8]);
                        }

                        if (wide_compare(8, candidateBestProof.limbs, &parallel_BestProofs[i * 8]) < 0)
                        {
                            wide_copy(8, &parallel_BestProofs[i * 8], candidateBestProof.limbs);
                            for (unsigned ID = 0; ID < roundInfo->maxIndices; ID++)
                            {
                                parallel_BestSolutions[(i * roundInfo->maxIndices) + ID] = parallel_Indices[i + (ID * xorStride)];
                            }
                        }
                    }
                }
            };

            tbb::parallel_for<unsigned>(0, TBB_PARALLEL_COUNT, tbbCrossHash);

            cpuTrials += TBB_PARALLEL_COUNT;
        }
        while ((tFinish - now() * 1e-9) > 0);

        runGPUThread.join();

        auto reduceGPU = [ = , &gpuBestSolution, &gpuBestProof]
        {
            cudaParallelReduce(CUDA_DIM, roundInfo->maxIndices, d_ParallelBestProofs, d_ParallelBestSolutions, &gpuBestSolution[0], gpuBestProof.limbs);
        };

        std::thread reduceThread(reduceGPU);

        //TBB
        for (int toDo = TBB_PARALLEL_COUNT / 2; toDo >= 1; toDo >>= 1)
        {
            auto tbbReduce = [ = ](unsigned i)
            {
                if (wide_compare(BIGINT_WORDS, &parallel_BestProofs[(i + toDo) * 8], &parallel_BestProofs[i * 8]) < 0)
                {
                    wide_copy(8, &parallel_BestProofs[i * 8], &parallel_BestProofs[(i + toDo) * 8]);
                    wide_copy(roundInfo->maxIndices, &parallel_BestSolutions[i * roundInfo->maxIndices], &parallel_BestSolutions[(i + toDo) * roundInfo->maxIndices]);
                }
            };

            tbb::parallel_for<unsigned>(0, toDo, tbbReduce);
        }

        wide_copy(BIGINT_WORDS, bestProof.limbs, &parallel_BestProofs[0]);
        wide_copy(roundInfo->maxIndices, &bestSolution[0], &parallel_BestSolutions[0]);

        reduceThread.join();

        if (wide_compare(BIGINT_WORDS, gpuBestProof.limbs, bestProof.limbs) < 0)
        {
            Log(Log_Verbose, "Accepting GPU Solution");
            wide_copy(8, bestProof.limbs, gpuBestProof.limbs);
            wide_copy(roundInfo->maxIndices, &bestSolution[0], &gpuBestSolution[0]);
        }

        solution = bestSolution;
        wide_copy(BIGINT_WORDS, pProof, bestProof.limbs);

        free(parallel_Indices);
        free(parallel_BestSolutions);
        free(parallel_Proofs);
        free(parallel_BestProofs);

        checkCudaErrors(cudaFree(d_ParallelBestSolutions));

        Log(Log_Verbose, "MakeBid - finish. Total trials %d, cpu: %d, gpu %d", cpuTrials + gpuTrials, cpuTrials, gpuTrials);
    }