int GetRandomIndex(int vectorSize) { std::random_device rdev{}; std::default_random_engine engineRandom{rdev()}; std::uniform_int_distribution<int> distribution(0,vectorSize-1); // Important - 0 to N-1 and not 1 to N!! int randomIndex = distribution(engineRandom); return randomIndex; }
double GetRandomDouble() { std::random_device rdev{}; std::default_random_engine engineRandom{rdev()}; std::uniform_real_distribution<double> distribution(0.0,1.0); double randomDouble = distribution(engineRandom); return randomDouble; }
void GameMainModel::createBallInfo(const int id) { std::random_device rdev; std::mt19937 engine(rdev()); std::uniform_int_distribution<> dist(0, 99); std::shared_ptr<BallInfo> ballInfo = std::make_shared<BallInfo>(); ballInfo->id = id; if (id < BALLS_NUMBER) { ballInfo->initX = 0.5f + (0.25f + (float)id * 0.01f) * cosf((float)(id + _randomR) * 44.0f * MATH_PI / 180.0f); ballInfo->initY = 0.5f + (0.25f + (float)id * 0.01f) * sinf((float)(id + _randomR) * 44.0f * MATH_PI / 180.0f) * 9.0f / 16.0f; ballInfo->impulseX = 0.0f; ballInfo->impulseY = 0.0f; } else { ballInfo->initX = 0.5f + 0.6f * ((float)(dist(engine) % 2)-0.5f) * 2.0f; ballInfo->initY = 0.5f + 0.1f * ((float)(dist(engine) % 2)-0.5f) * 2.0f; float abs = (float)(dist(engine) % 5) / 16.0f + 0.25f; ballInfo->impulseX = (ballInfo->initX < 0.5f) ? abs : -abs; ballInfo->impulseY = (ballInfo->initY < 0.5f) ? -abs : abs; } ballInfo->x = ballInfo->initX; ballInfo->y = ballInfo->initY; int randomNumber = dist(engine); if (randomNumber < NUMBER_1_PERCENT) ballInfo->number = 1; else if (randomNumber < NUMBER_2_PERCENT) ballInfo->number = 2; else if (randomNumber < NUMBER_3_PERCENT) ballInfo->number = 3; else if (randomNumber < NUMBER_4_PERCENT) ballInfo->number = 4; else if (randomNumber < NUMBER_5_PERCENT) ballInfo->number = 5; else if (randomNumber < NUMBER_6_PERCENT) ballInfo->number = 6; else if (randomNumber < NUMBER_7_PERCENT) ballInfo->number = 7; else if (randomNumber < NUMBER_8_PERCENT) ballInfo->number = 8; else ballInfo->number = 9; ballInfo->isPresence = true; ballInfo->isSelectEnable = true; ballInfo->isSelected = false; _ballsInfoList.push_back(ballInfo); }
GameMainModel::GameMainModel() { std::random_device rdev; std::mt19937 engine(rdev()); std::uniform_int_distribution<> dist(0, 359); _randomR = dist(engine); setUpGame(); }
Matrix<Scalar> NormalRandomMatrix(int m, int n, double mu, double sigma) { std::random_device rdev; std::default_random_engine generator(rdev()); std::normal_distribution<double> distribution(mu, sigma); Matrix<Scalar> A(m, n); // We can use fancier C++11 random number generators, but they are // still slow on some systems. for (int j = 0; j < A.n(); ++j) { for (int i = 0; i < A.m(); ++i) { A(i, j) = distribution(generator); } } return A; }
Matrix<Scalar> SkewedUniformRandomMatrix2(int m, int n, double a, double b) { std::random_device rdev; std::default_random_engine generator(rdev()); std::uniform_real_distribution<double> distribution(a, b); Matrix<Scalar> A(m, n); // We can use fancier C++11 random number generators, but they are // still slow on some systems. for (int j = 0; j < A.n(); ++j) { for (int i = 0; i < A.m(); ++i) { A(i, j) = distribution(generator) * (i + 1) * (j + 1); } } return A; }
Die::Die(int nFaces) { #if defined(_WIN32) static unsigned seed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); ++seed; std::mt19937 generator{seed}; #else std::random_device rdev{}; std::mt19937 generator{rdev()}; #endif // defined std::uniform_int_distribution<int> distribution(1, nFaces); rollDie = std::bind(distribution, generator); m_nFaces = nFaces; m_pnDieRollValues = new int[m_nFaces](); }
int main(){ // Initialize two default engines with different seeds std::default_random_engine e1; std::default_random_engine e2{232323}; // Compare the generators status std::cout<<"(e1==e2)? "<<(e1==e2)<<std::endl; // Reseed first generator with second generator seed and compare again e1.seed(232323); std::cout<<"(e1==e2)? "<<(e1==e2)<<std::endl; // Generate one random number with first generator and compare e1 and e2 status e1(); std::cout<<"(e1==e2)? "<<(e1==e2)<<std::endl; // Discard one random number of second generator and compare e1 and e2 status e2.discard(1); std::cout<<"(e1==e2)? "<<(e1==e2)<<std::endl; // Save current e2 status std::cout<<"Save e2 status in e2_status.out"<<std::endl; std::fstream e2_status; e2_status.open("e2_status.out",std::fstream::out); e2_status<<e2; e2_status.close(); // Discard 100 random numbers of second generator and compare e1 and e2 status e2.discard(100); std::cout<<"(e1==e2)? "<<(e1==e2)<<std::endl; // Restore previously saved e2 status and compare it with e1 std::cout<<"Restore e2 status from e2_status.out"<<std::endl; e2_status.open("e2_status.out",std::fstream::in); e2_status>>e2; e2_status.close(); std::cout<<"(e1==e2)? "<<(e1==e2)<<std::endl; // Reseed e2 with std::random_device and save its status std::cout<<"Save e2(rdev) status in e2_status_rdev.out"<<std::endl; std::random_device rdev{}; e2.seed(rdev()); e2_status.open("e2_status_rdev.out",std::fstream::out); e2_status<<e2; e2_status.close(); }
GameMain::GameMain() :_gameState(GameState::START) ,_time(0) ,_score(0) ,_tenCount(0) ,_moveLayer(nullptr) ,_dragLayer(nullptr) ,_talonLayer(nullptr) ,_boardCardLayer(nullptr) ,_homeCellLayer(nullptr) ,_timeLabel(nullptr) ,_scoreLabel(nullptr) ,_endLayer(nullptr) ,_touchTime(0) ,_doubleTouchFlag(false) ,_oneTouch(false) { random_device rdev; _engine.seed(rdev()); }
TYPED_TEST(QuantBlasTest, TestGemvComparativeFloatQuant) { typedef typename TypeParam::Dtype Dtype; // Expect at most 5% error float percentile_eps = 0.05; std::random_device rdev; std::mt19937 rngen(rdev()); // Need to test > 64 dimension std::uniform_int_distribution<int_tp> dimsRand(1, 256); std::uniform_int_distribution<int_tp> boolRand(0, 1); std::uniform_int_distribution<int_tp> factorRand(-25, 25); std::uniform_real_distribution<float> valRand(-2.0, 2.0); for (int_tp testIdx = 0; testIdx < 25; ++testIdx) { int_tp M = dimsRand(rngen); int_tp N = dimsRand(rngen); CBLAS_TRANSPOSE trans_A = boolRand(rngen) ? CblasTrans : CblasNoTrans; bool has_alpha = boolRand(rngen); bool has_beta = has_alpha ? boolRand(rngen) : true; bool alpha_with_quant = boolRand(rngen) && has_alpha; bool beta_with_quant = boolRand(rngen) && has_beta; float alpha_val; float beta_val; if (has_alpha) { alpha_val = alpha_with_quant ? valRand(rngen) : float(1.0); } else { alpha_val = 0.0; } if (has_beta) { beta_val = beta_with_quant ? valRand(rngen) : float(1.0); } else { beta_val = 0.0; } vector<int_tp> A_shape(4, 1); vector<int_tp> x_shape(4, 1); vector<int_tp> y_shape(4, 1); A_shape[2] = M; A_shape[3] = N; x_shape[3] = trans_A == CblasTrans ? M : N; y_shape[3] = trans_A == CblasTrans ? N : M; Blob<float> A(A_shape, Caffe::GetDefaultDevice()); Blob<float> x(x_shape, Caffe::GetDefaultDevice()); Blob<float> y(y_shape, Caffe::GetDefaultDevice()); Blob<float> y_result(y_shape, Caffe::GetDefaultDevice()); Blob<Dtype> A_quant(A_shape, Caffe::GetDefaultDevice()); Blob<Dtype> x_quant(x_shape, Caffe::GetDefaultDevice()); Blob<Dtype> y_quant(y_shape, Caffe::GetDefaultDevice()); Blob<float> y_unquant(y_shape, Caffe::GetDefaultDevice()); caffe_rng_gaussian(M * N, (float)0.0, (float)0.5, A.mutable_cpu_data()); caffe_rng_gaussian(trans_A == CblasTrans ? M : N, (float)0.0, (float)0.5, x.mutable_cpu_data()); caffe_rng_gaussian(trans_A == CblasTrans ? N : M, (float)0.0, (float)0.5, y.mutable_cpu_data()); caffe_copy(trans_A == CblasTrans ? N : M, y.cpu_data(), y_result.mutable_cpu_data()); QuantizerParameter qpm_a; QuantizerParameter qpm_x; QuantizerParameter qpm_y; QuantizerParameter qpm_alpha; QuantizerParameter qpm_beta; qpm_a.set_mode(CAFFE_QUANT_OBSERVE); qpm_x.set_mode(CAFFE_QUANT_OBSERVE); qpm_y.set_mode(CAFFE_QUANT_OBSERVE); qpm_alpha.set_mode(CAFFE_QUANT_OBSERVE); qpm_beta.set_mode(CAFFE_QUANT_OBSERVE); Quantizer<float, Dtype> aq(qpm_a); Quantizer<float, Dtype> xq(qpm_x); Quantizer<float, Dtype> yq(qpm_y); Quantizer<float, Dtype> alphaq(qpm_alpha); Quantizer<float, Dtype> betaq(qpm_beta); // Normal GEMM caffe_gemv<float>( trans_A, M, N, alpha_val, A.cpu_data(), x.cpu_data(), beta_val, y_result.mutable_cpu_data()); // Observe all values that will be relevant for quantization aq.ObserveIn_cpu(M * N, A.cpu_data()); xq.ObserveIn_cpu(trans_A == CblasTrans ? M : N, x.cpu_data()); yq.ObserveIn_cpu(trans_A == CblasTrans ? N : M, y.cpu_data()); yq.ObserveIn_cpu(trans_A == CblasTrans ? N : M, y_result.cpu_data()); alphaq.ObserveIn_cpu(1, &alpha_val); betaq.ObserveIn_cpu(1, &beta_val); // Apply observed values to the quantizer aq.update(); xq.update(); yq.update(); alphaq.update(); betaq.update(); // Quantize A, B and C aq.Forward_cpu(M * N, A.cpu_data(), A_quant.mutable_cpu_data()); xq.Forward_cpu(trans_A == CblasTrans ? M : N, x.cpu_data(), x_quant.mutable_cpu_data()); yq.Forward_cpu(trans_A == CblasTrans ? N : M, y.cpu_data(), y_quant.mutable_cpu_data()); Dtype alpha_val_quant = has_alpha; Dtype beta_val_quant = has_beta; // Quantize alpha if (alpha_with_quant) { alphaq.Forward_cpu(1, &alpha_val, &alpha_val_quant); } // Quantize beta if (beta_with_quant) { betaq.Forward_cpu(1, &beta_val, &beta_val_quant); } if (Caffe::mode() == Caffe::Brew::CPU) { caffe_gemv<Dtype>(trans_A, M, N, alpha_val_quant, A_quant.cpu_data(), x_quant.cpu_data(), beta_val_quant, y_quant.mutable_cpu_data(), alpha_with_quant ? &(alphaq.out_quantizer_values()) : nullptr, &(aq.out_quantizer_values()), &(xq.out_quantizer_values()), beta_with_quant ? &(betaq.out_quantizer_values()) : nullptr, &(yq.out_quantizer_values())); } else { Caffe::GetDefaultDevice()->template gemv<Dtype>(trans_A, M, N, alpha_val_quant, A_quant.gpu_data(), x_quant.gpu_data(), beta_val_quant, y_quant.mutable_gpu_data(), alpha_with_quant ? &(alphaq.out_quantizer_values()) : nullptr, &(aq.out_quantizer_values()), &(xq.out_quantizer_values()), beta_with_quant ? &(betaq.out_quantizer_values()) : nullptr, &(yq.out_quantizer_values())); } yq.Backward_cpu(trans_A == CblasTrans ? N : M, y_quant.cpu_data(), y_unquant.mutable_cpu_data()); // print_matrix(A_quant.cpu_data(), M, K); // print_matrix(B_quant.cpu_data(), K, N); // print_matrix(C_quant.cpu_data(), M, N); // print_matrix(C_result.cpu_data(), M, N); // print_matrix(C_unquant.cpu_data(), M, N); const QuantizerValues cqv = yq.in_quantizer_values(); float eps = std::max(std::abs(cqv.get_max<float>()), std::abs(cqv.get_min<float>())) * percentile_eps; for (int_tp i = 0; i < (trans_A == CblasTrans ? N : M); ++i) { EXPECT_NEAR(y_unquant.cpu_data()[i], y_result.cpu_data()[i], eps); // One error is enough to abort if (fabs(y_unquant.cpu_data()[i] - y_result.cpu_data()[i]) >= eps) { break; } } } }
TYPED_TEST(QuantBlasTest, TestAxpbyComparativeFloatQuant) { typedef typename TypeParam::Dtype Dtype; // Expect at most 5% error float percentile_eps = 0.05; std::random_device rdev; std::mt19937 rngen(rdev()); // Need to test > 64 dimension std::uniform_int_distribution<int_tp> dimsRand(1, 256); std::uniform_int_distribution<int_tp> boolRand(0, 1); std::uniform_int_distribution<int_tp> factorRand(-25, 25); std::uniform_real_distribution<float> valRand(-2.0, 2.0); for (int_tp testIdx = 0; testIdx < 25; ++testIdx) { int_tp N = dimsRand(rngen); bool has_alpha = boolRand(rngen); bool has_beta = has_alpha ? boolRand(rngen) : true; bool alpha_with_quant = boolRand(rngen) && has_alpha; bool beta_with_quant = boolRand(rngen) && has_beta; float alpha_val; float beta_val; if (has_alpha) { alpha_val = alpha_with_quant ? valRand(rngen) : float(1.0); } else { alpha_val = 0.0; } if (has_beta) { beta_val = beta_with_quant ? valRand(rngen) : float(1.0); } else { beta_val = 0.0; } vector<int_tp> x_shape(1, 1); vector<int_tp> y_shape(1, 1); x_shape[0] = N; y_shape[0] = N; Blob<float> x(x_shape, Caffe::GetDefaultDevice()); Blob<float> y(y_shape, Caffe::GetDefaultDevice()); Blob<float> y_result(y_shape, Caffe::GetDefaultDevice()); Blob<Dtype> x_quant(x_shape, Caffe::GetDefaultDevice()); Blob<Dtype> y_quant(y_shape, Caffe::GetDefaultDevice()); Blob<float> y_unquant(y_shape, Caffe::GetDefaultDevice()); caffe_rng_gaussian(N, (float)0.0, (float)0.5, x.mutable_cpu_data()); caffe_rng_gaussian(N, (float)0.0, (float)0.5, y.mutable_cpu_data()); caffe_copy(N, y.cpu_data(), y_result.mutable_cpu_data()); QuantizerParameter qpm_x; QuantizerParameter qpm_y; QuantizerParameter qpm_alpha; QuantizerParameter qpm_beta; qpm_x.set_mode(CAFFE_QUANT_OBSERVE); qpm_y.set_mode(CAFFE_QUANT_OBSERVE); qpm_alpha.set_mode(CAFFE_QUANT_OBSERVE); qpm_beta.set_mode(CAFFE_QUANT_OBSERVE); Quantizer<float, Dtype> xq(qpm_x); Quantizer<float, Dtype> yq(qpm_y); Quantizer<float, Dtype> alphaq(qpm_alpha); Quantizer<float, Dtype> betaq(qpm_beta); // Normal GEMM caffe_axpby<float>(N, alpha_val, x.cpu_data(), beta_val, y_result.mutable_cpu_data()); // Observe all values that will be relevant for quantization xq.ObserveIn_cpu(N, x.cpu_data()); yq.ObserveIn_cpu(N, y.cpu_data()); yq.ObserveIn_cpu(N, y_result.cpu_data()); alphaq.ObserveIn_cpu(1, &alpha_val); betaq.ObserveIn_cpu(1, &beta_val); // Apply observed values to the quantizer xq.update(); yq.update(); alphaq.update(); betaq.update(); // Quantize A, B and C xq.Forward_cpu(N, x.cpu_data(), x_quant.mutable_cpu_data()); yq.Forward_cpu(N, y.cpu_data(), y_quant.mutable_cpu_data()); Dtype alpha_val_quant = has_alpha; Dtype beta_val_quant = has_beta; // Quantize alpha if (alpha_with_quant) { alphaq.Forward_cpu(1, &alpha_val, &alpha_val_quant); } // Quantize beta if (beta_with_quant) { betaq.Forward_cpu(1, &beta_val, &beta_val_quant); } if (Caffe::mode() == Caffe::Brew::CPU) { // TODO: Not implemented yet return; /*caffe_axpby<Dtype>(N, alpha_val_quant, x_quant.cpu_data(), beta_val_quant, y_quant.mutable_cpu_data(), alpha_with_quant ? &(alphaq.out_quantizer_values()) : nullptr, &(xq.out_quantizer_values()), beta_with_quant ? &(betaq.out_quantizer_values()) : nullptr, &(yq.out_quantizer_values()));*/ } else { Caffe::GetDefaultDevice()->template axpby<Dtype>(N, alpha_val_quant, x_quant.gpu_data(), beta_val_quant, y_quant.mutable_gpu_data(), alpha_with_quant ? &(alphaq.out_quantizer_values()) : nullptr, &(xq.out_quantizer_values()), beta_with_quant ? &(betaq.out_quantizer_values()) : nullptr, &(yq.out_quantizer_values())); } yq.Backward_cpu(N, y_quant.cpu_data(), y_unquant.mutable_cpu_data()); const QuantizerValues cqv = yq.in_quantizer_values(); float eps = std::max(std::abs(cqv.get_max<float>()), std::abs(cqv.get_min<float>())) * percentile_eps; for (int_tp i = 0; i < N; ++i) { EXPECT_NEAR(y_unquant.cpu_data()[i], y_result.cpu_data()[i], eps); // One error is enough to abort if (fabs(y_unquant.cpu_data()[i] - y_result.cpu_data()[i]) >= eps) { break; } } } }
void SpinMatrix::Seed() { std::random_device rdev{}; rndEngine.seed(rdev()); }
void afk_testJigsaw( AFK_Computer *computer, const AFK_ConfigSettings& settings) { boost::random::random_device rdev; srand(rdev()); /* Make a jigsaw with a plausible shape size, and allocate lots * of pieces out of it in multiple iterations. Upon each of * these simulated frames, check that the set of available * pieces appears sane and hasn't gotten trampled, etc. * TODO -- Improvements: * - Test a 3D one too (pull out the test into a function I * can call with several) * - Multi-threaded test * - Test OpenCL program that writes known values to the jigsaw * texture -- verify that the values come out OK and don't * trample each other either. */ const int testIterations = 50; AFK_JigsawMemoryAllocation testAllocation( { AFK_JigsawMemoryAllocation::Entry( { AFK_JigsawImageDescriptor( afk_vec3<int>(9, 9, 1), AFK_JigsawFormat::FLOAT32_4, AFK_JigsawDimensions::TWO, AFK_JigsawBufferUsage::CL_ONLY, GL_NEAREST) }, 4, 1.0f), }, settings.concurrency, computer->useFake3DImages(settings), 1.0f, computer->getFirstDeviceProps()); AFK_JigsawCollection testCollection( computer, testAllocation.at(0), computer->getFirstDeviceProps(), 0); AFK_Frame frame; frame.increment(); testCollection.flip(frame); for (int i = 0; i < testIterations; ++i) { int piecesThisFrame = rand() % (settings.concurrency * testAllocation.at(0).getPieceCount() / 4); afk_out << "Test frame " << frame << ": Getting " << piecesThisFrame << " pieces" << std::endl; /* Here, I map each piece that I've drawn to its timestamp. */ boost::unordered_map<AFK_JigsawPiece, AFK_Frame> piecesMap; try { for (int p = 0; p < piecesThisFrame; ++p) { AFK_JigsawPiece jigsawPiece; AFK_Frame pieceFrame; testCollection.grab(0, &jigsawPiece, &pieceFrame, 1); afk_out << "Grabbed piece " << jigsawPiece << " with frame " << pieceFrame << std::endl; auto existing = piecesMap.find(jigsawPiece); if (existing != piecesMap.end()) assert(existing->second != pieceFrame); piecesMap[jigsawPiece] = pieceFrame; } } catch (AFK_Exception& e) { if (e.getMessage() == "Jigsaw ran out of room") { /* I'll forgive this. */ afk_out << "Out of room -- OK -- continuing" << std::endl; } else { throw e; } } frame.increment(); testCollection.flip(frame); testCollection.printStats(afk_out, "Test jigsaw"); } afk_out << "Jigsaw test completed" << std::endl; }
void AFK_Core::configure(int *argcp, char **argv) { /* Measure the clock tick interval and make sure it's somewhere near * sane ... */ afk_clock::time_point intervalTestStart = afk_clock::now(); afk_clock::time_point intervalTestEnd = afk_clock::now(); while (intervalTestStart == intervalTestEnd) { intervalTestEnd = afk_clock::now(); } afk_duration_mfl tickInterval = std::chrono::duration_cast<afk_duration_mfl>(intervalTestEnd - intervalTestStart); afk_out << "AFK: Using clock with apparent tick interval: " << tickInterval.count() << " millis" << std::endl; assert(tickInterval.count() < 0.1f); if (!settings.parseCmdLine(argcp, argv)) { throw AFK_Exception("Failed to parse command line"); } rng = new AFK_Boost_Taus88_RNG(); /* The special value -1 means no seed has been supplied, so I need to make one. * And of course, the seed comes in two 64-bit parts: */ if (settings.masterSeedHigh == -1 || settings.masterSeedLow == -1) { boost::random_device rdev; if (settings.masterSeedHigh == -1) { settings.masterSeedHigh = (static_cast<int64_t>(rdev()) | (static_cast<int64_t>(rdev()) << 32)); } if (settings.masterSeedLow == -1) { settings.masterSeedLow = (static_cast<int64_t>(rdev()) | (static_cast<int64_t>(rdev()) << 32)); } } AFK_RNG_Value rSeed; rSeed.v.ll[0] = settings.masterSeedLow; rSeed.v.ll[1] = settings.masterSeedHigh; rng->seed(rSeed); /* Startup state of the protagonist. */ velocity = afk_vec3<float>(0.0f, 0.0f, 0.0f); axisDisplacement = afk_vec3<float>(0.0f, 0.0f, 0.0f); controlsEnabled = 0uLL; /* TODO Make the viewpoint configurable? Right now I have a * fixed 3rd person view here. */ camera.setSeparation(afk_vec3<float>(0.0f, -1.5f, 3.0f)); /* Set up the sun. (TODO: Make configurable? Randomly * generated? W/e :) ) * TODO: Should I make separate ambient and diffuse colours, * and make the ambient colour dependent on the sky colour? */ sun.colour = afk_vec3<float>(1.0f, 1.0f, 1.0f); sun.direction = afk_vec3<float>(-0.5f, -1.0f, 1.0f).normalise(); sun.ambient = 0.2f; sun.diffuse = 1.0f; skyColour = afk_vec3<float>( rng->frand(), rng->frand(), rng->frand()); }
int afk_testChainLink() { boost::random::random_device rdev; int64_t rngSeed = (static_cast<int64_t>(rdev()) | (static_cast<int64_t>(rdev())) << 32); const int iterations = 40000; const int maxChainLength = static_cast<int>(sqrt(iterations)); const int threads = 24; std::shared_ptr<AFK_BasicLinkFactory<AFK_ClaimableChainLinkTestLink> > linkFactory = std::make_shared<AFK_BasicLinkFactory<AFK_ClaimableChainLinkTestLink> >(); AFK_ChainLinkTestChain *testChain = new AFK_ChainLinkTestChain(linkFactory); std::deque<std::thread> workers; afk_clock::time_point startTime, endTime; startTime = afk_clock::now(); assert(threads < 31); for (int i = 0; i < threads; ++i) { workers.push_back(std::thread( afk_testChainLink_worker, i + 1, rngSeed, iterations, maxChainLength, testChain )); } for (auto workerIt = workers.begin(); workerIt != workers.end(); ++workerIt) { workerIt->join(); } endTime = afk_clock::now(); /* Verify that whole thing */ int index = 0; int fails = 0; testChain->foreach([&index, &fails](std::shared_ptr<AFK_ClaimableChainLinkTestLink> link) { //afk_out << "verify test chain link: index " << index << ": "; auto claim = link->claim(1, AFK_CL_SPIN); //afk_out << claim.getShared(); if (claim.getShared().verify(index)) { //afk_out << " (verify ok)" << std::endl; } else { //afk_out << " (verify FAILED)" << std::endl; ++fails; } ++index; }); afk_duration_mfl timeTaken = std::chrono::duration_cast<afk_duration_mfl>(endTime - startTime); afk_out << "Chain link test (" << iterations << " iterations, " << maxChainLength << " max chain length, " << threads << " threads) finished in " << timeTaken.count() << " millis" << std::endl; delete testChain; afk_out << "Chain link test finished with " << iterations << " iterations, " << fails << " fails." << std::endl; return fails; }
TYPED_TEST(LibDNNBlasTest, TestGemvComparativeCPUGPU) { Device *dc = Caffe::GetDefaultDevice(); TypeParam eps = 0.0; if (std::is_same<TypeParam, half_fp>::value) { eps = EPS_HALF; } if (std::is_same<TypeParam, float>::value) { eps = EPS_FLOAT; } if (std::is_same<TypeParam, double>::value) { eps = EPS_DOUBLE; } std::random_device rdev; std::mt19937 rngen(rdev()); std::uniform_int_distribution<int_tp> dimsRand(1, 256); std::uniform_int_distribution<int_tp> boolRand(0, 1); std::uniform_int_distribution<int_tp> factorRand(-25, 25); for (int_tp testIdx = 0; testIdx < 25; ++testIdx) { int_tp M = dimsRand(rngen); int_tp N = dimsRand(rngen); CBLAS_TRANSPOSE trans_A = boolRand(rngen) ? CblasTrans : CblasNoTrans; bool has_alpha = boolRand(rngen); TypeParam alpha_val = factorRand(rngen) / 100.0; bool has_beta = boolRand(rngen); TypeParam beta_val = factorRand(rngen) / 100.0; vector<int_tp> A_shape(4, 1); vector<int_tp> x_shape(4, 1); vector<int_tp> y_shape(4, 1); A_shape[2] = M; A_shape[3] = N; x_shape[3] = trans_A == CblasTrans ? M : N; y_shape[3] = trans_A == CblasTrans ? N : M; Blob<TypeParam> A(A_shape, Caffe::GetDefaultDevice()); Blob<TypeParam> x(x_shape, Caffe::GetDefaultDevice()); Blob<TypeParam> y_GPU(y_shape, Caffe::GetDefaultDevice()); Blob<TypeParam> y_CPU(y_shape, Caffe::GetDefaultDevice()); caffe_rng_gaussian(M * N, (TypeParam)0.0, (TypeParam)0.25, A.mutable_cpu_data()); caffe_rng_gaussian(trans_A == CblasTrans ? M : N, (TypeParam)0.0, (TypeParam)0.25, x.mutable_cpu_data()); caffe_rng_gaussian(trans_A == CblasTrans ? N : M, (TypeParam)0.0, (TypeParam)0.25, y_CPU.mutable_cpu_data()); caffe_copy(trans_A == CblasTrans ? N : M, y_CPU.cpu_data(), y_GPU.mutable_cpu_data()); std::cout << "==== Test Case " << testIdx << " ====" << std::endl; std::cout << "M: " << M << " N: " << N << std::endl; std::cout << "alpha: " << (has_alpha ? alpha_val : (TypeParam)1.0) << " " << "beta: " << (has_beta ? beta_val : (TypeParam)0.0) << std::endl; std::cout << "trans A: " << (trans_A == CblasTrans) << std::endl; dc->GetLibDNNBlas<TypeParam, TypeParam>()->gemv( trans_A, M, N, has_alpha ? alpha_val: (TypeParam)1., A.gpu_data(), x.gpu_data(), has_beta ? beta_val : (TypeParam)0., y_GPU.mutable_gpu_data()); caffe_gemv<TypeParam>( trans_A, M, N, has_alpha ? alpha_val: (TypeParam)1., A.cpu_data(), x.cpu_data(), has_beta ? beta_val : (TypeParam)0., y_CPU.mutable_cpu_data()); for (int_tp i = 0; i < (trans_A == CblasTrans ? N : M); ++i) { EXPECT_NEAR(y_CPU.cpu_data()[i], y_GPU.cpu_data()[i], eps); // One error is enough to abort if (fabs(y_CPU.cpu_data()[i] - y_GPU.cpu_data()[i]) >= eps) { break; } } } }
TYPED_TEST(QuantBlasTest, TestGemmComparativeFloatQuant) { typedef typename TypeParam::Dtype Dtype; // Expect at most 5% error float percentile_eps = 0.05; std::random_device rdev; std::mt19937 rngen(rdev()); // Need to test > 64 dimension std::uniform_int_distribution<int_tp> dimsRand(1, 256); std::uniform_int_distribution<int_tp> boolRand(0, 1); std::uniform_int_distribution<int_tp> factorRand(-25, 25); std::uniform_real_distribution<float> valRand(-2.0, 2.0); for (int_tp testIdx = 0; testIdx < 25; ++testIdx) { int_tp M = dimsRand(rngen); int_tp N = dimsRand(rngen); int_tp K = dimsRand(rngen); CBLAS_TRANSPOSE trans_A = boolRand(rngen) ? CblasTrans : CblasNoTrans; CBLAS_TRANSPOSE trans_B = boolRand(rngen) ? CblasTrans : CblasNoTrans; bool has_alpha = boolRand(rngen); bool has_beta = has_alpha ? boolRand(rngen) : true; bool alpha_with_quant = boolRand(rngen) && has_alpha; bool beta_with_quant = boolRand(rngen) && has_beta; float alpha_val; float beta_val; if (has_alpha) { alpha_val = alpha_with_quant ? valRand(rngen) : float(1.0); } else { alpha_val = 0.0; } if (has_beta) { beta_val = beta_with_quant ? valRand(rngen) : float(1.0); } else { beta_val = 0.0; } vector<int_tp> A_shape(4, 1); vector<int_tp> B_shape(4, 1); vector<int_tp> C_shape(4, 1); A_shape[2] = M; A_shape[3] = K; B_shape[2] = K; B_shape[3] = N; C_shape[2] = M; C_shape[3] = N; Blob<float> A(A_shape, Caffe::GetDefaultDevice()); Blob<float> B(B_shape, Caffe::GetDefaultDevice()); Blob<float> C(C_shape, Caffe::GetDefaultDevice()); Blob<float> C_result(C_shape, Caffe::GetDefaultDevice()); Blob<Dtype> A_quant(A_shape, Caffe::GetDefaultDevice()); Blob<Dtype> B_quant(B_shape, Caffe::GetDefaultDevice()); Blob<Dtype> C_quant(C_shape, Caffe::GetDefaultDevice()); Blob<float> C_unquant(C_shape, Caffe::GetDefaultDevice()); caffe_rng_gaussian(M * K, (float)0.0, (float)0.5, A.mutable_cpu_data()); caffe_rng_gaussian(K * N, (float)0.0, (float)0.5, B.mutable_cpu_data()); caffe_rng_gaussian(M * N, (float)0.0, (float)0.5, C.mutable_cpu_data()); caffe_copy(M * N, C.cpu_data(), C_result.mutable_cpu_data()); QuantizerParameter qpm_a; QuantizerParameter qpm_b; QuantizerParameter qpm_c; QuantizerParameter qpm_alpha; QuantizerParameter qpm_beta; qpm_a.set_mode(CAFFE_QUANT_OBSERVE); qpm_b.set_mode(CAFFE_QUANT_OBSERVE); qpm_c.set_mode(CAFFE_QUANT_OBSERVE); qpm_alpha.set_mode(CAFFE_QUANT_OBSERVE); qpm_beta.set_mode(CAFFE_QUANT_OBSERVE); Quantizer<float, Dtype> aq(qpm_a); Quantizer<float, Dtype> bq(qpm_b); Quantizer<float, Dtype> cq(qpm_c); Quantizer<float, Dtype> alphaq(qpm_alpha); Quantizer<float, Dtype> betaq(qpm_beta); // Normal GEMM caffe_gemm<float>( trans_A, trans_B, M, N, K, alpha_val, A.cpu_data(), B.cpu_data(), beta_val, C_result.mutable_cpu_data()); // Observe all values that will be relevant for quantization aq.ObserveIn_cpu(M * K, A.cpu_data()); bq.ObserveIn_cpu(K * N, B.cpu_data()); cq.ObserveIn_cpu(M * N, C.cpu_data()); cq.ObserveIn_cpu(M * N, C_result.cpu_data()); alphaq.ObserveIn_cpu(1, &alpha_val); betaq.ObserveIn_cpu(1, &beta_val); // Apply observed values to the quantizer aq.update(); bq.update(); cq.update(); alphaq.update(); betaq.update(); // Quantize A, B and C aq.Forward_cpu(M * K, A.cpu_data(), A_quant.mutable_cpu_data()); bq.Forward_cpu(K * N, B.cpu_data(), B_quant.mutable_cpu_data()); cq.Forward_cpu(M * N, C.cpu_data(), C_quant.mutable_cpu_data()); Dtype alpha_val_quant = has_alpha; Dtype beta_val_quant = has_beta; // Quantize alpha if (alpha_with_quant) { alphaq.Forward_cpu(1, &alpha_val, &alpha_val_quant); } // Quantize beta if (beta_with_quant) { betaq.Forward_cpu(1, &beta_val, &beta_val_quant); } /* std::cout << "C max:" << cq.in_quantizer_values().max << std::endl; std::cout << "C min:" << cq.in_quantizer_values().min << std::endl; std::cout << "C zero:" << cq.in_quantizer_values().zero << std::endl; std::cout << "C scale:" << cq.in_quantizer_values().scale << std::endl; std::cout << "C max:" << cq.out_quantizer_values().max << std::endl; std::cout << "C min:" << cq.out_quantizer_values().min << std::endl; std::cout << "C zero:" << cq.out_quantizer_values().zero << std::endl; std::cout << "C scale:" << cq.out_quantizer_values().scale << std::endl; */ if (Caffe::mode() == Caffe::Brew::CPU) { caffe_gemm<Dtype>( trans_A, trans_B, M, N, K, alpha_val_quant, A_quant.cpu_data(), B_quant.cpu_data(), beta_val_quant, C_quant.mutable_cpu_data(), alpha_with_quant ? &(alphaq.out_quantizer_values()) : nullptr, &(aq.out_quantizer_values()), &(bq.out_quantizer_values()), beta_with_quant ? &(betaq.out_quantizer_values()) : nullptr, &(cq.out_quantizer_values())); } else { Caffe::GetDefaultDevice()->template gemm<Dtype>(trans_A, trans_B, M, N, K, alpha_val_quant, A_quant.gpu_data(), B_quant.gpu_data(), beta_val_quant, C_quant.mutable_gpu_data(), alpha_with_quant ? &(alphaq.out_quantizer_values()) : nullptr, &(aq.out_quantizer_values()), &(bq.out_quantizer_values()), beta_with_quant ? &(betaq.out_quantizer_values()) : nullptr, &(cq.out_quantizer_values())); } cq.Backward_cpu(M * N, C_quant.cpu_data(), C_unquant.mutable_cpu_data()); // print_matrix(A_quant.cpu_data(), M, K); // print_matrix(B_quant.cpu_data(), K, N); // print_matrix(C_quant.cpu_data(), M, N); // print_matrix(C_result.cpu_data(), M, N); // print_matrix(C_unquant.cpu_data(), M, N); const QuantizerValues cqv = cq.in_quantizer_values(); float eps = std::max(std::abs(cqv.get_max<float>()), std::abs(cqv.get_min<float>())) * percentile_eps; for (int_tp i = 0; i < M * N; ++i) { EXPECT_NEAR(C_unquant.cpu_data()[i], C_result.cpu_data()[i], eps); // One error is enough to abort if (fabs(C_unquant.cpu_data()[i] - C_result.cpu_data()[i]) >= eps) { break; } } } }