int main(int argc, char **argv){ unsigned long hits = 0, tries = 0; int errs = 0; threefry2x64_ctr_t c = {{0}}, r; threefry2x64_key_t k = {{R123_64BIT(0xdeadbeef12345678)}}; threefry2x32_ctr_t ch = {{0}}, rh; threefry2x32_key_t kh = {{0xdecafbad}}; (void)argc; (void)argv; /* unused */ printf("%lu uniform doubles from threefry2x64\n", NTRIES); while (tries < NTRIES) { double x, y; c.v[0]++; /* increment the counter */ r = threefry2x64(c, k); x = 2.*u01_open_open_64_53(r.v[0]) - 1.; y = 2.*u01_open_open_64_53(r.v[1]) - 1.; if( x*x + y*y < 1.0 ) hits++; tries++; } errs += pi_check(hits, tries); printf("%lu uniform doubles from threefry2x32\n", NTRIES); hits = tries = 0; while (tries < NTRIES) { double x, y; ch.v[0]++; /* increment the counter */ rh = threefry2x32(ch, kh); x = 2.*u01_open_open_32_53(rh.v[0]) - 1.; y = 2.*u01_open_open_32_53(rh.v[1]) - 1.; if( x*x + y*y < 1.0 ) hits++; tries++; } errs += pi_check(hits, tries); printf("%lu uniform floats from threefry2x32\n", NTRIES); hits = tries = 0; while (tries < NTRIES) { float x, y; ch.v[0]++; /* increment the counter */ rh = threefry2x32(ch, kh); x = 2.f*u01_open_open_32_24(rh.v[0]) - 1.f; y = 2.f*u01_open_open_32_24(rh.v[1]) - 1.f; if( x*x + y*y < 1.0 ) hits++; tries++; } errs += pi_check(hits, tries); return errs; }
int main(int argc, char **argv){ unsigned long hits = 0, tries = 0; const int64_t two_to_the_62 = ((int64_t)1)<<62; threefry2x64_key_t key = {{0, 0}}; threefry2x64_ctr_t ctr = {{0, 0}}; enum { int32s_per_counter = sizeof(ctr)/sizeof(int32_t) }; (void)argc;(void)argv; /* unused */ printf("Throwing %lu darts at a square board using threefry2x64\n", NTRIES); /* make the most of each bijection by looping over as many int32_t's as we can find in the ctr_type. */ assert( int32s_per_counter%2 == 0 ); while(tries < NTRIES){ /* Use a union to avoid strict aliasing issues. */ union{ threefry2x64_ctr_t ct; int32_t i32[int32s_per_counter]; }u; size_t j; /* Don't worry about the 'carry'. We're not going to loop more than 2^64 times. */ ctr.v[0]++; u.ct = threefry2x64(ctr, key); for(j=0; j<int32s_per_counter; j+=2){ int64_t x = u.i32[j]; int64_t y = u.i32[j+1]; if( (x*x + y*y) < two_to_the_62 ) hits++; tries++; } } return pi_check(hits, tries); }
int main(int argc, char **argv) { const char *kernelname = "counthits"; unsigned count =10000; cl_int err; cl_context cl_context; cl_program program; cl_kernel cl_kernel; cl_mem cl_out; cl_command_queue cl_queue; size_t i, nthreads, hits_sz; size_t cores, work_group_size; cl_uint2 * hits_host; double d = 0.; // timer d = timer(&d); progname = argv[0]; CHECK(cl::Platform::get(&platformList)); CHECKERR( cl_context = createCLContext(CL_DEVICE_TYPE_GPU,cl_vendor::VENDOR_AMD, &err) ); std::vector<cl::Device> devices; CHECKERR( devices = cl_context.getInfo<CL_CONTEXT_DEVICES>(&err) ); size_t length = 0; const char * sourceStr = loadFileToString("pi_opencl_kernel.ocl","",&length); cl::Program::Sources sources(1, std::make_pair(sourceStr, length)); program = cl::Program(cl_context, sources); CHECK( program.build(devices,"-I ..\\include") ); CHECKERR(work_group_size = devices[0].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(&err) ); CHECKERR(cores = devices[0].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(&err) ); cores *= 16*4; //Tahiti. if (work_group_size > 64) work_group_size /= 2; nthreads = cores * work_group_size*32; //2048*128 = 262144 if (count == 0) count = NTRIES/nthreads; //38 printf("Count: %lu\n",count); hits_sz = nthreads * sizeof(hits_host[0]);//2097152 CHECKNOTZERO(hits_host = (cl_uint2 *)malloc(hits_sz)); CHECKERR ( cl_out = cl::Buffer( cl_context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, hits_sz, hits_host, &err)); CHECKERR ( cl_kernel = cl::Kernel(program,kernelname,&err) ); CHECK ( cl_kernel.setArg( 0, count) ); CHECK ( cl_kernel.setArg( 1, cl_out) ); CHECKERR (cl_queue = cl::CommandQueue(cl_context, devices[0], 0, &err) ); cl::Event event; CHECK( cl_queue.enqueueNDRangeKernel(cl_kernel,cl::NullRange,cl::NDRange(nthreads), cl::NDRange(work_group_size), NULL, &event) ); event.wait(); CHECK( cl_queue.enqueueReadBuffer(cl_out, CL_TRUE, 0,hits_sz, hits_host) ); unsigned long hits = 0, tries = 0; for (i = 0; i < nthreads; i++) { #ifdef _DEBUG printf("%lu %u %u\n", (unsigned long)i, hits_host[i].s[0], hits_host[i].s[1]); #endif hits += hits_host[i].s[0]; tries += hits_host[i].s[1]; } return pi_check(hits, tries); }