Exemplo n.º 1
0
int main(int argc, char **argv){
    unsigned long hits = 0, tries = 0;
    int errs = 0;

    threefry2x64_ctr_t c = {{0}}, r;
    threefry2x64_key_t k = {{R123_64BIT(0xdeadbeef12345678)}};
    threefry2x32_ctr_t ch = {{0}}, rh;
    threefry2x32_key_t kh = {{0xdecafbad}};
    (void)argc; (void)argv; /* unused */
    printf("%lu uniform doubles from threefry2x64\n", NTRIES);
    while (tries < NTRIES) {
            double x, y;
            c.v[0]++; /* increment the counter */
	    r = threefry2x64(c, k);
            x = 2.*u01_open_open_64_53(r.v[0]) - 1.;
            y = 2.*u01_open_open_64_53(r.v[1]) - 1.;
            if( x*x + y*y < 1.0 )
                hits++;
	    tries++;
    }
    errs += pi_check(hits, tries);

    printf("%lu uniform doubles from threefry2x32\n", NTRIES);
    hits = tries = 0;
    while (tries < NTRIES) {
            double x, y;
            ch.v[0]++; /* increment the counter */
	    rh = threefry2x32(ch, kh);
            x = 2.*u01_open_open_32_53(rh.v[0]) - 1.;
            y = 2.*u01_open_open_32_53(rh.v[1]) - 1.;
            if( x*x + y*y < 1.0 )
                hits++;
	    tries++;
    }
    errs += pi_check(hits, tries);

    printf("%lu uniform floats from threefry2x32\n", NTRIES);
    hits = tries = 0;
    while (tries < NTRIES) {
            float x, y;
            ch.v[0]++; /* increment the counter */
	    rh = threefry2x32(ch, kh);
            x = 2.f*u01_open_open_32_24(rh.v[0]) - 1.f;
            y = 2.f*u01_open_open_32_24(rh.v[1]) - 1.f;
            if( x*x + y*y < 1.0 )
                hits++;
	    tries++;
    }
    errs += pi_check(hits, tries);
    return errs;
}
Exemplo n.º 2
0
int main(int argc, char **argv){
    unsigned long hits = 0, tries = 0;
    const int64_t two_to_the_62 = ((int64_t)1)<<62;

    threefry2x64_key_t key = {{0, 0}};
    threefry2x64_ctr_t ctr = {{0, 0}};
    enum { int32s_per_counter = sizeof(ctr)/sizeof(int32_t) };
    (void)argc;(void)argv; /* unused  */

    printf("Throwing %lu darts at a square board using threefry2x64\n", NTRIES);

    /* make the most of each bijection by looping over as many
       int32_t's as we can find in the ctr_type. */
    assert( int32s_per_counter%2 == 0 );
    while(tries < NTRIES){
        /* Use a union to avoid strict aliasing issues. */
        union{
            threefry2x64_ctr_t ct;
            int32_t i32[int32s_per_counter];
        }u;
        size_t j;
        /* Don't worry about the 'carry'.  We're not going to loop
           more than 2^64 times. */
        ctr.v[0]++;
        u.ct = threefry2x64(ctr, key);
        for(j=0; j<int32s_per_counter; j+=2){
            int64_t x = u.i32[j];
            int64_t y = u.i32[j+1];
            if( (x*x + y*y) < two_to_the_62 )
                hits++;
            tries++;
        }
    }
    return pi_check(hits, tries);
}
Exemplo n.º 3
0
int main(int argc, char **argv)
{
     const char *kernelname = "counthits";
    unsigned count =10000;

    cl_int              err;
    cl_context         cl_context;
    cl_program         program;
    cl_kernel          cl_kernel;
    cl_mem          cl_out;
	cl_command_queue    cl_queue;

    size_t i, nthreads, hits_sz;
    size_t cores, work_group_size;
    cl_uint2 *          hits_host;

    double              d = 0.; // timer

    d = timer(&d);
    progname = argv[0];

 
    CHECK(cl::Platform::get(&platformList));        
    CHECKERR(  cl_context = createCLContext(CL_DEVICE_TYPE_GPU,cl_vendor::VENDOR_AMD, &err) );

    std::vector<cl::Device> devices;
    CHECKERR( devices = cl_context.getInfo<CL_CONTEXT_DEVICES>(&err) );


    size_t length = 0;
    const char * sourceStr = loadFileToString("pi_opencl_kernel.ocl","",&length);

    cl::Program::Sources sources(1, std::make_pair(sourceStr, length));
    program = cl::Program(cl_context, sources);

    CHECK( program.build(devices,"-I ..\\include") );

    CHECKERR(work_group_size = devices[0].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(&err) );
    CHECKERR(cores = devices[0].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(&err) );
    cores *= 16*4; //Tahiti.

    if (work_group_size > 64) work_group_size /= 2;
    nthreads = cores * work_group_size*32; //2048*128 = 262144

    if (count == 0)
    count = NTRIES/nthreads; //38

    printf("Count: %lu\n",count);



    hits_sz = nthreads * sizeof(hits_host[0]);//2097152
    CHECKNOTZERO(hits_host = (cl_uint2 *)malloc(hits_sz));

    CHECKERR    ( cl_out = cl::Buffer(  cl_context,  CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, hits_sz, hits_host, &err));
    CHECKERR    ( cl_kernel = cl::Kernel(program,kernelname,&err) );
    CHECK       ( cl_kernel.setArg( 0, count) );
    CHECK       ( cl_kernel.setArg( 1, cl_out) );

    CHECKERR (cl_queue = cl::CommandQueue(cl_context, devices[0], 0, &err) );
    cl::Event event;

    CHECK( cl_queue.enqueueNDRangeKernel(cl_kernel,cl::NullRange,cl::NDRange(nthreads), cl::NDRange(work_group_size), NULL,  &event) );
    event.wait();
    CHECK( cl_queue.enqueueReadBuffer(cl_out, CL_TRUE, 0,hits_sz, hits_host) );

    unsigned long hits = 0, tries = 0;
    for (i = 0; i < nthreads; i++) {
#ifdef _DEBUG   
        printf("%lu %u %u\n", (unsigned long)i, hits_host[i].s[0], hits_host[i].s[1]);
#endif
    hits += hits_host[i].s[0];
    tries += hits_host[i].s[1];
    }


    return pi_check(hits, tries);
}