Esempio n. 1
0
void TestVectorCppEquality(void)
{
#if 1    
    KNOWN_FAILURE;
#else
    thrust::host_vector<int> h_a(3);
    thrust::host_vector<int> h_b(3);
    thrust::host_vector<int> h_c(3);
    h_a[0] = 0;    h_a[1] = 1;    h_a[2] = 2;
    h_b[0] = 0;    h_b[1] = 1;    h_b[2] = 3;
    h_b[0] = 0;    h_b[1] = 1;

    thrust::device_vector<int> d_a(3);
    thrust::device_vector<int> d_b(3);
    thrust::device_vector<int> d_c(3);
    d_a[0] = 0;    d_a[1] = 1;    d_a[2] = 2;
    d_b[0] = 0;    d_b[1] = 1;    d_b[2] = 3;
    d_b[0] = 0;    d_b[1] = 1;

    ASSERT_EQUAL((h_a == h_a), true); ASSERT_EQUAL((h_a == d_a), true); ASSERT_EQUAL((d_a == h_a), true);  ASSERT_EQUAL((d_a == d_a), true); 
    ASSERT_EQUAL((h_b == h_b), true); ASSERT_EQUAL((h_b == d_b), true); ASSERT_EQUAL((d_b == h_b), true);  ASSERT_EQUAL((d_b == d_b), true);
    ASSERT_EQUAL((h_c == h_c), true); ASSERT_EQUAL((h_c == d_c), true); ASSERT_EQUAL((d_c == h_c), true);  ASSERT_EQUAL((d_c == d_c), true);

    ASSERT_EQUAL((h_a == h_b), false); ASSERT_EQUAL((h_a == d_b), false); ASSERT_EQUAL((d_a == h_b), false); ASSERT_EQUAL((d_a == d_b), false); 
    ASSERT_EQUAL((h_b == h_a), false); ASSERT_EQUAL((h_b == d_a), false); ASSERT_EQUAL((d_b == h_a), false); ASSERT_EQUAL((d_b == d_a), false);
    ASSERT_EQUAL((h_a == h_c), false); ASSERT_EQUAL((h_a == d_c), false); ASSERT_EQUAL((d_a == h_c), false); ASSERT_EQUAL((d_a == d_c), false);
    ASSERT_EQUAL((h_c == h_a), false); ASSERT_EQUAL((h_c == d_a), false); ASSERT_EQUAL((d_c == h_a), false); ASSERT_EQUAL((d_c == d_a), false);
    ASSERT_EQUAL((h_b == h_c), false); ASSERT_EQUAL((h_b == d_c), false); ASSERT_EQUAL((d_b == h_c), false); ASSERT_EQUAL((d_b == d_c), false);
    ASSERT_EQUAL((h_c == h_b), false); ASSERT_EQUAL((h_c == d_b), false); ASSERT_EQUAL((d_c == h_b), false); ASSERT_EQUAL((d_c == d_b), false);
#endif    
}
Esempio n. 2
0
    /**
     * seed generator with 32-bit integer
     */
    void seed(unsigned int value)
    {
        // compute leapfrog multipliers for initialization
        cuda::vector<uint48> g_A(dim.threads()), g_C(dim.threads());
        cuda::configure(dim.grid, dim.block);
        get_rand48_kernel().leapfrog(g_A);

        // compute leapfrog addends for initialization
        cuda::copy(g_A, g_C);
        algorithm::gpu::scan<uint48> scan(g_C.size(), dim.threads_per_block());
        scan(g_C);

        // initialize generator with seed
        cuda::vector<uint48> g_a(1), g_c(1);
        cuda::host::vector<uint48> h_a(1), h_c(1);
        cuda::configure(dim.grid, dim.block);
        get_rand48_kernel().seed(g_A, g_C, g_a, g_c, g_state_, value);
        cuda::copy(g_a, h_a);
        cuda::copy(g_c, h_c);

        // set leapfrog constants for constant device memory
        rng_.a = h_a.front();
        rng_.c = h_c.front();
        rng_.g_state = g_state_.data();
    }
Esempio n. 3
0
File: vadd.cpp Progetto: pelmer/esc
int main(void)
{
    std::vector<float> h_a(LENGTH);              // a vector 
    std::vector<float> h_b(LENGTH);              // b vector 	
    std::vector<float> h_c (LENGTH, 0xdeadbeef); // c = a + b, from compute device

    cl::Buffer d_a;      // device memory used for the input  a vector
    cl::Buffer d_b;      // device memory used for the input  b vector
    cl::Buffer d_c;      // device memory used for the output c vector

    // Fill vectors a and b with random float values
    int count = LENGTH;
    for(int i = 0; i < count; i++)
    {
        h_a[i]  = rand() / (float)RAND_MAX;
        h_b[i]  = rand() / (float)RAND_MAX;
    }

    try 
    {
    	// Create a context
        cl::Context context(DEVICE);

        // Load in kernel source, creating a program object for the context

        cl::Program program(context, util::loadProgram("vadd.cl"), true);

        // Get the command queue
        cl::CommandQueue queue(context);

        // Create the kernel functor
 
        auto vadd = cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int>(program, "vadd");

        d_a   = cl::Buffer(context, begin(h_a), end(h_a), true);
        d_b   = cl::Buffer(context, begin(h_b), end(h_b), true);

        d_c  = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH);

        util::Timer timer;

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)), 
            d_a,
            d_b,
            d_c,
            count);

        queue.finish();

        double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;
        printf("\nThe kernels ran in %lf seconds\n", rtime);

        cl::copy(queue, d_c, begin(h_c), end(h_c));

        // Test the results
        int correct = 0;
        float tmp;
        for(int i = 0; i < count; i++) {
            tmp = h_a[i] + h_b[i]; // expected value for d_c[i]
            tmp -= h_c[i];                      // compute errors
            if(tmp*tmp < TOL*TOL) {      // correct if square deviation is less 
                correct++;                         //  than tolerance squared
            }
            else {

                printf(
                    " tmp %f h_a %f h_b %f  h_c %f \n",
                    tmp, 
                    h_a[i], 
                    h_b[i], 
                    h_c[i]);
            }
        }

        // summarize results
        printf(
            "vector add to find C = A+B:  %d out of %d results were correct.\n", 
            correct, 
            count);
    }
    catch (cl::Error err) {
        std::cout << "Exception\n";
        std::cerr 
            << "ERROR: "
            << err.what()
            << "("
            << err_code(err.err())
           << ")"
           << std::endl;
    }
}
int main(void)
{
    std::vector<float> h_a(LENGTH);                // a vector
    std::vector<float> h_b(LENGTH);                // b vector
    std::vector<float> h_c (LENGTH, 0xdeadbeef);   // c vector (result)
    std::vector<float> h_d (LENGTH, 0xdeadbeef);   // d vector (result)
    std::vector<float> h_e (LENGTH);               // e vector
    std::vector<float> h_f (LENGTH, 0xdeadbeef);   // f vector (result)
    std::vector<float> h_g (LENGTH);               // g vector

    cl::Buffer d_a;                       // device memory used for the input  a vector
    cl::Buffer d_b;                       // device memory used for the input  b vector
    cl::Buffer d_c;                       // device memory used for the output c vector
    cl::Buffer d_d;                       // device memory used for the output d vector
    cl::Buffer d_e;                       // device memory used for the input e vector
    cl::Buffer d_f;                       // device memory used for the output f vector
    cl::Buffer d_g;                       // device memory used for the input g vector

    // Fill vectors a and b with random float values
    int count = LENGTH;
    for(int i = 0; i < count; i++)
    {
        h_a[i]  = rand() / (float)RAND_MAX;
        h_b[i]  = rand() / (float)RAND_MAX;
        h_e[i]  = rand() / (float)RAND_MAX;
        h_g[i]  = rand() / (float)RAND_MAX;
    }

    try
    {
        // Create a context
        cl::Context context(DEVICE);

        // Load in kernel source, creating a program object for the context

        cl::Program program(context, util::loadProgram("vadd.cl"), true);

        // Get the command queue
        cl::CommandQueue queue(context);

        // Create the kernel functor

        auto vadd = cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer>(program, "vadd");

        d_a   = cl::Buffer(context, begin(h_a), end(h_a), true);
        d_b   = cl::Buffer(context, begin(h_b), end(h_b), true);
        d_e   = cl::Buffer(context, begin(h_e), end(h_e), true);
        d_g   = cl::Buffer(context, begin(h_g), end(h_g), true);

        d_c  = cl::Buffer(context, CL_MEM_READ_WRITE, sizeof(float) * LENGTH);
        d_d  = cl::Buffer(context, CL_MEM_READ_WRITE, sizeof(float) * LENGTH);
        d_f  = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH);

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)),
            d_a,
            d_b,
            d_c);

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)),
            d_e,
            d_c,
            d_d);

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)),
            d_g,
            d_d,
            d_f);

        cl::copy(queue, d_f, begin(h_f), end(h_f));

        // Test the results
        int correct = 0;
        float tmp;
        for(int i = 0; i < count; i++)
        {
            tmp = h_a[i] + h_b[i] + h_e[i] + h_g[i];     // assign element i of a+b+e+g to tmp
            tmp -= h_f[i];                               // compute deviation of expected and output result
            if(tmp*tmp < TOL*TOL)                        // correct if square deviation is less than tolerance squared
                correct++;
            else {
                printf(" tmp %f h_a %f h_b %f h_e %f h_g %f h_f %f\n",tmp, h_a[i], h_b[i], h_e[i], h_g[i], h_f[i]);
            }
        }

        // summarize results
        printf("C = A+B+E+G:  %d out of %d results were correct.\n", correct, count);

    }
    catch (cl::Error err) {
        std::cout << "Exception\n";
        std::cerr
                << "ERROR: "
                << err.what()
                << std::endl;
    }
}