Example #1
0
void run_SpMV(RBFFD& der, Grid& grid) {

    unsigned int N = grid.getNodeListSize();

    char test_name[256];
    char assemble_timer_name[256];
    char multiply_timer_name[256];

    sprintf(test_name, "%u SpMV (%s -> %s)", N, matTypeStrings[matType], matTypeStrings[multType]);
    sprintf(assemble_timer_name, "%u %s Assemble", N, matTypeStrings[matType]);
    sprintf(multiply_timer_name, "%u %s Multiply", N, matTypeStrings[matType]);

    if (!timers.contains(assemble_timer_name)) { timers[assemble_timer_name] = new EB::Timer(assemble_timer_name); }
    if (!timers.contains(multiply_timer_name)) { timers[multiply_timer_name] = new EB::Timer(multiply_timer_name); }

    std::cout << test_name << std::endl;

    // Assemble the matrix
    // ----------------------
    timers[assemble_timer_name]->start();
    MatType* A = new MatType(N);
    assemble_LHS(der, grid, *A);
    timers[assemble_timer_name]->stop();


    timers[multiply_timer_name]->start();
    benchmarkMultiplyHost<MatType>(*A);
    timers[multiply_timer_name]->stop();

    // Cleanup
    delete(A);
    }
Example #2
0
void gpuTest(RBFFD& der, Grid& grid, int primeGPU=0) {
    unsigned int N = grid.getStencilsSize();
    unsigned int n = grid.getMaxStencilSize();
    unsigned int nb_bnd = grid.getBoundaryIndicesSize();
    unsigned int n_unknowns = 4 * N;
    // We subtract off the unknowns for the boundary
    unsigned int nrows = 4 * N - 4*nb_bnd;
    unsigned int ncols = 4 * N - 4*nb_bnd;
    unsigned int NNZ = 9*n*N+2*(4*N)+2*(3*N);

    char test_name[256];
    char assemble_timer_name[256];
    char copy_timer_name[512];
    char test_timer_name[256];

    if (primeGPU) {
        sprintf(test_name, "%u PRIMING THE GPU", N);
        sprintf(assemble_timer_name, "%u Primer Assemble", N);
        sprintf(copy_timer_name,     "%u Primer Copy To VCL_CSR", N);
        sprintf(test_timer_name, "%u Primer GMRES test", N);
    } else {
        sprintf(test_name, "%u GMRES GPU (VCL_CSR)", N);
        sprintf(assemble_timer_name, "%u UBLAS_CSR Assemble", N);
        sprintf(copy_timer_name,     "%u UBLAS_CSR Copy To VCL_CSR", N);
        sprintf(test_timer_name, "%u GPU GMRES test", N);
    }

    if (!timers.contains(assemble_timer_name)) { timers[assemble_timer_name] = new EB::Timer(assemble_timer_name); }
    if (!timers.contains(copy_timer_name)) { timers[copy_timer_name] = new EB::Timer(copy_timer_name); }
    if (!timers.contains(test_timer_name)) { timers[test_timer_name] = new EB::Timer(test_timer_name); }


    std::cout << test_name << std::endl;


    // ----- ASSEMBLE -----
    timers[assemble_timer_name]->start();
    // Compress system to remove boundary rows
    UBLAS_MAT_t* A = new UBLAS_MAT_t(nrows, ncols, NNZ);
    UBLAS_VEC_t* F = new UBLAS_VEC_t(nrows, 0);
    UBLAS_VEC_t* U_exact = new UBLAS_VEC_t(n_unknowns, 0);
    UBLAS_VEC_t* U_exact_compressed = new UBLAS_VEC_t(nrows, 0);
    assemble_System_Stokes(der, grid, *A, *F, *U_exact, *U_exact_compressed);
    timers[assemble_timer_name]->stop();

    write_System(*A, *F, *U_exact);
    write_to_file(*U_exact_compressed, "output/U_exact_compressed.mtx");

    UBLAS_VEC_t F_discrete = prod(*A, *U_exact_compressed);
    write_to_file(F_discrete, "output/F_discrete.mtx");

    // ----- SOLVE -----

    timers[copy_timer_name]->start();

    VCL_MAT_t* A_gpu = new VCL_MAT_t(A->size1(), A->size2());
    copy(*A, *A_gpu);

    VCL_VEC_t* F_gpu = new VCL_VEC_t(F->size());
    VCL_VEC_t* U_exact_gpu = new VCL_VEC_t(U_exact_compressed->size());
    VCL_VEC_t* U_approx_gpu = new VCL_VEC_t(F->size());

    viennacl::copy(F->begin(), F->end(), F_gpu->begin());
    viennacl::copy(U_exact_compressed->begin(), U_exact_compressed->end(), U_exact_gpu->begin());
    timers[copy_timer_name]->stop();

    timers[test_timer_name]->start();
    // Use GMRES to solve A*u = F
    GMRES_Device(*A_gpu, *F_gpu, *U_exact_gpu, *U_approx_gpu, N, nb_bnd);
    timers[test_timer_name]->stop();

    write_Solution(grid, *U_exact_compressed, *U_approx_gpu);

    // Cleanup
    delete(A);
    delete(A_gpu);
    delete(F);
    delete(U_exact);
    delete(U_exact_compressed);
    delete(F_gpu);
    delete(U_exact_gpu);
    delete(U_approx_gpu);
}