예제 #1
0
void run_SpMV(RBFFD& der, Grid& grid) {

    unsigned int N = grid.getNodeListSize();

    char test_name[256];
    char assemble_timer_name[256];
    char multiply_timer_name[256];

    sprintf(test_name, "%u SpMV (%s -> %s)", N, matTypeStrings[matType], matTypeStrings[multType]);
    sprintf(assemble_timer_name, "%u %s Assemble", N, matTypeStrings[matType]);
    sprintf(multiply_timer_name, "%u %s Multiply", N, matTypeStrings[matType]);

    if (!timers.contains(assemble_timer_name)) { timers[assemble_timer_name] = new EB::Timer(assemble_timer_name); }
    if (!timers.contains(multiply_timer_name)) { timers[multiply_timer_name] = new EB::Timer(multiply_timer_name); }

    std::cout << test_name << std::endl;

    // Assemble the matrix
    // ----------------------
    timers[assemble_timer_name]->start();
    MatType* A = new MatType(N);
    assemble_LHS(der, grid, *A);
    timers[assemble_timer_name]->stop();


    timers[multiply_timer_name]->start();
    benchmarkMultiplyHost<MatType>(*A);
    timers[multiply_timer_name]->stop();

    // Cleanup
    delete(A);
    }
예제 #2
0
int main(void)
{
    bool writeIntermediate = true;
    bool primed = false;

    std::vector<std::string> grids;

    //    grids.push_back("~/GRIDS/md/md005.00036");

    //grids.push_back("~/GRIDS/md/md165.27556");
    //grids.push_back("~/GRIDS/md/md031.01024");
    grids.push_back("~/GRIDS/md/md089.08100");
#if 0
    grids.push_back("~/GRIDS/md/md031.01024");
    grids.push_back("~/GRIDS/md/md050.02601");
    grids.push_back("~/GRIDS/md/md063.04096");
    grids.push_back("~/GRIDS/md/md089.08100");
    grids.push_back("~/GRIDS/md/md127.16384");
    grids.push_back("~/GRIDS/md/md165.27556");
#endif
#if 0
    grids.push_back("~/GRIDS/geoff/scvtmesh_100k_nodes.ascii");
    grids.push_back("~/GRIDS/geoff/scvtmesh_500k_nodes.ascii");
    grids.push_back("~/GRIDS/geoff/scvtmesh_1m_nodes.ascii");
#endif
    //grids.push_back("~/GRIDS/geoff/scvtmesh_1m_nodes.ascii");

    for (size_t i = 0; i < grids.size(); i++) {
        std::string& grid_name = grids[i];

        std::string weight_timer_name = grid_name + " Calc Weights";

        timers[weight_timer_name] = new EB::Timer(weight_timer_name.c_str());

        // Get contours from rbfzone.blogspot.com to choose eps_c1 and eps_c2 based on stencil_size (n)
#if 0
        // Too ill-conditioned? Doesnt converge in GMRES + ILU0
        unsigned int stencil_size = 40;
        double eps_c1 = 0.027;
        double eps_c2 = 0.274;
#else
        unsigned int stencil_size = 31;
        double eps_c1 = 0.035;
        double eps_c2 = 0.1;
#endif


        GridReader* grid = new GridReader(grid_name, 4);
        grid->setMaxStencilSize(stencil_size);
        // We do not read until generate is called:

        Grid::GridLoadErrType err = grid->loadFromFile();
        if (err == Grid::NO_GRID_FILES)
        {
            grid->generate();
#if 1
            // NOTE: We force at least one node in the domain to be a boundary.
            //-----------------------------
            // We will set the first node as a boundary/ground point. We know
            // the normal because we're on teh sphere centered at (0,0,0)
            for (unsigned int nodeIndex = 0; nodeIndex < 1; nodeIndex++) {
                NodeType& node = grid->getNode(nodeIndex);
                Vec3 nodeNormal = node - Vec3(0,0,0);
                grid->appendBoundaryIndex(nodeIndex, nodeNormal);
            }
#endif
            //-----------------------------
            if (writeIntermediate) {
                grid->writeToFile();
            }
        }
        std::cout << "Generate Stencils\n";
        Grid::GridLoadErrType st_err = grid->loadStencilsFromFile();
        if (st_err == Grid::NO_STENCIL_FILES) {
            //            grid->generateStencils(Grid::ST_BRUTE_FORCE);
#if 1
            grid->generateStencils(Grid::ST_KDTREE);
#else
            grid->setNSHashDims(50, 50,50);
            grid->generateStencils(Grid::ST_HASH);
#endif
            if (writeIntermediate) {
                grid->writeToFile();
            }
        }


        std::cout << "Generate RBFFD Weights\n";
        timers[weight_timer_name]->start();
        RBFFD der(RBFFD::LSFC | RBFFD::XSFC | RBFFD::YSFC | RBFFD::ZSFC, grid, 3, 0);
        //TODO:         der.setWeightType(RBFFD::ContourSVD);
        der.setEpsilonByParameters(eps_c1, eps_c2);
        int der_err = der.loadAllWeightsFromFile();
        if (der_err) {
            der.computeAllWeightsForAllStencils();

            timers[weight_timer_name]->stop();
#if 0
            if (writeIntermediate) {
                der.writeAllWeightsToFile();
            }
#endif
        }

        if (!primed)  {
            std::cout << "\n\n";
            cout << "Priming GPU with dummy operations (removes compile from benchmarks)\n";
            gpuTest(der,*grid, 1);
            primed = true;
            std::cout << "\n\n";
        }

        // No support for GMRES on the CPU yet.
        //cpuTest(der,*grid);
        gpuTest(der,*grid);

        delete(grid);
    }

    timers.printAll();
    timers.writeToFile();
    return EXIT_SUCCESS;
}
예제 #3
0
void gpuTest(RBFFD& der, Grid& grid, int primeGPU=0) {
    unsigned int N = grid.getStencilsSize();
    unsigned int n = grid.getMaxStencilSize();
    unsigned int nb_bnd = grid.getBoundaryIndicesSize();
    unsigned int n_unknowns = 4 * N;
    // We subtract off the unknowns for the boundary
    unsigned int nrows = 4 * N - 4*nb_bnd;
    unsigned int ncols = 4 * N - 4*nb_bnd;
    unsigned int NNZ = 9*n*N+2*(4*N)+2*(3*N);

    char test_name[256];
    char assemble_timer_name[256];
    char copy_timer_name[512];
    char test_timer_name[256];

    if (primeGPU) {
        sprintf(test_name, "%u PRIMING THE GPU", N);
        sprintf(assemble_timer_name, "%u Primer Assemble", N);
        sprintf(copy_timer_name,     "%u Primer Copy To VCL_CSR", N);
        sprintf(test_timer_name, "%u Primer GMRES test", N);
    } else {
        sprintf(test_name, "%u GMRES GPU (VCL_CSR)", N);
        sprintf(assemble_timer_name, "%u UBLAS_CSR Assemble", N);
        sprintf(copy_timer_name,     "%u UBLAS_CSR Copy To VCL_CSR", N);
        sprintf(test_timer_name, "%u GPU GMRES test", N);
    }

    if (!timers.contains(assemble_timer_name)) { timers[assemble_timer_name] = new EB::Timer(assemble_timer_name); }
    if (!timers.contains(copy_timer_name)) { timers[copy_timer_name] = new EB::Timer(copy_timer_name); }
    if (!timers.contains(test_timer_name)) { timers[test_timer_name] = new EB::Timer(test_timer_name); }


    std::cout << test_name << std::endl;


    // ----- ASSEMBLE -----
    timers[assemble_timer_name]->start();
    // Compress system to remove boundary rows
    UBLAS_MAT_t* A = new UBLAS_MAT_t(nrows, ncols, NNZ);
    UBLAS_VEC_t* F = new UBLAS_VEC_t(nrows, 0);
    UBLAS_VEC_t* U_exact = new UBLAS_VEC_t(n_unknowns, 0);
    UBLAS_VEC_t* U_exact_compressed = new UBLAS_VEC_t(nrows, 0);
    assemble_System_Stokes(der, grid, *A, *F, *U_exact, *U_exact_compressed);
    timers[assemble_timer_name]->stop();

    write_System(*A, *F, *U_exact);
    write_to_file(*U_exact_compressed, "output/U_exact_compressed.mtx");

    UBLAS_VEC_t F_discrete = prod(*A, *U_exact_compressed);
    write_to_file(F_discrete, "output/F_discrete.mtx");

    // ----- SOLVE -----

    timers[copy_timer_name]->start();

    VCL_MAT_t* A_gpu = new VCL_MAT_t(A->size1(), A->size2());
    copy(*A, *A_gpu);

    VCL_VEC_t* F_gpu = new VCL_VEC_t(F->size());
    VCL_VEC_t* U_exact_gpu = new VCL_VEC_t(U_exact_compressed->size());
    VCL_VEC_t* U_approx_gpu = new VCL_VEC_t(F->size());

    viennacl::copy(F->begin(), F->end(), F_gpu->begin());
    viennacl::copy(U_exact_compressed->begin(), U_exact_compressed->end(), U_exact_gpu->begin());
    timers[copy_timer_name]->stop();

    timers[test_timer_name]->start();
    // Use GMRES to solve A*u = F
    GMRES_Device(*A_gpu, *F_gpu, *U_exact_gpu, *U_approx_gpu, N, nb_bnd);
    timers[test_timer_name]->stop();

    write_Solution(grid, *U_exact_compressed, *U_approx_gpu);

    // Cleanup
    delete(A);
    delete(A_gpu);
    delete(F);
    delete(U_exact);
    delete(U_exact_compressed);
    delete(F_gpu);
    delete(U_exact_gpu);
    delete(U_approx_gpu);
}
예제 #4
0
int main(void)
{
    bool writeIntermediate = true;
    bool primed = false;

    std::vector<std::string> grids;

#if 0
    //grids.push_back("~/GRIDS/md/md005.00036");
    grids.push_back("~/GRIDS/md/md031.01024");
    grids.push_back("~/GRIDS/md/md050.02601");
    grids.push_back("~/GRIDS/md/md063.04096");
    grids.push_back("~/GRIDS/md/md089.08100");
    grids.push_back("~/GRIDS/md/md127.16384");
    grids.push_back("~/GRIDS/md/md165.27556");
#endif
#if 0
    grids.push_back("~/GRIDS/geoff/scvtimersesh_100k_nodes.ascii");
    grids.push_back("~/GRIDS/geoff/scvtimersesh_500k_nodes.ascii");
    grids.push_back("~/GRIDS/geoff/scvtimersesh_1m_nodes.ascii");
#endif
    //grids.push_back("~/GRIDS/geoff/scvtimersesh_1m_nodes.ascii");
    grids.push_back("~/sphere_grids/md063.04096");
    grids.push_back("~/sphere_grids/md079.06400");
    grids.push_back("~/sphere_grids/md089.08100");
    grids.push_back("~/sphere_grids/md100.10201");
    grids.push_back("~/sphere_grids/md127.16384");
    grids.push_back("~/sphere_grids/md141.20164");
    grids.push_back("~/sphere_grids/md165.27556");
    grids.push_back("~/sphere_grids/scvtmesh001.100000");
    grids.push_back("~/sphere_grids/scvtmesh002.500000");
    grids.push_back("~/sphere_grids/scvtmesh003.1000000");

    for (size_t i = 0; i < grids.size(); i++) {
        std::string& grid_name = grids[i];

        std::string weight_timer_name = grid_name + " Calc Weights";

        timers[weight_timer_name] = new EB::Timer(weight_timer_name.c_str());

        // Get contours from rbfzone.blogspot.com to choose eps_c1 and eps_c2 based on stencil_size (n)
        unsigned int stencil_size = 40;
        double eps_c1 = 0.027;
        double eps_c2 = 0.274;


        GridReader* grid = new GridReader(grid_name, 4);
        grid->setMaxStencilSize(stencil_size);
        // We do not read until generate is called:

        Grid::GridLoadErrType err = grid->loadFromFile();
        if (err == Grid::NO_GRID_FILES)
        {
            grid->generate();
            if (writeIntermediate) {
                grid->writeToFile();
            }
        }
        std::cout << "Generate Stencils\n";
        Grid::GridLoadErrType st_err = grid->loadStencilsFromFile();
        if (st_err == Grid::NO_STENCIL_FILES) {
            //            grid->generateStencils(Grid::ST_BRUTE_FORCE);
#if 1
            grid->generateStencils(Grid::ST_KDTREE);
#else
            grid->setNSHashDims(50, 50,50);
            grid->generateStencils(Grid::ST_HASH);
#endif
            if (writeIntermediate) {
                grid->writeToFile();
            }
        }


        std::cout << "Generate RBFFD Weights\n";
        timers[weight_timer_name]->start();
        RBFFD der(RBFFD::LSFC | RBFFD::XSFC | RBFFD::YSFC | RBFFD::ZSFC, grid, 3, 0);
        der.setEpsilonByParameters(eps_c1, eps_c2);
        int der_err = der.loadAllWeightsFromFile();
        if (der_err) {
            der.computeAllWeightsForAllStencils();

            timers[weight_timer_name]->start();
            if (writeIntermediate) {
                der.writeAllWeightsToFile();
            }
        }

        if (!primed)  {
            cout << "Priming GPU with dummy operations (removes compile from benchmarks)\n";
            run_test<DUMMY, DUMMY>(der, *grid);
            primed = true;
        }

        cout << "Running Tests\n" << std::endl;
        {
            run_test<COO_CPU, COO_CPU>(der, *grid);
            run_test<COO_CPU, COO_GPU>(der, *grid);
            run_test<CSR_CPU, CSR_CPU>(der, *grid);
            run_test<CSR_CPU, CSR_GPU>(der, *grid);
            run_test<COO_CPU, CSR_GPU>(der, *grid);
            run_test<CSR_CPU, COO_GPU>(der, *grid);
        }

        delete(grid);
    }

    timers.printAll();
    timers.writeToFile();
    return EXIT_SUCCESS;
}