inline void jacobi_kernel_nonuniform(
     crs_matrix<double> const & A
   , std::vector<std::vector<double> > & x
   , std::vector<double> const & b
   , std::pair<std::size_t, std::size_t> range
   , std::size_t src
   , std::size_t dst
 )
 {
     for(std::size_t i = range.first; i < range.second; ++i)
     {
         double result = b[i];
         double div = 1.0;
         const std::size_t begin = A.row_begin(i);
         const std::size_t end = A.row_end(i);
         for(std::size_t j = begin; j < end; ++j)
         {
             if(j == i) div = div/A.values[j];
             else result -= A.values[j] * x[src][A.indices[j]];
         }
         x[dst][i] = result * div;
     }
 }
Esempio n. 2
0
    void jacobi(
        crs_matrix<double> const & A
      , std::vector<double> const & b
      , std::size_t iterations
      , std::size_t block_size
    )
    {
        typedef std::vector<double> vector_type;

        boost::shared_ptr<vector_type> dst(new vector_type(b));
        boost::shared_ptr<vector_type> src(new vector_type(b));

        std::vector<range> block_ranges;
        // pre-computing ranges for the different blocks
        for(std::size_t i = 0; i < dst->size(); i += block_size)
        {
            block_ranges.push_back(
                range(i, std::min<std::size_t>(dst->size(), i + block_size)));
        }

        // pre-computing dependencies
        std::vector<std::vector<std::size_t> > dependencies(block_ranges.size());
        for(std::size_t b = 0; b < block_ranges.size(); ++b)
        {
            for(std::size_t i = block_ranges[b].begin(); i < block_ranges[b].end(); ++i)
            {
                std::size_t begin = A.row_begin(i);
                std::size_t end = A.row_end(i);
                
                for(std::size_t ii = begin; ii < end; ++ii)
                {
                    std::size_t idx = A.indices[ii];
                    for(std::size_t j = 0; j < block_ranges.size(); ++j)
                    {
                        if(block_ranges[j].begin() <= idx && idx < block_ranges[j].end())
                        {
                            if(std::find(dependencies[b].begin(), dependencies[b].end(), j) == dependencies[b].end())
                            {
                                dependencies[b].push_back(j);
                            }
                            break;
                        }
                    }
                }
            }
        }

        typedef std::vector<hpx::shared_future<void> > future_vector;
        boost::shared_ptr<future_vector> deps_dst(new future_vector(dependencies.size(), hpx::make_ready_future()));
        boost::shared_ptr<future_vector> deps_src(new future_vector(dependencies.size(), hpx::make_ready_future()));

        hpx::util::high_resolution_timer t;
        for(std::size_t iter = 0; iter < iterations; ++iter)
        {
            for(std::size_t block = 0; block < block_ranges.size(); ++block)
            {
                std::vector<std::size_t> const & deps(dependencies[block]);
                std::vector<hpx::shared_future<void> > trigger;
                trigger.reserve(deps.size());
                for (std::size_t dep : deps)
                {
                    trigger.push_back((*deps_src)[dep]);
                }                                                                                                                           
                
                (*deps_dst)[block]
                    = hpx::when_all(boost::move(trigger)).then(
                        hpx::launch::async,
                        hpx::util::bind(
                            jacobi_kernel_wrap
                          , block_ranges[block]
                          , boost::cref(A)
                          , boost::ref(*dst)
                          , boost::cref(*src)
                          , boost::cref(b)
                        )
                    );
            }
            std::swap(dst, src);
            std::swap(deps_dst, deps_src);
        }

        hpx::wait_all(*deps_dst);
        hpx::wait_all(*deps_src);

        double time_elapsed = t.elapsed();
        std::cout << dst->size() << " "
            << ((double(dst->size() * iterations)/1e6)/time_elapsed) << " MLUPS/s\n" 
            << std::flush;
    }