int ColorSYMGS( const SparseMatrix & A, const Vector & r, Vector & x){ assert(x.localLength == A.localNumberOfColumns); // Make sure x contains space for halo values #ifndef HPCG_NO_MPI ExchangeHalo(A,x); #endif Optimatrix* A_Optimized = (Optimatrix*)A.optimizationData; local_matrix_type localMatrix = A_Optimized->localMatrix; local_int_1d_type matrixDiagonal = A_Optimized->matrixDiagonal; local_int_1d_type colors_ind = A_Optimized->colors_ind; host_local_int_1d_type host_colors_ind = A_Optimized->host_colors_ind; local_int_1d_type colors_map = A_Optimized->colors_map; host_local_int_1d_type host_colors_map = A_Optimized->host_colors_map; const int numColors = A_Optimized->numColors; Optivector * r_Optimized = (Optivector*)r.optimizationData; double_1d_type r_values = r_Optimized->values; Optivector * x_Optimized = (Optivector*)x.optimizationData; double_1d_type x_values = x_Optimized->values; // Forward Sweep! #ifdef KOKKOS_TEAM int vector_size = 32; int teamSizeMax = 8; for(int i = 0; i < numColors; i++){ int color_index_begin = host_colors_map(i); int color_index_end = host_colors_map(i + 1); int numberOfTeams = color_index_end - color_index_begin; Kokkos::parallel_for(team_policy(numberOfTeams / teamSizeMax + 1, teamSizeMax, vector_size), ColouredSweep(color_index_begin, color_index_end, localMatrix, colors_ind, r_values, x_values)); execution_space::fence(); } for(int i = numColors - 1; i >= 0; i--){ int color_index_begin = host_colors_map(i); int color_index_end = host_colors_map(i+1); int numberOfTeams = color_index_end - color_index_begin; Kokkos::parallel_for(team_policy(numberOfTeams / teamSizeMax + 1, teamSizeMax, vector_size), ColouredSweep(color_index_begin, color_index_end, localMatrix, colors_ind, r_values, x_values)); execution_space::fence(); } #else local_int_t dummy = 0; for(int i = 0; i < numColors; i++){ int start = host_colors_map(i); // Colors start at 1, i starts at 0 int end = host_colors_map(i+1); dummy += end - start; Kokkos::parallel_for(end - start, colouredForwardSweep(start, colors_ind, localMatrix, r_values, x_values, matrixDiagonal)); } assert(dummy == A.localNumberOfRows); // Back Sweep! for(int i = numColors -1; i >= 0; --i){ int start = host_colors_map(i); // Colors start at 1, i starts at 0 int end = host_colors_map(i+1); Kokkos::parallel_for(end - start, colouredBackSweep(start, colors_ind, localMatrix, r_values, x_values, matrixDiagonal)); } #endif return(0); }
hpx::future<void> ComputeSPMV_async( const SparseMatrix & A, /*const*/ Vector & x, Vector & y) { assert(x.localLength>=A.localNumberOfColumns); // Test vector lengths assert(y.localLength>=A.localNumberOfRows); #ifndef HPCG_NOMPI ExchangeHalo(A,x); #endif const double * const xv = x.values; double * const yv = y.values; const local_int_t nrow = A.localNumberOfRows; typedef boost::counting_iterator<local_int_t> iterator; return hpx::parallel::for_each( hpx::parallel::par(hpx::parallel::task), iterator(0), iterator(nrow), [xv, yv, &A](local_int_t i) { double sum = 0.0; const double * const cur_vals = A.matrixValues[i]; const local_int_t * const cur_inds = A.mtxIndL[i]; const int cur_nnz = A.nonzerosInRow[i]; for (int j=0; j< cur_nnz; j++) sum += cur_vals[j]*xv[cur_inds[j]]; yv[i] = sum; }); }
/*! Routine to compute matrix vector product y = Ax where: Precondition: First call exchange_externals to get off-processor values of x This is the reference SPMV implementation. It CANNOT be modified for the purposes of this benchmark. @param[in] A the known system matrix @param[in] x the known vector @param[out] y the On exit contains the result: Ax. @return returns 0 upon success and non-zero otherwise @see ComputeSPMV */ int ComputeSPMV_ref( const SparseMatrix & A, Vector & x, Vector & y) { assert(x.localLength>=A.localNumberOfColumns); // Test vector lengths assert(y.localLength>=A.localNumberOfRows); #ifndef HPCG_NOMPI ExchangeHalo(A,x); #endif const double * const xv = x.values; double * const yv = y.values; const local_int_t nrow = A.localNumberOfRows; #ifndef HPCG_NOOPENMP #pragma omp parallel for #endif for (local_int_t i=0; i< nrow; i++) { double sum = 0.0; const double * const cur_vals = A.matrixValues[i]; const local_int_t * const cur_inds = A.mtxIndL[i]; const int cur_nnz = A.nonzerosInRow[i]; for (int j=0; j< cur_nnz; j++) sum += cur_vals[j]*xv[cur_inds[j]]; yv[i] = sum; } return(0); }