Exemple #1
0
int ColorSYMGS( const SparseMatrix & A, const Vector & r, Vector & x){
assert(x.localLength == A.localNumberOfColumns); // Make sure x contains space for halo values

#ifndef HPCG_NO_MPI
  ExchangeHalo(A,x);
#endif
  Optimatrix* A_Optimized = (Optimatrix*)A.optimizationData;
  local_matrix_type localMatrix = A_Optimized->localMatrix;
  local_int_1d_type matrixDiagonal = A_Optimized->matrixDiagonal;
  local_int_1d_type colors_ind = A_Optimized->colors_ind;
  host_local_int_1d_type host_colors_ind = A_Optimized->host_colors_ind;
  local_int_1d_type colors_map = A_Optimized->colors_map;
  host_local_int_1d_type host_colors_map = A_Optimized->host_colors_map;
  const int numColors = A_Optimized->numColors;

  Optivector * r_Optimized = (Optivector*)r.optimizationData;
  double_1d_type r_values = r_Optimized->values;

  Optivector * x_Optimized = (Optivector*)x.optimizationData;
  double_1d_type x_values = x_Optimized->values;

	 // Forward Sweep!
#ifdef KOKKOS_TEAM
  int vector_size = 32;
  int teamSizeMax = 8;
  for(int i = 0; i < numColors; i++){
    int color_index_begin = host_colors_map(i);
    int color_index_end = host_colors_map(i + 1);
    int numberOfTeams = color_index_end - color_index_begin;
    Kokkos::parallel_for(team_policy(numberOfTeams / teamSizeMax + 1, teamSizeMax, vector_size),
      ColouredSweep(color_index_begin, color_index_end, localMatrix, colors_ind, r_values, x_values));

    execution_space::fence();
  }
  for(int i = numColors - 1; i >= 0; i--){
    int color_index_begin = host_colors_map(i);
    int color_index_end = host_colors_map(i+1);
    int numberOfTeams = color_index_end - color_index_begin;
    Kokkos::parallel_for(team_policy(numberOfTeams / teamSizeMax + 1, teamSizeMax, vector_size),
      ColouredSweep(color_index_begin, color_index_end, localMatrix, colors_ind, r_values, x_values));
    execution_space::fence();
  }
#else
  local_int_t dummy = 0;
  for(int i = 0; i < numColors; i++){
    int start = host_colors_map(i); // Colors start at 1, i starts at 0
    int end = host_colors_map(i+1);
   dummy += end - start;
    Kokkos::parallel_for(end - start, colouredForwardSweep(start, colors_ind, localMatrix, r_values, x_values, matrixDiagonal));
  }
  assert(dummy == A.localNumberOfRows);
 // Back Sweep!
  for(int i = numColors -1; i >= 0; --i){
    int start = host_colors_map(i); // Colors start at 1, i starts at 0
    int end = host_colors_map(i+1);
    Kokkos::parallel_for(end - start, colouredBackSweep(start, colors_ind, localMatrix, r_values, x_values, matrixDiagonal));
  }
#endif
return(0);
}
hpx::future<void> ComputeSPMV_async( const SparseMatrix & A, /*const*/ Vector & x, Vector & y) {

  assert(x.localLength>=A.localNumberOfColumns); // Test vector lengths
  assert(y.localLength>=A.localNumberOfRows);

#ifndef HPCG_NOMPI
    ExchangeHalo(A,x);
#endif

  const double * const xv = x.values;
  double * const yv = y.values;
  const local_int_t nrow = A.localNumberOfRows;

  typedef boost::counting_iterator<local_int_t> iterator;

  return hpx::parallel::for_each(
    hpx::parallel::par(hpx::parallel::task), iterator(0), iterator(nrow),
    [xv, yv, &A](local_int_t i) {
      double sum = 0.0;
      const double * const cur_vals = A.matrixValues[i];
      const local_int_t * const cur_inds = A.mtxIndL[i];
      const int cur_nnz = A.nonzerosInRow[i];

      for (int j=0; j< cur_nnz; j++)
        sum += cur_vals[j]*xv[cur_inds[j]];
      yv[i] = sum;
    });
}
/*!
  Routine to compute matrix vector product y = Ax where:
  Precondition: First call exchange_externals to get off-processor values of x

  This is the reference SPMV implementation.  It CANNOT be modified for the
  purposes of this benchmark.

  @param[in]  A the known system matrix
  @param[in]  x the known vector
  @param[out] y the On exit contains the result: Ax.

  @return returns 0 upon success and non-zero otherwise

  @see ComputeSPMV
*/
int ComputeSPMV_ref( const SparseMatrix & A, Vector & x, Vector & y) {

  assert(x.localLength>=A.localNumberOfColumns); // Test vector lengths
  assert(y.localLength>=A.localNumberOfRows);

#ifndef HPCG_NOMPI
    ExchangeHalo(A,x);
#endif
  const double * const xv = x.values;
  double * const yv = y.values;
  const local_int_t nrow = A.localNumberOfRows;
#ifndef HPCG_NOOPENMP
  #pragma omp parallel for
#endif
  for (local_int_t i=0; i< nrow; i++)  {
    double sum = 0.0;
    const double * const cur_vals = A.matrixValues[i];
    const local_int_t * const cur_inds = A.mtxIndL[i];
    const int cur_nnz = A.nonzerosInRow[i];

    for (int j=0; j< cur_nnz; j++)
      sum += cur_vals[j]*xv[cur_inds[j]];
    yv[i] = sum;
  }
  return(0);
}