void high_level_relax ( MulticoreArray<T> & array, MulticoreArray<T> & old_array ) { // The code in this function is what we expect users to want to write. However, // while this code uses the same data as the transformed code, but it less efficient. const int arraySizeX = array.get_arraySize(0); const int arraySizeY = array.get_arraySize(1); const int arraySizeZ = array.get_arraySize(2); // Higher level could be: // array(I) = (array(I-1) + array(I+1)) / 2.0; // but our current abstraction does not support this // (use array class (e.g. A++) for this level). // Use pragma to trigger compiler transformations, such as: #pragma stencil-manycore(T, old_array, array) S(i,0,arraySizeX) D(j,0,arraySizeY) D(k,0,arraySizeZ) #pragma omp for for (int k = 1; k < arraySizeZ-1; k++) { for (int j = 1; j < arraySizeY-1; j++) { for (int i = 1; i < arraySizeX-1; i++) { old_array(i,j,k) = (array(i-1,j-1,k-1) + array(i+1,j-1,k-1) + array(i-1,j+1,k-1) + array(i+1,j+1,k-1) + array(i-1,j-1,k+1) + array(i+1,j-1,k+1) + array(i-1,j+1,k+1) + array(i+1,j+1,k+1)) / 8.0; } } } }
void lower_level_relax ( MulticoreArray<T> & array, MulticoreArray<T> & old_array ) { // The code in this function is what we would want to have ben generated by the compiler. const int numberOfCores = array.get_numberOfCores(); // Make sure that these are distributed using the same approach (table-based or via an algorithmic approach). assert(array.get_tableBasedDistribution() == old_array.get_tableBasedDistribution()); // Use OpenMP to support the parallel threads on each core. #pragma omp parallel for for (int p = 0; p < numberOfCores; p++) { // Refactored form of relaxation on the interior. // array.coreArray[p]->relax(p,array,old_array); // ************************************************************** // Fixup internal bounaries of the memory allocated to each core. // ************************************************************** // Refactored form of relaxation on the interior. array.coreArray[p]->relax_on_boundary(p,array,old_array); } }
void lower_level_relax ( MulticoreArray<T> & array, MulticoreArray<T> & old_array ) { // The code in this function is what we would want to have ben generated by the compiler. const int numberOfCores = array.get_numberOfCores(); // Make sure that these are distributed using the same approach (table-based or via an algorithmic approach). assert(array.get_tableBasedDistribution() == old_array.get_tableBasedDistribution()); // Use OpenMP to support the parallel threads on each core. if(old_array.isHaloExist()) { old_array.haloExchange(); #pragma omp parallel for for (int p = 0; p < numberOfCores; p++) { relax(p,array,old_array,1); if(array.hasDetachedHalo()) relax_on_detachedhalo_boundary(p,array,old_array,1); } } else { #pragma omp parallel for for (int p = 0; p < numberOfCores; p++) { relax(p,array,old_array,1); relax_on_boundary(p,array,old_array,1); } } }