Пример #1
0
void
high_level_relax ( MulticoreArray<T> & array, MulticoreArray<T> & old_array )
   {
  // The code in this function is what we expect users to want to write. However, 
  // while this code uses the same data as the transformed code, but it less efficient.

     const int arraySizeX = array.get_arraySize(0);
     const int arraySizeY = array.get_arraySize(1);
     const int arraySizeZ = array.get_arraySize(2);

  // Higher level could be:
  //    array(I) = (array(I-1) + array(I+1)) / 2.0;
  // but our current abstraction does not support this
  // (use array class (e.g. A++) for this level).

  // Use pragma to trigger compiler transformations, such as: 
#pragma stencil-manycore(T, old_array, array) S(i,0,arraySizeX) D(j,0,arraySizeY) D(k,0,arraySizeZ) 
#pragma omp for
     for (int k = 1; k < arraySizeZ-1; k++)
        {
          for (int j = 1; j < arraySizeY-1; j++)
             {
               for (int i = 1; i < arraySizeX-1; i++)
                  {
                    old_array(i,j,k) = (array(i-1,j-1,k-1) + array(i+1,j-1,k-1) + array(i-1,j+1,k-1) + array(i+1,j+1,k-1) + array(i-1,j-1,k+1) + array(i+1,j-1,k+1) + array(i-1,j+1,k+1) + array(i+1,j+1,k+1)) / 8.0;
                  }
             }
        }
   }
Пример #2
0
void
lower_level_relax ( MulticoreArray<T> & array, MulticoreArray<T> & old_array )
   {
  // The code in this function is what we would want to have ben generated by the compiler.

     const int numberOfCores = array.get_numberOfCores();

  // Make sure that these are distributed using the same approach (table-based or via an algorithmic approach).
     assert(array.get_tableBasedDistribution() == old_array.get_tableBasedDistribution());

// Use OpenMP to support the parallel threads on each core.
#pragma omp parallel for
     for (int p = 0; p < numberOfCores; p++)
        {
       // Refactored form of relaxation on the interior.
//          array.coreArray[p]->relax(p,array,old_array);

       // **************************************************************
       // Fixup internal bounaries of the memory allocated to each core.
       // **************************************************************

       // Refactored form of relaxation on the interior.
          array.coreArray[p]->relax_on_boundary(p,array,old_array);
        }
   }
Пример #3
0
void
lower_level_relax ( MulticoreArray<T> & array, MulticoreArray<T> & old_array )
{
    // The code in this function is what we would want to have ben generated by the compiler.

    const int numberOfCores = array.get_numberOfCores();

    // Make sure that these are distributed using the same approach (table-based or via an algorithmic approach).
    assert(array.get_tableBasedDistribution() == old_array.get_tableBasedDistribution());

// Use OpenMP to support the parallel threads on each core.
    if(old_array.isHaloExist())
    {
        old_array.haloExchange();
        #pragma omp parallel for
        for (int p = 0; p < numberOfCores; p++)
        {
            relax(p,array,old_array,1);
            if(array.hasDetachedHalo())
                relax_on_detachedhalo_boundary(p,array,old_array,1);
        }
    }
    else
    {
        #pragma omp parallel for
        for (int p = 0; p < numberOfCores; p++)
        {
            relax(p,array,old_array,1);
            relax_on_boundary(p,array,old_array,1);
        }
    }
}