Esempio n. 1
0
TA::detail::DistEval<typename Op::result_type, Policy> make_contract_eval(
    const TA::detail::DistEval<LeftTile, Policy>& left,
    const TA::detail::DistEval<RightTile, Policy>& right, madness::World& world,
    const typename TA::detail::DistEval<typename Op::result_type,
                                        Policy>::shape_type& shape,
    const std::shared_ptr<typename TA::detail::DistEval<
        typename Op::result_type, Policy>::pmap_interface>& pmap,
    const TA::Permutation& perm, const Op& op) {
  TA_ASSERT(left.range().rank() == op.left_rank());
  TA_ASSERT(right.range().rank() == op.right_rank());
  TA_ASSERT((perm.dim() == op.result_rank()) || !perm);

  // Define the impl type
  typedef TA::detail::Summa<
      TA::detail::DistEval<LeftTile, Policy>,
      TA::detail::DistEval<RightTile, Policy>, Op, Policy> impl_type;

  // Precompute iteration range data
  const unsigned int num_contract_ranks = op.num_contract_ranks();
  const unsigned int left_end = op.left_rank();
  const unsigned int left_middle = left_end - num_contract_ranks;
  const unsigned int right_end = op.right_rank();

  // Construct a vector TiledRange1 objects from the left- and right-hand
  // arguments that will be used to construct the result TiledRange. Also,
  // compute the fused outer dimension sizes, number of tiles and elements,
  // for the contraction.
  typename impl_type::trange_type::Ranges ranges(op.result_rank());
  std::size_t M = 1ul, m = 1ul, N = 1ul, n = 1ul;
  std::size_t pi = 0ul;
  for(unsigned int i = 0ul; i < left_middle; ++i) {
    ranges[(perm ? perm[pi++] : pi++)] = left.trange().data()[i];
    M *= left.range().extent(i);
    m *= left.trange().elements_range().extent(i);
  }
  for(std::size_t i = num_contract_ranks; i < right_end; ++i) {
    ranges[(perm ? perm[pi++] : pi++)] = right.trange().data()[i];
    N *= right.range().extent(i);
    n *= right.trange().elements_range().extent(i);
  }

  // Compute the number of tiles in the inner dimension.
  std::size_t K = 1ul;
  for(std::size_t i = left_middle; i < left_end; ++i)
    K *= left.range().extent(i);

  // Construct the result range
  typename impl_type::trange_type trange(ranges.begin(), ranges.end());

  // Construct the process grid
  TA::detail::ProcGrid proc_grid(world, M, N, m, n);

  return TA::detail::DistEval<typename Op::result_type, Policy>(
      std::shared_ptr<impl_type>( new impl_type(left, right, world, trange,
      shape, pmap, perm, op, K, proc_grid)));
}
Esempio n. 2
0
void tensor_contract_444(TA::DistArray<Tile, Policy>& tv,
                         const TA::DistArray<Tile, Policy>& t,
                         const TA::DistArray<Tile, Policy>& v) {

  // for convenience, obtain the tiled ranges for the two kinds of dimensions used to define t, v, and tv
  auto trange_occ = t.trange().dim(0);  // the first dimension of t is occ
  auto trange_uocc = v.trange().dim(0); // every dimension of v is uocc
  auto ntiles_occ = trange_occ.tile_extent();
  auto ntiles_uocc = trange_uocc.tile_extent();
  auto n_occ = trange_occ.extent();
  auto n_uocc = trange_occ.extent();

  // compute the 2-d grid of processors for the SUMMA
  // note that the result is (occ occ|uocc uocc), hence the row dimension is occ x occ, etc.
  auto& world = t.world();
  auto nrowtiles = ntiles_occ * ntiles_occ;
  auto ncoltiles = ntiles_uocc * ntiles_uocc;
  auto ninttiles = ntiles_uocc * ntiles_uocc; // contraction is over uocc x uocc
  auto nrows = n_occ * n_occ;
  auto ncols = n_uocc * n_uocc;
  TA::detail::ProcGrid proc_grid(world,
                                 nrowtiles, ncoltiles,
                                 nrows, ncols);
  std::shared_ptr<TA::Pmap> pmap;
  auto t_eval = make_array_eval(t, t.world(), TA::DenseShape(),
      proc_grid.make_row_phase_pmap(ninttiles),
      TA::Permutation(), make_array_noop<Tile>());
  auto v_eval = make_array_eval(v, v.world(), TA::DenseShape(),
      proc_grid.make_col_phase_pmap(ninttiles),
      TA::Permutation(), make_array_noop<Tile>());

  //
  // make the result metadata
  //

  // result shape
  TA::TiledRange trange_tv({trange_occ, trange_occ, trange_uocc, trange_uocc});
  //
  pmap.reset(new TA::detail::BlockedPmap(world, trange_tv.tiles_range().volume()));
  // 'contract' object is of type
  // PaRSEC's PTG object will do the job here:
  // 1. it will use t_eval and v_eval's Futures as input
  // 2. there will be a dummy output ArrayEval, its Futures will be set by the PTG
  auto contract = make_contract_eval(
      t_eval, v_eval, world, TA::DenseShape(), pmap, TA::Permutation(),
      make_contract<Tile>(4u, 4u, 4u)
      );

  // eval() just schedules the Summa task and proceeds
  // in expressions evaluation is lazy ... you could just use contract tiles
  // immediately to compose further (in principle even before eval()!)
  contract.eval();

  // since the intent of this function is to return result as a named DistArray
  // migrate contract's futures to tv here

  // Create a temporary result array
  TA::DistArray<Tile,Policy> result(contract.world(), contract.trange(),
           contract.shape(), contract.pmap());

  // Move the data from dist_eval into the result array. There is no
  // communication in this step.
  for(const auto index : *contract.pmap()) {
    if(! contract.is_zero(index))
      result.set(index, contract.get(index));
  }

  // uncomment this to block until Summa is complete .. but no need to wait
  //contract.wait();

  // Swap the new array with the result array object.
  result.swap(tv);

}
Esempio n. 3
0
//c---------------------------------------------------------------------
//c
//c Authors: S. Weeratunga
//c          V. Venkatakrishnan
//c          E. Barszcz
//c          M. Yarrow
//c C-version: Rob Van der Wijngaart, Intel Corporation
//c
//c---------------------------------------------------------------------
// 
// Copyright 2010 Intel Corporation
// 
//    Licensed under the Apache License, Version 2.0 (the "License");
//    you may not use this file except in compliance with the License.
//    You may obtain a copy of the License at
// 
//        http://www.apache.org/licenses/LICENSE-2.0
// 
//    Unless required by applicable law or agreed to in writing, software
//    distributed under the License is distributed on an "AS IS" BASIS,
//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//    See the License for the specific language governing permissions and
//    limitations under the License.
// 
int RCCE_APP(int argc, char **argv){

//c---------------------------------------------------------------------
//c
//c   driver for the performance evaluation of the solver for
//c   five coupled parabolic/elliptic partial differential equations.
//c
//c---------------------------------------------------------------------



      char class;
      double mflops;
      int ierr, i, j, k, mm, iverified;

//c---------------------------------------------------------------------
//c   initialize communications
//c---------------------------------------------------------------------
       init_comm(&argc, &argv);
//       RCCE_debug_set(RCCE_DEBUG_SYNCH);
//c---------------------------------------------------------------------
//c   read input data
//c---------------------------------------------------------------------
       read_input();

//c---------------------------------------------------------------------
//c   set up processor grid
//c---------------------------------------------------------------------
       proc_grid();

//c---------------------------------------------------------------------
//c   determine the neighbors
//c---------------------------------------------------------------------
       neighbors();

//c---------------------------------------------------------------------
//c   set up sub-domain sizes
//c---------------------------------------------------------------------
       subdomain();

//c---------------------------------------------------------------------
//c   set up coefficients
//c---------------------------------------------------------------------
       setcoeff();

//c---------------------------------------------------------------------
//c   set the boundary values for dependent variables
//c---------------------------------------------------------------------

       setbv();

//c---------------------------------------------------------------------
//c   set the initial values for dependent variables
//c---------------------------------------------------------------------

       setiv();

//c---------------------------------------------------------------------
//c   compute the forcing term based on prescribed exact solution
//c---------------------------------------------------------------------
       erhs();

////c---------------------------------------------------------------------
////c   perform one SSOR iteration to touch all data and program pages 
////c---------------------------------------------------------------------
       ssor(1);

//
////c---------------------------------------------------------------------
////c   reset the boundary and initial values
////c---------------------------------------------------------------------
       setbv();
       setiv();
//
////c---------------------------------------------------------------------
////c   perform the SSOR iterations
////c---------------------------------------------------------------------
       ssor(itmax);

////c---------------------------------------------------------------------
////c   compute the solution error
////c---------------------------------------------------------------------
        error();

////c---------------------------------------------------------------------
////c   compute the surface integral
////c---------------------------------------------------------------------
      pintgr();
//
////c---------------------------------------------------------------------
////c   verification test
////c---------------------------------------------------------------------

      if (id ==0) {
        verify( rsdnm, errnm, &frc, &class );
         mflops = (double)(itmax)*(1984.77*(double)( nx0 )
              *(double)( ny0 )
              *(double)( nz0 )
              -10923.3*((double)( nx0+ny0+nz0 )/3.)*((double)( nx0+ny0+nz0 )/3.)
              +27770.9* (double)( nx0+ny0+nz0 )/3.
              -144010.)
              / (maxtime*1000000.);

          print_results("LU", &class, &nx0,
           &ny0, &nz0, &itmax, &nnodes_compiled,
           &num, &maxtime, &mflops, "          floating point", &iverified, 
           NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6);

//         FILE *perf_file;
//         char name[50] = "/shared/DEMOS/RCCE/NPB_LU/perf."; 
//         char postfix[50]; 
//         sprintf(postfix, "%d", nnodes_compiled); 
//         strcat(name, postfix); 
//         perf_file = fopen(name,"w"); 
//         fprintf(perf_file, "%d", (int)mflops); 
//         fclose(perf_file); 
      }