Exemplo n.º 1
0
void
axpby(const AV& a,
      const Kokkos::View< XT,XL,XD,XM,Kokkos::Impl::ViewMPVectorContiguous >& x,
      const BV& b,
      const Kokkos::View< YT,YL,YD,YM,Kokkos::Impl::ViewMPVectorContiguous >& y)
{
  typedef Kokkos::Impl::ViewMPVectorContiguous S;
  typedef Kokkos::View< XT,XL,XD,XM,S > XVector;
  typedef Kokkos::View< YT,YL,YD,YM,S > YVector;

  if (!Sacado::is_constant(a) || !Sacado::is_constant(b)) {
    Kokkos::Impl::raise_error("axpby not implemented for non-constant a or b");
  }

  typename XVector::flat_array_type x_flat = x;
  typename YVector::flat_array_type y_flat = y;
  auto aa = Sacado::Value<AV>::eval(a);
  auto bb = Sacado::Value<BV>::eval(b);
  axpby( aa, x_flat, bb, y_flat );
}
Exemplo n.º 2
0
typename std::enable_if<
  Kokkos::is_view_mp_vector< Kokkos::View<XD,XP...> >::value &&
  Kokkos::is_view_mp_vector< Kokkos::View<YD,YP...> >::value >::type
axpby(const AV& a,
      const Kokkos::View<XD,XP...>& x,
      const BV& b,
      const Kokkos::View<YD,YP...>& y)
{
  typedef Kokkos::View<XD,XP...> XVector;
  typedef Kokkos::View<YD,YP...> YVector;

  if (!Sacado::is_constant(a) || !Sacado::is_constant(b)) {
    Kokkos::Impl::raise_error("axpby not implemented for non-constant a or b");
  }

  typename Kokkos::FlatArrayType<XVector>::type x_flat = x;
  typename Kokkos::FlatArrayType<YVector>::type y_flat = y;
  auto aa = Sacado::Value<AV>::eval(a);
  auto bb = Sacado::Value<BV>::eval(b);
  axpby( aa, x_flat, bb, y_flat );
}
Exemplo n.º 3
0
//=============================================================================
void
FluidNavierStokes(
const int         order,      ///< order of the time discretization
const mesh_t*     mesh,       ///< mesh structure
const bc_t*       bc,         ///< boundary conditions
#ifdef VERSION_Z
      mesh_t*     mesh_o,     ///< mesh structure of outer domain
      double*     Vel_o[4],   ///< fluid velocity of outer domain
#endif          
const fluid_t*    fluid,      ///< fluid parameters
const particle_t  particle[], ///< particles
const double      tau[3],     ///< time discretization coefficients
const double      dt,         ///< time step
      FluidVar_t* FluidVar )  ///< fluid variables
{
  double
  *VelTilde1[4] = {NULL,NULL,NULL,NULL}, // Convected velocity from level n 
  *VelTilde2[4] = {NULL,NULL,NULL,NULL}, // Convected velocity from level n-1
  FrameVel[4] = { 0.,0.,0.,0. }; // frame velocity
  
  for ( int dir = 1 ; dir <= 3 ; dir++ )
  {
    AllocVdouble(mesh->NbOfNodes, VelTilde1[dir]);
    AllocVdouble(mesh->NbOfNodes, VelTilde2[dir]);
  }
  
#ifdef VERSION_Z
  if ( UsingMicroGrid() )
  {
    // find nodes of inner domain that are outside outer domain
    FindNodesOutside(particle, mesh_o, mesh);
    
    // set those nodes velocity to 0.
    for ( int NodeId = 1 ; NodeId <= mesh->NbOfNodes ; NodeId++ )
      if ( mesh->OutsideNodes[NodeId] == true )
        for ( int dir = 1 ; dir <= 3 ; dir++ )
          FluidVar->Vel[dir][NodeId] = 0.;    
  }
#endif
  
  //----------------------------------------------------------------------------
  // Compute all the velocity rhs contributions
  //----------------------------------------------------------------------------
  // rhs = 0
  for ( int dir = 1 ; dir <= 3 ; dir++ )
    scal( mesh->NbOfNodes+1, 0., FluidVar->VelRHS[dir] );
  
  // compute frame velocity
  FrameVelSet( particle, fluid->FrameVelDir, FrameVel );
  
  //----------------------------------------------------------------------------
  // Pressure gradient contribution
  //----------------------------------------------------------------------------
  // compute and substract pressure gradient to rhs
  ApplyOperator("gradient", &FluidVar->Pre, FluidVar->VelRHS);
  
  //----------------------------------------------------------------------------
  // Convection terms contribution
  //----------------------------------------------------------------------------
#ifdef VERSION_Z
  const double *ParticlePos = (order == 1) ? particle[1].Pos1 : particle[1].Pos2;
//  const double *ParticlePos = particle[1].Pos;
  
  if ( UsingMicroGrid() )
    ConvectionMicroGrid(order, mesh, dt, FrameVel,
             ParticlePos, mesh_o, Vel_o,
             FluidVar->VelOld1, FluidVar->VelOld2, VelTilde1, VelTilde2);
  else
    ConvectionMacroGrid(order, mesh, dt, FrameVel,
                        ParticlePos, mesh_o, Vel_o,
                        FluidVar->VelOld1, FluidVar->VelOld2, VelTilde1, VelTilde2);
#else
  
  Convection(order, mesh, dt, FrameVel,
             FluidVar->VelOld1, FluidVar->VelOld2, VelTilde1, VelTilde2);
#endif 
  
  // compute the convection terms in Acc
  for ( int dir = 1 ; dir <= 3 ; dir++ )
    axpby( mesh->NbOfNodes+1, -tau[1], VelTilde1[dir], 0., FluidVar->Acc[dir] );
  
  if ( order == 2 )
    for ( int dir = 1 ; dir <= 3 ; dir++ )
      axpby( mesh->NbOfNodes+1, -tau[2], VelTilde2[dir], 1., FluidVar->Acc[dir]);
  
  // weight by mass matrix and substract them from rhs
  ApplyOperator("convection", FluidVar->Acc, FluidVar->VelRHS);
  
  // set convected velocity at previous timestep as initial guess for conjugate gradient
  // WARNING : this has to be done before we set boundary conditions, otherwise those latter could get overwritten
  for ( int dir = 1 ; dir <= 3 ; dir++ )
    copy(mesh->NbOfNodes+1, VelTilde1[dir], FluidVar->Vel[dir]);
  
  //----------------------------------------------------------------------------
  // Particle weight contribution
  //----------------------------------------------------------------------------
  if ( UsingMicroGrid() )
    GetParticleMomentumContribution(mesh, particle, fluid, FluidVar->VelRHS);
  
  //----------------------------------------------------------------------------
  // Boundary conditions contribution
  //----------------------------------------------------------------------------
  SetVelBC( mesh->NbOfNodes, bc, FluidVar->Vel );
  
#ifdef VERSION_Z
  // prescribe dirichlet bc by interpolating outer domain velocity, this has to be done after SetVelBC !
  if ( UsingMicroGrid() )
    MassConserveBC( particle[1].Pos, mesh_o, Vel_o, mesh, FluidVar->Vel );
#endif
  
  // weight with stifness matrix and substract to rhs
  ApplyOperator("v_bc", FluidVar->Vel, FluidVar->VelRHS);
  
  //----------------------------------------------------------------------------
  // Solve for the velocity: advection-diffusion step
  //----------------------------------------------------------------------------
  debug( "\nVelocity diffusion step\n" );
  
  SolveOperator("v_stiffness", mesh->OutsideNodes, FluidVar->VelRHS, FluidVar->Vel);

  //----------------------------------------------------------------------------
  // Solve for the pressure star: projection step
  //----------------------------------------------------------------------------
  debug( "\nPressure prediction step\n" );
  
  // initialize pressure rhs
  double *PreStar = NULL, // Predicted presssure
  *PreRHS  = NULL; // Presssure RHS
  
  AllocVdouble( mesh->NbOfPressureNodes,PreStar );
  AllocVdouble( mesh->NbOfPressureNodes,PreRHS );
  
  // compute and add velocity divergence to pressure rhs
  ApplyOperator("divergence", FluidVar->Vel, &PreRHS);
  
  // mutliply pressure rhs by -tau_0
  scal( mesh->NbOfPressureNodes+1, -tau[0], PreRHS );
  
  // set previous step pressure as initial guess for conjugate gradient
  copy( mesh->NbOfFreePressureNodes+1, FluidVar->Pre, PreStar );
  
  SolveOperator("p_stiffness", mesh->OutsideNodes, &PreRHS, &PreStar);

  //----------------------------------------------------------------------------
  // Solve for the velocity: projection step
  //----------------------------------------------------------------------------
  debug( "\nVelocity projection step\n" );
  
  // rhs = 0
  for ( int dir = 1 ; dir <= 3 ; dir++ )
    scal( mesh->NbOfNodes+1, 0., FluidVar->VelRHS[dir] );
  
  // compute and substract pressure star gradient to velocity rhs
  ApplyOperator("gradient", &PreStar, FluidVar->VelRHS);
  
  // VelTilde2 = 0, used here as temporary array, it will store the un-weigthed gradient
  for ( int dir = 1 ; dir <= 3 ; dir++ )
    scal( mesh->NbOfNodes+1, 0., VelTilde2[dir] );
  
  // compute the un-weigthed gradient
  SolveOperator("v_mass", mesh->OutsideNodes, FluidVar->VelRHS, VelTilde2);
  
  // Remove the non solenoidal part of the velocity, i.e. the un-weigthed gradient
  for ( int dir = 1 ; dir <= 3 ; dir++ )
    axpby(mesh->NbOfNodes+1, 1. / tau[0], VelTilde2[dir], 1.,
          FluidVar->Vel[dir]);
  
  //----------------------------------------------------------------------------
  // Solve for the pressure: correction step
  //----------------------------------------------------------------------------
  debug( "\nPressure correction step\n" );
  
  // p = p + p*
  axpby( mesh->NbOfPressureNodes+1, 1., PreStar, 1., FluidVar->Pre );
  
#ifdef PRE_INCREMENTAL_ROTATIONAL
  // then solve for - 1/Re Div(vel) = PreMass^{-1} * PreRHS, solve in p* again
  
  // solve for PreStar
  for ( int i = 1 ; i <= mesh->NbOfPressureNodes ; i++ ) 
    PreStar[i] = PreRHS[i] * FluidOperators->PreMassPrec[i];
  
  // p = p + p* = p - 1/Re Div(vel)
  axpby( mesh->NbOfPressureNodes+1, 1., PreStar, 1., FluidVar->Pre );
#endif
  
  // Compute the acceleration field of the fluid, Acc = tau0 * Vel - Acc
  for ( int dir = 1 ; dir <= 3 ; dir++ )
    axpby(mesh->NbOfNodes+1, tau[0], FluidVar->Vel[dir], -1.,
          FluidVar->Acc[dir]);
  
  // free local arrays
  for ( int dir = 1 ; dir <= 3 ; dir++ )
  {  
    free(VelTilde1[dir]);    
    free(VelTilde2[dir]);
  }
  free(PreStar);
  free(PreRHS);  
}
Exemplo n.º 4
0
bool use_case_blas_driver(MPI_Comm comm,
                        int num_threads,
                        int num_trials,
                        const std::string &working_directory,
                        const std::string &mesh_filename,
                        const std::string &mesh_type,
                        const std::string &thread_runner,
                        int bucket_size,
                        bool performance_test)
{
  bool output = !performance_test; // If running for performance measurements, turn off output

  if (stk::parallel_machine_rank(comm) == 0) {
    std::cout << " stk_mesh Use Case Blas - fill, axpby, dot, norm , begin" << std::endl ;
    std::cout << "Running '" << mesh_filename << "' case, num_trials = "
              << num_trials << std::endl;
  }


  const AlgorithmRunnerInterface* alg_runner = NULL ;
  if ( thread_runner.empty() ||
       thread_runner == std::string("NonThreaded") ) {
    alg_runner = stk::algorithm_runner_non_thread();
  }
  else if ( thread_runner == std::string("TPI") ) {
    alg_runner = stk::algorithm_runner_tpi(num_threads);
  }
  else if ( thread_runner == std::string("TBB") ) {
    alg_runner = stk::algorithm_runner_tbb(num_threads);
  }

  if (alg_runner != NULL) {
    if (stk::parallel_machine_rank(comm) == 0)
      std::cout << "Using " << thread_runner
                << " algorithm runner, num_threads = " << num_threads
                << std::endl;
  } else {
    std::cout << "ERROR, failed to obtain requested AlgorithmRunner '"
              << thread_runner << "'." << std::endl;
    return false;
  }

  //----------------------------------

  // Timing:
  //   [0] = stk::mesh::MetaData creation
  //   [1] = stk::mesh::BulkData creation
  //   [2] = Initialization
  //   [3] = fill and axpby
  //   [4] = dot and norm2

  double time_min[9] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
  double time_max[9] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
  double wtime = 0 ;

  //--------------------------------------------------------------------

  reset_malloc_stats();

  if ( 0 == stk::parallel_machine_rank( comm ) ) {
    std::cout << "stk_mesh performance use case BLAS" << std::endl
              << "  Number Processes = " << stk::parallel_machine_size( comm )
              << std::endl ;
    std::cout.flush();
  }

  //--------------------------------------------------------------------

  // Initialize IO system.  Registers all element types and storage
  // types and the exodusII default database type.
  Ioss::Init::Initializer init_db;

  {
    wtime = stk::wall_time();

    //------------------------------------------------------------------
    // Declare the mesh meta data: element blocks and associated fields

    stk::mesh::fem::FEMMetaData meta_data(  spatial_dimension );
    stk::io::MeshData mesh_data;
    std::string filename = working_directory + mesh_filename;
    stk::io::create_input_mesh(mesh_type, filename, comm,
			       meta_data, mesh_data);
    stk::io::define_input_fields(mesh_data, meta_data);

    Fields fields;
    use_case_14_declare_fields(fields, meta_data.get_meta_data(meta_data));

    //--------------------------------
    // Commit (finalize) the meta data.  Is now ready to be used
    // in the creation and management of mesh bulk data.

    meta_data.commit();

    //------------------------------------------------------------------

    time_max[0] = stk::wall_dtime( wtime );

    //------------------------------------------------------------------
    // stk::mesh::BulkData bulk data conforming to the meta data.
    stk::mesh::BulkData bulk_data(meta_data.get_meta_data(meta_data) , comm, bucket_size);
    stk::io::populate_bulk_data(bulk_data, mesh_data);

    //------------------------------------------------------------------
    // Create output mesh...  (input filename + ".out14")
    if (output) {
      filename = working_directory + mesh_filename + ".blas";
      stk::io::create_output_mesh(filename, comm, bulk_data, mesh_data);
      stk::io::define_output_fields(mesh_data, meta_data, true);
    }

    stk::app::use_case_14_initialize_nodal_data(bulk_data ,
                                                *fields.model_coordinates ,
                                                *fields.coordinates_field ,
                                                *fields.velocity_field,
                                                1.0 /*dt*/);

    time_max[1] = stk::wall_dtime( wtime );

    //------------------------------------------------------------------
    // Ready to run the algorithms:
    //------------------------------------------------------------------

    //------------------------------------------------------------------
    time_max[2] = stk::wall_dtime( wtime );
    //------------------------------------------------------------------

    wtime = stk::wall_time();

    double dot1 = 0;

    for(int n=0; n<num_trials; ++n) {
      //
      // Call BLAS algs.
      //

      wtime = stk::wall_time();

      fill( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK , *fields.velocity_field, 0.2 );

      fill( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK , *fields.fint_field, 1.0 );

      axpby( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK ,
             0.01, *fields.model_coordinates , 1.0 , *fields.coordinates_field );

      axpby( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK ,
             0.1, *fields.coordinates_field, 1.0 , *fields.velocity_field );

      time_max[3] += stk::wall_dtime( wtime );

      dot1 = dot( *alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK ,
                  *fields.velocity_field, *fields.coordinates_field );

      double dot2 = dot( *alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK,
                         *fields.velocity_field, *fields.fint_field );

      double norm_1 = norm2(*alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK, *fields.velocity_field );

      double norm_2 = norm2(*alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK, *fields.coordinates_field );

      if ( stk::parallel_machine_rank( comm ) == 0 ) {
        std::cout << "    " << dot1 << "  " << dot2 << "  " << norm_1 << "  " << norm_2 << std::endl;
      }

      time_max[4] += stk::wall_dtime( wtime );

      if (output) {
        stk::io::process_output_request(mesh_data, bulk_data, n);
      }

    }//end for(..num_trials...

    if ( stk::parallel_machine_rank( comm ) == 0 ) {
      //Try to make sure the number gets printed out just the way we want it,
      //so we can use it as a pass/fail check for a regression test...
      std::cout.precision(6);
      std::cout.setf(std::ios_base::scientific, std::ios_base::floatfield);
      std::cout << "Final dot1: " << dot1 << std::endl;
    }
    //------------------------------------------------------------------

#ifdef USE_GNU_MALLOC_HOOKS
    if (parallel_machine_rank(comm) == 0) {
      double net_alloc = alloc_MB() - freed_MB();
      std::cout << "Mesh creation:" << "\n   Total allocated: "
                << alloc_MB()<<"MB in "<<alloc_blks() << " blocks."
                << "\n   Total freed: " << freed_MB() << "MB in "
                << freed_blks() << " blocks."
                << "\n   Net allocated: "<<net_alloc << "MB."<<std::endl;
    }
#endif

    //------------------------------------------------------------------
  }

  time_max[8] = stk::wall_dtime( wtime );

  time_min[0] = time_max[0] ;
  time_min[1] = time_max[1] ;
  time_min[2] = time_max[2] ;
  time_min[3] = time_max[3] ;
  time_min[4] = time_max[4] ;
  time_min[5] = time_max[5] ;
  time_min[6] = time_max[6] ;
  time_min[7] = time_max[7] ;
  time_min[8] = time_max[8] ;

  stk::all_reduce( comm , stk::ReduceMax<9>( time_max ) & stk::ReduceMin<9>( time_min ) );

  time_max[3] /= num_trials ;
  time_max[4] /= num_trials ;
  time_max[5] /= num_trials ;
  time_max[6] /= num_trials ;

  time_min[3] /= num_trials ;
  time_min[4] /= num_trials ;
  time_min[5] /= num_trials ;
  time_min[6] /= num_trials ;

  //   [0] = stk::mesh::MetaData creation
  //   [1] = stk::mesh::BulkData creation
  //   [2] = Initialization
  //   [3] = Internal force

  if ( ! stk::parallel_machine_rank( comm ) ) {
    std::cout
      << "stk_mesh performance use case results:" << std::endl
      << "  Number of trials         = " << num_trials << std::endl
      << "  Meta-data setup          = " << time_min[0] << " : "
      << time_max[0] << " sec, min : max"
      << std::endl
      << "  Bulk-data generation     = " << time_min[1] << " : "
      << time_max[1] << " sec, min : max"
      << std::endl
      << "  Initialization           = " << time_min[2] << " : "
      << time_max[2] << " sec, min : max"
      << std::endl
      << "  fill & axpby (per-trial) = " << time_min[3] << " : "
      << time_max[3] << " sec, min : max"
      << std::endl
      << "  dot & norm2 (per-trial)  = " << time_min[4] << " : "
      << time_max[4] << " sec, min : max"
      << std::endl
      << "  Mesh destruction         = " << time_min[8] << " : "
      << time_max[8] << " sec, min : max"
      << std::endl
      << std::endl ;
  }

  return true;
}