示例#1
0
static 
void box_partition( int ip , int up , int axis ,
                    const int box[3][2] ,
                    int p_box[][3][2] )
{
  const int np = up - ip ;
  if ( 1 == np ) {
    p_box[ip][0][0] = box[0][0] ; p_box[ip][0][1] = box[0][1] ;
    p_box[ip][1][0] = box[1][0] ; p_box[ip][1][1] = box[1][1] ;
    p_box[ip][2][0] = box[2][0] ; p_box[ip][2][1] = box[2][1] ;
  }
  else {
    const int n = box[ axis ][1] - box[ axis ][0] ;
    const int np_low = np / 2 ;  /* Rounded down */
    const int np_upp = np - np_low ;

    const int n_upp = (int) (((double) n) * ( ((double)np_upp) / ((double)np)));
    const int n_low = n - n_upp ;
    const int next_axis = ( axis + 2 ) % 3 ;

    if ( np_low ) { /* P = [ip,ip+np_low) */
      int dbox[3][2] ;
      dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ;
      dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ;
      dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ;

      dbox[ axis ][1] = dbox[ axis ][0] + n_low ;

      box_partition( ip, ip + np_low, next_axis,
                     (const int (*)[2]) dbox, p_box );
    }

    if ( np_upp ) { /* P = [ip+np_low,ip+np_low+np_upp) */
      int dbox[3][2] ;
      dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ;
      dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ;
      dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ;

      ip += np_low ;
      dbox[ axis ][0] += n_low ;
      dbox[ axis ][1]  = dbox[ axis ][0] + n_upp ;

      box_partition( ip, ip + np_upp, next_axis,
                     (const int (*)[2]) dbox, p_box );
    }
  }
}
示例#2
0
void box_partition_rcb( const BoxType        & root_box ,
                        std::vector<BoxType> & part_boxes )
{
  const BoxBoundsLinear use_boxes ;

  const size_t part_count = part_boxes.size();

  box_partition( 0 , part_count , root_box , & part_boxes[0] );

  // Verify partitioning

  size_t total_cell = 0 ;

  for ( size_t i = 0 ; i < part_count ; ++i ) {

    total_cell += count( part_boxes[i] );

    BoxType box_interior , box_use ;

    use_boxes.apply( root_box , part_boxes[i] , box_interior , box_use );

    if ( count( box_use ) < count( part_boxes[i] ) ||
         count( part_boxes[i] ) < count( box_interior ) ||
         part_boxes[i] != intersect( part_boxes[i] , box_use ) ||
         box_interior  != intersect( part_boxes[i] , box_interior )) {

      std::ostringstream msg ;

      msg << "box_partition_rcb ERROR : "
          << "part_boxes[" << i << "] = "
          << part_boxes[i]
          << " use " << box_use
          << " interior " << box_interior
          << std::endl 
          << "  part ^ use " << intersect( part_boxes[i] , box_use )
          << "  part ^ interior " << intersect( part_boxes[i] , box_interior );

      throw std::runtime_error( msg.str() );
    }

    for ( size_t j = i + 1 ; j < part_count ; ++j ) {
      const BoxType tmp = intersect( part_boxes[i] , part_boxes[j] );

      if ( count( tmp ) ) {
        throw std::runtime_error( std::string("box partition intersection") );
      }
    }
  }

  if ( total_cell != count( root_box ) ) {
    throw std::runtime_error( std::string("box partition count") );
  }
}
示例#3
0
static void test_box( const int box[3][2] , const int np )
{
  const int ncell_box = box[0][1] * box[1][1] * box[2][1] ;
  int ncell_total = 0 ;
  int ncell_min = ncell_box ;
  int ncell_max = 0 ;
  int (*pbox)[3][2] ;
  int i , j ;

  pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 );

  box_partition( 0 , np , 2 , box , pbox );

  for ( i = 0 ; i < np ; ++i ) {
    const int ncell = ( pbox[i][0][1] - pbox[i][0][0] ) *
                      ( pbox[i][1][1] - pbox[i][1][0] ) *
                      ( pbox[i][2][1] - pbox[i][2][0] );

    if ( ! box_contain( box , (const int (*)[2]) pbox[i] ) ) {
      fprintf(stdout,"  OUT OF BOUNDS pbox[%d/%d] = ",i,np);
      box_print(stdout,(const int (*)[2]) pbox[i]);
      fprintf(stdout,"\n");
      abort();
    }

    for ( j = i + 1 ; j < np ; ++j ) {
      if ( ! box_disjoint( (const int (*)[2]) pbox[i] ,
                           (const int (*)[2]) pbox[j] ) ) {
        fprintf(stdout,"  NOT DISJOINT pbox[%d/%d] = ",i,np);
        box_print(stdout, (const int (*)[2]) pbox[i]);
        fprintf(stdout,"\n");
        fprintf(stdout,"               pbox[%d/%d] = ",j,np);
        box_print(stdout, (const int (*)[2]) pbox[j]);
        fprintf(stdout,"\n");
        abort();
      }
    }
    ncell_total += ncell ;

    if ( ncell_max < ncell ) { ncell_max = ncell ; }
    if ( ncell < ncell_min ) { ncell_min = ncell ; }
  }

  if ( ncell_total != ncell_box ) {
    fprintf(stdout,"  WRONG CELL COUNT NP = %d\n",np);
    abort();
  }
  fprintf(stdout,"NP = %d, total = %d, avg = %d, min = %d, max = %d\n",
          np,ncell_box,ncell_box/np,ncell_min,ncell_max);

  free( pbox );
}
示例#4
0
void box_partition_rcb( const int np , 
                        const int my_p ,
                        const int root_box[][2] , 
                        const int ghost ,
                        int (**pbox)[3][2] , 
                        int ** map_local_id ,
                        int ** map_recv_pc ,
                        int ** map_send_pc ,
                        int ** map_send_id )
{
  *pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 );

  box_partition( 0 , np , 2 , root_box , *pbox );

  box_partition_maps( np , my_p , (const int (*)[3][2]) *pbox , ghost ,
                      map_local_id , map_recv_pc , 
                      map_send_pc , map_send_id );
}
示例#5
0
文件: main.cpp 项目: Mantevo/miniFE
int main(int argc, char** argv) {
  miniFE::Parameters params;
  miniFE::get_parameters(argc, argv, params);

  int numprocs = 1, myproc = 0;
  miniFE::initialize_mpi(argc, argv, numprocs, myproc);

  Kokkos::initialize(argc,argv);

  if(myproc==0) {
    std::cout << "MiniFE Mini-App, Kokkos Peer Implementation" << std::endl;
  }

  miniFE::timer_type start_time = miniFE::mytimer();

#ifdef MINIFE_DEBUG
  outstream(numprocs, myproc);
#endif

  //make sure each processor has the same parameters:
  miniFE::broadcast_parameters(params);

  Box global_box = { 0, params.nx, 0, params.ny, 0, params.nz };
  std::vector<Box> local_boxes(numprocs);

  box_partition(0, numprocs, 2, global_box, &local_boxes[0]);

  Box& my_box = local_boxes[myproc];

  MINIFE_GLOBAL_ORDINAL num_my_ids = miniFE::get_num_ids<MINIFE_GLOBAL_ORDINAL>(my_box);
  MINIFE_GLOBAL_ORDINAL min_ids = num_my_ids;

#ifdef HAVE_MPI
  MPI_Datatype mpi_dtype = miniFE::TypeTraits<MINIFE_GLOBAL_ORDINAL>::mpi_type();
  MPI_Allreduce(&num_my_ids, &min_ids, 1, mpi_dtype, MPI_MIN, MPI_COMM_WORLD);
#endif

  if (min_ids == 0) {
    std::cout<<"One or more processors have 0 equations. Not currently supported. Exiting."<<std::endl;

    miniFE::finalize_mpi();

    return 1;
  }

  std::ostringstream osstr;
  osstr << "miniFE." << params.nx << "x" << params.ny << "x" << params.nz;
#ifdef HAVE_MPI
  osstr << ".P"<<numprocs;
#endif
  osstr << ".";
  if (params.name != "") osstr << params.name << ".";

  YAML_Doc doc("miniFE", MINIFE_VERSION, ".", osstr.str());
  if (myproc == 0) {
    add_params_to_yaml(doc, params);
    add_configuration_to_yaml(doc, numprocs);
    add_timestring_to_yaml(doc);
  }

  //Most of the program is performed in the 'driver' function, which is
  //templated on < Scalar, LocalOrdinal, GlobalOrdinal >.
  //To run miniFE with float instead of double, or 'long long' instead of int,
  //etc., change these template-parameters by changing the macro definitions in
  //the makefile or on the make command-line.

  int return_code =
     miniFE::driver< MINIFE_SCALAR, MINIFE_LOCAL_ORDINAL, MINIFE_GLOBAL_ORDINAL>(global_box, my_box, params, doc);

  miniFE::timer_type total_time = miniFE::mytimer() - start_time;

  if (myproc == 0) {
    doc.add("Total Program Time",total_time);
    doc.generateYAML();
  }

  Kokkos::finalize();

  miniFE::finalize_mpi();

  return return_code;
}
示例#6
0
void BoxFixture::generate_boxes( const BOX   root_box,
                                       BOX   local_box )
{
  const unsigned p_rank = m_bulk_data.parallel_rank();
  const unsigned p_size = m_bulk_data.parallel_size();
  const unsigned ngx = root_box[0][1] - root_box[0][0] ;
  const unsigned ngy = root_box[1][1] - root_box[1][0] ;

  BOX * const p_box = new BOX[ p_size ];

  box_partition( 0 , p_size , 2 , root_box , & p_box[0] );

  local_box[0][0] = p_box[ p_rank ][0][0] ;
  local_box[0][1] = p_box[ p_rank ][0][1] ;
  local_box[1][0] = p_box[ p_rank ][1][0] ;
  local_box[1][1] = p_box[ p_rank ][1][1] ;
  local_box[2][0] = p_box[ p_rank ][2][0] ;
  local_box[2][1] = p_box[ p_rank ][2][1] ;

  // Create elements:

  std::vector<unsigned> local_count ;

  const stk_classic::mesh::PartVector no_parts ;

  for ( int k = local_box[2][0] ; k < local_box[2][1] ; ++k ) {
  for ( int j = local_box[1][0] ; j < local_box[1][1] ; ++j ) {
  for ( int i = local_box[0][0] ; i < local_box[0][1] ; ++i ) {
    const EntityId n0= 1 + (i+0) + (j+0) * (ngx+1) + (k+0) * (ngx+1) * (ngy+1);
    const EntityId n1= 1 + (i+1) + (j+0) * (ngx+1) + (k+0) * (ngx+1) * (ngy+1);
    const EntityId n2= 1 + (i+1) + (j+1) * (ngx+1) + (k+0) * (ngx+1) * (ngy+1);
    const EntityId n3= 1 + (i+0) + (j+1) * (ngx+1) + (k+0) * (ngx+1) * (ngy+1);
    const EntityId n4= 1 + (i+0) + (j+0) * (ngx+1) + (k+1) * (ngx+1) * (ngy+1);
    const EntityId n5= 1 + (i+1) + (j+0) * (ngx+1) + (k+1) * (ngx+1) * (ngy+1);
    const EntityId n6= 1 + (i+1) + (j+1) * (ngx+1) + (k+1) * (ngx+1) * (ngy+1);
    const EntityId n7= 1 + (i+0) + (j+1) * (ngx+1) + (k+1) * (ngx+1) * (ngy+1);

    const EntityId elem_id =  1 + i + j * ngx + k * ngx * ngy;

    Entity & node0 = m_bulk_data.declare_entity( 0 , n0 , no_parts );
    Entity & node1 = m_bulk_data.declare_entity( 0 , n1 , no_parts );
    Entity & node2 = m_bulk_data.declare_entity( 0 , n2 , no_parts );
    Entity & node3 = m_bulk_data.declare_entity( 0 , n3 , no_parts );
    Entity & node4 = m_bulk_data.declare_entity( 0 , n4 , no_parts );
    Entity & node5 = m_bulk_data.declare_entity( 0 , n5 , no_parts );
    Entity & node6 = m_bulk_data.declare_entity( 0 , n6 , no_parts );
    Entity & node7 = m_bulk_data.declare_entity( 0 , n7 , no_parts );
    Entity & elem  = m_bulk_data.declare_entity( 3 , elem_id , no_parts );

    m_bulk_data.declare_relation( elem , node0 , 0 );
    m_bulk_data.declare_relation( elem , node1 , 1 );
    m_bulk_data.declare_relation( elem , node2 , 2 );
    m_bulk_data.declare_relation( elem , node3 , 3 );
    m_bulk_data.declare_relation( elem , node4 , 4 );
    m_bulk_data.declare_relation( elem , node5 , 5 );
    m_bulk_data.declare_relation( elem , node6 , 6 );
    m_bulk_data.declare_relation( elem , node7 , 7 );
  }
  }
  }

  delete[] p_box ;
}
示例#7
0
static void test_maps( const int root_box[][2] , const int np )
{
  const int ghost = 1 ;
  const int nx_global = root_box[0][1] - root_box[0][0] ;
  const int ny_global = root_box[1][1] - root_box[1][0] ;
  int ieq , i , j ;
  int (*pbox)[3][2] ;
  int **local_values ;
  int **map_local_id ;
  int **map_recv_pc ;
  int **map_send_pc ;
  int **map_send_id ;
  
  pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 );

  box_partition( 0 , np , 2 , root_box , pbox );

  local_values = (int **) malloc( sizeof(int*) * np );
  map_local_id = (int **) malloc( sizeof(int*) * np );
  map_recv_pc  = (int **) malloc( sizeof(int*) * np );
  map_send_pc  = (int **) malloc( sizeof(int*) * np );
  map_send_id  = (int **) malloc( sizeof(int*) * np );

  /* Set each local value to the global equation number */

  for ( ieq = i = 0 ; i < np ; ++i ) {
    const int (*mybox)[2] = (const int (*)[2]) pbox[i] ;
    const int nx = mybox[0][1] - mybox[0][0] ;
    const int ny = mybox[1][1] - mybox[1][0] ;
    const int nz = mybox[2][1] - mybox[2][0] ;
    int ix , iy , iz ;

    /* Generate the partition maps for this rank */
    box_partition_maps( np , i , (const int (*)[3][2]) pbox , ghost ,
                        & map_local_id[i] , & map_recv_pc[i] , 
                        & map_send_pc[i] , & map_send_id[i] );

    local_values[i] = (int *) malloc( sizeof(int) * map_recv_pc[i][np] );

    for ( iz = -ghost ; iz < nz + ghost ; ++iz ) {
    for ( iy = -ghost ; iy < ny + ghost ; ++iy ) {
    for ( ix = -ghost ; ix < nx + ghost ; ++ix ) {
      const int ieq = box_map_local(mybox,ghost,map_local_id[i],ix,iy,iz);

      if ( 0 <= ieq ) {
        const int ix_global = ix + mybox[0][0] ;
        const int iy_global = iy + mybox[1][0] ;
        const int iz_global = iz + mybox[2][0] ;

        if ( root_box[0][0] <= ix_global && ix_global < root_box[0][1] &&
             root_box[1][0] <= iy_global && iy_global < root_box[1][1] &&
             root_box[2][0] <= iz_global && iz_global < root_box[2][1] ) {

          local_values[i][ ieq ] = ix_global +
                                   iy_global * nx_global +
                                   iz_global * nx_global * ny_global ;
        }
        else {
          local_values[i][ ieq ] = -1 ;
        }
      }
    }
    }
    }
  }

  /* Pair-wise compare the local values */
  /* i  == receiving processor rank */
  /* ip == sending   processor rank */
  /* j  == receiving processor data entry for message from 'ip' */
  /* jp == sending   processor data entry for message to   'i' */

  for ( i = 0 ; i < np ; ++i ) {
    for ( j = 1 ; j < np ; ++j ) {
      const int ip = ( i + j ) % np ;
      const int jp = ( i + np - ip ) % np ;
      const int nrecv = map_recv_pc[i] [j+1]  - map_recv_pc[i] [j] ;
      const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ;
      int k ;
      if ( nrecv != nsend ) {
        fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip);
        fprintf(stderr,"P%d send %d to   P%d\n",ip,nsend,i);
        abort();
      }
      for ( k = 0 ; k < nrecv ; ++k ) {
        const int irecv = map_recv_pc[i][j] + k ;
        const int isend = map_send_pc[ip][jp] + k ;
        const int val_irecv = local_values[i][irecv] ;
        const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ;
        if ( val_irecv != val_isend ) {
          fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip);
          fprintf(stderr,"P%d send[%d] = %d , to   P%d\n",ip,k,val_isend,i);
          abort();
        }
      }
    }
  }

  for ( i = 0 ; i < np ; ++i ) {
    free( map_local_id[i] );
    free( map_recv_pc[i] );
    free( map_send_pc[i] );
    free( map_send_id[i] );
    free( local_values[i] );
  }
  free( map_send_id );
  free( map_send_pc );
  free( map_recv_pc );
  free( map_local_id );
  free( local_values );
  free( pbox );
}
示例#8
0
void box_partition_rcb( const int np ,
                        const int root_box[3][2] ,
                        int    pbox[][3][2] )
{
  box_partition( 0 , np , 2 , root_box , pbox );
}
示例#9
0
文件: main.cpp 项目: Mantevo/miniFE
int main(int argc, char** argv) {
  miniFE::Parameters params;
  miniFE::get_parameters(argc, argv, params);

  int numprocs = 1, myproc = 0;
  miniFE::initialize_mpi(argc, argv, numprocs, myproc);

  miniFE::timer_type start_time = miniFE::mytimer();

#ifdef MINIFE_DEBUG
  outstream(numprocs, myproc);
#endif

  if(myproc==0) {
    std::cout << "MiniFE Mini-App, OpenMP Peer Implementation" << std::endl;
    std::cout << "Creating OpenMP Thread Pool..." << std::endl;
  }
  int value = 0;
  const int thread_count = omp_get_max_threads();
#pragma omp parallel for reduction(+:value)
  for(int i = 0; i < thread_count; ++i) {
	value += 1;
  }
  double global_threadcount;
  double local_threadcount = value;

#ifdef HAVE_MPI
  MPI_Allreduce(&local_threadcount,&global_threadcount,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
#else
  global_threadcount = local_threadcount;
#endif
  if(myproc==0) {
    std::cout << "Counted: " << global_threadcount << " threads." << std::endl;
    std::cout << "Running MiniFE Mini-App..." << std::endl;
  }

  //make sure each processor has the same parameters:
  miniFE::broadcast_parameters(params);


  Box global_box = { 0, params.nx, 0, params.ny, 0, params.nz };
  std::vector<Box> local_boxes(numprocs);

  box_partition(0, numprocs, 2, global_box, &local_boxes[0]);

  Box& my_box = local_boxes[myproc];

  MINIFE_GLOBAL_ORDINAL num_my_ids = miniFE::get_num_ids<MINIFE_GLOBAL_ORDINAL>(my_box);
  MINIFE_GLOBAL_ORDINAL min_ids = num_my_ids;

#ifdef HAVE_MPI
  MPI_Datatype mpi_dtype = miniFE::TypeTraits<MINIFE_GLOBAL_ORDINAL>::mpi_type();
  MPI_Allreduce(&num_my_ids, &min_ids, 1, mpi_dtype, MPI_MIN, MPI_COMM_WORLD);
#endif

  if (min_ids == 0) {
    std::cout<<"One or more processors have 0 equations. Not currently supported. Exiting."<<std::endl;

    miniFE::finalize_mpi();

    return 1;
  }

  std::ostringstream osstr;
  osstr << "miniFE." << params.nx << "x" << params.ny << "x" << params.nz;
#ifdef HAVE_MPI
  osstr << ".P" << numprocs;
#endif
#ifdef _OPENMP
  osstr << ".T" << omp_get_max_threads();
#endif
  osstr << ".";
  if (params.name != "") osstr << params.name << ".";

  YAML_Doc doc("miniFE", MINIFE_VERSION, ".", osstr.str());
  if (myproc == 0) {
    add_params_to_yaml(doc, params);
    add_configuration_to_yaml(doc, numprocs, params.numthreads);
    add_timestring_to_yaml(doc);
  }

  //Most of the program is performed in the 'driver' function, which is
  //templated on < Scalar, LocalOrdinal, GlobalOrdinal >.
  //To run miniFE with float instead of double, or 'long long' instead of int,
  //etc., change these template-parameters by changing the macro definitions in
  //the makefile or on the make command-line.

  int return_code =
     miniFE::driver< MINIFE_SCALAR, MINIFE_LOCAL_ORDINAL, MINIFE_GLOBAL_ORDINAL>(global_box, my_box, params, doc);

  miniFE::timer_type total_time = miniFE::mytimer() - start_time;

#ifdef MINIFE_REPORT_RUSAGE
   struct rusage get_mem;
   getrusage(RUSAGE_SELF, &get_mem);

   long long int rank_rss = get_mem.ru_maxrss;
   long long int global_rss = 0;
   long long int max_rss = 0;

#ifdef HAVE_MPI
   MPI_Reduce(&rank_rss, &global_rss, 1, 
	MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
   MPI_Reduce(&rank_rss, &max_rss, 1, 
	MPI_LONG_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
   if (myproc == 0) {
	doc.add("Global All-RSS (kB)", global_rss);
	doc.add("Global Max-RSS (kB)", max_rss);
   }
#else
   doc.add("RSS (kB)", rank_rss);
#endif
#endif

  if (myproc == 0) {
    doc.add("Total Program Time",total_time);
    doc.generateYAML();
  }


  miniFE::finalize_mpi();

  return return_code;
}