Пример #1
0
void box_partition_rcb( const int np , 
                        const int my_p ,
                        const int root_box[][2] , 
                        const int ghost ,
                        int (**pbox)[3][2] , 
                        int ** map_local_id ,
                        int ** map_recv_pc ,
                        int ** map_send_pc ,
                        int ** map_send_id )
{
  *pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 );

  box_partition( 0 , np , 2 , root_box , *pbox );

  box_partition_maps( np , my_p , (const int (*)[3][2]) *pbox , ghost ,
                      map_local_id , map_recv_pc , 
                      map_send_pc , map_send_id );
}
Пример #2
0
  static FEMeshType create( const size_t proc_count ,
                            const size_t proc_local ,
                            const size_t gang_count ,
                            const size_t elems_x ,
                            const size_t elems_y ,
                            const size_t elems_z ,
                            const double x_coord_curve = 1 ,
                            const double y_coord_curve = 1 ,
                            const double z_coord_curve = 1 )
  {
    const size_t vertices_x = elems_x + 1 ;
    const size_t vertices_y = elems_y + 1 ;
    const size_t vertices_z = elems_z + 1 ;

    const BoxBoundsLinear vertex_box_bounds ;
    const ElementSpec element ;

    // Partition based upon vertices:

    BoxType vertex_box_global ;
    std::vector< BoxType > vertex_box_parts( proc_count );

    vertex_box_global[0][0] = 0 ; vertex_box_global[0][1] = vertices_x ;
    vertex_box_global[1][0] = 0 ; vertex_box_global[1][1] = vertices_y ;
    vertex_box_global[2][0] = 0 ; vertex_box_global[2][1] = vertices_z ;

    box_partition_rcb( vertex_box_global , vertex_box_parts );

    const BoxType vertex_box_local_owned = vertex_box_parts[ proc_local ];

    // Determine interior and used vertices:

    BoxType vertex_box_local_interior ;
    BoxType vertex_box_local_used ;

    vertex_box_bounds.apply( vertex_box_global ,
                             vertex_box_local_owned ,
                             vertex_box_local_interior ,
                             vertex_box_local_used );

    // Element counts:

    const long local_elems_x =
      ( vertex_box_local_used[0][1] - vertex_box_local_used[0][0] ) - 1 ;
    const long local_elems_y =
      ( vertex_box_local_used[1][1] - vertex_box_local_used[1][0] ) - 1 ;
    const long local_elems_z =
      ( vertex_box_local_used[2][1] - vertex_box_local_used[2][0] ) - 1 ;

    const size_t elem_count_total = std::max( long(0) , local_elems_x ) *
                                    std::max( long(0) , local_elems_y ) *
                                    std::max( long(0) , local_elems_z );

    const long interior_elems_x =
      ( vertex_box_local_owned[0][1] - vertex_box_local_owned[0][0] ) - 1 ;
    const long interior_elems_y =
      ( vertex_box_local_owned[1][1] - vertex_box_local_owned[1][0] ) - 1 ;
    const long interior_elems_z =
      ( vertex_box_local_owned[2][1] - vertex_box_local_owned[2][0] ) - 1 ;

    const size_t elem_count_interior = std::max( long(0) , interior_elems_x ) *
                                       std::max( long(0) , interior_elems_y ) *
                                       std::max( long(0) , interior_elems_z );

    // Expand vertex boxes to node boxes:

    BoxType node_box_global ;
    BoxType node_box_local_used ;
    std::vector< BoxType > node_box_parts ;

    element.create_node_boxes_from_vertex_boxes(
      vertex_box_global , vertex_box_parts ,
      node_box_global , node_box_parts );

    // Node communication maps:

    size_t node_count_interior = 0 ;
    size_t node_count_owned    = 0 ;
    size_t node_count_total    = 0 ;
    std::vector<size_t>                 node_used_id_map ;
    std::vector<size_t>                 node_part_counts ;
    std::vector< std::vector<size_t> >  node_send_map ;

    box_partition_maps( node_box_global ,
                        node_box_parts ,
                        element.box_bounds ,
                        proc_local ,
                        node_box_local_used ,
                        node_used_id_map ,
                        node_count_interior ,
                        node_count_owned ,
                        node_count_total ,
                        node_part_counts ,
                        node_send_map );

    size_t node_count_send = 0 ;
    for ( size_t i = 0 ; i < node_send_map.size() ; ++i ) {
      node_count_send += node_send_map[i].size();
    }

    size_t recv_msg_count = 0 ;
    size_t send_msg_count = 0 ;
    size_t send_count = 0 ;

    for ( size_t i = 1 ; i < proc_count ; ++i ) {
      if ( node_part_counts[i] ) ++recv_msg_count ;
      if ( node_send_map[i].size() ) {
        ++send_msg_count ;
        send_count += node_send_map[i].size();
      }
    }

    // Finite element mesh:

    FEMeshType mesh ;

    if ( node_count_total ) {
      mesh.node_coords = node_coords_type( "node_coords", node_count_total );
    }

    if ( elem_count_total ) {
      mesh.elem_node_ids =
        elem_node_ids_type( "elem_node_ids", elem_count_total );
    }

    mesh.parallel_data_map.assign( node_count_interior ,
                                   node_count_owned ,
                                   node_count_total ,
                                   recv_msg_count ,
                                   send_msg_count ,
                                   send_count );

    typename node_coords_type::HostMirror node_coords =
      Kokkos::create_mirror( mesh.node_coords );

    typename elem_node_ids_type::HostMirror elem_node_ids =
      Kokkos::create_mirror( mesh.elem_node_ids );

    //------------------------------------
    // set node coordinates to grid location for subsequent verification

    for ( size_t iz = node_box_local_used[2][0] ;
                 iz < node_box_local_used[2][1] ; ++iz ) {

    for ( size_t iy = node_box_local_used[1][0] ;
                 iy < node_box_local_used[1][1] ; ++iy ) {

    for ( size_t ix = node_box_local_used[0][0] ;
                 ix < node_box_local_used[0][1] ; ++ix ) {

      const size_t node_local_id =
        box_map_id( node_box_local_used , node_used_id_map , ix , iy , iz );

      node_coords( node_local_id , 0 ) = ix ;
      node_coords( node_local_id , 1 ) = iy ;
      node_coords( node_local_id , 2 ) = iz ;
    }}}

    //------------------------------------
    // Initialize element-node connectivity:

    if ( 1 < gang_count ) {
      layout_elements_partitioned( vertex_box_local_used ,
                                   vertex_box_local_owned ,
                                   node_box_local_used ,
                                   node_used_id_map ,
                                   element ,
                                   gang_count ,
                                   elem_node_ids );
    }
    else {
      layout_elements_interior_exterior( vertex_box_local_used ,
                                         vertex_box_local_owned ,
                                         node_box_local_used ,
                                         node_used_id_map ,
                                         element ,
                                         elem_count_interior ,
                                         elem_node_ids );
    }

    //------------------------------------
    // Populate node->element connectivity:

    std::vector<size_t> node_elem_work( node_count_total , (size_t) 0 );

    for ( size_t i = 0 ; i < elem_count_total ; ++i ) {
      for ( size_t n = 0 ; n < element_node_count  ; ++n ) {
        ++node_elem_work[ elem_node_ids(i,n) ];
      }
    }

    mesh.node_elem_ids =
      Kokkos::create_staticcrsgraph< node_elem_ids_type >( "node_elem_ids" , node_elem_work );

    typename node_elem_ids_type::HostMirror
      node_elem_ids = Kokkos::create_mirror( mesh.node_elem_ids );

    for ( size_t i = 0 ; i < node_count_total ; ++i ) {
      node_elem_work[i] = node_elem_ids.row_map[i];
    }

    // Looping in element order insures the list of elements
    // is sorted by element index.

    for ( size_t i = 0 ; i < elem_count_total ; ++i ) {
      for ( size_t n = 0 ; n < element_node_count ; ++n ) {
        const unsigned nid = elem_node_ids(i, n);
        const unsigned j = node_elem_work[nid] ; ++node_elem_work[nid] ;

        node_elem_ids.entries( j , 0 ) = i ;
        node_elem_ids.entries( j , 1 ) = n ;
      }
    }
    //------------------------------------
    // Verify setup with node coordinates matching grid indices.
    verify( node_coords , elem_node_ids , node_elem_ids );

    //------------------------------------
    // Scale node coordinates to problem extent with
    // nonlinear mapping.
    {
      const double problem_extent[3] =
        { static_cast<double>( vertex_box_global[0][1] - 1 ) ,
          static_cast<double>( vertex_box_global[1][1] - 1 ) ,
          static_cast<double>( vertex_box_global[2][1] - 1 ) };

      const double grid_extent[3] =
        { static_cast<double>( node_box_global[0][1] - 1 ) ,
          static_cast<double>( node_box_global[1][1] - 1 ) ,
          static_cast<double>( node_box_global[2][1] - 1 ) };

      for ( size_t i = 0 ; i < node_count_total ; ++i ) {
        const double x_unit = node_coords(i,0) / grid_extent[0] ;
        const double y_unit = node_coords(i,1) / grid_extent[1] ;
        const double z_unit = node_coords(i,2) / grid_extent[2] ;

        node_coords(i,0) = coordinate_scalar_type( problem_extent[0] * std::pow( x_unit , x_coord_curve ) );
        node_coords(i,1) = coordinate_scalar_type( problem_extent[1] * std::pow( y_unit , y_coord_curve ) );
        node_coords(i,2) = coordinate_scalar_type( problem_extent[2] * std::pow( z_unit , z_coord_curve ) );
      }
    }

    Kokkos::deep_copy( mesh.node_coords ,   node_coords );
    Kokkos::deep_copy( mesh.elem_node_ids , elem_node_ids );
    Kokkos::deep_copy( mesh.node_elem_ids.entries , node_elem_ids.entries );

    //------------------------------------
    // Communication lists:
    {
      recv_msg_count = 0 ;
      send_msg_count = 0 ;
      send_count = 0 ;

      for ( size_t i = 1 ; i < proc_count ; ++i ) {

        // Order sending starting with the local processor rank
        // to try to smooth out the amount of messages simultaneously
        // send to a particular processor.

        const int proc = ( proc_local + i ) % proc_count ;
        if ( node_part_counts[i] ) {
          mesh.parallel_data_map.host_recv(recv_msg_count,0) = proc ;
          mesh.parallel_data_map.host_recv(recv_msg_count,1) = node_part_counts[i] ;
          ++recv_msg_count ;
        }
        if ( node_send_map[i].size() ) {
          mesh.parallel_data_map.host_send(send_msg_count,0) = proc ;
          mesh.parallel_data_map.host_send(send_msg_count,1) = node_send_map[i].size() ;
          for ( size_t j = 0 ; j < node_send_map[i].size() ; ++j , ++send_count ) {
            mesh.parallel_data_map.host_send_item(send_count) = node_send_map[i][j] - node_count_interior ;
          }
          ++send_msg_count ;
        }
      }
    }

    return mesh ;
  }
Пример #3
0
static void test_maps( const int root_box[][2] , const int np )
{
  const int ghost = 1 ;
  const int nx_global = root_box[0][1] - root_box[0][0] ;
  const int ny_global = root_box[1][1] - root_box[1][0] ;
  int ieq , i , j ;
  int (*pbox)[3][2] ;
  int **local_values ;
  int **map_local_id ;
  int **map_recv_pc ;
  int **map_send_pc ;
  int **map_send_id ;
  
  pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 );

  box_partition( 0 , np , 2 , root_box , pbox );

  local_values = (int **) malloc( sizeof(int*) * np );
  map_local_id = (int **) malloc( sizeof(int*) * np );
  map_recv_pc  = (int **) malloc( sizeof(int*) * np );
  map_send_pc  = (int **) malloc( sizeof(int*) * np );
  map_send_id  = (int **) malloc( sizeof(int*) * np );

  /* Set each local value to the global equation number */

  for ( ieq = i = 0 ; i < np ; ++i ) {
    const int (*mybox)[2] = (const int (*)[2]) pbox[i] ;
    const int nx = mybox[0][1] - mybox[0][0] ;
    const int ny = mybox[1][1] - mybox[1][0] ;
    const int nz = mybox[2][1] - mybox[2][0] ;
    int ix , iy , iz ;

    /* Generate the partition maps for this rank */
    box_partition_maps( np , i , (const int (*)[3][2]) pbox , ghost ,
                        & map_local_id[i] , & map_recv_pc[i] , 
                        & map_send_pc[i] , & map_send_id[i] );

    local_values[i] = (int *) malloc( sizeof(int) * map_recv_pc[i][np] );

    for ( iz = -ghost ; iz < nz + ghost ; ++iz ) {
    for ( iy = -ghost ; iy < ny + ghost ; ++iy ) {
    for ( ix = -ghost ; ix < nx + ghost ; ++ix ) {
      const int ieq = box_map_local(mybox,ghost,map_local_id[i],ix,iy,iz);

      if ( 0 <= ieq ) {
        const int ix_global = ix + mybox[0][0] ;
        const int iy_global = iy + mybox[1][0] ;
        const int iz_global = iz + mybox[2][0] ;

        if ( root_box[0][0] <= ix_global && ix_global < root_box[0][1] &&
             root_box[1][0] <= iy_global && iy_global < root_box[1][1] &&
             root_box[2][0] <= iz_global && iz_global < root_box[2][1] ) {

          local_values[i][ ieq ] = ix_global +
                                   iy_global * nx_global +
                                   iz_global * nx_global * ny_global ;
        }
        else {
          local_values[i][ ieq ] = -1 ;
        }
      }
    }
    }
    }
  }

  /* Pair-wise compare the local values */
  /* i  == receiving processor rank */
  /* ip == sending   processor rank */
  /* j  == receiving processor data entry for message from 'ip' */
  /* jp == sending   processor data entry for message to   'i' */

  for ( i = 0 ; i < np ; ++i ) {
    for ( j = 1 ; j < np ; ++j ) {
      const int ip = ( i + j ) % np ;
      const int jp = ( i + np - ip ) % np ;
      const int nrecv = map_recv_pc[i] [j+1]  - map_recv_pc[i] [j] ;
      const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ;
      int k ;
      if ( nrecv != nsend ) {
        fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip);
        fprintf(stderr,"P%d send %d to   P%d\n",ip,nsend,i);
        abort();
      }
      for ( k = 0 ; k < nrecv ; ++k ) {
        const int irecv = map_recv_pc[i][j] + k ;
        const int isend = map_send_pc[ip][jp] + k ;
        const int val_irecv = local_values[i][irecv] ;
        const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ;
        if ( val_irecv != val_isend ) {
          fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip);
          fprintf(stderr,"P%d send[%d] = %d , to   P%d\n",ip,k,val_isend,i);
          abort();
        }
      }
    }
  }

  for ( i = 0 ; i < np ; ++i ) {
    free( map_local_id[i] );
    free( map_recv_pc[i] );
    free( map_send_pc[i] );
    free( map_send_id[i] );
    free( local_values[i] );
  }
  free( map_send_id );
  free( map_send_pc );
  free( map_recv_pc );
  free( map_local_id );
  free( local_values );
  free( pbox );
}