Beispiel #1
0
    static void layout_elements_partitioned(
        const BoxType                vertex_box_local_used ,
        const BoxType                /*vertex_box_local_owned*/ ,
        const BoxType                node_box_local_used ,
        const std::vector<size_t> &  node_used_id_map ,
        const ElementSpec            element_fixture ,
        const size_t                 thread_gang_count ,
        const typename elem_node_ids_type::HostMirror elem_node_ids )
    {
        std::vector< BoxType > element_box_gangs( thread_gang_count );

        BoxType element_box_local_used = vertex_box_local_used ;

        element_box_local_used[0][1] -= 1 ;
        element_box_local_used[1][1] -= 1 ;
        element_box_local_used[2][1] -= 1 ;

        box_partition_rcb( element_box_local_used , element_box_gangs );

        size_t elem_index = 0 ;

        for ( size_t ig = 0 ; ig < thread_gang_count ; ++ig ) {

            const BoxType box = element_box_gangs[ig] ;

            for ( size_t iz = box[2][0] ; iz < box[2][1] ; ++iz ) {
                for ( size_t iy = box[1][0] ; iy < box[1][1] ; ++iy ) {
                    for ( size_t ix = box[0][0] ; ix < box[0][1] ; ++ix , ++elem_index ) {

                        for ( size_t nn = 0 ; nn < element_node_count ; ++nn ) {
                            unsigned coord[3] = { static_cast<unsigned>(ix) , static_cast<unsigned>(iy) , static_cast<unsigned>(iz) };

                            element_fixture.elem_to_node( nn , coord );

                            const size_t node_local_id =
                                box_map_id( node_box_local_used ,
                                            node_used_id_map ,
                                            coord[0] , coord[1] , coord[2] );

                            elem_node_ids( elem_index , nn ) = node_local_id ;
                        }
                    }
                }
            }
        }
    }
Beispiel #2
0
static
void hpccg_alloc_and_fill( const int np ,
                           const int my_p ,
                           const int gbox[][2] ,
                           const int ghost ,
                           struct cgsolve_data * const data )
{
  int (*pbox)[3][2] = NULL ;
  int * map_local_ord = NULL;

  data->nRow = 0 ;
  data->A_pc = NULL ;
  data->A_ia = NULL ;
  data->A_a  = NULL ;

  data->np = np ;
  data->ip = my_p ;
  data->recv_pc = NULL  ;
  data->send_pc = NULL ;
  data->send_id = NULL ;

  box_partition_rcb( np, my_p,
                     (const int (*)[2]) gbox, ghost,
                     & pbox ,
                     & map_local_ord ,
                     & data->recv_pc ,
                     & data->send_pc ,
                     & data->send_id );

  {
    const int (* const my_box)[2] = (const int (*)[2]) pbox[my_p] ;
    const int bx = my_box[0][0] ;
    const int by = my_box[1][0] ;
    const int bz = my_box[2][0] ;
    const int nx = my_box[0][1] - bx ;
    const int ny = my_box[1][1] - by ;
    const int nz = my_box[2][1] - bz ;
    const int n = nx * ny * nz ;
    const int nnz = 27 * n ; /* Upper bound */
    int    * const pc = (int *)   malloc( sizeof(int) * ( n + 1 ) );
    int    * const ia = (int *)   malloc( sizeof(int) * nnz );
    MATRIX_SCALAR  * const a  = (MATRIX_SCALAR *) malloc( sizeof(MATRIX_SCALAR) * nnz );

    int irow = 0 ;
    int ipc  = 0 ;
    int ix , iy , iz ;
    int sx , sy , sz ;

    for ( iz = 0 ; iz < nz ; ++iz ) {
    for ( iy = 0 ; iy < ny ; ++iy ) {
    for ( ix = 0 ; ix < nx ; ++ix , ++irow ) {

      if ( irow != box_map_local( my_box, ghost, map_local_ord,ix,iy,iz) ) {
        fprintf(stderr,"P%d:  irow[%d] != box_map_local(%d,%d,%d) = %d\n",
                my_p,irow,ix,iy,iz,
                box_map_local( my_box, ghost, map_local_ord, ix, iy, iz) );
      }

      pc[ irow ] = ipc ;   /* Beginning of row coefficients */
      /* Diagonal term first */
      ia[ ipc ] = irow ;
      a[  ipc ] = 27.0f ;
      ++ipc ;

      /* Off-diagonal terms to follow */
      for ( sz = -1 ; sz <= 1 ; ++sz ) {
      for ( sy = -1 ; sy <= 1 ; ++sy ) {
      for ( sx = -1 ; sx <= 1 ; ++sx ) {
        const int dx = ix + sx ;
        const int dy = iy + sy ;
        const int dz = iz + sz ;
        const int global_x = dx + bx ;
        const int global_y = dy + by ;
        const int global_z = dz + bz ;

        if ( gbox[0][0] <= global_x && global_x < gbox[0][1] &&
             gbox[1][0] <= global_y && global_y < gbox[1][1] &&
             gbox[2][0] <= global_z && global_z < gbox[2][1] &&
             ! ( sz == 0 && sy == 0 && sx == 0 ) ) {
          /* 'icol' is mapped for communication */

          const int icol =
            box_map_local(my_box,ghost,map_local_ord,dx,dy,dz);

          if ( icol < 0 ) {
            fprintf(stderr,"P%d : bad column at local (%d,%d,%d) global(%d,%d,%d)\n",
                    my_p, dx,dy,dz,global_x,global_y,global_z);
            fflush(stderr);
            abort();
          }

          ia[ ipc ] = icol ;
          a[  ipc ] = -1.0f ;
          ++ipc ;
        }
      }
      }
      }
    }
    }
    }

    pc[irow] = ipc ;

    data->nRow = irow ;
    data->A_pc = pc ;
    data->A_ia = ia ;
    data->A_a  = a ;
  }

  free( map_local_ord );
  free( pbox );
}
  static FEMeshType create( const size_t proc_count ,
                            const size_t proc_local ,
                            const size_t gang_count ,
                            const size_t elems_x ,
                            const size_t elems_y ,
                            const size_t elems_z ,
                            const double x_coord_curve = 1 ,
                            const double y_coord_curve = 1 ,
                            const double z_coord_curve = 1 )
  {
    const size_t vertices_x = elems_x + 1 ;
    const size_t vertices_y = elems_y + 1 ;
    const size_t vertices_z = elems_z + 1 ;

    const BoxBoundsLinear vertex_box_bounds ;
    const ElementSpec element ;

    // Partition based upon vertices:

    BoxType vertex_box_global ;
    std::vector< BoxType > vertex_box_parts( proc_count );

    vertex_box_global[0][0] = 0 ; vertex_box_global[0][1] = vertices_x ;
    vertex_box_global[1][0] = 0 ; vertex_box_global[1][1] = vertices_y ;
    vertex_box_global[2][0] = 0 ; vertex_box_global[2][1] = vertices_z ;

    box_partition_rcb( vertex_box_global , vertex_box_parts );

    const BoxType vertex_box_local_owned = vertex_box_parts[ proc_local ];

    // Determine interior and used vertices:

    BoxType vertex_box_local_interior ;
    BoxType vertex_box_local_used ;

    vertex_box_bounds.apply( vertex_box_global ,
                             vertex_box_local_owned ,
                             vertex_box_local_interior ,
                             vertex_box_local_used );

    // Element counts:

    const long local_elems_x =
      ( vertex_box_local_used[0][1] - vertex_box_local_used[0][0] ) - 1 ;
    const long local_elems_y =
      ( vertex_box_local_used[1][1] - vertex_box_local_used[1][0] ) - 1 ;
    const long local_elems_z =
      ( vertex_box_local_used[2][1] - vertex_box_local_used[2][0] ) - 1 ;

    const size_t elem_count_total = std::max( long(0) , local_elems_x ) *
                                    std::max( long(0) , local_elems_y ) *
                                    std::max( long(0) , local_elems_z );

    const long interior_elems_x =
      ( vertex_box_local_owned[0][1] - vertex_box_local_owned[0][0] ) - 1 ;
    const long interior_elems_y =
      ( vertex_box_local_owned[1][1] - vertex_box_local_owned[1][0] ) - 1 ;
    const long interior_elems_z =
      ( vertex_box_local_owned[2][1] - vertex_box_local_owned[2][0] ) - 1 ;

    const size_t elem_count_interior = std::max( long(0) , interior_elems_x ) *
                                       std::max( long(0) , interior_elems_y ) *
                                       std::max( long(0) , interior_elems_z );

    // Expand vertex boxes to node boxes:

    BoxType node_box_global ;
    BoxType node_box_local_used ;
    std::vector< BoxType > node_box_parts ;

    element.create_node_boxes_from_vertex_boxes(
      vertex_box_global , vertex_box_parts ,
      node_box_global , node_box_parts );

    // Node communication maps:

    size_t node_count_interior = 0 ;
    size_t node_count_owned    = 0 ;
    size_t node_count_total    = 0 ;
    std::vector<size_t>                 node_used_id_map ;
    std::vector<size_t>                 node_part_counts ;
    std::vector< std::vector<size_t> >  node_send_map ;

    box_partition_maps( node_box_global ,
                        node_box_parts ,
                        element.box_bounds ,
                        proc_local ,
                        node_box_local_used ,
                        node_used_id_map ,
                        node_count_interior ,
                        node_count_owned ,
                        node_count_total ,
                        node_part_counts ,
                        node_send_map );

    size_t node_count_send = 0 ;
    for ( size_t i = 0 ; i < node_send_map.size() ; ++i ) {
      node_count_send += node_send_map[i].size();
    }

    size_t recv_msg_count = 0 ;
    size_t send_msg_count = 0 ;
    size_t send_count = 0 ;

    for ( size_t i = 1 ; i < proc_count ; ++i ) {
      if ( node_part_counts[i] ) ++recv_msg_count ;
      if ( node_send_map[i].size() ) {
        ++send_msg_count ;
        send_count += node_send_map[i].size();
      }
    }

    // Finite element mesh:

    FEMeshType mesh ;

    if ( node_count_total ) {
      mesh.node_coords = node_coords_type( "node_coords", node_count_total );
    }

    if ( elem_count_total ) {
      mesh.elem_node_ids =
        elem_node_ids_type( "elem_node_ids", elem_count_total );
    }

    mesh.parallel_data_map.assign( node_count_interior ,
                                   node_count_owned ,
                                   node_count_total ,
                                   recv_msg_count ,
                                   send_msg_count ,
                                   send_count );

    typename node_coords_type::HostMirror node_coords =
      Kokkos::create_mirror( mesh.node_coords );

    typename elem_node_ids_type::HostMirror elem_node_ids =
      Kokkos::create_mirror( mesh.elem_node_ids );

    //------------------------------------
    // set node coordinates to grid location for subsequent verification

    for ( size_t iz = node_box_local_used[2][0] ;
                 iz < node_box_local_used[2][1] ; ++iz ) {

    for ( size_t iy = node_box_local_used[1][0] ;
                 iy < node_box_local_used[1][1] ; ++iy ) {

    for ( size_t ix = node_box_local_used[0][0] ;
                 ix < node_box_local_used[0][1] ; ++ix ) {

      const size_t node_local_id =
        box_map_id( node_box_local_used , node_used_id_map , ix , iy , iz );

      node_coords( node_local_id , 0 ) = ix ;
      node_coords( node_local_id , 1 ) = iy ;
      node_coords( node_local_id , 2 ) = iz ;
    }}}

    //------------------------------------
    // Initialize element-node connectivity:

    if ( 1 < gang_count ) {
      layout_elements_partitioned( vertex_box_local_used ,
                                   vertex_box_local_owned ,
                                   node_box_local_used ,
                                   node_used_id_map ,
                                   element ,
                                   gang_count ,
                                   elem_node_ids );
    }
    else {
      layout_elements_interior_exterior( vertex_box_local_used ,
                                         vertex_box_local_owned ,
                                         node_box_local_used ,
                                         node_used_id_map ,
                                         element ,
                                         elem_count_interior ,
                                         elem_node_ids );
    }

    //------------------------------------
    // Populate node->element connectivity:

    std::vector<size_t> node_elem_work( node_count_total , (size_t) 0 );

    for ( size_t i = 0 ; i < elem_count_total ; ++i ) {
      for ( size_t n = 0 ; n < element_node_count  ; ++n ) {
        ++node_elem_work[ elem_node_ids(i,n) ];
      }
    }

    mesh.node_elem_ids =
      Kokkos::create_staticcrsgraph< node_elem_ids_type >( "node_elem_ids" , node_elem_work );

    typename node_elem_ids_type::HostMirror
      node_elem_ids = Kokkos::create_mirror( mesh.node_elem_ids );

    for ( size_t i = 0 ; i < node_count_total ; ++i ) {
      node_elem_work[i] = node_elem_ids.row_map[i];
    }

    // Looping in element order insures the list of elements
    // is sorted by element index.

    for ( size_t i = 0 ; i < elem_count_total ; ++i ) {
      for ( size_t n = 0 ; n < element_node_count ; ++n ) {
        const unsigned nid = elem_node_ids(i, n);
        const unsigned j = node_elem_work[nid] ; ++node_elem_work[nid] ;

        node_elem_ids.entries( j , 0 ) = i ;
        node_elem_ids.entries( j , 1 ) = n ;
      }
    }
    //------------------------------------
    // Verify setup with node coordinates matching grid indices.
    verify( node_coords , elem_node_ids , node_elem_ids );

    //------------------------------------
    // Scale node coordinates to problem extent with
    // nonlinear mapping.
    {
      const double problem_extent[3] =
        { static_cast<double>( vertex_box_global[0][1] - 1 ) ,
          static_cast<double>( vertex_box_global[1][1] - 1 ) ,
          static_cast<double>( vertex_box_global[2][1] - 1 ) };

      const double grid_extent[3] =
        { static_cast<double>( node_box_global[0][1] - 1 ) ,
          static_cast<double>( node_box_global[1][1] - 1 ) ,
          static_cast<double>( node_box_global[2][1] - 1 ) };

      for ( size_t i = 0 ; i < node_count_total ; ++i ) {
        const double x_unit = node_coords(i,0) / grid_extent[0] ;
        const double y_unit = node_coords(i,1) / grid_extent[1] ;
        const double z_unit = node_coords(i,2) / grid_extent[2] ;

        node_coords(i,0) = coordinate_scalar_type( problem_extent[0] * std::pow( x_unit , x_coord_curve ) );
        node_coords(i,1) = coordinate_scalar_type( problem_extent[1] * std::pow( y_unit , y_coord_curve ) );
        node_coords(i,2) = coordinate_scalar_type( problem_extent[2] * std::pow( z_unit , z_coord_curve ) );
      }
    }

    Kokkos::deep_copy( mesh.node_coords ,   node_coords );
    Kokkos::deep_copy( mesh.elem_node_ids , elem_node_ids );
    Kokkos::deep_copy( mesh.node_elem_ids.entries , node_elem_ids.entries );

    //------------------------------------
    // Communication lists:
    {
      recv_msg_count = 0 ;
      send_msg_count = 0 ;
      send_count = 0 ;

      for ( size_t i = 1 ; i < proc_count ; ++i ) {

        // Order sending starting with the local processor rank
        // to try to smooth out the amount of messages simultaneously
        // send to a particular processor.

        const int proc = ( proc_local + i ) % proc_count ;
        if ( node_part_counts[i] ) {
          mesh.parallel_data_map.host_recv(recv_msg_count,0) = proc ;
          mesh.parallel_data_map.host_recv(recv_msg_count,1) = node_part_counts[i] ;
          ++recv_msg_count ;
        }
        if ( node_send_map[i].size() ) {
          mesh.parallel_data_map.host_send(send_msg_count,0) = proc ;
          mesh.parallel_data_map.host_send(send_msg_count,1) = node_send_map[i].size() ;
          for ( size_t j = 0 ; j < node_send_map[i].size() ; ++j , ++send_count ) {
            mesh.parallel_data_map.host_send_item(send_count) = node_send_map[i][j] - node_count_interior ;
          }
          ++send_msg_count ;
        }
      }
    }

    return mesh ;
  }