static void layout_elements_partitioned( const BoxType vertex_box_local_used , const BoxType /*vertex_box_local_owned*/ , const BoxType node_box_local_used , const std::vector<size_t> & node_used_id_map , const ElementSpec element_fixture , const size_t thread_gang_count , const typename elem_node_ids_type::HostMirror elem_node_ids ) { std::vector< BoxType > element_box_gangs( thread_gang_count ); BoxType element_box_local_used = vertex_box_local_used ; element_box_local_used[0][1] -= 1 ; element_box_local_used[1][1] -= 1 ; element_box_local_used[2][1] -= 1 ; box_partition_rcb( element_box_local_used , element_box_gangs ); size_t elem_index = 0 ; for ( size_t ig = 0 ; ig < thread_gang_count ; ++ig ) { const BoxType box = element_box_gangs[ig] ; for ( size_t iz = box[2][0] ; iz < box[2][1] ; ++iz ) { for ( size_t iy = box[1][0] ; iy < box[1][1] ; ++iy ) { for ( size_t ix = box[0][0] ; ix < box[0][1] ; ++ix , ++elem_index ) { for ( size_t nn = 0 ; nn < element_node_count ; ++nn ) { unsigned coord[3] = { static_cast<unsigned>(ix) , static_cast<unsigned>(iy) , static_cast<unsigned>(iz) }; element_fixture.elem_to_node( nn , coord ); const size_t node_local_id = box_map_id( node_box_local_used , node_used_id_map , coord[0] , coord[1] , coord[2] ); elem_node_ids( elem_index , nn ) = node_local_id ; } } } } } }
static void hpccg_alloc_and_fill( const int np , const int my_p , const int gbox[][2] , const int ghost , struct cgsolve_data * const data ) { int (*pbox)[3][2] = NULL ; int * map_local_ord = NULL; data->nRow = 0 ; data->A_pc = NULL ; data->A_ia = NULL ; data->A_a = NULL ; data->np = np ; data->ip = my_p ; data->recv_pc = NULL ; data->send_pc = NULL ; data->send_id = NULL ; box_partition_rcb( np, my_p, (const int (*)[2]) gbox, ghost, & pbox , & map_local_ord , & data->recv_pc , & data->send_pc , & data->send_id ); { const int (* const my_box)[2] = (const int (*)[2]) pbox[my_p] ; const int bx = my_box[0][0] ; const int by = my_box[1][0] ; const int bz = my_box[2][0] ; const int nx = my_box[0][1] - bx ; const int ny = my_box[1][1] - by ; const int nz = my_box[2][1] - bz ; const int n = nx * ny * nz ; const int nnz = 27 * n ; /* Upper bound */ int * const pc = (int *) malloc( sizeof(int) * ( n + 1 ) ); int * const ia = (int *) malloc( sizeof(int) * nnz ); MATRIX_SCALAR * const a = (MATRIX_SCALAR *) malloc( sizeof(MATRIX_SCALAR) * nnz ); int irow = 0 ; int ipc = 0 ; int ix , iy , iz ; int sx , sy , sz ; for ( iz = 0 ; iz < nz ; ++iz ) { for ( iy = 0 ; iy < ny ; ++iy ) { for ( ix = 0 ; ix < nx ; ++ix , ++irow ) { if ( irow != box_map_local( my_box, ghost, map_local_ord,ix,iy,iz) ) { fprintf(stderr,"P%d: irow[%d] != box_map_local(%d,%d,%d) = %d\n", my_p,irow,ix,iy,iz, box_map_local( my_box, ghost, map_local_ord, ix, iy, iz) ); } pc[ irow ] = ipc ; /* Beginning of row coefficients */ /* Diagonal term first */ ia[ ipc ] = irow ; a[ ipc ] = 27.0f ; ++ipc ; /* Off-diagonal terms to follow */ for ( sz = -1 ; sz <= 1 ; ++sz ) { for ( sy = -1 ; sy <= 1 ; ++sy ) { for ( sx = -1 ; sx <= 1 ; ++sx ) { const int dx = ix + sx ; const int dy = iy + sy ; const int dz = iz + sz ; const int global_x = dx + bx ; const int global_y = dy + by ; const int global_z = dz + bz ; if ( gbox[0][0] <= global_x && global_x < gbox[0][1] && gbox[1][0] <= global_y && global_y < gbox[1][1] && gbox[2][0] <= global_z && global_z < gbox[2][1] && ! ( sz == 0 && sy == 0 && sx == 0 ) ) { /* 'icol' is mapped for communication */ const int icol = box_map_local(my_box,ghost,map_local_ord,dx,dy,dz); if ( icol < 0 ) { fprintf(stderr,"P%d : bad column at local (%d,%d,%d) global(%d,%d,%d)\n", my_p, dx,dy,dz,global_x,global_y,global_z); fflush(stderr); abort(); } ia[ ipc ] = icol ; a[ ipc ] = -1.0f ; ++ipc ; } } } } } } } pc[irow] = ipc ; data->nRow = irow ; data->A_pc = pc ; data->A_ia = ia ; data->A_a = a ; } free( map_local_ord ); free( pbox ); }
static FEMeshType create( const size_t proc_count , const size_t proc_local , const size_t gang_count , const size_t elems_x , const size_t elems_y , const size_t elems_z , const double x_coord_curve = 1 , const double y_coord_curve = 1 , const double z_coord_curve = 1 ) { const size_t vertices_x = elems_x + 1 ; const size_t vertices_y = elems_y + 1 ; const size_t vertices_z = elems_z + 1 ; const BoxBoundsLinear vertex_box_bounds ; const ElementSpec element ; // Partition based upon vertices: BoxType vertex_box_global ; std::vector< BoxType > vertex_box_parts( proc_count ); vertex_box_global[0][0] = 0 ; vertex_box_global[0][1] = vertices_x ; vertex_box_global[1][0] = 0 ; vertex_box_global[1][1] = vertices_y ; vertex_box_global[2][0] = 0 ; vertex_box_global[2][1] = vertices_z ; box_partition_rcb( vertex_box_global , vertex_box_parts ); const BoxType vertex_box_local_owned = vertex_box_parts[ proc_local ]; // Determine interior and used vertices: BoxType vertex_box_local_interior ; BoxType vertex_box_local_used ; vertex_box_bounds.apply( vertex_box_global , vertex_box_local_owned , vertex_box_local_interior , vertex_box_local_used ); // Element counts: const long local_elems_x = ( vertex_box_local_used[0][1] - vertex_box_local_used[0][0] ) - 1 ; const long local_elems_y = ( vertex_box_local_used[1][1] - vertex_box_local_used[1][0] ) - 1 ; const long local_elems_z = ( vertex_box_local_used[2][1] - vertex_box_local_used[2][0] ) - 1 ; const size_t elem_count_total = std::max( long(0) , local_elems_x ) * std::max( long(0) , local_elems_y ) * std::max( long(0) , local_elems_z ); const long interior_elems_x = ( vertex_box_local_owned[0][1] - vertex_box_local_owned[0][0] ) - 1 ; const long interior_elems_y = ( vertex_box_local_owned[1][1] - vertex_box_local_owned[1][0] ) - 1 ; const long interior_elems_z = ( vertex_box_local_owned[2][1] - vertex_box_local_owned[2][0] ) - 1 ; const size_t elem_count_interior = std::max( long(0) , interior_elems_x ) * std::max( long(0) , interior_elems_y ) * std::max( long(0) , interior_elems_z ); // Expand vertex boxes to node boxes: BoxType node_box_global ; BoxType node_box_local_used ; std::vector< BoxType > node_box_parts ; element.create_node_boxes_from_vertex_boxes( vertex_box_global , vertex_box_parts , node_box_global , node_box_parts ); // Node communication maps: size_t node_count_interior = 0 ; size_t node_count_owned = 0 ; size_t node_count_total = 0 ; std::vector<size_t> node_used_id_map ; std::vector<size_t> node_part_counts ; std::vector< std::vector<size_t> > node_send_map ; box_partition_maps( node_box_global , node_box_parts , element.box_bounds , proc_local , node_box_local_used , node_used_id_map , node_count_interior , node_count_owned , node_count_total , node_part_counts , node_send_map ); size_t node_count_send = 0 ; for ( size_t i = 0 ; i < node_send_map.size() ; ++i ) { node_count_send += node_send_map[i].size(); } size_t recv_msg_count = 0 ; size_t send_msg_count = 0 ; size_t send_count = 0 ; for ( size_t i = 1 ; i < proc_count ; ++i ) { if ( node_part_counts[i] ) ++recv_msg_count ; if ( node_send_map[i].size() ) { ++send_msg_count ; send_count += node_send_map[i].size(); } } // Finite element mesh: FEMeshType mesh ; if ( node_count_total ) { mesh.node_coords = node_coords_type( "node_coords", node_count_total ); } if ( elem_count_total ) { mesh.elem_node_ids = elem_node_ids_type( "elem_node_ids", elem_count_total ); } mesh.parallel_data_map.assign( node_count_interior , node_count_owned , node_count_total , recv_msg_count , send_msg_count , send_count ); typename node_coords_type::HostMirror node_coords = Kokkos::create_mirror( mesh.node_coords ); typename elem_node_ids_type::HostMirror elem_node_ids = Kokkos::create_mirror( mesh.elem_node_ids ); //------------------------------------ // set node coordinates to grid location for subsequent verification for ( size_t iz = node_box_local_used[2][0] ; iz < node_box_local_used[2][1] ; ++iz ) { for ( size_t iy = node_box_local_used[1][0] ; iy < node_box_local_used[1][1] ; ++iy ) { for ( size_t ix = node_box_local_used[0][0] ; ix < node_box_local_used[0][1] ; ++ix ) { const size_t node_local_id = box_map_id( node_box_local_used , node_used_id_map , ix , iy , iz ); node_coords( node_local_id , 0 ) = ix ; node_coords( node_local_id , 1 ) = iy ; node_coords( node_local_id , 2 ) = iz ; }}} //------------------------------------ // Initialize element-node connectivity: if ( 1 < gang_count ) { layout_elements_partitioned( vertex_box_local_used , vertex_box_local_owned , node_box_local_used , node_used_id_map , element , gang_count , elem_node_ids ); } else { layout_elements_interior_exterior( vertex_box_local_used , vertex_box_local_owned , node_box_local_used , node_used_id_map , element , elem_count_interior , elem_node_ids ); } //------------------------------------ // Populate node->element connectivity: std::vector<size_t> node_elem_work( node_count_total , (size_t) 0 ); for ( size_t i = 0 ; i < elem_count_total ; ++i ) { for ( size_t n = 0 ; n < element_node_count ; ++n ) { ++node_elem_work[ elem_node_ids(i,n) ]; } } mesh.node_elem_ids = Kokkos::create_staticcrsgraph< node_elem_ids_type >( "node_elem_ids" , node_elem_work ); typename node_elem_ids_type::HostMirror node_elem_ids = Kokkos::create_mirror( mesh.node_elem_ids ); for ( size_t i = 0 ; i < node_count_total ; ++i ) { node_elem_work[i] = node_elem_ids.row_map[i]; } // Looping in element order insures the list of elements // is sorted by element index. for ( size_t i = 0 ; i < elem_count_total ; ++i ) { for ( size_t n = 0 ; n < element_node_count ; ++n ) { const unsigned nid = elem_node_ids(i, n); const unsigned j = node_elem_work[nid] ; ++node_elem_work[nid] ; node_elem_ids.entries( j , 0 ) = i ; node_elem_ids.entries( j , 1 ) = n ; } } //------------------------------------ // Verify setup with node coordinates matching grid indices. verify( node_coords , elem_node_ids , node_elem_ids ); //------------------------------------ // Scale node coordinates to problem extent with // nonlinear mapping. { const double problem_extent[3] = { static_cast<double>( vertex_box_global[0][1] - 1 ) , static_cast<double>( vertex_box_global[1][1] - 1 ) , static_cast<double>( vertex_box_global[2][1] - 1 ) }; const double grid_extent[3] = { static_cast<double>( node_box_global[0][1] - 1 ) , static_cast<double>( node_box_global[1][1] - 1 ) , static_cast<double>( node_box_global[2][1] - 1 ) }; for ( size_t i = 0 ; i < node_count_total ; ++i ) { const double x_unit = node_coords(i,0) / grid_extent[0] ; const double y_unit = node_coords(i,1) / grid_extent[1] ; const double z_unit = node_coords(i,2) / grid_extent[2] ; node_coords(i,0) = coordinate_scalar_type( problem_extent[0] * std::pow( x_unit , x_coord_curve ) ); node_coords(i,1) = coordinate_scalar_type( problem_extent[1] * std::pow( y_unit , y_coord_curve ) ); node_coords(i,2) = coordinate_scalar_type( problem_extent[2] * std::pow( z_unit , z_coord_curve ) ); } } Kokkos::deep_copy( mesh.node_coords , node_coords ); Kokkos::deep_copy( mesh.elem_node_ids , elem_node_ids ); Kokkos::deep_copy( mesh.node_elem_ids.entries , node_elem_ids.entries ); //------------------------------------ // Communication lists: { recv_msg_count = 0 ; send_msg_count = 0 ; send_count = 0 ; for ( size_t i = 1 ; i < proc_count ; ++i ) { // Order sending starting with the local processor rank // to try to smooth out the amount of messages simultaneously // send to a particular processor. const int proc = ( proc_local + i ) % proc_count ; if ( node_part_counts[i] ) { mesh.parallel_data_map.host_recv(recv_msg_count,0) = proc ; mesh.parallel_data_map.host_recv(recv_msg_count,1) = node_part_counts[i] ; ++recv_msg_count ; } if ( node_send_map[i].size() ) { mesh.parallel_data_map.host_send(send_msg_count,0) = proc ; mesh.parallel_data_map.host_send(send_msg_count,1) = node_send_map[i].size() ; for ( size_t j = 0 ; j < node_send_map[i].size() ; ++j , ++send_count ) { mesh.parallel_data_map.host_send_item(send_count) = node_send_map[i][j] - node_count_interior ; } ++send_msg_count ; } } } return mesh ; }