void box_partition_rcb( const int np , const int my_p , const int root_box[][2] , const int ghost , int (**pbox)[3][2] , int ** map_local_id , int ** map_recv_pc , int ** map_send_pc , int ** map_send_id ) { *pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); box_partition( 0 , np , 2 , root_box , *pbox ); box_partition_maps( np , my_p , (const int (*)[3][2]) *pbox , ghost , map_local_id , map_recv_pc , map_send_pc , map_send_id ); }
static FEMeshType create( const size_t proc_count , const size_t proc_local , const size_t gang_count , const size_t elems_x , const size_t elems_y , const size_t elems_z , const double x_coord_curve = 1 , const double y_coord_curve = 1 , const double z_coord_curve = 1 ) { const size_t vertices_x = elems_x + 1 ; const size_t vertices_y = elems_y + 1 ; const size_t vertices_z = elems_z + 1 ; const BoxBoundsLinear vertex_box_bounds ; const ElementSpec element ; // Partition based upon vertices: BoxType vertex_box_global ; std::vector< BoxType > vertex_box_parts( proc_count ); vertex_box_global[0][0] = 0 ; vertex_box_global[0][1] = vertices_x ; vertex_box_global[1][0] = 0 ; vertex_box_global[1][1] = vertices_y ; vertex_box_global[2][0] = 0 ; vertex_box_global[2][1] = vertices_z ; box_partition_rcb( vertex_box_global , vertex_box_parts ); const BoxType vertex_box_local_owned = vertex_box_parts[ proc_local ]; // Determine interior and used vertices: BoxType vertex_box_local_interior ; BoxType vertex_box_local_used ; vertex_box_bounds.apply( vertex_box_global , vertex_box_local_owned , vertex_box_local_interior , vertex_box_local_used ); // Element counts: const long local_elems_x = ( vertex_box_local_used[0][1] - vertex_box_local_used[0][0] ) - 1 ; const long local_elems_y = ( vertex_box_local_used[1][1] - vertex_box_local_used[1][0] ) - 1 ; const long local_elems_z = ( vertex_box_local_used[2][1] - vertex_box_local_used[2][0] ) - 1 ; const size_t elem_count_total = std::max( long(0) , local_elems_x ) * std::max( long(0) , local_elems_y ) * std::max( long(0) , local_elems_z ); const long interior_elems_x = ( vertex_box_local_owned[0][1] - vertex_box_local_owned[0][0] ) - 1 ; const long interior_elems_y = ( vertex_box_local_owned[1][1] - vertex_box_local_owned[1][0] ) - 1 ; const long interior_elems_z = ( vertex_box_local_owned[2][1] - vertex_box_local_owned[2][0] ) - 1 ; const size_t elem_count_interior = std::max( long(0) , interior_elems_x ) * std::max( long(0) , interior_elems_y ) * std::max( long(0) , interior_elems_z ); // Expand vertex boxes to node boxes: BoxType node_box_global ; BoxType node_box_local_used ; std::vector< BoxType > node_box_parts ; element.create_node_boxes_from_vertex_boxes( vertex_box_global , vertex_box_parts , node_box_global , node_box_parts ); // Node communication maps: size_t node_count_interior = 0 ; size_t node_count_owned = 0 ; size_t node_count_total = 0 ; std::vector<size_t> node_used_id_map ; std::vector<size_t> node_part_counts ; std::vector< std::vector<size_t> > node_send_map ; box_partition_maps( node_box_global , node_box_parts , element.box_bounds , proc_local , node_box_local_used , node_used_id_map , node_count_interior , node_count_owned , node_count_total , node_part_counts , node_send_map ); size_t node_count_send = 0 ; for ( size_t i = 0 ; i < node_send_map.size() ; ++i ) { node_count_send += node_send_map[i].size(); } size_t recv_msg_count = 0 ; size_t send_msg_count = 0 ; size_t send_count = 0 ; for ( size_t i = 1 ; i < proc_count ; ++i ) { if ( node_part_counts[i] ) ++recv_msg_count ; if ( node_send_map[i].size() ) { ++send_msg_count ; send_count += node_send_map[i].size(); } } // Finite element mesh: FEMeshType mesh ; if ( node_count_total ) { mesh.node_coords = node_coords_type( "node_coords", node_count_total ); } if ( elem_count_total ) { mesh.elem_node_ids = elem_node_ids_type( "elem_node_ids", elem_count_total ); } mesh.parallel_data_map.assign( node_count_interior , node_count_owned , node_count_total , recv_msg_count , send_msg_count , send_count ); typename node_coords_type::HostMirror node_coords = Kokkos::create_mirror( mesh.node_coords ); typename elem_node_ids_type::HostMirror elem_node_ids = Kokkos::create_mirror( mesh.elem_node_ids ); //------------------------------------ // set node coordinates to grid location for subsequent verification for ( size_t iz = node_box_local_used[2][0] ; iz < node_box_local_used[2][1] ; ++iz ) { for ( size_t iy = node_box_local_used[1][0] ; iy < node_box_local_used[1][1] ; ++iy ) { for ( size_t ix = node_box_local_used[0][0] ; ix < node_box_local_used[0][1] ; ++ix ) { const size_t node_local_id = box_map_id( node_box_local_used , node_used_id_map , ix , iy , iz ); node_coords( node_local_id , 0 ) = ix ; node_coords( node_local_id , 1 ) = iy ; node_coords( node_local_id , 2 ) = iz ; }}} //------------------------------------ // Initialize element-node connectivity: if ( 1 < gang_count ) { layout_elements_partitioned( vertex_box_local_used , vertex_box_local_owned , node_box_local_used , node_used_id_map , element , gang_count , elem_node_ids ); } else { layout_elements_interior_exterior( vertex_box_local_used , vertex_box_local_owned , node_box_local_used , node_used_id_map , element , elem_count_interior , elem_node_ids ); } //------------------------------------ // Populate node->element connectivity: std::vector<size_t> node_elem_work( node_count_total , (size_t) 0 ); for ( size_t i = 0 ; i < elem_count_total ; ++i ) { for ( size_t n = 0 ; n < element_node_count ; ++n ) { ++node_elem_work[ elem_node_ids(i,n) ]; } } mesh.node_elem_ids = Kokkos::create_staticcrsgraph< node_elem_ids_type >( "node_elem_ids" , node_elem_work ); typename node_elem_ids_type::HostMirror node_elem_ids = Kokkos::create_mirror( mesh.node_elem_ids ); for ( size_t i = 0 ; i < node_count_total ; ++i ) { node_elem_work[i] = node_elem_ids.row_map[i]; } // Looping in element order insures the list of elements // is sorted by element index. for ( size_t i = 0 ; i < elem_count_total ; ++i ) { for ( size_t n = 0 ; n < element_node_count ; ++n ) { const unsigned nid = elem_node_ids(i, n); const unsigned j = node_elem_work[nid] ; ++node_elem_work[nid] ; node_elem_ids.entries( j , 0 ) = i ; node_elem_ids.entries( j , 1 ) = n ; } } //------------------------------------ // Verify setup with node coordinates matching grid indices. verify( node_coords , elem_node_ids , node_elem_ids ); //------------------------------------ // Scale node coordinates to problem extent with // nonlinear mapping. { const double problem_extent[3] = { static_cast<double>( vertex_box_global[0][1] - 1 ) , static_cast<double>( vertex_box_global[1][1] - 1 ) , static_cast<double>( vertex_box_global[2][1] - 1 ) }; const double grid_extent[3] = { static_cast<double>( node_box_global[0][1] - 1 ) , static_cast<double>( node_box_global[1][1] - 1 ) , static_cast<double>( node_box_global[2][1] - 1 ) }; for ( size_t i = 0 ; i < node_count_total ; ++i ) { const double x_unit = node_coords(i,0) / grid_extent[0] ; const double y_unit = node_coords(i,1) / grid_extent[1] ; const double z_unit = node_coords(i,2) / grid_extent[2] ; node_coords(i,0) = coordinate_scalar_type( problem_extent[0] * std::pow( x_unit , x_coord_curve ) ); node_coords(i,1) = coordinate_scalar_type( problem_extent[1] * std::pow( y_unit , y_coord_curve ) ); node_coords(i,2) = coordinate_scalar_type( problem_extent[2] * std::pow( z_unit , z_coord_curve ) ); } } Kokkos::deep_copy( mesh.node_coords , node_coords ); Kokkos::deep_copy( mesh.elem_node_ids , elem_node_ids ); Kokkos::deep_copy( mesh.node_elem_ids.entries , node_elem_ids.entries ); //------------------------------------ // Communication lists: { recv_msg_count = 0 ; send_msg_count = 0 ; send_count = 0 ; for ( size_t i = 1 ; i < proc_count ; ++i ) { // Order sending starting with the local processor rank // to try to smooth out the amount of messages simultaneously // send to a particular processor. const int proc = ( proc_local + i ) % proc_count ; if ( node_part_counts[i] ) { mesh.parallel_data_map.host_recv(recv_msg_count,0) = proc ; mesh.parallel_data_map.host_recv(recv_msg_count,1) = node_part_counts[i] ; ++recv_msg_count ; } if ( node_send_map[i].size() ) { mesh.parallel_data_map.host_send(send_msg_count,0) = proc ; mesh.parallel_data_map.host_send(send_msg_count,1) = node_send_map[i].size() ; for ( size_t j = 0 ; j < node_send_map[i].size() ; ++j , ++send_count ) { mesh.parallel_data_map.host_send_item(send_count) = node_send_map[i][j] - node_count_interior ; } ++send_msg_count ; } } } return mesh ; }
static void test_maps( const int root_box[][2] , const int np ) { const int ghost = 1 ; const int nx_global = root_box[0][1] - root_box[0][0] ; const int ny_global = root_box[1][1] - root_box[1][0] ; int ieq , i , j ; int (*pbox)[3][2] ; int **local_values ; int **map_local_id ; int **map_recv_pc ; int **map_send_pc ; int **map_send_id ; pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); box_partition( 0 , np , 2 , root_box , pbox ); local_values = (int **) malloc( sizeof(int*) * np ); map_local_id = (int **) malloc( sizeof(int*) * np ); map_recv_pc = (int **) malloc( sizeof(int*) * np ); map_send_pc = (int **) malloc( sizeof(int*) * np ); map_send_id = (int **) malloc( sizeof(int*) * np ); /* Set each local value to the global equation number */ for ( ieq = i = 0 ; i < np ; ++i ) { const int (*mybox)[2] = (const int (*)[2]) pbox[i] ; const int nx = mybox[0][1] - mybox[0][0] ; const int ny = mybox[1][1] - mybox[1][0] ; const int nz = mybox[2][1] - mybox[2][0] ; int ix , iy , iz ; /* Generate the partition maps for this rank */ box_partition_maps( np , i , (const int (*)[3][2]) pbox , ghost , & map_local_id[i] , & map_recv_pc[i] , & map_send_pc[i] , & map_send_id[i] ); local_values[i] = (int *) malloc( sizeof(int) * map_recv_pc[i][np] ); for ( iz = -ghost ; iz < nz + ghost ; ++iz ) { for ( iy = -ghost ; iy < ny + ghost ; ++iy ) { for ( ix = -ghost ; ix < nx + ghost ; ++ix ) { const int ieq = box_map_local(mybox,ghost,map_local_id[i],ix,iy,iz); if ( 0 <= ieq ) { const int ix_global = ix + mybox[0][0] ; const int iy_global = iy + mybox[1][0] ; const int iz_global = iz + mybox[2][0] ; if ( root_box[0][0] <= ix_global && ix_global < root_box[0][1] && root_box[1][0] <= iy_global && iy_global < root_box[1][1] && root_box[2][0] <= iz_global && iz_global < root_box[2][1] ) { local_values[i][ ieq ] = ix_global + iy_global * nx_global + iz_global * nx_global * ny_global ; } else { local_values[i][ ieq ] = -1 ; } } } } } } /* Pair-wise compare the local values */ /* i == receiving processor rank */ /* ip == sending processor rank */ /* j == receiving processor data entry for message from 'ip' */ /* jp == sending processor data entry for message to 'i' */ for ( i = 0 ; i < np ; ++i ) { for ( j = 1 ; j < np ; ++j ) { const int ip = ( i + j ) % np ; const int jp = ( i + np - ip ) % np ; const int nrecv = map_recv_pc[i] [j+1] - map_recv_pc[i] [j] ; const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ; int k ; if ( nrecv != nsend ) { fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip); fprintf(stderr,"P%d send %d to P%d\n",ip,nsend,i); abort(); } for ( k = 0 ; k < nrecv ; ++k ) { const int irecv = map_recv_pc[i][j] + k ; const int isend = map_send_pc[ip][jp] + k ; const int val_irecv = local_values[i][irecv] ; const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ; if ( val_irecv != val_isend ) { fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip); fprintf(stderr,"P%d send[%d] = %d , to P%d\n",ip,k,val_isend,i); abort(); } } } } for ( i = 0 ; i < np ; ++i ) { free( map_local_id[i] ); free( map_recv_pc[i] ); free( map_send_pc[i] ); free( map_send_id[i] ); free( local_values[i] ); } free( map_send_id ); free( map_send_pc ); free( map_recv_pc ); free( map_local_id ); free( local_values ); free( pbox ); }