void perform_element_loop(const simple_mesh_description<GlobalOrdinal>& mesh, const Box& local_elem_box, MatrixType& A, VectorType& b, Parameters& params) { typedef typename MatrixType::ScalarType Scalar; if (A.rows.size() == 0) return; int num_threads = params.numthreads; timer_type t0 = mytimer(); //We will iterate the local-element-box (local portion of the mesh), and //assemble the FE operators into the global sparse linear-system. int global_elems_x = mesh.global_box[0][1]; int global_elems_y = mesh.global_box[1][1]; int global_elems_z = mesh.global_box[2][1]; GlobalOrdinal num_elems = get_num_ids<GlobalOrdinal>(local_elem_box); std::vector<GlobalOrdinal> elemIDs(num_elems); BoxIterator iter = BoxIterator::begin(local_elem_box); BoxIterator end = BoxIterator::end(local_elem_box); for(size_t i=0; iter != end; ++iter, ++i) { elemIDs[i] = get_id<GlobalOrdinal>(global_elems_x, global_elems_y, global_elems_z, iter.x, iter.y, iter.z); } LockingMatrix<MatrixType> lockingA(A); LockingVector<VectorType> lockingb(b); FEAssembleSumInto<GlobalOrdinal,Scalar,MatrixType,VectorType> fe_op; fe_op.mesh = &mesh; fe_op.elemIDs = &elemIDs[0]; fe_op.A = &lockingA; fe_op.b = &lockingb; typedef typename VectorType::ComputeNodeType ComputeNodeType; ComputeNodeType& compute_node = b.compute_node; compute_node.parallel_for(elemIDs.size(), fe_op); std::cout << "\n{number of matrix conflicts: " << miniFE_num_matrix_conflicts << "}"<<std::endl; std::cout << "{number of vector conflicts: " << miniFE_num_vector_conflicts << "}"<<std::endl; }
void perform_element_loop(const simple_mesh_description<GlobalOrdinal>& mesh, const Box& local_elem_box, MatrixType& A, VectorType& b, Parameters& /*params*/) { typedef typename MatrixType::ScalarType Scalar; int global_elems_x = mesh.global_box[0][1]; int global_elems_y = mesh.global_box[1][1]; int global_elems_z = mesh.global_box[2][1]; //We will iterate the local-element-box (local portion of the mesh), and //get element-IDs in preparation for later assembling the FE operators //into the global sparse linear-system. GlobalOrdinal num_elems = get_num_ids<GlobalOrdinal>(local_elem_box); std::vector<GlobalOrdinal> elemIDs(num_elems); BoxIterator iter = BoxIterator::begin(local_elem_box); BoxIterator end = BoxIterator::end(local_elem_box); for(size_t i=0; iter != end; ++iter, ++i) { elemIDs[i] = get_id<GlobalOrdinal>(global_elems_x, global_elems_y, global_elems_z, iter.x, iter.y, iter.z); //#ifdef MINIFE_DEBUG //std::cout << "elem ID " << elemIDs[i] << " ("<<iter.x<<","<<iter.y<<","<<iter.z<<")"<<std::endl; //#endif } //Now do the actual finite-element assembly loop: ElemData<GlobalOrdinal,Scalar> elem_data; compute_gradient_values(elem_data.grad_vals); timer_type t_gn = 0, t_ce = 0, t_si = 0; timer_type t0 = 0; for(size_t i=0; i<elemIDs.size(); ++i) { get_elem_nodes_and_coords(mesh, elemIDs[i], elem_data); compute_element_matrix_and_vector(elem_data); sum_into_global_linear_system(elem_data, A, b); } //std::cout << std::endl<<"get-nodes: " << t_gn << std::endl; //std::cout << "compute-elems: " << t_ce << std::endl; //std::cout << "sum-in: " << t_si << std::endl; }
void perform_element_loop(const simple_mesh_description<GlobalOrdinal>& mesh, const Box& local_elem_box, MatrixType& A, VectorType& b, Parameters& /*params*/) { typedef typename MatrixType::ScalarType Scalar; int global_elems_x = mesh.global_box[0][1]; int global_elems_y = mesh.global_box[1][1]; int global_elems_z = mesh.global_box[2][1]; //We will iterate the local-element-box (local portion of the mesh), and //get element-IDs in preparation for later assembling the FE operators //into the global sparse linear-system. GlobalOrdinal num_elems = get_num_ids<GlobalOrdinal>(local_elem_box); v_global_ordinal elemIDs("PerfElemLoop::elemIDs",num_elems); h_v_global_ordinal h_elemIDs = Kokkos::create_mirror_view(elemIDs); BoxIterator iter = BoxIterator::begin(local_elem_box); BoxIterator end = BoxIterator::end(local_elem_box); for(size_t i=0; iter != end; ++iter, ++i) { h_elemIDs[i] = get_id<GlobalOrdinal>(global_elems_x, global_elems_y, global_elems_z, iter.x, iter.y, iter.z); } //Now do the actual finite-element assembly loop: ElemData<GlobalOrdinal,Scalar> elem_data; compute_gradient_values(elem_data.grad_vals); struct perform_element_loop_functor<GlobalOrdinal, MatrixType,VectorType> f(&A,&b,mesh,h_elemIDs,elem_data); Kokkos::parallel_for("perform_element_loop<Host>",h_elemIDs.dimension_0(),f); device_device_type::fence(); }