inline void operator()(int i)
{
  ElemData<GlobalOrdinal,Scalar> elem_data;
  GlobalOrdinal elemID = elemIDs[i];
  get_elem_nodes_and_coords(*mesh, elemID, elem_data.elem_node_ids,
                            elem_data.elem_node_coords);
  compute_element_matrix_and_vector(elem_data);
  sum_into_global_linear_system(elem_data, *A, *b);
}
void
perform_element_loop(const simple_mesh_description<GlobalOrdinal>& mesh,
                     const Box& local_elem_box,
                     MatrixType& A, VectorType& b,
                     Parameters& /*params*/)
{
  typedef typename MatrixType::ScalarType Scalar;

  int global_elems_x = mesh.global_box[0][1];
  int global_elems_y = mesh.global_box[1][1];
  int global_elems_z = mesh.global_box[2][1];

  //We will iterate the local-element-box (local portion of the mesh), and
  //get element-IDs in preparation for later assembling the FE operators
  //into the global sparse linear-system.

  GlobalOrdinal num_elems = get_num_ids<GlobalOrdinal>(local_elem_box);
  std::vector<GlobalOrdinal> elemIDs(num_elems);

  BoxIterator iter = BoxIterator::begin(local_elem_box);
  BoxIterator end  = BoxIterator::end(local_elem_box);

  for(size_t i=0; iter != end; ++iter, ++i) {
    elemIDs[i] = get_id<GlobalOrdinal>(global_elems_x, global_elems_y, global_elems_z,
                                       iter.x, iter.y, iter.z);
//#ifdef MINIFE_DEBUG
//std::cout << "elem ID " << elemIDs[i] << " ("<<iter.x<<","<<iter.y<<","<<iter.z<<")"<<std::endl;
//#endif
  }

  //Now do the actual finite-element assembly loop:

  ElemData<GlobalOrdinal,Scalar> elem_data;

  compute_gradient_values(elem_data.grad_vals);

  timer_type t_gn = 0, t_ce = 0, t_si = 0;
  timer_type t0 = 0;
  for(size_t i=0; i<elemIDs.size(); ++i) {
    get_elem_nodes_and_coords(mesh, elemIDs[i], elem_data);
    compute_element_matrix_and_vector(elem_data);
    sum_into_global_linear_system(elem_data, A, b);
  }
//std::cout << std::endl<<"get-nodes: " << t_gn << std::endl;
//std::cout << "compute-elems: " << t_ce << std::endl;
//std::cout << "sum-in: " << t_si << std::endl;
}
	  inline
	  void operator()( const int i ) const
	  {
	        ElemData<GlobalOrdinal,Scalar> elem_data;// = _elem_data;

			compute_gradient_values(elem_data.grad_vals);
		    //Given an element-id, populate elem_data with the
		    //element's node_ids and nodal-coords:
		    get_elem_nodes_and_coords(mesh, elemIDs[i], elem_data);

		    //Next compute element-diffusion-matrix and element-source-vector:

		    compute_element_matrix_and_vector(elem_data);

		    //Now assemble the (dense) element-matrix and element-vector into the
		    //global sparse linear system:

		    sum_into_global_linear_system(elem_data, *A, *b);

	  };