/** * prediction - perform a state prediction step * @params[in] self * @params[in] args * - gyro * - accel * - dT * @return state */ static PyObject* prediction(PyObject* self, PyObject* args) { PyArrayObject *vec_gyro, *vec_accel; float gyro_data[3], accel_data[3]; float dT; if (!PyArg_ParseTuple(args, "O!O!f", &PyArray_Type, &vec_gyro, &PyArray_Type, &vec_accel, &dT)) return NULL; if (NULL == vec_gyro) return NULL; if (NULL == vec_accel) return NULL; if (!parseFloatVec3(vec_gyro, gyro_data)) return NULL; if (!parseFloatVec3(vec_accel, accel_data)) return NULL; INSStatePrediction(gyro_data, accel_data, dT); INSCovariancePrediction(dT); if (false) { const float zeros[3] = {0,0,0}; INSSetGyroBias(zeros); INSSetAccelBias(zeros); } return pack_state(self); }
int m2_timer_save_state(struct m2_timer *timer, struct save_state *state) { size_t size; uint8_t *buf; int rc; size = pack_state(timer, m2_timer_state_items, NULL); buf = malloc(size); if (!buf) return -1; pack_state(timer, m2_timer_state_items, buf); rc = save_state_add_chunk(state, "M2 ", buf, size); free(buf); return rc; }
static PyObject* init(PyObject* self, PyObject* args) { INSGPSInit(); const float Be[] = {400, 0, 1600}; INSSetMagNorth(Be); return pack_state(self); }
/** * correction - perform a correction of the EKF * @params[in] self * @params[in] args * - Z - vector of measurements (position, velocity, mag, baro) * - sensors - binary flags for which sensors should be used * @return state */ static PyObject* correction(PyObject* self, PyObject* args) { PyArrayObject *vec_z; float z[10]; int sensors; if (!PyArg_ParseTuple(args, "O!i", &PyArray_Type, &vec_z, &sensors)) return NULL; if (NULL == vec_z) return NULL; if (!parseFloatVecN(vec_z, z, 10)) return NULL; INSCorrection(&z[6], &z[0], &z[3], z[9], sensors); return pack_state(self); }
void set_state(T state) volatile { tag_type t = get_tag(); state_ = pack_state(state, t); }
void set (T state, tag_type t) { state_ = pack_state(state, t); }
explicit tagged_thread_state(T state, tag_type t = 0) : state_(pack_state(state, t)) {}
void set_tag(tag_type t) volatile { T state = get_state(); state_ = pack_state(state, t); }
PerformanceData run( const typename FixtureType::FEMeshType & mesh , const int global_max_x , const int global_max_y , const int global_max_z , const unsigned uq_count , const int steps , const int print_sample ) { typedef Scalar scalar_type ; typedef FixtureType fixture_type ; typedef typename fixture_type::device_type device_type ; enum { ElementNodeCount = fixture_type::element_node_count }; const int total_num_steps = steps ; const Scalar user_dt = 5.0e-6; //const Scalar end_time = 0.0050; // element block parameters const Scalar lin_bulk_visc = 0.0; const Scalar quad_bulk_visc = 0.0; // const Scalar lin_bulk_visc = 0.06; // const Scalar quad_bulk_visc = 1.2; // const Scalar hg_stiffness = 0.0; // const Scalar hg_viscosity = 0.0; // const Scalar hg_stiffness = 0.03; // const Scalar hg_viscosity = 0.001; // material properties const Scalar youngs_modulus=1.0e6; const Scalar poissons_ratio=0.0; const Scalar density = 8.0e-4; const comm::Machine machine = mesh.parallel_data_map.machine ; PerformanceData perf_data ; Kokkos::Impl::Timer wall_clock ; //------------------------------------ // Generate fields typedef Fields< scalar_type , device_type > fields_type ; fields_type mesh_fields( mesh , uq_count , lin_bulk_visc , quad_bulk_visc , youngs_modulus , poissons_ratio , density ); typename fields_type::node_coords_type::HostMirror model_coords_h = Kokkos::create_mirror( mesh_fields.model_coords ); typename fields_type::spatial_precise_view::HostMirror displacement_h = Kokkos::create_mirror( mesh_fields.displacement ); typename fields_type::spatial_precise_view::HostMirror velocity_h = Kokkos::create_mirror( mesh_fields.velocity ); Kokkos::deep_copy( model_coords_h , mesh_fields.model_coords ); //------------------------------------ // Initialization initialize_element( mesh_fields ); initialize_node( mesh_fields ); const Scalar x_bc = global_max_x ; // Initial condition on velocity to initiate a pulse along the X axis { const unsigned X = 0; for ( unsigned inode = 0; inode< mesh_fields.num_nodes; ++inode) { for ( unsigned kq = 0 ; kq < uq_count ; ++kq ) { if ( model_coords_h(inode,X) == 0 ) { velocity_h(inode,kq,X) = 1000 + 100 * kq ; velocity_h(inode,kq,X) = 1000 + 100 * kq ; } } } } Kokkos::deep_copy( mesh_fields.velocity , velocity_h ); Kokkos::deep_copy( mesh_fields.velocity_new , velocity_h ); //-------------------------------------------------------------------------- // We will call a sequence of functions. These functions have been // grouped into several functors to balance the number of global memory // accesses versus requiring too many registers or too much L1 cache. // Global memory accees have read/write cost and memory subsystem contention cost. //-------------------------------------------------------------------------- perf_data.init_time = comm::max( machine , wall_clock.seconds() ); // Parameters required for the internal force computations. perf_data.number_of_steps = total_num_steps ; typedef typename fields_type::spatial_precise_view::scalar_type comm_value_type ; const unsigned comm_value_count = 6 ; Kokkos::AsyncExchange< comm_value_type , device_type , Kokkos::ParallelDataMap > comm_exchange( mesh.parallel_data_map , comm_value_count * uq_count ); for ( int step = 0; step < total_num_steps; ++step ) { //------------------------------------------------------------------------ // rotate the state variable views. swap( mesh_fields.dt , mesh_fields.dt_new ); swap( mesh_fields.displacement , mesh_fields.displacement_new ); swap( mesh_fields.velocity , mesh_fields.velocity_new ); swap( mesh_fields.rotation , mesh_fields.rotation_new ); //------------------------------------------------------------------------ // Communicate "send" nodes' displacement and velocity next_state // to the ghosted nodes. // buffer packages: { { dx , dy , dz , vx , vy , vz }_node } wall_clock.reset(); pack_state( mesh_fields , comm_exchange.buffer(), mesh.parallel_data_map.count_interior , mesh.parallel_data_map.count_send ); comm_exchange.setup(); comm_exchange.send_receive(); unpack_state( mesh_fields , comm_exchange.buffer() , mesh.parallel_data_map.count_owned , mesh.parallel_data_map.count_receive ); device_type::fence(); perf_data.comm_time += comm::max( machine , wall_clock.seconds() ); //------------------------------------------------------------------------ wall_clock.reset(); // First kernel 'grad_hgop' combines two functions: // gradient, velocity gradient gradient( mesh_fields ); // Combine tensor decomposition and rotation functions. decomp_rotate( mesh_fields ); internal_force( mesh_fields , user_dt ); device_type::fence(); perf_data.internal_force_time += comm::max( machine , wall_clock.seconds() ); //------------------------------------------------------------------------ // Assembly of elements' contributions to nodal force into // a nodal force vector. Update the accelerations, velocities, // displacements. // The same pattern can be used for matrix-free residual computations. wall_clock.reset(); nodal_update( mesh_fields , x_bc ); device_type::fence(); perf_data.central_diff += comm::max( machine , wall_clock.seconds() ); if ( print_sample && 0 == step % 100 ) { Kokkos::deep_copy( displacement_h , mesh_fields.displacement_new ); Kokkos::deep_copy( velocity_h , mesh_fields.velocity_new ); if ( 1 == print_sample ) { for ( unsigned kp = 0 ; kp < uq_count ; ++kp ) { std::cout << "step " << step << " : displacement({*,0,0}," << kp << ",0) =" ; for ( unsigned i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) { if ( model_coords_h(i,1) == 0 && model_coords_h(i,2) == 0 ) { std::cout << " " << displacement_h(i,kp,0); } } std::cout << std::endl ; const float tol = 1.0e-6 ; const int yb = global_max_y ; const int zb = global_max_z ; std::cout << "step " << step << " : displacement({*," << yb << "," << zb << "}," << kp << ",0) =" ; for ( unsigned i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) { if ( fabs( model_coords_h(i,1) - yb ) < tol && fabs( model_coords_h(i,2) - zb ) < tol ) { std::cout << " " << displacement_h(i,kp,0); } } std::cout << std::endl ; } } else if ( 2 == print_sample ) { const unsigned kp = 0 ; const float tol = 1.0e-6 ; const int xb = global_max_x / 2 ; const int yb = global_max_y / 2 ; const int zb = global_max_z / 2 ; for ( unsigned i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) { if ( fabs( model_coords_h(i,0) - xb ) < tol && fabs( model_coords_h(i,1) - yb ) < tol && fabs( model_coords_h(i,2) - zb ) < tol ) { std::cout << "step " << step << " : displacement(" << xb << "," << yb << "," << zb << ") = {" << std::setprecision(6) << " " << displacement_h(i,kp,0) << std::setprecision(2) << " " << displacement_h(i,kp,1) << std::setprecision(2) << " " << displacement_h(i,kp,2) << " }" << std::endl ; } } } } } return perf_data ; }