void vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { const struct brw_tes_prog_data *tes_prog_data = (const struct brw_tes_prog_data *) prog_data; switch (instr->intrinsic) { case nir_intrinsic_load_tess_coord: /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), src_reg(brw_vec8_grf(1, 0)))); break; case nir_intrinsic_load_tess_level_outer: if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), BRW_SWIZZLE_ZWZW))); } else { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), BRW_SWIZZLE_WZYX))); } break; case nir_intrinsic_load_tess_level_inner: if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), swizzle(src_reg(ATTR, 0, glsl_type::vec4_type), BRW_SWIZZLE_WZYX))); } else { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), src_reg(ATTR, 1, glsl_type::float_type))); } break; case nir_intrinsic_load_primitive_id: emit(TES_OPCODE_GET_PRIMITIVE_ID, get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); break; case nir_intrinsic_load_input: case nir_intrinsic_load_per_vertex_input: { src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; src_reg header = input_read_header; bool is_64bit = nir_dest_bit_size(instr->dest) == 64; unsigned first_component = nir_intrinsic_component(instr); if (is_64bit) first_component /= 2; if (indirect_offset.file != BAD_FILE) { header = src_reg(this, glsl_type::uvec4_type); emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), input_read_header, indirect_offset); } else { /* Arbitrarily only push up to 24 vec4 slots worth of data, * which is 12 registers (since each holds 2 vec4 slots). */ const unsigned max_push_slots = 24; if (imm_offset < max_push_slots) { const glsl_type *src_glsl_type = is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type; src_reg src = src_reg(ATTR, imm_offset, src_glsl_type); src.swizzle = BRW_SWZ_COMP_INPUT(first_component); const brw_reg_type dst_reg_type = is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D; emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src)); prog_data->urb_read_length = MAX2(prog_data->urb_read_length, DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2)); break; } } if (!is_64bit) { dst_reg temp(this, glsl_type::ivec4_type); vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); read->offset = imm_offset; read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; src_reg src = src_reg(temp); src.swizzle = BRW_SWZ_COMP_INPUT(first_component); /* Copy to target. We might end up with some funky writemasks landing * in here, but we really don't want them in the above pseudo-ops. */ dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); emit(MOV(dst, src)); } else { /* For 64-bit we need to load twice as many 32-bit components, and for * dvec3/4 we need to emit 2 URB Read messages */ dst_reg temp(this, glsl_type::dvec4_type); dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D); vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header)); read->offset = imm_offset; read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; if (instr->num_components > 2) { read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE), src_reg(header)); read->offset = imm_offset + 1; read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; } src_reg temp_as_src = src_reg(temp); temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component); dst_reg shuffled(this, glsl_type::dvec4_type); shuffle_64bit_data(shuffled, temp_as_src, false); dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF); dst.writemask = brw_writemask_for_size(instr->num_components); emit(MOV(dst, src_reg(shuffled))); } break; } default: vec4_visitor::nir_emit_intrinsic(instr); } }
arma::Mat<T> shuffled_matrix(const arma::Mat<T>& matrix, std::mt19937* gen) { arma::Mat<T> shuffled(matrix.n_rows, matrix.n_cols); std::vector<std::size_t> indices(matrix.n_rows); std::iota(indices.begin(), indices.end(), 0); for (std::size_t j=0; j < matrix.n_cols; ++j) { std::shuffle(indices.begin(), indices.end(), *gen); for (std::size_t i=0; i < matrix.n_rows; ++i) { shuffled(i, j) = matrix(indices[i], j); } } return shuffled; }
void pri_queue_test_ordered_iterators(void) { for (int i = 0; i != test_size; ++i) { test_data data = make_test_data(i); test_data shuffled (data); std::random_shuffle(shuffled.begin(), shuffled.end()); pri_queue q; BOOST_REQUIRE(q.ordered_begin() == q.ordered_end()); fill_q(q, shuffled); test_data data_from_queue(q.ordered_begin(), q.ordered_end()); std::reverse(data_from_queue.begin(), data_from_queue.end()); BOOST_REQUIRE(data == data_from_queue); for (unsigned long i = 0; i != data.size(); ++i) BOOST_REQUIRE(std::find(q.ordered_begin(), q.ordered_end(), data[i]) != q.ordered_end()); for (unsigned long i = 0; i != data.size(); ++i) BOOST_REQUIRE(std::find(q.ordered_begin(), q.ordered_end(), data[i] + data.size()) == q.ordered_end()); for (unsigned long i = 0; i != data.size(); ++i) { BOOST_REQUIRE_EQUAL((long)std::distance(q.begin(), q.end()), (long)(data.size() - i)); q.pop(); } } }
void pri_queue_test_update_shuffled(void) { pri_queue q; test_data data = make_test_data(test_size); PUSH_WITH_HANDLES(handles, q, data); test_data shuffled (data); std::random_shuffle(shuffled.begin(), shuffled.end()); for (int i = 0; i != test_size; ++i) q.update(handles[i], shuffled[i]); check_q(q, data); }
void pri_queue_test_random_push(void) { for (int i = 0; i != test_size; ++i) { pri_queue q; test_data data = make_test_data(i); test_data shuffled (data); std::random_shuffle(shuffled.begin(), shuffled.end()); fill_q(q, shuffled); check_q(q, data); } }
inline void shuffle_stream ( const char * messages_in, const char * shuffled_out, long seed, double target_mem_bytes) { typedef std::vector<char> Message; typedef uint32_t pos_t; LOOM_ASSERT( std::string(messages_in) != std::string(shuffled_out), "cannot shuffle file in-place: " << messages_in); const auto stats = protobuf::InFile::stream_stats(messages_in); LOOM_ASSERT(stats.is_file, "shuffle input is not a file: " << messages_in); const uint64_t max_message_count = std::numeric_limits<pos_t>::max(); LOOM_ASSERT(stats.message_count, max_message_count); const size_t message_count = stats.message_count; double index_bytes = sizeof(pos_t) * message_count; double target_chunk_size = std::max(1.0, std::min(double(message_count), (target_mem_bytes - index_bytes) / stats.max_message_size)); size_t chunk_size = static_cast<size_t>(std::round(target_chunk_size)); std::vector<pos_t> index(message_count); for (size_t i = 0; i < message_count; ++i) { index[i] = i; } std::shuffle(index.begin(), index.end(), loom::rng_t(seed)); Message message; std::vector<Message> chunk; protobuf::OutFile shuffled(shuffled_out); for (size_t begin = 0; begin < message_count; begin += chunk_size) { size_t end = std::min(begin + chunk_size, message_count); chunk.resize(end - begin); protobuf::InFile messages(messages_in); for (size_t i : index) { messages.try_read_stream(message); if (begin <= i and i < end) { std::swap(message, chunk[i - begin]); } } for (const auto & message : chunk) { shuffled.write_stream(message); } } }
void pri_queue_test_swap(void) { for (int i = 0; i != test_size; ++i) { pri_queue q; test_data data = make_test_data(i); test_data shuffled (data); std::random_shuffle(shuffled.begin(), shuffled.end()); fill_q(q, shuffled); pri_queue r; q.swap(r); check_q(r, data); BOOST_REQUIRE(q.empty()); } }