Ejemplo n.º 1
1
void
vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
   const struct brw_tes_prog_data *tes_prog_data =
      (const struct brw_tes_prog_data *) prog_data;

   switch (instr->intrinsic) {
   case nir_intrinsic_load_tess_coord:
      /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
               src_reg(brw_vec8_grf(1, 0))));
      break;
   case nir_intrinsic_load_tess_level_outer:
      if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
                          BRW_SWIZZLE_ZWZW)));
      } else {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
                          BRW_SWIZZLE_WZYX)));
      }
      break;
   case nir_intrinsic_load_tess_level_inner:
      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
                          BRW_SWIZZLE_WZYX)));
      } else {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  src_reg(ATTR, 1, glsl_type::float_type)));
      }
      break;
   case nir_intrinsic_load_primitive_id:
      emit(TES_OPCODE_GET_PRIMITIVE_ID,
           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
      break;

   case nir_intrinsic_load_input:
   case nir_intrinsic_load_per_vertex_input: {
      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];
      src_reg header = input_read_header;
      bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
      unsigned first_component = nir_intrinsic_component(instr);
      if (is_64bit)
         first_component /= 2;

      if (indirect_offset.file != BAD_FILE) {
         header = src_reg(this, glsl_type::uvec4_type);
         emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
              input_read_header, indirect_offset);
      } else {
         /* Arbitrarily only push up to 24 vec4 slots worth of data,
          * which is 12 registers (since each holds 2 vec4 slots).
          */
         const unsigned max_push_slots = 24;
         if (imm_offset < max_push_slots) {
            const glsl_type *src_glsl_type =
               is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
            src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

            const brw_reg_type dst_reg_type =
               is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
            emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));

            prog_data->urb_read_length =
               MAX2(prog_data->urb_read_length,
                    DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
            break;
         }
      }

      if (!is_64bit) {
         dst_reg temp(this, glsl_type::ivec4_type);
         vec4_instruction *read =
            emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
         read->offset = imm_offset;
         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;

         src_reg src = src_reg(temp);
         src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

         /* Copy to target.  We might end up with some funky writemasks landing
          * in here, but we really don't want them in the above pseudo-ops.
          */
         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
         dst.writemask = brw_writemask_for_size(instr->num_components);
         emit(MOV(dst, src));
      } else {
         /* For 64-bit we need to load twice as many 32-bit components, and for
          * dvec3/4 we need to emit 2 URB Read messages
          */
         dst_reg temp(this, glsl_type::dvec4_type);
         dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);

         vec4_instruction *read =
            emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
         read->offset = imm_offset;
         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;

         if (instr->num_components > 2) {
            read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
                        src_reg(header));
            read->offset = imm_offset + 1;
            read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
         }

         src_reg temp_as_src = src_reg(temp);
         temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

         dst_reg shuffled(this, glsl_type::dvec4_type);
         shuffle_64bit_data(shuffled, temp_as_src, false);

         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
         dst.writemask = brw_writemask_for_size(instr->num_components);
         emit(MOV(dst, src_reg(shuffled)));
      }
      break;
   }
   default:
      vec4_visitor::nir_emit_intrinsic(instr);
   }
}
Ejemplo n.º 2
0
arma::Mat<T> shuffled_matrix(const arma::Mat<T>& matrix, std::mt19937* gen) {
  arma::Mat<T> shuffled(matrix.n_rows, matrix.n_cols);
  std::vector<std::size_t> indices(matrix.n_rows);
  std::iota(indices.begin(), indices.end(), 0);
  for (std::size_t j=0; j < matrix.n_cols; ++j) {
    std::shuffle(indices.begin(), indices.end(), *gen);
    for (std::size_t i=0; i < matrix.n_rows; ++i) {
      shuffled(i, j) = matrix(indices[i], j);
    }
  }
  return shuffled;
}
Ejemplo n.º 3
0
void pri_queue_test_ordered_iterators(void)
{
    for (int i = 0; i != test_size; ++i) {
        test_data data = make_test_data(i);
        test_data shuffled (data);
        std::random_shuffle(shuffled.begin(), shuffled.end());
        pri_queue q;
        BOOST_REQUIRE(q.ordered_begin() == q.ordered_end());
        fill_q(q, shuffled);

        test_data data_from_queue(q.ordered_begin(), q.ordered_end());
        std::reverse(data_from_queue.begin(), data_from_queue.end());
        BOOST_REQUIRE(data == data_from_queue);

        for (unsigned long i = 0; i != data.size(); ++i)
            BOOST_REQUIRE(std::find(q.ordered_begin(), q.ordered_end(), data[i]) != q.ordered_end());

        for (unsigned long i = 0; i != data.size(); ++i)
            BOOST_REQUIRE(std::find(q.ordered_begin(), q.ordered_end(), data[i] + data.size()) == q.ordered_end());

        for (unsigned long i = 0; i != data.size(); ++i) {
            BOOST_REQUIRE_EQUAL((long)std::distance(q.begin(), q.end()), (long)(data.size() - i));
            q.pop();
        }
    }
}
Ejemplo n.º 4
0
void pri_queue_test_update_shuffled(void)
{
    pri_queue q;
    test_data data = make_test_data(test_size);
    PUSH_WITH_HANDLES(handles, q, data);

    test_data shuffled (data);
    std::random_shuffle(shuffled.begin(), shuffled.end());

    for (int i = 0; i != test_size; ++i)
        q.update(handles[i], shuffled[i]);

    check_q(q, data);
}
Ejemplo n.º 5
0
void pri_queue_test_random_push(void)
{
    for (int i = 0; i != test_size; ++i)
    {
        pri_queue q;
        test_data data = make_test_data(i);

        test_data shuffled (data);
        std::random_shuffle(shuffled.begin(), shuffled.end());

        fill_q(q, shuffled);

        check_q(q, data);
    }
}
Ejemplo n.º 6
0
inline void shuffle_stream (
        const char * messages_in,
        const char * shuffled_out,
        long seed,
        double target_mem_bytes)
{
    typedef std::vector<char> Message;
    typedef uint32_t pos_t;

    LOOM_ASSERT(
        std::string(messages_in) != std::string(shuffled_out),
        "cannot shuffle file in-place: " << messages_in);
    const auto stats = protobuf::InFile::stream_stats(messages_in);
    LOOM_ASSERT(stats.is_file, "shuffle input is not a file: " << messages_in);
    const uint64_t max_message_count = std::numeric_limits<pos_t>::max();
    LOOM_ASSERT(stats.message_count, max_message_count);
    const size_t message_count = stats.message_count;

    double index_bytes = sizeof(pos_t) * message_count;
    double target_chunk_size = std::max(1.0, std::min(double(message_count),
        (target_mem_bytes - index_bytes) / stats.max_message_size));
    size_t chunk_size = static_cast<size_t>(std::round(target_chunk_size));

    std::vector<pos_t> index(message_count);
    for (size_t i = 0; i < message_count; ++i) {
        index[i] = i;
    }
    std::shuffle(index.begin(), index.end(), loom::rng_t(seed));

    Message message;
    std::vector<Message> chunk;
    protobuf::OutFile shuffled(shuffled_out);
    for (size_t begin = 0; begin < message_count; begin += chunk_size) {
        size_t end = std::min(begin + chunk_size, message_count);
        chunk.resize(end - begin);
        protobuf::InFile messages(messages_in);
        for (size_t i : index) {
            messages.try_read_stream(message);
            if (begin <= i and i < end) {
                std::swap(message, chunk[i - begin]);
            }
        }
        for (const auto & message : chunk) {
            shuffled.write_stream(message);
        }
    }
}
Ejemplo n.º 7
0
void pri_queue_test_swap(void)
{
    for (int i = 0; i != test_size; ++i)
    {
        pri_queue q;
        test_data data = make_test_data(i);
        test_data shuffled (data);
        std::random_shuffle(shuffled.begin(), shuffled.end());
        fill_q(q, shuffled);

        pri_queue r;

        q.swap(r);
        check_q(r, data);
        BOOST_REQUIRE(q.empty());
    }
}