Beispiel #1
0
static bool
brw_codegen_tcs_prog(struct brw_context *brw,
                     struct gl_shader_program *shader_prog,
                     struct brw_program *tcp,
                     struct brw_tcs_prog_key *key)
{
   struct gl_context *ctx = &brw->ctx;
   const struct brw_compiler *compiler = brw->screen->compiler;
   const struct gen_device_info *devinfo = compiler->devinfo;
   struct brw_stage_state *stage_state = &brw->tcs.base;
   nir_shader *nir;
   struct brw_tcs_prog_data prog_data;
   bool start_busy = false;
   double start_time = 0;

   void *mem_ctx = ralloc_context(NULL);
   if (tcp) {
      nir = tcp->program.nir;
   } else {
      /* Create a dummy nir_shader.  We won't actually use NIR code to
       * generate assembly (it's easier to generate assembly directly),
       * but the whole compiler assumes one of these exists.
       */
      const nir_shader_compiler_options *options =
         ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions;
      nir = create_passthrough_tcs(mem_ctx, compiler, options, key);
   }

   memset(&prog_data, 0, sizeof(prog_data));

   /* Allocate the references to the uniforms that will end up in the
    * prog_data associated with the compiled program, and which will be freed
    * by the state cache.
    *
    * Note: param_count needs to be num_uniform_components * 4, since we add
    * padding around uniform values below vec4 size, so the worst case is that
    * every uniform is a float which gets padded to the size of a vec4.
    */
   int param_count = nir->num_uniforms / 4;

   prog_data.base.base.param =
      rzalloc_array(NULL, const gl_constant_value *, param_count);
   prog_data.base.base.pull_param =
      rzalloc_array(NULL, const gl_constant_value *, param_count);
   prog_data.base.base.nr_params = param_count;

   if (tcp) {
      brw_assign_common_binding_table_offsets(MESA_SHADER_TESS_CTRL, devinfo,
                                              shader_prog, &tcp->program,
                                              &prog_data.base.base, 0);

      prog_data.base.base.image_param =
         rzalloc_array(NULL, struct brw_image_param,
                       tcp->program.info.num_images);
      prog_data.base.base.nr_image_params = tcp->program.info.num_images;

      brw_nir_setup_glsl_uniforms(nir, shader_prog, &tcp->program,
                                  &prog_data.base.base,
                                  compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
   } else {
Beispiel #2
0
TEST(ir_variable_constructor, interface_array)
{
   void *mem_ctx = ralloc_context(NULL);

   static const glsl_struct_field f[] = {
      {
         glsl_type::vec(4),
         "v",
         false
      }
   };

   const glsl_type *const interface =
      glsl_type::get_interface_instance(f,
                                        ARRAY_SIZE(f),
                                        GLSL_INTERFACE_PACKING_STD140,
                                        "simple_interface");

   const glsl_type *const interface_array =
      glsl_type::get_array_instance(interface, 2);

   static const char name[] = "array_instance";

   ir_variable *const v =
      new(mem_ctx) ir_variable(interface_array, name, ir_var_uniform);

   EXPECT_STREQ(name, v->name);
   EXPECT_NE(name, v->name);
   EXPECT_EQ(interface_array, v->type);
   EXPECT_EQ(interface, v->get_interface_type());
}
brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw,
                                           bool debug_flag)
   : mem_ctx(ralloc_context(NULL)),
     generator(brw->intelScreen->compiler, brw,
               mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key),
               (struct brw_stage_prog_data *) rzalloc(mem_ctx, struct brw_wm_prog_data),
               0, false, MESA_SHADER_FRAGMENT)
{
   if (debug_flag)
      generator.enable_debug("blorp");
}

brw_blorp_eu_emitter::~brw_blorp_eu_emitter()
{
   ralloc_free(mem_ctx);
}

const unsigned *
brw_blorp_eu_emitter::get_program(unsigned *program_size)
{
   cfg_t cfg(&insts);
   generator.generate_code(&cfg, 16);

   return generator.get_assembly(program_size);
}
Beispiel #4
0
output_read_remover::output_read_remover(unsigned stage)
{
   this->stage = stage;
   mem_ctx = ralloc_context(NULL);
   replacements = _mesa_hash_table_create(NULL, hash_table_var_hash,
                                          _mesa_key_pointer_equal);
}
Beispiel #5
0
loop_state::loop_state()
{
   this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                      _mesa_key_pointer_equal);
   this->mem_ctx = ralloc_context(NULL);
   this->loop_found = false;
}
Beispiel #6
0
loop_state::loop_state()
{
   this->ht = hash_table_ctor(0, hash_table_pointer_hash,
			      hash_table_pointer_compare);
   this->mem_ctx = ralloc_context(NULL);
   this->loop_found = false;
}
	has_recursion_visitor()
		: current(NULL)
	{
		this->mem_ctx = ralloc_context(NULL);
		this->function_hash = hash_table_ctor(0, hash_table_pointer_hash,
			hash_table_pointer_compare);
	}
Beispiel #8
0
static bool
brw_codegen_cs_prog(struct brw_context *brw,
                    struct gl_shader_program *prog,
                    struct brw_compute_program *cp,
                    struct brw_cs_prog_key *key)
{
   struct gl_context *ctx = &brw->ctx;
   const GLuint *program;
   void *mem_ctx = ralloc_context(NULL);
   GLuint program_size;
   struct brw_cs_prog_data prog_data;

   struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
   assert (cs);

   memset(&prog_data, 0, sizeof(prog_data));

   /* Allocate the references to the uniforms that will end up in the
    * prog_data associated with the compiled program, and which will be freed
    * by the state cache.
    */
   int param_count = cs->num_uniform_components +
                     cs->NumImages * BRW_IMAGE_PARAM_SIZE;

   /* The backend also sometimes adds params for texture size. */
   param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
   prog_data.base.param =
      rzalloc_array(NULL, const gl_constant_value *, param_count);
   prog_data.base.pull_param =
      rzalloc_array(NULL, const gl_constant_value *, param_count);
   prog_data.base.image_param =
      rzalloc_array(NULL, struct brw_image_param, cs->NumImages);
   prog_data.base.nr_params = param_count;
   prog_data.base.nr_image_params = cs->NumImages;

   program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
                         &cp->program, prog, &program_size);
   if (program == NULL) {
      ralloc_free(mem_ctx);
      return false;
   }

   if (prog_data.base.total_scratch) {
      brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo,
                         prog_data.base.total_scratch * brw->max_cs_threads);
   }

   if (unlikely(INTEL_DEBUG & DEBUG_CS))
      fprintf(stderr, "\n");

   brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
                    key, sizeof(*key),
                    program, program_size,
                    &prog_data, sizeof(prog_data),
                    &brw->cs.base.prog_offset, &brw->cs.prog_data);
   ralloc_free(mem_ctx);

   return true;
}
 has_recursion_visitor()
    : current(NULL)
 {
    progress = false;
    this->mem_ctx = ralloc_context(NULL);
    this->function_hash = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                                  _mesa_key_pointer_equal);
 }
Beispiel #10
0
brw_blorp_clear_program::brw_blorp_clear_program(
      struct brw_context *brw,
      const brw_blorp_clear_prog_key *key)
   : mem_ctx(ralloc_context(NULL)),
     brw(brw),
     key(key)
{
   brw_init_compile(brw, &func, mem_ctx);
}
Beispiel #11
0
/* Test that data values are written and read with proper alignment. */
static void
test_alignment(void)
{
   void *ctx = ralloc_context(NULL);
   struct blob *blob;
   struct blob_reader reader;
   uint8_t bytes[] = "ABCDEFGHIJKLMNOP";
   size_t delta, last, num_bytes;

   blob = blob_create(ctx);

   /* First, write an intptr value to the blob and capture that size. This is
    * the expected offset between any pair of intptr values (if written with
    * alignment).
    */
   blob_write_intptr(blob, (intptr_t) blob);

   delta = blob->size;
   last = blob->size;

   /* Then loop doing the following:
    *
    *   1. Write an unaligned number of bytes
    *   2. Verify that write results in an unaligned size
    *   3. Write an intptr_t value
    *   2. Verify that that write results in an aligned size
    */
   for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) {
      blob_write_bytes(blob, bytes, num_bytes);

      expect_unequal(delta, blob->size - last, "unaligned write of bytes");

      blob_write_intptr(blob, (intptr_t) blob);

      expect_equal(2 * delta, blob->size - last, "aligned write of intptr");

      last = blob->size;
   }

   /* Finally, test that reading also does proper alignment. Since we know
    * that values were written with all the right alignment, all we have to do
    * here is verify that correct values are read.
    */
   blob_reader_init(&reader, blob->data, blob->size);

   expect_equal((intptr_t) blob, blob_read_intptr(&reader),
                "read of initial, aligned intptr_t");

   for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) {
      expect_equal_bytes(bytes, blob_read_bytes(&reader, num_bytes),
                         num_bytes, "unaligned read of bytes");
      expect_equal((intptr_t) blob, blob_read_intptr(&reader),
                   "aligned read of intptr_t");
   }

   ralloc_free(ctx);
}
Beispiel #12
0
 get_sampler_name(ir_dereference *last,
                  struct gl_shader_program *shader_program)
 {
     this->mem_ctx = ralloc_context(NULL);
     this->shader_program = shader_program;
     this->name = NULL;
     this->offset = 0;
     this->last = last;
 }
void
set_uniform_initializer::SetUp()
{
   this->mem_ctx = ralloc_context(NULL);
   this->prog = rzalloc(NULL, struct gl_shader_program);

   /* Set default values used by the test cases.
    */
   this->actual_index = 1;
   this->name = "i";
}
Beispiel #14
0
void
link_varyings::SetUp()
{
   this->mem_ctx = ralloc_context(NULL);
   this->ir.make_empty();

   this->consumer_inputs
      = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);

   this->consumer_interface_inputs
      = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);
}
Beispiel #15
0
brw_blorp_const_color_program::brw_blorp_const_color_program(
      struct brw_context *brw,
      const brw_blorp_const_color_prog_key *key)
   : mem_ctx(ralloc_context(NULL)),
     brw(brw),
     key(key),
     R0(),
     R1(),
     clear_rgba(),
     base_mrf(0)
{
   brw_init_compile(brw, &func, mem_ctx);
}
Beispiel #16
0
static void
brw_vs_init_compile(struct brw_context *brw,
	            struct gl_shader_program *prog,
	            struct brw_vertex_program *vp,
	            const struct brw_vs_prog_key *key,
	            struct brw_vs_compile *c)
{
   memset(c, 0, sizeof(*c));

   memcpy(&c->key, key, sizeof(*key));
   c->vp = vp;
   c->base.shader_prog = prog;
   c->base.mem_ctx = ralloc_context(NULL);
}
void
link_varyings::SetUp()
{
   this->mem_ctx = ralloc_context(NULL);
   this->ir.make_empty();

   this->consumer_inputs =
         _mesa_hash_table_create(NULL, _mesa_key_hash_string,
                                 _mesa_key_string_equal);

   this->consumer_interface_inputs =
         _mesa_hash_table_create(NULL, _mesa_key_hash_string,
                                 _mesa_key_string_equal);
}
Beispiel #18
0
static void
brw_blorp_params_get_clear_kernel(struct brw_context *brw,
                                  struct brw_blorp_params *params,
                                  bool use_replicated_data)
{
    struct brw_blorp_const_color_prog_key blorp_key;
    memset(&blorp_key, 0, sizeof(blorp_key));
    blorp_key.use_simd16_replicated_data = use_replicated_data;

    if (brw_search_cache(&brw->cache, BRW_CACHE_BLORP_PROG,
                         &blorp_key, sizeof(blorp_key),
                         &params->wm_prog_kernel, &params->wm_prog_data))
        return;

    void *mem_ctx = ralloc_context(NULL);

    nir_builder b;
    nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
    b.shader->info.name = ralloc_strdup(b.shader, "BLORP-clear");

    nir_variable *v_color = nir_variable_create(b.shader, nir_var_shader_in,
                            glsl_vec4_type(), "v_color");
    v_color->data.location = VARYING_SLOT_VAR0;
    v_color->data.interpolation = INTERP_MODE_FLAT;

    nir_variable *frag_color = nir_variable_create(b.shader, nir_var_shader_out,
                               glsl_vec4_type(),
                               "gl_FragColor");
    frag_color->data.location = FRAG_RESULT_COLOR;

    nir_copy_var(&b, frag_color, v_color);

    struct brw_wm_prog_key wm_key;
    brw_blorp_init_wm_prog_key(&wm_key);

    struct brw_blorp_prog_data prog_data;
    unsigned program_size;
    const unsigned *program =
        brw_blorp_compile_nir_shader(brw, b.shader, &wm_key, use_replicated_data,
                                     &prog_data, &program_size);

    brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG,
                     &blorp_key, sizeof(blorp_key),
                     program, program_size,
                     &prog_data, sizeof(prog_data),
                     &params->wm_prog_kernel, &params->wm_prog_data);

    ralloc_free(mem_ctx);
}
Beispiel #19
0
brw_blorp_const_color_program::brw_blorp_const_color_program(
      struct brw_context *brw,
      const brw_blorp_const_color_prog_key *key)
   : mem_ctx(ralloc_context(NULL)),
     brw(brw),
     key(key),
     R0(),
     R1(),
     clear_rgba(),
     base_mrf(0)
{
   prog_data.first_curbe_grf = 0;
   prog_data.persample_msaa_dispatch = false;
   brw_init_compile(brw, &func, mem_ctx);
}
void
common_builtin::SetUp()
{
   this->mem_ctx = ralloc_context(NULL);
   this->ir.make_empty();

   initialize_context_to_defaults(&this->ctx, API_OPENGL_COMPAT);

   this->shader = rzalloc(this->mem_ctx, gl_shader);
   this->shader->Type = this->shader_type;
   this->shader->Stage = _mesa_shader_enum_to_shader_stage(this->shader_type);

   this->state =
      new(mem_ctx) _mesa_glsl_parse_state(&this->ctx, this->shader->Stage,
                                          this->shader);

   _mesa_glsl_initialize_types(this->state);
   _mesa_glsl_initialize_variables(&this->ir, this->state);
}
Beispiel #21
0
static void compile_sf_prog( struct brw_context *brw,
			     struct brw_sf_prog_key *key )
{
   const unsigned *program;
   void *mem_ctx;
   unsigned program_size;

   mem_ctx = ralloc_context(NULL);

   struct brw_sf_prog_data prog_data;
   program = brw_compile_sf(brw->screen->compiler, mem_ctx, key, &prog_data,
                            &brw->vue_map_geom_out, &program_size);

   brw_upload_cache(&brw->cache, BRW_CACHE_SF_PROG,
		    key, sizeof(*key),
		    program, program_size,
		    &prog_data, sizeof(prog_data),
		    &brw->sf.prog_offset, &brw->sf.prog_data);
   ralloc_free(mem_ctx);
}
Beispiel #22
0
static bool
split_var_copies_impl(nir_function_impl *impl)
{
   struct split_var_copies_state state;

   state.mem_ctx = ralloc_parent(impl);
   state.dead_ctx = ralloc_context(NULL);
   state.progress = false;

   nir_foreach_block(impl, split_var_copies_block, &state);

   ralloc_free(state.dead_ctx);

   if (state.progress) {
      nir_metadata_preserve(impl, nir_metadata_block_index |
                                  nir_metadata_dominance);
   }

   return state.progress;
}
Beispiel #23
0
/* Test that we can read and write some large objects, (exercising the code in
 * the blob_write functions to realloc blob->data.
 */
static void
test_big_objects(void)
{
   void *ctx = ralloc_context(NULL);
   struct blob blob;
   struct blob_reader reader;
   int size = 1000;
   int count = 1000;
   size_t i;
   char *buf;

   blob_init(&blob);

   /* Initialize our buffer. */
   buf = ralloc_size(ctx, size);
   for (i = 0; i < size; i++) {
      buf[i] = i % 256;
   }

   /* Write it many times. */
   for (i = 0; i < count; i++) {
      blob_write_bytes(&blob, buf, size);
   }

   blob_reader_init(&reader, blob.data, blob.size);

   /* Read and verify it many times. */
   for (i = 0; i < count; i++) {
      expect_equal_bytes((uint8_t *) buf, blob_read_bytes(&reader, size), size,
                         "read of large objects");
   }

   expect_equal(reader.end - reader.data, reader.current - reader.data,
                "number of bytes read reading large objects");

   expect_equal(false, reader.overrun,
                "overrun flag not set reading large objects");

   blob_finish(&blob);
   ralloc_free(ctx);
}
Beispiel #24
0
/* Test that we detect overrun. */
static void
test_overrun(void)
{
   void *ctx =ralloc_context(NULL);
   struct blob *blob;
   struct blob_reader reader;
   uint32_t value = 0xdeadbeef;

   blob = blob_create(ctx);

   blob_write_uint32(blob, value);

   blob_reader_init(&reader, blob->data, blob->size);

   expect_equal(value, blob_read_uint32(&reader), "read before overrun");
   expect_equal(false, reader.overrun, "overrun flag not set");
   expect_equal(0, blob_read_uint32(&reader), "read at overrun");
   expect_equal(true, reader.overrun, "overrun flag set");

   ralloc_free(ctx);
}
Beispiel #25
0
static bool
brw_codegen_wm_prog(struct brw_context *brw,
                    struct brw_program *fp,
                    struct brw_wm_prog_key *key,
                    struct brw_vue_map *vue_map)
{
   const struct gen_device_info *devinfo = &brw->screen->devinfo;
   void *mem_ctx = ralloc_context(NULL);
   struct brw_wm_prog_data prog_data;
   const GLuint *program;
   bool start_busy = false;
   double start_time = 0;

   nir_shader *nir = nir_shader_clone(mem_ctx, fp->program.nir);

   memset(&prog_data, 0, sizeof(prog_data));

   /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
   if (fp->program.is_arb_asm)
      prog_data.base.use_alt_mode = true;

   assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data);

   if (!fp->program.is_arb_asm) {
      brw_nir_setup_glsl_uniforms(mem_ctx, nir, &fp->program,
                                  &prog_data.base, true);
      brw_nir_analyze_ubo_ranges(brw->screen->compiler, nir,
                                 NULL, prog_data.base.ubo_ranges);
   } else {
      brw_nir_setup_arb_uniforms(mem_ctx, nir, &fp->program, &prog_data.base);

      if (unlikely(INTEL_DEBUG & DEBUG_WM))
         brw_dump_arb_asm("fragment", &fp->program);
   }

   if (unlikely(brw->perf_debug)) {
      start_busy = (brw->batch.last_bo &&
                    brw_bo_busy(brw->batch.last_bo));
      start_time = get_time();
   }

   int st_index8 = -1, st_index16 = -1, st_index32 = -1;
   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
      st_index8 = brw_get_shader_time_index(brw, &fp->program, ST_FS8,
                                            !fp->program.is_arb_asm);
      st_index16 = brw_get_shader_time_index(brw, &fp->program, ST_FS16,
                                             !fp->program.is_arb_asm);
      st_index32 = brw_get_shader_time_index(brw, &fp->program, ST_FS32,
                                             !fp->program.is_arb_asm);
   }

   char *error_str = NULL;
   program = brw_compile_fs(brw->screen->compiler, brw, mem_ctx,
                            key, &prog_data, nir,
                            &fp->program, st_index8, st_index16, st_index32,
                            true, false, vue_map,
                            &error_str);

   if (program == NULL) {
      if (!fp->program.is_arb_asm) {
         fp->program.sh.data->LinkStatus = LINKING_FAILURE;
         ralloc_strcat(&fp->program.sh.data->InfoLog, error_str);
      }

      _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", error_str);

      ralloc_free(mem_ctx);
      return false;
   }

   if (unlikely(brw->perf_debug)) {
      if (fp->compiled_once) {
         brw_debug_recompile(brw, MESA_SHADER_FRAGMENT, fp->program.Id,
                             key->program_string_id, key);
      }
      fp->compiled_once = true;

      if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
         perf_debug("FS compile took %.03f ms and stalled the GPU\n",
                    (get_time() - start_time) * 1000);
      }
   }

   brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);

   if (unlikely((INTEL_DEBUG & DEBUG_WM) && fp->program.is_arb_asm))
      fprintf(stderr, "\n");

   /* The param and pull_param arrays will be freed by the shader cache. */
   ralloc_steal(NULL, prog_data.base.param);
   ralloc_steal(NULL, prog_data.base.pull_param);
   brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG,
                    key, sizeof(struct brw_wm_prog_key),
                    program, prog_data.base.program_size,
                    &prog_data, sizeof(prog_data),
                    &brw->wm.base.prog_offset, &brw->wm.base.prog_data);

   ralloc_free(mem_ctx);

   return true;
}
Beispiel #26
0
bool
brw_codegen_gs_prog(struct brw_context *brw,
                    struct gl_shader_program *prog,
                    struct brw_geometry_program *gp,
                    struct brw_gs_prog_key *key)
{
   struct brw_stage_state *stage_state = &brw->gs.base;
   struct brw_gs_compile c;
   memset(&c, 0, sizeof(c));
   c.key = *key;
   c.gp = gp;

   c.prog_data.include_primitive_id =
      (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;

   c.prog_data.invocations = gp->program.Invocations;

   /* Allocate the references to the uniforms that will end up in the
    * prog_data associated with the compiled program, and which will be freed
    * by the state cache.
    *
    * Note: param_count needs to be num_uniform_components * 4, since we add
    * padding around uniform values below vec4 size, so the worst case is that
    * every uniform is a float which gets padded to the size of a vec4.
    */
   struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
   int param_count = gs->num_uniform_components * 4;

   /* We also upload clip plane data as uniforms */
   param_count += MAX_CLIP_PLANES * 4;

   c.prog_data.base.base.param =
      rzalloc_array(NULL, const gl_constant_value *, param_count);
   c.prog_data.base.base.pull_param =
      rzalloc_array(NULL, const gl_constant_value *, param_count);
   c.prog_data.base.base.nr_params = param_count;

   if (brw->gen >= 7) {
      if (gp->program.OutputType == GL_POINTS) {
         /* When the output type is points, the geometry shader may output data
          * to multiple streams, and EndPrimitive() has no effect.  So we
          * configure the hardware to interpret the control data as stream ID.
          */
         c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;

         /* We only have to emit control bits if we are using streams */
         if (prog->Geom.UsesStreams)
            c.control_data_bits_per_vertex = 2;
         else
            c.control_data_bits_per_vertex = 0;
      } else {
         /* When the output type is triangle_strip or line_strip, EndPrimitive()
          * may be used to terminate the current strip and start a new one
          * (similar to primitive restart), and outputting data to multiple
          * streams is not supported.  So we configure the hardware to interpret
          * the control data as EndPrimitive information (a.k.a. "cut bits").
          */
         c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;

         /* We only need to output control data if the shader actually calls
          * EndPrimitive().
          */
         c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
      }
   } else {
      /* There are no control data bits in gen6. */
      c.control_data_bits_per_vertex = 0;

      /* If it is using transform feedback, enable it */
      if (prog->TransformFeedback.NumVarying)
         c.prog_data.gen6_xfb_enabled = true;
      else
         c.prog_data.gen6_xfb_enabled = false;
   }
   c.control_data_header_size_bits =
      gp->program.VerticesOut * c.control_data_bits_per_vertex;

   /* 1 HWORD = 32 bytes = 256 bits */
   c.prog_data.control_data_header_size_hwords =
      ALIGN(c.control_data_header_size_bits, 256) / 256;

   GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;

   /* In order for legacy clipping to work, we need to populate the clip
    * distance varying slots whenever clipping is enabled, even if the vertex
    * shader doesn't write to gl_ClipDistance.
    */
   if (c.key.base.userclip_active) {
      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
   }

   brw_compute_vue_map(brw->intelScreen->devinfo,
                       &c.prog_data.base.vue_map, outputs_written);

   /* Compute the output vertex size.
    *
    * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex
    * Size (p168):
    *
    *     [0,62] indicating [1,63] 16B units
    *
    *     Specifies the size of each vertex stored in the GS output entry
    *     (following any Control Header data) as a number of 128-bit units
    *     (minus one).
    *
    *     Programming Restrictions: The vertex size must be programmed as a
    *     multiple of 32B units with the following exception: Rendering is
    *     disabled (as per SOL stage state) and the vertex size output by the
    *     GS thread is 16B.
    *
    *     If rendering is enabled (as per SOL state) the vertex size must be
    *     programmed as a multiple of 32B units. In other words, the only time
    *     software can program a vertex size with an odd number of 16B units
    *     is when rendering is disabled.
    *
    * Note: B=bytes in the above text.
    *
    * It doesn't seem worth the extra trouble to optimize the case where the
    * vertex size is 16B (especially since this would require special-casing
    * the GEN assembly that writes to the URB).  So we just set the vertex
    * size to a multiple of 32B (2 vec4's) in all cases.
    *
    * The maximum output vertex size is 62*16 = 992 bytes (31 hwords).  We
    * budget that as follows:
    *
    *   512 bytes for varyings (a varying component is 4 bytes and
    *             gl_MaxGeometryOutputComponents = 128)
    *    16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
    *             bytes)
    *    16 bytes overhead for gl_Position (we allocate it a slot in the VUE
    *             even if it's not used)
    *    32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
    *             whenever clip planes are enabled, even if the shader doesn't
    *             write to gl_ClipDistance)
    *    16 bytes overhead since the VUE size must be a multiple of 32 bytes
    *             (see below)--this causes up to 1 VUE slot to be wasted
    *   400 bytes available for varying packing overhead
    *
    * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes)
    * per interpolation type, so this is plenty.
    *
    */
   unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16;
   assert(brw->gen == 6 ||
          output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
   c.prog_data.output_vertex_size_hwords =
      ALIGN(output_vertex_size_bytes, 32) / 32;

   /* Compute URB entry size.  The maximum allowed URB entry size is 32k.
    * That divides up as follows:
    *
    *     64 bytes for the control data header (cut indices or StreamID bits)
    *   4096 bytes for varyings (a varying component is 4 bytes and
    *              gl_MaxGeometryTotalOutputComponents = 1024)
    *   4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
    *              bytes/vertex and gl_MaxGeometryOutputVertices is 256)
    *   4096 bytes overhead for gl_Position (we allocate it a slot in the VUE
    *              even if it's not used)
    *   8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
    *              whenever clip planes are enabled, even if the shader doesn't
    *              write to gl_ClipDistance)
    *   4096 bytes overhead since the VUE size must be a multiple of 32
    *              bytes (see above)--this causes up to 1 VUE slot to be wasted
    *   8128 bytes available for varying packing overhead
    *
    * Worst-case varying packing overhead is 3/4 of a varying slot per
    * interpolation type, which works out to 3072 bytes, so this would allow
    * us to accommodate 2 interpolation types without any danger of running
    * out of URB space.
    *
    * In practice, the risk of running out of URB space is very small, since
    * the above figures are all worst-case, and most of them scale with the
    * number of output vertices.  So we'll just calculate the amount of space
    * we need, and if it's too large, fail to compile.
    *
    * The above is for gen7+ where we have a single URB entry that will hold
    * all the output. In gen6, we will have to allocate URB entries for every
    * vertex we emit, so our URB entries only need to be large enough to hold
    * a single vertex. Also, gen6 does not have a control data header.
    */
   unsigned output_size_bytes;
   if (brw->gen >= 7) {
      output_size_bytes =
         c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
      output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
   } else {
      output_size_bytes = c.prog_data.output_vertex_size_hwords * 32;
   }

   /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
    * which comes before the control header.
    */
   if (brw->gen >= 8)
      output_size_bytes += 32;

   assert(output_size_bytes >= 1);
   int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
   if (brw->gen == 6)
      max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
   if (output_size_bytes > max_output_size_bytes)
      return false;


   /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
    * a multiple of 128 bytes in gen6.
    */
   if (brw->gen >= 7)
      c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
   else
      c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;

   c.prog_data.output_topology =
      get_hw_prim_for_gl_prim(gp->program.OutputType);

   brw_compute_vue_map(brw->intelScreen->devinfo,
                       &c.input_vue_map, c.key.input_varyings);

   /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
    * need to program a URB read length of ceiling(num_slots / 2).
    */
   c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;

   void *mem_ctx = ralloc_context(NULL);
   unsigned program_size;
   const unsigned *program =
      brw_gs_emit(brw, prog, &c, mem_ctx, &program_size);
   if (program == NULL) {
      ralloc_free(mem_ctx);
      return false;
   }

   /* Scratch space is used for register spilling */
   if (c.base.last_scratch) {
      perf_debug("Geometry shader triggered register spilling.  "
                 "Try reducing the number of live vec4 values to "
                 "improve performance.\n");

      c.prog_data.base.base.total_scratch
         = brw_get_scratch_size(c.base.last_scratch*REG_SIZE);

      brw_get_scratch_bo(brw, &stage_state->scratch_bo,
			 c.prog_data.base.base.total_scratch *
                         brw->max_gs_threads);
   }

   brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
                    &c.key, sizeof(c.key),
                    program, program_size,
                    &c.prog_data, sizeof(c.prog_data),
                    &stage_state->prog_offset, &brw->gs.prog_data);
   ralloc_free(mem_ctx);

   return true;
}
Beispiel #27
0
bool
brw_codegen_vs_prog(struct brw_context *brw,
                    struct gl_shader_program *prog,
                    struct brw_vertex_program *vp,
                    struct brw_vs_prog_key *key)
{
   GLuint program_size;
   const GLuint *program;
   struct brw_vs_prog_data prog_data;
   struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
   void *mem_ctx;
   int i;
   struct brw_shader *vs = NULL;
   bool start_busy = false;
   double start_time = 0;

   if (prog)
      vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];

   memset(&prog_data, 0, sizeof(prog_data));

   /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
   if (!prog)
      stage_prog_data->use_alt_mode = true;

   mem_ctx = ralloc_context(NULL);

   brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX,
                                           brw->intelScreen->devinfo,
                                           prog, &vp->program.Base,
                                           &prog_data.base.base, 0);

   /* Allocate the references to the uniforms that will end up in the
    * prog_data associated with the compiled program, and which will be freed
    * by the state cache.
    */
   int param_count = vp->program.Base.nir->num_uniforms;
   if (!brw->intelScreen->compiler->scalar_vs)
      param_count *= 4;

   if (vs)
      prog_data.base.base.nr_image_params = vs->base.NumImages;

   /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
    * planes as uniforms.
    */
   param_count += key->nr_userclip_plane_consts * 4;

   stage_prog_data->param =
      rzalloc_array(NULL, const gl_constant_value *, param_count);
   stage_prog_data->pull_param =
      rzalloc_array(NULL, const gl_constant_value *, param_count);
   stage_prog_data->image_param =
      rzalloc_array(NULL, struct brw_image_param,
                    stage_prog_data->nr_image_params);
   stage_prog_data->nr_params = param_count;

   if (prog) {
      brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base,
                                  &prog_data.base.base,
                                  brw->intelScreen->compiler->scalar_vs);
   } else {
      brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base,
                                 &prog_data.base.base);
   }

   GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
   prog_data.inputs_read = vp->program.Base.InputsRead;

   if (key->copy_edgeflag) {
      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
      prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
   }

   if (brw->gen < 6) {
      /* Put dummy slots into the VUE for the SF to put the replaced
       * point sprite coords in.  We shouldn't need these dummy slots,
       * which take up precious URB space, but it would mean that the SF
       * doesn't get nice aligned pairs of input coords into output
       * coords, which would be a pain to handle.
       */
      for (i = 0; i < 8; i++) {
         if (key->point_coord_replace & (1 << i))
            outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
      }

      /* if back colors are written, allocate slots for front colors too */
      if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
         outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
      if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
         outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
   }

   /* In order for legacy clipping to work, we need to populate the clip
    * distance varying slots whenever clipping is enabled, even if the vertex
    * shader doesn't write to gl_ClipDistance.
    */
   if (key->nr_userclip_plane_consts > 0) {
      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
   }

   brw_compute_vue_map(brw->intelScreen->devinfo,
                       &prog_data.base.vue_map, outputs_written,
                       prog ? prog->SeparateShader : false);

   if (0) {
      _mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG,
			       true);
   }

   if (unlikely(brw->perf_debug)) {
      start_busy = (brw->batch.last_bo &&
                    drm_intel_bo_busy(brw->batch.last_bo));
      start_time = get_time();
   }

   if (unlikely(INTEL_DEBUG & DEBUG_VS))
      brw_dump_ir("vertex", prog, vs ? &vs->base : NULL, &vp->program.Base);

   int st_index = -1;
   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
      st_index = brw_get_shader_time_index(brw, prog, &vp->program.Base, ST_VS);

   /* Emit GEN4 code.
    */
   char *error_str;
   program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, key,
                            &prog_data, vp->program.Base.nir,
                            brw_select_clip_planes(&brw->ctx),
                            !_mesa_is_gles3(&brw->ctx),
                            st_index, &program_size, &error_str);
   if (program == NULL) {
      if (prog) {
         prog->LinkStatus = false;
         ralloc_strcat(&prog->InfoLog, error_str);
      }

      _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", error_str);

      ralloc_free(mem_ctx);
      return false;
   }

   if (unlikely(brw->perf_debug) && vs) {
      if (vs->compiled_once) {
         brw_vs_debug_recompile(brw, prog, key);
      }
      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
         perf_debug("VS compile took %.03f ms and stalled the GPU\n",
                    (get_time() - start_time) * 1000);
      }
      vs->compiled_once = true;
   }

   /* Scratch space is used for register spilling */
   if (prog_data.base.base.total_scratch) {
      brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo,
			 prog_data.base.base.total_scratch *
                         brw->max_vs_threads);
   }

   brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
		    key, sizeof(struct brw_vs_prog_key),
		    program, program_size,
		    &prog_data, sizeof(prog_data),
		    &brw->vs.base.prog_offset, &brw->vs.prog_data);
   ralloc_free(mem_ctx);

   return true;
}
	glslopt_ctx (glslopt_target target) {
		mem_ctx = ralloc_context (NULL);
		initialize_mesa_context (&mesa_ctx, target);
	}
Beispiel #29
0
bool
fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
{
   bool progress = false;

   void *mem_ctx = ralloc_context(this->mem_ctx);

   for (fs_inst *inst = (fs_inst *)block->start;
	inst != block->end->next;
	inst = (fs_inst *) inst->next) {

      /* Skip some cases. */
      if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
          !inst->force_uncompressed && !inst->force_sechalf &&
          !inst->conditional_mod)
      {
	 bool found = false;

	 aeb_entry *entry;
	 foreach_list(entry_node, aeb) {
	    entry = (aeb_entry *) entry_node;

	    /* Match current instruction's expression against those in AEB. */
	    if (inst->opcode == entry->generator->opcode &&
		inst->saturate == entry->generator->saturate &&
		operands_match(entry->generator->src, inst->src)) {

	       found = true;
	       progress = true;
	       break;
	    }
	 }

	 if (!found) {
	    /* Our first sighting of this expression.  Create an entry. */
	    aeb_entry *entry = ralloc(mem_ctx, aeb_entry);
	    entry->tmp = reg_undef;
	    entry->generator = inst;
	    aeb->push_tail(entry);
	 } else {
	    /* This is at least our second sighting of this expression.
	     * If we don't have a temporary already, make one.
	     */
	    bool no_existing_temp = entry->tmp.file == BAD_FILE;
	    if (no_existing_temp) {
	       entry->tmp = fs_reg(this, glsl_type::float_type);
	       entry->tmp.type = inst->dst.type;

	       fs_inst *copy = new(ralloc_parent(inst))
		  fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp);
	       entry->generator->insert_after(copy);
	       entry->generator->dst = entry->tmp;
	    }

	    /* dest <- temp */
	    fs_inst *copy = new(ralloc_parent(inst))
	       fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp);
	    inst->replace_with(copy);

	    /* Appending an instruction may have changed our bblock end. */
	    if (inst == block->end) {
	       block->end = copy;
	    }

	    /* Continue iteration with copy->next */
	    inst = copy;
	 }
      }
Beispiel #30
0
int main(int argc, char **argv)
{
	int i;
	Light light;
	const char *filename;
	Camera cam;
	Vec3 position = {0, 0, 150e9};
	Vec3 up =  {0, 1, 0};
	Vec3 target = {0, 0, 0};
	Shader *shader_light;
	Shader *shader_simple;
	Mesh *mesh;
	Renderable planet;
	if (argc < 2)
		filename = STRINGIFY(ROOT_PATH) "/data/teapot.ply";
	else
		filename = argv[1];

	solsys = solsys_load(STRINGIFY(ROOT_PATH) "/data/sol.ini");
	if (solsys == NULL)
		return 1;

	mesh = mesh_import(filename);
	if (mesh == NULL)
		return 1;
	for (i = 0; i < mesh->num_vertices; i++) /* Blow up the teapot */
	{
		mesh->vertex[i].x = mesh->vertex[i].x * 100;
		mesh->vertex[i].y = mesh->vertex[i].y * 100;
		mesh->vertex[i].z = mesh->vertex[i].z * 100;
	}

	cam.fov = M_PI/4;
	cam.left = 0;
	cam.bottom = 0;
	cam.width = 1024;
	cam.height = 768;
	cam.zNear = 1e6;
	cam.zFar = 4.5e15;
	init_allegro(&cam);
	cam_lookat(&cam, position, target, up);

	glewInit();

	shader_light = shader_create(STRINGIFY(ROOT_PATH) "/data/lighting.v.glsl", 
	                             STRINGIFY(ROOT_PATH) "/data/lighting.f.glsl");
	if (shader_light == NULL)
		return 1;
	
	shader_simple = shader_create(STRINGIFY(ROOT_PATH) "/data/simple.v.glsl", 
	                              STRINGIFY(ROOT_PATH) "/data/simple.f.glsl");
	if (shader_simple == NULL)
		return 1;

	glmProjectionMatrix = glmNewMatrixStack();
	glmViewMatrix = glmNewMatrixStack();
	glmModelMatrix = glmNewMatrixStack();

	light.position = light_pos;
	memcpy(light.ambient, light_ambient, sizeof(light_ambient));
	memcpy(light.diffuse, light_diffuse, sizeof(light_diffuse));
	memcpy(light.specular, light_specular, sizeof(light_specular));

	glClearColor(20/255., 30/255., 50/255., 1.0);
	glEnable(GL_DEPTH_TEST);
	glEnable(GL_CULL_FACE);
	glCullFace(GL_BACK);
	glPointSize(2);

	planet.data = mesh;
	planet.upload_to_gpu = mesh_upload_to_gpu;
	planet.render = mesh_render;
	planet.shader = shader_light;
	renderable_upload_to_gpu(&planet);

	/* Transformation matrices */
	cam_projection_matrix(&cam, glmProjectionMatrix);

	/* Start rendering */
	while(handle_input(ev_queue, &cam))
	{
		void *ctx;
		Entity *renderlist, *prev;

		t += 365*86400;

		/* Physics stuff */
		solsys_update(solsys, t);
		ctx = ralloc_context(NULL);

		prev = NULL;
		for (i = 0; i < solsys->num_bodies; i++)
		{
			Entity *e;

			e = ralloc(ctx, Entity);
			e->orientation = (Quaternion) {1, 0, 0, 0};
			e->renderable = &planet;
			e->position = solsys->body[i].position;
			e->radius = solsys->body[i].radius;
			e->prev = prev;
			e->next = NULL;
			if (prev != NULL)
				e->prev->next = e;
			prev = e;

			if (i == 0)
				renderlist = e;
		}

		/* Rendering stuff */
		glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

		glmLoadIdentity(glmModelMatrix);
		glmLoadIdentity(glmViewMatrix);
		cam_view_matrix(&cam, glmViewMatrix); /* view */

		light_upload_to_gpu(&light, shader_light);

		render_entity_list(renderlist);

		al_flip_display();
		calcfps();

		ralloc_free(ctx);
	}

	ralloc_free(mesh);
	ralloc_free(solsys);

	shader_delete(shader_light);
	shader_delete(shader_simple);
	glmFreeMatrixStack(glmProjectionMatrix);
	glmFreeMatrixStack(glmViewMatrix);
	glmFreeMatrixStack(glmModelMatrix);
	al_destroy_display(dpy);
	return 0;
}