static bool brw_codegen_tcs_prog(struct brw_context *brw, struct gl_shader_program *shader_prog, struct brw_program *tcp, struct brw_tcs_prog_key *key) { struct gl_context *ctx = &brw->ctx; const struct brw_compiler *compiler = brw->screen->compiler; const struct gen_device_info *devinfo = compiler->devinfo; struct brw_stage_state *stage_state = &brw->tcs.base; nir_shader *nir; struct brw_tcs_prog_data prog_data; bool start_busy = false; double start_time = 0; void *mem_ctx = ralloc_context(NULL); if (tcp) { nir = tcp->program.nir; } else { /* Create a dummy nir_shader. We won't actually use NIR code to * generate assembly (it's easier to generate assembly directly), * but the whole compiler assumes one of these exists. */ const nir_shader_compiler_options *options = ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions; nir = create_passthrough_tcs(mem_ctx, compiler, options, key); } memset(&prog_data, 0, sizeof(prog_data)); /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. * * Note: param_count needs to be num_uniform_components * 4, since we add * padding around uniform values below vec4 size, so the worst case is that * every uniform is a float which gets padded to the size of a vec4. */ int param_count = nir->num_uniforms / 4; prog_data.base.base.param = rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.base.nr_params = param_count; if (tcp) { brw_assign_common_binding_table_offsets(MESA_SHADER_TESS_CTRL, devinfo, shader_prog, &tcp->program, &prog_data.base.base, 0); prog_data.base.base.image_param = rzalloc_array(NULL, struct brw_image_param, tcp->program.info.num_images); prog_data.base.base.nr_image_params = tcp->program.info.num_images; brw_nir_setup_glsl_uniforms(nir, shader_prog, &tcp->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_TESS_CTRL]); } else {
TEST(ir_variable_constructor, interface_array) { void *mem_ctx = ralloc_context(NULL); static const glsl_struct_field f[] = { { glsl_type::vec(4), "v", false } }; const glsl_type *const interface = glsl_type::get_interface_instance(f, ARRAY_SIZE(f), GLSL_INTERFACE_PACKING_STD140, "simple_interface"); const glsl_type *const interface_array = glsl_type::get_array_instance(interface, 2); static const char name[] = "array_instance"; ir_variable *const v = new(mem_ctx) ir_variable(interface_array, name, ir_var_uniform); EXPECT_STREQ(name, v->name); EXPECT_NE(name, v->name); EXPECT_EQ(interface_array, v->type); EXPECT_EQ(interface, v->get_interface_type()); }
brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw, bool debug_flag) : mem_ctx(ralloc_context(NULL)), generator(brw->intelScreen->compiler, brw, mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key), (struct brw_stage_prog_data *) rzalloc(mem_ctx, struct brw_wm_prog_data), 0, false, MESA_SHADER_FRAGMENT) { if (debug_flag) generator.enable_debug("blorp"); } brw_blorp_eu_emitter::~brw_blorp_eu_emitter() { ralloc_free(mem_ctx); } const unsigned * brw_blorp_eu_emitter::get_program(unsigned *program_size) { cfg_t cfg(&insts); generator.generate_code(&cfg, 16); return generator.get_assembly(program_size); }
output_read_remover::output_read_remover(unsigned stage) { this->stage = stage; mem_ctx = ralloc_context(NULL); replacements = _mesa_hash_table_create(NULL, hash_table_var_hash, _mesa_key_pointer_equal); }
loop_state::loop_state() { this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); this->mem_ctx = ralloc_context(NULL); this->loop_found = false; }
loop_state::loop_state() { this->ht = hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); this->mem_ctx = ralloc_context(NULL); this->loop_found = false; }
has_recursion_visitor() : current(NULL) { this->mem_ctx = ralloc_context(NULL); this->function_hash = hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); }
static bool brw_codegen_cs_prog(struct brw_context *brw, struct gl_shader_program *prog, struct brw_compute_program *cp, struct brw_cs_prog_key *key) { struct gl_context *ctx = &brw->ctx; const GLuint *program; void *mem_ctx = ralloc_context(NULL); GLuint program_size; struct brw_cs_prog_data prog_data; struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; assert (cs); memset(&prog_data, 0, sizeof(prog_data)); /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ int param_count = cs->num_uniform_components + cs->NumImages * BRW_IMAGE_PARAM_SIZE; /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; prog_data.base.param = rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.image_param = rzalloc_array(NULL, struct brw_image_param, cs->NumImages); prog_data.base.nr_params = param_count; prog_data.base.nr_image_params = cs->NumImages; program = brw_cs_emit(brw, mem_ctx, key, &prog_data, &cp->program, prog, &program_size); if (program == NULL) { ralloc_free(mem_ctx); return false; } if (prog_data.base.total_scratch) { brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo, prog_data.base.total_scratch * brw->max_cs_threads); } if (unlikely(INTEL_DEBUG & DEBUG_CS)) fprintf(stderr, "\n"); brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG, key, sizeof(*key), program, program_size, &prog_data, sizeof(prog_data), &brw->cs.base.prog_offset, &brw->cs.prog_data); ralloc_free(mem_ctx); return true; }
has_recursion_visitor() : current(NULL) { progress = false; this->mem_ctx = ralloc_context(NULL); this->function_hash = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); }
brw_blorp_clear_program::brw_blorp_clear_program( struct brw_context *brw, const brw_blorp_clear_prog_key *key) : mem_ctx(ralloc_context(NULL)), brw(brw), key(key) { brw_init_compile(brw, &func, mem_ctx); }
/* Test that data values are written and read with proper alignment. */ static void test_alignment(void) { void *ctx = ralloc_context(NULL); struct blob *blob; struct blob_reader reader; uint8_t bytes[] = "ABCDEFGHIJKLMNOP"; size_t delta, last, num_bytes; blob = blob_create(ctx); /* First, write an intptr value to the blob and capture that size. This is * the expected offset between any pair of intptr values (if written with * alignment). */ blob_write_intptr(blob, (intptr_t) blob); delta = blob->size; last = blob->size; /* Then loop doing the following: * * 1. Write an unaligned number of bytes * 2. Verify that write results in an unaligned size * 3. Write an intptr_t value * 2. Verify that that write results in an aligned size */ for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) { blob_write_bytes(blob, bytes, num_bytes); expect_unequal(delta, blob->size - last, "unaligned write of bytes"); blob_write_intptr(blob, (intptr_t) blob); expect_equal(2 * delta, blob->size - last, "aligned write of intptr"); last = blob->size; } /* Finally, test that reading also does proper alignment. Since we know * that values were written with all the right alignment, all we have to do * here is verify that correct values are read. */ blob_reader_init(&reader, blob->data, blob->size); expect_equal((intptr_t) blob, blob_read_intptr(&reader), "read of initial, aligned intptr_t"); for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) { expect_equal_bytes(bytes, blob_read_bytes(&reader, num_bytes), num_bytes, "unaligned read of bytes"); expect_equal((intptr_t) blob, blob_read_intptr(&reader), "aligned read of intptr_t"); } ralloc_free(ctx); }
get_sampler_name(ir_dereference *last, struct gl_shader_program *shader_program) { this->mem_ctx = ralloc_context(NULL); this->shader_program = shader_program; this->name = NULL; this->offset = 0; this->last = last; }
void set_uniform_initializer::SetUp() { this->mem_ctx = ralloc_context(NULL); this->prog = rzalloc(NULL, struct gl_shader_program); /* Set default values used by the test cases. */ this->actual_index = 1; this->name = "i"; }
void link_varyings::SetUp() { this->mem_ctx = ralloc_context(NULL); this->ir.make_empty(); this->consumer_inputs = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); this->consumer_interface_inputs = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); }
brw_blorp_const_color_program::brw_blorp_const_color_program( struct brw_context *brw, const brw_blorp_const_color_prog_key *key) : mem_ctx(ralloc_context(NULL)), brw(brw), key(key), R0(), R1(), clear_rgba(), base_mrf(0) { brw_init_compile(brw, &func, mem_ctx); }
static void brw_vs_init_compile(struct brw_context *brw, struct gl_shader_program *prog, struct brw_vertex_program *vp, const struct brw_vs_prog_key *key, struct brw_vs_compile *c) { memset(c, 0, sizeof(*c)); memcpy(&c->key, key, sizeof(*key)); c->vp = vp; c->base.shader_prog = prog; c->base.mem_ctx = ralloc_context(NULL); }
void link_varyings::SetUp() { this->mem_ctx = ralloc_context(NULL); this->ir.make_empty(); this->consumer_inputs = _mesa_hash_table_create(NULL, _mesa_key_hash_string, _mesa_key_string_equal); this->consumer_interface_inputs = _mesa_hash_table_create(NULL, _mesa_key_hash_string, _mesa_key_string_equal); }
static void brw_blorp_params_get_clear_kernel(struct brw_context *brw, struct brw_blorp_params *params, bool use_replicated_data) { struct brw_blorp_const_color_prog_key blorp_key; memset(&blorp_key, 0, sizeof(blorp_key)); blorp_key.use_simd16_replicated_data = use_replicated_data; if (brw_search_cache(&brw->cache, BRW_CACHE_BLORP_PROG, &blorp_key, sizeof(blorp_key), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) return; void *mem_ctx = ralloc_context(NULL); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); b.shader->info.name = ralloc_strdup(b.shader, "BLORP-clear"); nir_variable *v_color = nir_variable_create(b.shader, nir_var_shader_in, glsl_vec4_type(), "v_color"); v_color->data.location = VARYING_SLOT_VAR0; v_color->data.interpolation = INTERP_MODE_FLAT; nir_variable *frag_color = nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(), "gl_FragColor"); frag_color->data.location = FRAG_RESULT_COLOR; nir_copy_var(&b, frag_color, v_color); struct brw_wm_prog_key wm_key; brw_blorp_init_wm_prog_key(&wm_key); struct brw_blorp_prog_data prog_data; unsigned program_size; const unsigned *program = brw_blorp_compile_nir_shader(brw, b.shader, &wm_key, use_replicated_data, &prog_data, &program_size); brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, &blorp_key, sizeof(blorp_key), program, program_size, &prog_data, sizeof(prog_data), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); ralloc_free(mem_ctx); }
brw_blorp_const_color_program::brw_blorp_const_color_program( struct brw_context *brw, const brw_blorp_const_color_prog_key *key) : mem_ctx(ralloc_context(NULL)), brw(brw), key(key), R0(), R1(), clear_rgba(), base_mrf(0) { prog_data.first_curbe_grf = 0; prog_data.persample_msaa_dispatch = false; brw_init_compile(brw, &func, mem_ctx); }
void common_builtin::SetUp() { this->mem_ctx = ralloc_context(NULL); this->ir.make_empty(); initialize_context_to_defaults(&this->ctx, API_OPENGL_COMPAT); this->shader = rzalloc(this->mem_ctx, gl_shader); this->shader->Type = this->shader_type; this->shader->Stage = _mesa_shader_enum_to_shader_stage(this->shader_type); this->state = new(mem_ctx) _mesa_glsl_parse_state(&this->ctx, this->shader->Stage, this->shader); _mesa_glsl_initialize_types(this->state); _mesa_glsl_initialize_variables(&this->ir, this->state); }
static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { const unsigned *program; void *mem_ctx; unsigned program_size; mem_ctx = ralloc_context(NULL); struct brw_sf_prog_data prog_data; program = brw_compile_sf(brw->screen->compiler, mem_ctx, key, &prog_data, &brw->vue_map_geom_out, &program_size); brw_upload_cache(&brw->cache, BRW_CACHE_SF_PROG, key, sizeof(*key), program, program_size, &prog_data, sizeof(prog_data), &brw->sf.prog_offset, &brw->sf.prog_data); ralloc_free(mem_ctx); }
static bool split_var_copies_impl(nir_function_impl *impl) { struct split_var_copies_state state; state.mem_ctx = ralloc_parent(impl); state.dead_ctx = ralloc_context(NULL); state.progress = false; nir_foreach_block(impl, split_var_copies_block, &state); ralloc_free(state.dead_ctx); if (state.progress) { nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); } return state.progress; }
/* Test that we can read and write some large objects, (exercising the code in * the blob_write functions to realloc blob->data. */ static void test_big_objects(void) { void *ctx = ralloc_context(NULL); struct blob blob; struct blob_reader reader; int size = 1000; int count = 1000; size_t i; char *buf; blob_init(&blob); /* Initialize our buffer. */ buf = ralloc_size(ctx, size); for (i = 0; i < size; i++) { buf[i] = i % 256; } /* Write it many times. */ for (i = 0; i < count; i++) { blob_write_bytes(&blob, buf, size); } blob_reader_init(&reader, blob.data, blob.size); /* Read and verify it many times. */ for (i = 0; i < count; i++) { expect_equal_bytes((uint8_t *) buf, blob_read_bytes(&reader, size), size, "read of large objects"); } expect_equal(reader.end - reader.data, reader.current - reader.data, "number of bytes read reading large objects"); expect_equal(false, reader.overrun, "overrun flag not set reading large objects"); blob_finish(&blob); ralloc_free(ctx); }
/* Test that we detect overrun. */ static void test_overrun(void) { void *ctx =ralloc_context(NULL); struct blob *blob; struct blob_reader reader; uint32_t value = 0xdeadbeef; blob = blob_create(ctx); blob_write_uint32(blob, value); blob_reader_init(&reader, blob->data, blob->size); expect_equal(value, blob_read_uint32(&reader), "read before overrun"); expect_equal(false, reader.overrun, "overrun flag not set"); expect_equal(0, blob_read_uint32(&reader), "read at overrun"); expect_equal(true, reader.overrun, "overrun flag set"); ralloc_free(ctx); }
static bool brw_codegen_wm_prog(struct brw_context *brw, struct brw_program *fp, struct brw_wm_prog_key *key, struct brw_vue_map *vue_map) { const struct gen_device_info *devinfo = &brw->screen->devinfo; void *mem_ctx = ralloc_context(NULL); struct brw_wm_prog_data prog_data; const GLuint *program; bool start_busy = false; double start_time = 0; nir_shader *nir = nir_shader_clone(mem_ctx, fp->program.nir); memset(&prog_data, 0, sizeof(prog_data)); /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ if (fp->program.is_arb_asm) prog_data.base.use_alt_mode = true; assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data); if (!fp->program.is_arb_asm) { brw_nir_setup_glsl_uniforms(mem_ctx, nir, &fp->program, &prog_data.base, true); brw_nir_analyze_ubo_ranges(brw->screen->compiler, nir, NULL, prog_data.base.ubo_ranges); } else { brw_nir_setup_arb_uniforms(mem_ctx, nir, &fp->program, &prog_data.base); if (unlikely(INTEL_DEBUG & DEBUG_WM)) brw_dump_arb_asm("fragment", &fp->program); } if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo)); start_time = get_time(); } int st_index8 = -1, st_index16 = -1, st_index32 = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) { st_index8 = brw_get_shader_time_index(brw, &fp->program, ST_FS8, !fp->program.is_arb_asm); st_index16 = brw_get_shader_time_index(brw, &fp->program, ST_FS16, !fp->program.is_arb_asm); st_index32 = brw_get_shader_time_index(brw, &fp->program, ST_FS32, !fp->program.is_arb_asm); } char *error_str = NULL; program = brw_compile_fs(brw->screen->compiler, brw, mem_ctx, key, &prog_data, nir, &fp->program, st_index8, st_index16, st_index32, true, false, vue_map, &error_str); if (program == NULL) { if (!fp->program.is_arb_asm) { fp->program.sh.data->LinkStatus = LINKING_FAILURE; ralloc_strcat(&fp->program.sh.data->InfoLog, error_str); } _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", error_str); ralloc_free(mem_ctx); return false; } if (unlikely(brw->perf_debug)) { if (fp->compiled_once) { brw_debug_recompile(brw, MESA_SHADER_FRAGMENT, fp->program.Id, key->program_string_id, key); } fp->compiled_once = true; if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { perf_debug("FS compile took %.03f ms and stalled the GPU\n", (get_time() - start_time) * 1000); } } brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch); if (unlikely((INTEL_DEBUG & DEBUG_WM) && fp->program.is_arb_asm)) fprintf(stderr, "\n"); /* The param and pull_param arrays will be freed by the shader cache. */ ralloc_steal(NULL, prog_data.base.param); ralloc_steal(NULL, prog_data.base.pull_param); brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG, key, sizeof(struct brw_wm_prog_key), program, prog_data.base.program_size, &prog_data, sizeof(prog_data), &brw->wm.base.prog_offset, &brw->wm.base.prog_data); ralloc_free(mem_ctx); return true; }
bool brw_codegen_gs_prog(struct brw_context *brw, struct gl_shader_program *prog, struct brw_geometry_program *gp, struct brw_gs_prog_key *key) { struct brw_stage_state *stage_state = &brw->gs.base; struct brw_gs_compile c; memset(&c, 0, sizeof(c)); c.key = *key; c.gp = gp; c.prog_data.include_primitive_id = (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0; c.prog_data.invocations = gp->program.Invocations; /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. * * Note: param_count needs to be num_uniform_components * 4, since we add * padding around uniform values below vec4 size, so the worst case is that * every uniform is a float which gets padded to the size of a vec4. */ struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; int param_count = gs->num_uniform_components * 4; /* We also upload clip plane data as uniforms */ param_count += MAX_CLIP_PLANES * 4; c.prog_data.base.base.param = rzalloc_array(NULL, const gl_constant_value *, param_count); c.prog_data.base.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); c.prog_data.base.base.nr_params = param_count; if (brw->gen >= 7) { if (gp->program.OutputType == GL_POINTS) { /* When the output type is points, the geometry shader may output data * to multiple streams, and EndPrimitive() has no effect. So we * configure the hardware to interpret the control data as stream ID. */ c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; /* We only have to emit control bits if we are using streams */ if (prog->Geom.UsesStreams) c.control_data_bits_per_vertex = 2; else c.control_data_bits_per_vertex = 0; } else { /* When the output type is triangle_strip or line_strip, EndPrimitive() * may be used to terminate the current strip and start a new one * (similar to primitive restart), and outputting data to multiple * streams is not supported. So we configure the hardware to interpret * the control data as EndPrimitive information (a.k.a. "cut bits"). */ c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT; /* We only need to output control data if the shader actually calls * EndPrimitive(). */ c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0; } } else { /* There are no control data bits in gen6. */ c.control_data_bits_per_vertex = 0; /* If it is using transform feedback, enable it */ if (prog->TransformFeedback.NumVarying) c.prog_data.gen6_xfb_enabled = true; else c.prog_data.gen6_xfb_enabled = false; } c.control_data_header_size_bits = gp->program.VerticesOut * c.control_data_bits_per_vertex; /* 1 HWORD = 32 bytes = 256 bits */ c.prog_data.control_data_header_size_hwords = ALIGN(c.control_data_header_size_bits, 256) / 256; GLbitfield64 outputs_written = gp->program.Base.OutputsWritten; /* In order for legacy clipping to work, we need to populate the clip * distance varying slots whenever clipping is enabled, even if the vertex * shader doesn't write to gl_ClipDistance. */ if (c.key.base.userclip_active) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); } brw_compute_vue_map(brw->intelScreen->devinfo, &c.prog_data.base.vue_map, outputs_written); /* Compute the output vertex size. * * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex * Size (p168): * * [0,62] indicating [1,63] 16B units * * Specifies the size of each vertex stored in the GS output entry * (following any Control Header data) as a number of 128-bit units * (minus one). * * Programming Restrictions: The vertex size must be programmed as a * multiple of 32B units with the following exception: Rendering is * disabled (as per SOL stage state) and the vertex size output by the * GS thread is 16B. * * If rendering is enabled (as per SOL state) the vertex size must be * programmed as a multiple of 32B units. In other words, the only time * software can program a vertex size with an odd number of 16B units * is when rendering is disabled. * * Note: B=bytes in the above text. * * It doesn't seem worth the extra trouble to optimize the case where the * vertex size is 16B (especially since this would require special-casing * the GEN assembly that writes to the URB). So we just set the vertex * size to a multiple of 32B (2 vec4's) in all cases. * * The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We * budget that as follows: * * 512 bytes for varyings (a varying component is 4 bytes and * gl_MaxGeometryOutputComponents = 128) * 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 * bytes) * 16 bytes overhead for gl_Position (we allocate it a slot in the VUE * even if it's not used) * 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots * whenever clip planes are enabled, even if the shader doesn't * write to gl_ClipDistance) * 16 bytes overhead since the VUE size must be a multiple of 32 bytes * (see below)--this causes up to 1 VUE slot to be wasted * 400 bytes available for varying packing overhead * * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes) * per interpolation type, so this is plenty. * */ unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16; assert(brw->gen == 6 || output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES); c.prog_data.output_vertex_size_hwords = ALIGN(output_vertex_size_bytes, 32) / 32; /* Compute URB entry size. The maximum allowed URB entry size is 32k. * That divides up as follows: * * 64 bytes for the control data header (cut indices or StreamID bits) * 4096 bytes for varyings (a varying component is 4 bytes and * gl_MaxGeometryTotalOutputComponents = 1024) * 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 * bytes/vertex and gl_MaxGeometryOutputVertices is 256) * 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE * even if it's not used) * 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots * whenever clip planes are enabled, even if the shader doesn't * write to gl_ClipDistance) * 4096 bytes overhead since the VUE size must be a multiple of 32 * bytes (see above)--this causes up to 1 VUE slot to be wasted * 8128 bytes available for varying packing overhead * * Worst-case varying packing overhead is 3/4 of a varying slot per * interpolation type, which works out to 3072 bytes, so this would allow * us to accommodate 2 interpolation types without any danger of running * out of URB space. * * In practice, the risk of running out of URB space is very small, since * the above figures are all worst-case, and most of them scale with the * number of output vertices. So we'll just calculate the amount of space * we need, and if it's too large, fail to compile. * * The above is for gen7+ where we have a single URB entry that will hold * all the output. In gen6, we will have to allocate URB entries for every * vertex we emit, so our URB entries only need to be large enough to hold * a single vertex. Also, gen6 does not have a control data header. */ unsigned output_size_bytes; if (brw->gen >= 7) { output_size_bytes = c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut; output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords; } else { output_size_bytes = c.prog_data.output_vertex_size_hwords * 32; } /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output, * which comes before the control header. */ if (brw->gen >= 8) output_size_bytes += 32; assert(output_size_bytes >= 1); int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; if (brw->gen == 6) max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; if (output_size_bytes > max_output_size_bytes) return false; /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and * a multiple of 128 bytes in gen6. */ if (brw->gen >= 7) c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; else c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; c.prog_data.output_topology = get_hw_prim_for_gl_prim(gp->program.OutputType); brw_compute_vue_map(brw->intelScreen->devinfo, &c.input_vue_map, c.key.input_varyings); /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we * need to program a URB read length of ceiling(num_slots / 2). */ c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2; void *mem_ctx = ralloc_context(NULL); unsigned program_size; const unsigned *program = brw_gs_emit(brw, prog, &c, mem_ctx, &program_size); if (program == NULL) { ralloc_free(mem_ctx); return false; } /* Scratch space is used for register spilling */ if (c.base.last_scratch) { perf_debug("Geometry shader triggered register spilling. " "Try reducing the number of live vec4 values to " "improve performance.\n"); c.prog_data.base.base.total_scratch = brw_get_scratch_size(c.base.last_scratch*REG_SIZE); brw_get_scratch_bo(brw, &stage_state->scratch_bo, c.prog_data.base.base.total_scratch * brw->max_gs_threads); } brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &stage_state->prog_offset, &brw->gs.prog_data); ralloc_free(mem_ctx); return true; }
bool brw_codegen_vs_prog(struct brw_context *brw, struct gl_shader_program *prog, struct brw_vertex_program *vp, struct brw_vs_prog_key *key) { GLuint program_size; const GLuint *program; struct brw_vs_prog_data prog_data; struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base; void *mem_ctx; int i; struct brw_shader *vs = NULL; bool start_busy = false; double start_time = 0; if (prog) vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; memset(&prog_data, 0, sizeof(prog_data)); /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ if (!prog) stage_prog_data->use_alt_mode = true; mem_ctx = ralloc_context(NULL); brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX, brw->intelScreen->devinfo, prog, &vp->program.Base, &prog_data.base.base, 0); /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ int param_count = vp->program.Base.nir->num_uniforms; if (!brw->intelScreen->compiler->scalar_vs) param_count *= 4; if (vs) prog_data.base.base.nr_image_params = vs->base.NumImages; /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip * planes as uniforms. */ param_count += key->nr_userclip_plane_consts * 4; stage_prog_data->param = rzalloc_array(NULL, const gl_constant_value *, param_count); stage_prog_data->pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); stage_prog_data->image_param = rzalloc_array(NULL, struct brw_image_param, stage_prog_data->nr_image_params); stage_prog_data->nr_params = param_count; if (prog) { brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base, &prog_data.base.base, brw->intelScreen->compiler->scalar_vs); } else { brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base, &prog_data.base.base); } GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data.inputs_read = vp->program.Base.InputsRead; if (key->copy_edgeflag) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); prog_data.inputs_read |= VERT_BIT_EDGEFLAG; } if (brw->gen < 6) { /* Put dummy slots into the VUE for the SF to put the replaced * point sprite coords in. We shouldn't need these dummy slots, * which take up precious URB space, but it would mean that the SF * doesn't get nice aligned pairs of input coords into output * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { if (key->point_coord_replace & (1 << i)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); } /* if back colors are written, allocate slots for front colors too */ if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); } /* In order for legacy clipping to work, we need to populate the clip * distance varying slots whenever clipping is enabled, even if the vertex * shader doesn't write to gl_ClipDistance. */ if (key->nr_userclip_plane_consts > 0) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); } brw_compute_vue_map(brw->intelScreen->devinfo, &prog_data.base.vue_map, outputs_written, prog ? prog->SeparateShader : false); if (0) { _mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG, true); } if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && drm_intel_bo_busy(brw->batch.last_bo)); start_time = get_time(); } if (unlikely(INTEL_DEBUG & DEBUG_VS)) brw_dump_ir("vertex", prog, vs ? &vs->base : NULL, &vp->program.Base); int st_index = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) st_index = brw_get_shader_time_index(brw, prog, &vp->program.Base, ST_VS); /* Emit GEN4 code. */ char *error_str; program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, key, &prog_data, vp->program.Base.nir, brw_select_clip_planes(&brw->ctx), !_mesa_is_gles3(&brw->ctx), st_index, &program_size, &error_str); if (program == NULL) { if (prog) { prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, error_str); } _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", error_str); ralloc_free(mem_ctx); return false; } if (unlikely(brw->perf_debug) && vs) { if (vs->compiled_once) { brw_vs_debug_recompile(brw, prog, key); } if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { perf_debug("VS compile took %.03f ms and stalled the GPU\n", (get_time() - start_time) * 1000); } vs->compiled_once = true; } /* Scratch space is used for register spilling */ if (prog_data.base.base.total_scratch) { brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo, prog_data.base.base.total_scratch * brw->max_vs_threads); } brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG, key, sizeof(struct brw_vs_prog_key), program, program_size, &prog_data, sizeof(prog_data), &brw->vs.base.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); return true; }
glslopt_ctx (glslopt_target target) { mem_ctx = ralloc_context (NULL); initialize_mesa_context (&mesa_ctx, target); }
bool fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) { bool progress = false; void *mem_ctx = ralloc_context(this->mem_ctx); for (fs_inst *inst = (fs_inst *)block->start; inst != block->end->next; inst = (fs_inst *) inst->next) { /* Skip some cases. */ if (is_expression(inst) && !inst->predicate && inst->mlen == 0 && !inst->force_uncompressed && !inst->force_sechalf && !inst->conditional_mod) { bool found = false; aeb_entry *entry; foreach_list(entry_node, aeb) { entry = (aeb_entry *) entry_node; /* Match current instruction's expression against those in AEB. */ if (inst->opcode == entry->generator->opcode && inst->saturate == entry->generator->saturate && operands_match(entry->generator->src, inst->src)) { found = true; progress = true; break; } } if (!found) { /* Our first sighting of this expression. Create an entry. */ aeb_entry *entry = ralloc(mem_ctx, aeb_entry); entry->tmp = reg_undef; entry->generator = inst; aeb->push_tail(entry); } else { /* This is at least our second sighting of this expression. * If we don't have a temporary already, make one. */ bool no_existing_temp = entry->tmp.file == BAD_FILE; if (no_existing_temp) { entry->tmp = fs_reg(this, glsl_type::float_type); entry->tmp.type = inst->dst.type; fs_inst *copy = new(ralloc_parent(inst)) fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp); entry->generator->insert_after(copy); entry->generator->dst = entry->tmp; } /* dest <- temp */ fs_inst *copy = new(ralloc_parent(inst)) fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp); inst->replace_with(copy); /* Appending an instruction may have changed our bblock end. */ if (inst == block->end) { block->end = copy; } /* Continue iteration with copy->next */ inst = copy; } }
int main(int argc, char **argv) { int i; Light light; const char *filename; Camera cam; Vec3 position = {0, 0, 150e9}; Vec3 up = {0, 1, 0}; Vec3 target = {0, 0, 0}; Shader *shader_light; Shader *shader_simple; Mesh *mesh; Renderable planet; if (argc < 2) filename = STRINGIFY(ROOT_PATH) "/data/teapot.ply"; else filename = argv[1]; solsys = solsys_load(STRINGIFY(ROOT_PATH) "/data/sol.ini"); if (solsys == NULL) return 1; mesh = mesh_import(filename); if (mesh == NULL) return 1; for (i = 0; i < mesh->num_vertices; i++) /* Blow up the teapot */ { mesh->vertex[i].x = mesh->vertex[i].x * 100; mesh->vertex[i].y = mesh->vertex[i].y * 100; mesh->vertex[i].z = mesh->vertex[i].z * 100; } cam.fov = M_PI/4; cam.left = 0; cam.bottom = 0; cam.width = 1024; cam.height = 768; cam.zNear = 1e6; cam.zFar = 4.5e15; init_allegro(&cam); cam_lookat(&cam, position, target, up); glewInit(); shader_light = shader_create(STRINGIFY(ROOT_PATH) "/data/lighting.v.glsl", STRINGIFY(ROOT_PATH) "/data/lighting.f.glsl"); if (shader_light == NULL) return 1; shader_simple = shader_create(STRINGIFY(ROOT_PATH) "/data/simple.v.glsl", STRINGIFY(ROOT_PATH) "/data/simple.f.glsl"); if (shader_simple == NULL) return 1; glmProjectionMatrix = glmNewMatrixStack(); glmViewMatrix = glmNewMatrixStack(); glmModelMatrix = glmNewMatrixStack(); light.position = light_pos; memcpy(light.ambient, light_ambient, sizeof(light_ambient)); memcpy(light.diffuse, light_diffuse, sizeof(light_diffuse)); memcpy(light.specular, light_specular, sizeof(light_specular)); glClearColor(20/255., 30/255., 50/255., 1.0); glEnable(GL_DEPTH_TEST); glEnable(GL_CULL_FACE); glCullFace(GL_BACK); glPointSize(2); planet.data = mesh; planet.upload_to_gpu = mesh_upload_to_gpu; planet.render = mesh_render; planet.shader = shader_light; renderable_upload_to_gpu(&planet); /* Transformation matrices */ cam_projection_matrix(&cam, glmProjectionMatrix); /* Start rendering */ while(handle_input(ev_queue, &cam)) { void *ctx; Entity *renderlist, *prev; t += 365*86400; /* Physics stuff */ solsys_update(solsys, t); ctx = ralloc_context(NULL); prev = NULL; for (i = 0; i < solsys->num_bodies; i++) { Entity *e; e = ralloc(ctx, Entity); e->orientation = (Quaternion) {1, 0, 0, 0}; e->renderable = &planet; e->position = solsys->body[i].position; e->radius = solsys->body[i].radius; e->prev = prev; e->next = NULL; if (prev != NULL) e->prev->next = e; prev = e; if (i == 0) renderlist = e; } /* Rendering stuff */ glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glmLoadIdentity(glmModelMatrix); glmLoadIdentity(glmViewMatrix); cam_view_matrix(&cam, glmViewMatrix); /* view */ light_upload_to_gpu(&light, shader_light); render_entity_list(renderlist); al_flip_display(); calcfps(); ralloc_free(ctx); } ralloc_free(mesh); ralloc_free(solsys); shader_delete(shader_light); shader_delete(shader_simple); glmFreeMatrixStack(glmProjectionMatrix); glmFreeMatrixStack(glmViewMatrix); glmFreeMatrixStack(glmModelMatrix); al_destroy_display(dpy); return 0; }