char *interface_field_name(const glsl_type *iface, unsigned field = 0) { return ralloc_asprintf(mem_ctx, "%s.%s", iface->name, iface->fields.structure[field].name); }
/** * Determines whether every stage in a linked program is active in the * specified pipeline. */ static bool program_stages_all_active(struct gl_pipeline_object *pipe, const struct gl_shader_program *prog) { unsigned i; bool status = true; if (!prog) return true; for (i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i]) { if (pipe->CurrentProgram[i]) { if (prog->Name != pipe->CurrentProgram[i]->Name) { status = false; } } else { status = false; } } } if (!status) { pipe->InfoLog = ralloc_asprintf(pipe, "Program %d is not active for all " "shaders that was linked", prog->Name); } return status; }
static GLuint setup_program(struct brw_context *brw, bool msaa_tex) { struct gl_context *ctx = &brw->ctx; struct blit_state *blit = &ctx->Meta->Blit; char *fs_source; const struct sampler_and_fetch *sampler = &samplers[msaa_tex]; _mesa_meta_setup_vertex_objects(&blit->VAO, &blit->VBO, true, 2, 2, 0); GLuint *prog_id = &brw->meta_stencil_blit_programs[msaa_tex]; if (*prog_id) { _mesa_UseProgram(*prog_id); return *prog_id; } fs_source = ralloc_asprintf(NULL, fs_tmpl, sampler->sampler, sampler->fetch); _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, "i965 stencil blit", prog_id); ralloc_free(fs_source); return *prog_id; }
virtual ir_visitor_status visit_leave(ir_dereference_array *ir) { ir_constant *index = ir->array_index->as_constant(); int i; if (index) { i = index->value.i[0]; } else { /* GLSL 1.10 and 1.20 allowed variable sampler array indices, * while GLSL 1.30 requires that the array indices be * constant integer expressions. We don't expect any driver * to actually work with a really variable array index, so * all that would work would be an unrolled loop counter that ends * up being constant above. */ ralloc_strcat(&shader_program->InfoLog, "warning: Variable sampler array index unsupported.\n" "This feature of the language was removed in GLSL 1.20 " "and is unlikely to be supported for 1.10 in Mesa.\n"); i = 0; } if (ir != last) { this->name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i); } else { offset = i; } return visit_continue; }
int glsl_symbol_table::get_default_precision_qualifier(const char *type_name) { char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name); symbol_table_entry *entry = get_entry(name); if (!entry) return ast_precision_none; return entry->a->default_precision; }
/* Return a filename within the cache's directory corresponding to 'key'. The * returned filename is ralloced with 'cache' as the parent context. * * Returns NULL if out of memory. */ static char * get_cache_file(struct program_cache *cache, cache_key key) { char buf[41]; _mesa_sha1_format(buf, key); return ralloc_asprintf(cache, "%s/%c%c/%s", cache->path, buf[0], buf[1], buf + 2); }
/* simple passthrough shader */ static nir_shader * build_nir_fs(void) { nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); b.shader->info.name = ralloc_asprintf(b.shader, "meta_depth_decomp_noop_fs"); return b.shader; }
/* Create the directory that will be needed for the cache file for \key. * * Obviously, the implementation here must closely match * _get_cache_file above. */ static void make_cache_file_directory(struct program_cache *cache, cache_key key) { char *dir; char buf[41]; _mesa_sha1_format(buf, key); dir = ralloc_asprintf(cache, "%s/%c%c", cache->path, buf[0], buf[1]); mkdir_if_needed(dir); ralloc_free(dir); }
/* Concatenate an existing path and a new name to form a new path. If the new * path does not exist as a directory, create it then return the resulting * name of the new path (ralloc'ed off of 'ctx'). * * Returns NULL on any error, such as: * * <path> does not exist or is not a directory * <path>/<name> exists but is not a directory * <path>/<name> cannot be created as a directory */ static char * concatenate_and_mkdir(void *ctx, char *path, char *name) { char *new_path; struct stat sb; if (stat(path, &sb) != 0 || ! S_ISDIR(sb.st_mode)) return NULL; new_path = ralloc_asprintf(ctx, "%s/%s", path, name); if (mkdir_if_needed(new_path) == 0) return new_path; else return NULL; }
bool glsl_symbol_table::add_default_precision_qualifier(const char *type_name, int precision) { char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name); ast_type_specifier *default_specifier = new(mem_ctx) ast_type_specifier(name); default_specifier->default_precision = precision; symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(default_specifier); if (!get_entry(name)) return _mesa_symbol_table_add_symbol(table, name, entry) == 0; return _mesa_symbol_table_replace_symbol(table, name, entry) == 0; }
static void log_error(validate_state *state, const char *cond, const char *file, int line) { const void *obj; if (state->instr) obj = state->instr; else if (state->var) obj = state->var; else obj = cond; char *msg = ralloc_asprintf(state->errors, "error: %s (%s:%d)", cond, file, line); _mesa_hash_table_insert(state->errors, obj, msg); }
/* emit 0, 0, 0, 1 */ static nir_shader * build_nir_fs(void) { const struct glsl_type *vec4 = glsl_vec4_type(); nir_builder b; nir_variable *f_color; /* vec4, fragment output color */ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); b.shader->info.name = ralloc_asprintf(b.shader, "meta_resolve_fs"); f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); f_color->data.location = FRAG_RESULT_DATA0; nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf); return b.shader; }
const glsl_type * glsl_type::get_shadow_sampler_type() const { const glsl_type* shadow_type = NULL; if (base_type == GLSL_TYPE_SAMPLER && sampler_types) { /** Generate a key that is the combination of sampler type and inner type. */ char key[128]; snprintf(key, sizeof(key), "%sShadow", name); shadow_type = (const glsl_type*)hash_table_find(sampler_types, key); if (shadow_type == NULL) { glsl_type* new_shadow_type = new glsl_type(*this); new_shadow_type->sampler_shadow = true; new_shadow_type->name = ralloc_asprintf(mem_ctx, "%sShadow", name); new_shadow_type->inner_type = glsl_type::uint_type; shadow_type = new_shadow_type; } } return shadow_type; }
extern GLboolean _mesa_validate_program_pipeline(struct gl_context* ctx, struct gl_pipeline_object *pipe, GLboolean IsBound) { unsigned i; pipe->Validated = GL_FALSE; /* Release and reset the info log. */ if (pipe->InfoLog != NULL) ralloc_free(pipe->InfoLog); pipe->InfoLog = NULL; /* Section 2.11.11 (Shader Execution), subheading "Validation," of the * OpenGL 4.1 spec says: * * "[INVALID_OPERATION] is generated by any command that transfers * vertices to the GL if: * * - A program object is active for at least one, but not all of * the shader stages that were present when the program was * linked." * * For each possible program stage, verify that the program bound to that * stage has all of its stages active. In other words, if the program * bound to the vertex stage also has a fragment shader, the fragment * shader must also be bound to the fragment stage. */ for (i = 0; i < MESA_SHADER_STAGES; i++) { if (!program_stages_all_active(pipe, pipe->CurrentProgram[i])) { goto err; } } /* Section 2.11.11 (Shader Execution), subheading "Validation," of the * OpenGL 4.1 spec says: * * "[INVALID_OPERATION] is generated by any command that transfers * vertices to the GL if: * * ... * * - One program object is active for at least two shader stages * and a second program is active for a shader stage between two * stages for which the first program was active." * * Without Tesselation, the only case where this can occur is the geometry * shader between the fragment shader and vertex shader. */ if (pipe->CurrentProgram[MESA_SHADER_GEOMETRY] && pipe->CurrentProgram[MESA_SHADER_FRAGMENT] && pipe->CurrentProgram[MESA_SHADER_VERTEX]) { if (pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name == pipe->CurrentProgram[MESA_SHADER_FRAGMENT]->Name && pipe->CurrentProgram[MESA_SHADER_GEOMETRY]->Name != pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name) { pipe->InfoLog = ralloc_asprintf(pipe, "Program %d is active for geometry stage between " "two stages for which another program %d is " "active", pipe->CurrentProgram[MESA_SHADER_GEOMETRY]->Name, pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name); goto err; } } /* Section 2.11.11 (Shader Execution), subheading "Validation," of the * OpenGL 4.1 spec says: * * "[INVALID_OPERATION] is generated by any command that transfers * vertices to the GL if: * * ... * * - There is an active program for tessellation control, * tessellation evaluation, or geometry stages with corresponding * executable shader, but there is no active program with * executable vertex shader." */ if (!pipe->CurrentProgram[MESA_SHADER_VERTEX] && pipe->CurrentProgram[MESA_SHADER_GEOMETRY]) { pipe->InfoLog = ralloc_strdup(pipe, "Program lacks a vertex shader"); goto err; } /* Section 2.11.11 (Shader Execution), subheading "Validation," of the * OpenGL 4.1 spec says: * * "[INVALID_OPERATION] is generated by any command that transfers * vertices to the GL if: * * ... * * - There is no current program object specified by UseProgram, * there is a current program pipeline object, and the current * program for any shader stage has been relinked since being * applied to the pipeline object via UseProgramStages with the * PROGRAM_SEPARABLE parameter set to FALSE. */ for (i = 0; i < MESA_SHADER_STAGES; i++) { if (pipe->CurrentProgram[i] && !pipe->CurrentProgram[i]->SeparateShader) { pipe->InfoLog = ralloc_asprintf(pipe, "Program %d was relinked without " "PROGRAM_SEPARABLE state", pipe->CurrentProgram[i]->Name); goto err; } } /* Section 2.11.11 (Shader Execution), subheading "Validation," of the * OpenGL 4.1 spec says: * * "[INVALID_OPERATION] is generated by any command that transfers * vertices to the GL if: * * ... * * - Any two active samplers in the current program object are of * different types, but refer to the same texture image unit. * * - The number of active samplers in the program exceeds the * maximum number of texture image units allowed." */ if (!_mesa_sampler_uniforms_pipeline_are_valid(pipe)) goto err; pipe->Validated = GL_TRUE; return GL_TRUE; err: if (IsBound) _mesa_error(ctx, GL_INVALID_OPERATION, "glValidateProgramPipeline failed to validate the pipeline"); return GL_FALSE; }
void cache_put(struct program_cache *cache, cache_key key, const void *data, size_t size) { int fd = -1, fd_final = -1, err, ret; size_t len; char *filename = NULL, *filename_tmp = NULL; const char *p = data; filename = get_cache_file(cache, key); if (filename == NULL) goto done; /* Write to a temporary file to allow for an atomic rename to the * final destination filename, (to prevent any readers from seeing * a partially written file). */ filename_tmp = ralloc_asprintf(cache, "%s.tmp", filename); if (filename_tmp == NULL) goto done; fd = open(filename_tmp, O_WRONLY | O_CLOEXEC | O_CREAT, 0644); /* Make the two-character subdirectory within the cache as needed. */ if (fd == -1) { if (errno != ENOENT) goto done; make_cache_file_directory(cache, key); fd = open(filename_tmp, O_WRONLY | O_CLOEXEC | O_CREAT, 0644); if (fd == -1) goto done; } /* With the temporary file open, we take an exclusive flock on * it. If the flock fails, then another process still has the file * open with the flock held. So just let that file be responsible * for writing the file. */ err = flock(fd, LOCK_EX | LOCK_NB); if (err == -1) goto done; /* Now that we have the lock on the open temporary file, we can * check to see if the destination file already exists. If so, * another process won the race between when we saw that the file * didn't exist and now. In this case, we don't do anything more, * (to ensure the size accounting of the cache doesn't get off). */ fd_final = open(filename, O_RDONLY | O_CLOEXEC); if (fd_final != -1) goto done; /* OK, we're now on the hook to write out a file that we know is * not in the cache, and is also not being written out to the cache * by some other process. * * Before we do that, if the cache is too large, evict something * else first. */ if (*cache->size + size > cache->max_size) evict_random_item(cache); /* Now, finally, write out the contents to the temporary file, then * rename them atomically to the destination filename, and also * perform an atomic increment of the total cache size. */ for (len = 0; len < size; len += ret) { ret = write(fd, p + len, size - len); if (ret == -1) { unlink(filename_tmp); goto done; } } rename(filename_tmp, filename); p_atomic_add(cache->size, size); /* This close finally releases the flock, (now that the final dile * has been renamed into place and the size has been added). */ close(fd); fd = -1; done: if (filename_tmp) ralloc_free(filename_tmp); if (filename) ralloc_free(filename); if (fd != -1) close(fd); }
static nir_alu_src construct_value(const nir_search_value *value, nir_alu_type type, unsigned num_components, struct match_state *state, nir_instr *instr, void *mem_ctx) { switch (value->type) { case nir_search_value_expression: { const nir_search_expression *expr = nir_search_value_as_expression(value); if (nir_op_infos[expr->opcode].output_size != 0) num_components = nir_op_infos[expr->opcode].output_size; nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode); nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL); alu->dest.write_mask = (1 << num_components) - 1; alu->dest.saturate = false; for (unsigned i = 0; i < nir_op_infos[expr->opcode].num_inputs; i++) { /* If the source is an explicitly sized source, then we need to reset * the number of components to match. */ if (nir_op_infos[alu->op].input_sizes[i] != 0) num_components = nir_op_infos[alu->op].input_sizes[i]; alu->src[i] = construct_value(expr->srcs[i], nir_op_infos[alu->op].input_types[i], num_components, state, instr, mem_ctx); } nir_instr_insert_before(instr, &alu->instr); nir_alu_src val; val.src = nir_src_for_ssa(&alu->dest.dest.ssa); val.negate = false; val.abs = false, memcpy(val.swizzle, identity_swizzle, sizeof val.swizzle); return val; } case nir_search_value_variable: { const nir_search_variable *var = nir_search_value_as_variable(value); assert(state->variables_seen & (1 << var->variable)); nir_alu_src val = { NIR_SRC_INIT }; nir_alu_src_copy(&val, &state->variables[var->variable], mem_ctx); assert(!var->is_constant); return val; } case nir_search_value_constant: { const nir_search_constant *c = nir_search_value_as_constant(value); nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1); switch (type) { case nir_type_float: load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f); load->value.f[0] = c->data.f; break; case nir_type_int: load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i); load->value.i[0] = c->data.i; break; case nir_type_unsigned: case nir_type_bool: load->value.u[0] = c->data.u; break; default: unreachable("Invalid alu source type"); } nir_instr_insert_before(instr, &load->instr); nir_alu_src val; val.src = nir_src_for_ssa(&load->def); val.negate = false; val.abs = false, memset(val.swizzle, 0, sizeof val.swizzle); return val; } default: unreachable("Invalid search value type"); } }
void GLAPIENTRY _mesa_ProgramStringARB(GLenum target, GLenum format, GLsizei len, const GLvoid *string) { struct gl_program *base; bool failed; GET_CURRENT_CONTEXT(ctx); FLUSH_VERTICES(ctx, _NEW_PROGRAM); if (!ctx->Extensions.ARB_vertex_program && !ctx->Extensions.ARB_fragment_program) { _mesa_error(ctx, GL_INVALID_OPERATION, "glProgramStringARB()"); return; } if (format != GL_PROGRAM_FORMAT_ASCII_ARB) { _mesa_error(ctx, GL_INVALID_ENUM, "glProgramStringARB(format)"); return; } if (target == GL_VERTEX_PROGRAM_ARB && ctx->Extensions.ARB_vertex_program) { struct gl_vertex_program *prog = ctx->VertexProgram.Current; _mesa_parse_arb_vertex_program(ctx, target, string, len, prog); base = & prog->Base; } else if (target == GL_FRAGMENT_PROGRAM_ARB && ctx->Extensions.ARB_fragment_program) { struct gl_fragment_program *prog = ctx->FragmentProgram.Current; _mesa_parse_arb_fragment_program(ctx, target, string, len, prog); base = & prog->Base; } else { _mesa_error(ctx, GL_INVALID_ENUM, "glProgramStringARB(target)"); return; } failed = ctx->Program.ErrorPos != -1; if (!failed) { /* finally, give the program to the driver for translation/checking */ if (!ctx->Driver.ProgramStringNotify(ctx, target, base)) { failed = true; _mesa_error(ctx, GL_INVALID_OPERATION, "glProgramStringARB(rejected by driver"); } } if (ctx->_Shader->Flags & GLSL_DUMP) { const char *shader_type = target == GL_FRAGMENT_PROGRAM_ARB ? "fragment" : "vertex"; fprintf(stderr, "ARB_%s_program source for program %d:\n", shader_type, base->Id); fprintf(stderr, "%s\n", (const char *) string); if (failed) { fprintf(stderr, "ARB_%s_program %d failed to compile.\n", shader_type, base->Id); } else { fprintf(stderr, "Mesa IR for ARB_%s_program %d:\n", shader_type, base->Id); _mesa_print_program(base); fprintf(stderr, "\n"); } fflush(stderr); } /* Capture vp-*.shader_test/fp-*.shader_test files. */ const char *capture_path = _mesa_get_shader_capture_path(); if (capture_path != NULL) { FILE *file; const char *shader_type = target == GL_FRAGMENT_PROGRAM_ARB ? "fragment" : "vertex"; char *filename = ralloc_asprintf(NULL, "%s/%cp-%u.shader_test", capture_path, shader_type[0], base->Id); file = fopen(filename, "w"); if (file) { fprintf(file, "[require]\nGL_ARB_%s_program\n\n[%s program]\n%s\n", shader_type, shader_type, (const char *) string); fclose(file); } else { _mesa_warning(ctx, "Failed to open %s", filename); } ralloc_free(filename); } }
virtual ir_visitor_status visit_leave(ir_dereference_record *ir) { this->name = ralloc_asprintf(mem_ctx, "%s.%s", name, ir->field); return visit_continue; }
extern "C" bool _mesa_sampler_uniforms_pipeline_are_valid(struct gl_pipeline_object *pipeline) { /* Section 2.11.11 (Shader Execution), subheading "Validation," of the * OpenGL 4.1 spec says: * * "[INVALID_OPERATION] is generated by any command that transfers * vertices to the GL if: * * ... * * - Any two active samplers in the current program object are of * different types, but refer to the same texture image unit. * * - The number of active samplers in the program exceeds the * maximum number of texture image units allowed." */ unsigned active_samplers = 0; const struct gl_shader_program **shProg = (const struct gl_shader_program **) pipeline->CurrentProgram; const glsl_type *unit_types[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; memset(unit_types, 0, sizeof(unit_types)); for (unsigned idx = 0; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) { if (!shProg[idx]) continue; for (unsigned i = 0; i < shProg[idx]->NumUniformStorage; i++) { const struct gl_uniform_storage *const storage = &shProg[idx]->UniformStorage[i]; if (!storage->type->is_sampler()) continue; active_samplers++; const unsigned count = MAX2(1, storage->array_elements); for (unsigned j = 0; j < count; j++) { const unsigned unit = storage->storage[j].i; /* FIXME: Samplers are initialized to 0 and Mesa doesn't do a * great job of eliminating unused uniforms currently so for now * don't throw an error if two sampler types both point to 0. */ if (unit == 0) continue; /* The types of the samplers associated with a particular texture * unit must be an exact match. Page 74 (page 89 of the PDF) of * the OpenGL 3.3 core spec says: * * "It is not allowed to have variables of different sampler * types pointing to the same texture image unit within a * program object." */ if (unit_types[unit] == NULL) { unit_types[unit] = storage->type; } else if (unit_types[unit] != storage->type) { pipeline->InfoLog = ralloc_asprintf(pipeline, "Texture unit %d is accessed both as %s " "and %s", unit, unit_types[unit]->name, storage->type->name); return false; } } } } if (active_samplers > MAX_COMBINED_TEXTURE_IMAGE_UNITS) { pipeline->InfoLog = ralloc_asprintf(pipeline, "the number of active samplers %d exceed the " "maximum %d", active_samplers, MAX_COMBINED_TEXTURE_IMAGE_UNITS); return false; } return true; }
extern "C" const unsigned * brw_compile_gs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_gs_prog_key *key, struct brw_gs_prog_data *prog_data, const nir_shader *src_shader, struct gl_shader_program *shader_prog, int shader_time_index, unsigned *final_assembly_size, char **error_str) { struct brw_gs_compile c; memset(&c, 0, sizeof(c)); c.key = *key; const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY]; nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); /* The GLSL linker will have already matched up GS inputs and the outputs * of prior stages. The driver does extend VS outputs in some cases, but * only for legacy OpenGL or Gen4-5 hardware, neither of which offer * geometry shader support. So we can safely ignore that. * * For SSO pipelines, we use a fixed VUE map layout based on variable * locations, so we can rely on rendezvous-by-location making this work. */ GLbitfield64 inputs_read = shader->info->inputs_read; brw_compute_vue_map(compiler->devinfo, &c.input_vue_map, inputs_read, shader->info->separate_shader); shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, is_scalar); brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map); brw_nir_lower_vue_outputs(shader, is_scalar); shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar); prog_data->base.clip_distance_mask = ((1 << shader->info->clip_distance_array_size) - 1); prog_data->base.cull_distance_mask = ((1 << shader->info->cull_distance_array_size) - 1) << shader->info->clip_distance_array_size; prog_data->include_primitive_id = (shader->info->system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)) != 0; prog_data->invocations = shader->info->gs.invocations; if (compiler->devinfo->gen >= 8) prog_data->static_vertex_count = nir_gs_count_vertices(shader); if (compiler->devinfo->gen >= 7) { if (shader->info->gs.output_primitive == GL_POINTS) { /* When the output type is points, the geometry shader may output data * to multiple streams, and EndPrimitive() has no effect. So we * configure the hardware to interpret the control data as stream ID. */ prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; /* We only have to emit control bits if we are using streams */ if (shader_prog && shader_prog->Geom.UsesStreams) c.control_data_bits_per_vertex = 2; else c.control_data_bits_per_vertex = 0; } else { /* When the output type is triangle_strip or line_strip, EndPrimitive() * may be used to terminate the current strip and start a new one * (similar to primitive restart), and outputting data to multiple * streams is not supported. So we configure the hardware to interpret * the control data as EndPrimitive information (a.k.a. "cut bits"). */ prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT; /* We only need to output control data if the shader actually calls * EndPrimitive(). */ c.control_data_bits_per_vertex = shader->info->gs.uses_end_primitive ? 1 : 0; } } else { /* There are no control data bits in gen6. */ c.control_data_bits_per_vertex = 0; /* If it is using transform feedback, enable it */ if (shader->info->has_transform_feedback_varyings) prog_data->gen6_xfb_enabled = true; else prog_data->gen6_xfb_enabled = false; } c.control_data_header_size_bits = shader->info->gs.vertices_out * c.control_data_bits_per_vertex; /* 1 HWORD = 32 bytes = 256 bits */ prog_data->control_data_header_size_hwords = ALIGN(c.control_data_header_size_bits, 256) / 256; /* Compute the output vertex size. * * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex * Size (p168): * * [0,62] indicating [1,63] 16B units * * Specifies the size of each vertex stored in the GS output entry * (following any Control Header data) as a number of 128-bit units * (minus one). * * Programming Restrictions: The vertex size must be programmed as a * multiple of 32B units with the following exception: Rendering is * disabled (as per SOL stage state) and the vertex size output by the * GS thread is 16B. * * If rendering is enabled (as per SOL state) the vertex size must be * programmed as a multiple of 32B units. In other words, the only time * software can program a vertex size with an odd number of 16B units * is when rendering is disabled. * * Note: B=bytes in the above text. * * It doesn't seem worth the extra trouble to optimize the case where the * vertex size is 16B (especially since this would require special-casing * the GEN assembly that writes to the URB). So we just set the vertex * size to a multiple of 32B (2 vec4's) in all cases. * * The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We * budget that as follows: * * 512 bytes for varyings (a varying component is 4 bytes and * gl_MaxGeometryOutputComponents = 128) * 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 * bytes) * 16 bytes overhead for gl_Position (we allocate it a slot in the VUE * even if it's not used) * 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots * whenever clip planes are enabled, even if the shader doesn't * write to gl_ClipDistance) * 16 bytes overhead since the VUE size must be a multiple of 32 bytes * (see below)--this causes up to 1 VUE slot to be wasted * 400 bytes available for varying packing overhead * * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes) * per interpolation type, so this is plenty. * */ unsigned output_vertex_size_bytes = prog_data->base.vue_map.num_slots * 16; assert(compiler->devinfo->gen == 6 || output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES); prog_data->output_vertex_size_hwords = ALIGN(output_vertex_size_bytes, 32) / 32; /* Compute URB entry size. The maximum allowed URB entry size is 32k. * That divides up as follows: * * 64 bytes for the control data header (cut indices or StreamID bits) * 4096 bytes for varyings (a varying component is 4 bytes and * gl_MaxGeometryTotalOutputComponents = 1024) * 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 * bytes/vertex and gl_MaxGeometryOutputVertices is 256) * 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE * even if it's not used) * 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots * whenever clip planes are enabled, even if the shader doesn't * write to gl_ClipDistance) * 4096 bytes overhead since the VUE size must be a multiple of 32 * bytes (see above)--this causes up to 1 VUE slot to be wasted * 8128 bytes available for varying packing overhead * * Worst-case varying packing overhead is 3/4 of a varying slot per * interpolation type, which works out to 3072 bytes, so this would allow * us to accommodate 2 interpolation types without any danger of running * out of URB space. * * In practice, the risk of running out of URB space is very small, since * the above figures are all worst-case, and most of them scale with the * number of output vertices. So we'll just calculate the amount of space * we need, and if it's too large, fail to compile. * * The above is for gen7+ where we have a single URB entry that will hold * all the output. In gen6, we will have to allocate URB entries for every * vertex we emit, so our URB entries only need to be large enough to hold * a single vertex. Also, gen6 does not have a control data header. */ unsigned output_size_bytes; if (compiler->devinfo->gen >= 7) { output_size_bytes = prog_data->output_vertex_size_hwords * 32 * shader->info->gs.vertices_out; output_size_bytes += 32 * prog_data->control_data_header_size_hwords; } else { output_size_bytes = prog_data->output_vertex_size_hwords * 32; } /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output, * which comes before the control header. */ if (compiler->devinfo->gen >= 8) output_size_bytes += 32; /* Shaders can technically set max_vertices = 0, at which point we * may have a URB size of 0 bytes. Nothing good can come from that, * so enforce a minimum size. */ if (output_size_bytes == 0) output_size_bytes = 1; unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; if (compiler->devinfo->gen == 6) max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; if (output_size_bytes > max_output_size_bytes) return NULL; /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and * a multiple of 128 bytes in gen6. */ if (compiler->devinfo->gen >= 7) prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; else prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; prog_data->output_topology = get_hw_prim_for_gl_prim(shader->info->gs.output_primitive); prog_data->vertices_in = shader->info->gs.vertices_in; /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we * need to program a URB read length of ceiling(num_slots / 2). */ prog_data->base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2; /* Now that prog_data setup is done, we are ready to actually compile the * program. */ if (unlikely(INTEL_DEBUG & DEBUG_GS)) { fprintf(stderr, "GS Input "); brw_print_vue_map(stderr, &c.input_vue_map); fprintf(stderr, "GS Output "); brw_print_vue_map(stderr, &prog_data->base.vue_map); } if (is_scalar) { fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, shader, shader_time_index); if (v.run_gs()) { prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; fs_generator g(compiler, log_data, mem_ctx, &c.key, &prog_data->base.base, v.promoted_constants, false, MESA_SHADER_GEOMETRY); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { const char *label = shader->info->label ? shader->info->label : "unnamed"; char *name = ralloc_asprintf(mem_ctx, "%s geometry shader %s", label, shader->info->name); g.enable_debug(name); } g.generate_code(v.cfg, 8); return g.get_assembly(final_assembly_size); } } if (compiler->devinfo->gen >= 7) { /* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do * so without spilling. If the GS invocations count > 1, then we can't use * dual object mode. */ if (prog_data->invocations <= 1 && likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader, mem_ctx, true /* no_spills */, shader_time_index); if (v.run()) { return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader, &prog_data->base, v.cfg, final_assembly_size); } } } /* Either we failed to compile in DUAL_OBJECT mode (probably because it * would have required spilling) or DUAL_OBJECT mode is disabled. So fall * back to DUAL_INSTANCED or SINGLE mode, which consumes fewer registers. * * FIXME: Single dispatch mode requires that the driver can handle * interleaving of input registers, but this is already supported (dual * instance mode has the same requirement). However, to take full advantage * of single dispatch mode to reduce register pressure we would also need to * do interleaved outputs, but currently, the vec4 visitor and generator * classes do not support this, so at the moment register pressure in * single and dual instance modes is the same. * * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 "3DSTATE_GS" * "If InstanceCount>1, DUAL_OBJECT mode is invalid. Software will likely * want to use DUAL_INSTANCE mode for higher performance, but SINGLE mode * is also supported. When InstanceCount=1 (one instance per object) software * can decide which dispatch mode to use. DUAL_OBJECT mode would likely be * the best choice for performance, followed by SINGLE mode." * * So SINGLE mode is more performant when invocations == 1 and DUAL_INSTANCE * mode is more performant when invocations > 1. Gen6 only supports * SINGLE mode. */ if (prog_data->invocations <= 1 || compiler->devinfo->gen < 7) prog_data->base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE; else prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE; vec4_gs_visitor *gs = NULL; const unsigned *ret = NULL; if (compiler->devinfo->gen >= 7) gs = new vec4_gs_visitor(compiler, log_data, &c, prog_data, shader, mem_ctx, false /* no_spills */, shader_time_index); else gs = new gen6_gs_visitor(compiler, log_data, &c, prog_data, shader_prog, shader, mem_ctx, false /* no_spills */, shader_time_index); if (!gs->run()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, gs->fail_msg); } else { ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader, &prog_data->base, gs->cfg, final_assembly_size); } delete gs; return ret; }
extern "C" const unsigned * brw_compile_tcs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_tcs_prog_key *key, struct brw_tcs_prog_data *prog_data, const nir_shader *src_shader, int shader_time_index, unsigned *final_assembly_size, char **error_str) { const struct gen_device_info *devinfo = compiler->devinfo; struct brw_vue_prog_data *vue_prog_data = &prog_data->base; const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL]; nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); nir->info->outputs_written = key->outputs_written; nir->info->patch_outputs_written = key->patch_outputs_written; struct brw_vue_map input_vue_map; brw_compute_vue_map(devinfo, &input_vue_map, nir->info->inputs_read, nir->info->separate_shader); brw_compute_tess_vue_map(&vue_prog_data->vue_map, nir->info->outputs_written, nir->info->patch_outputs_written); nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map); brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map); if (key->quads_workaround) brw_nir_apply_tcs_quads_workaround(nir); nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); if (is_scalar) prog_data->instances = DIV_ROUND_UP(nir->info->tcs.vertices_out, 8); else prog_data->instances = DIV_ROUND_UP(nir->info->tcs.vertices_out, 2); /* Compute URB entry size. The maximum allowed URB entry size is 32k. * That divides up as follows: * * 32 bytes for the patch header (tessellation factors) * 480 bytes for per-patch varyings (a varying component is 4 bytes and * gl_MaxTessPatchComponents = 120) * 16384 bytes for per-vertex varyings (a varying component is 4 bytes, * gl_MaxPatchVertices = 32 and * gl_MaxTessControlOutputComponents = 128) * * 15808 bytes left for varying packing overhead */ const int num_per_patch_slots = vue_prog_data->vue_map.num_per_patch_slots; const int num_per_vertex_slots = vue_prog_data->vue_map.num_per_vertex_slots; unsigned output_size_bytes = 0; /* Note that the patch header is counted in num_per_patch_slots. */ output_size_bytes += num_per_patch_slots * 16; output_size_bytes += nir->info->tcs.vertices_out * num_per_vertex_slots * 16; assert(output_size_bytes >= 1); if (output_size_bytes > GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES) return NULL; /* URB entry sizes are stored as a multiple of 64 bytes. */ vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64; /* HS does not use the usual payload pushing from URB to GRFs, * because we don't have enough registers for a full-size payload, and * the hardware is broken on Haswell anyway. */ vue_prog_data->urb_read_length = 0; if (unlikely(INTEL_DEBUG & DEBUG_TCS)) { fprintf(stderr, "TCS Input "); brw_print_vue_map(stderr, &input_vue_map); fprintf(stderr, "TCS Output "); brw_print_vue_map(stderr, &vue_prog_data->vue_map); } if (is_scalar) { fs_visitor v(compiler, log_data, mem_ctx, (void *) key, &prog_data->base.base, NULL, nir, 8, shader_time_index, &input_vue_map); if (!v.run_tcs_single_patch()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base.base, v.promoted_constants, false, MESA_SHADER_TESS_CTRL); if (unlikely(INTEL_DEBUG & DEBUG_TCS)) { g.enable_debug(ralloc_asprintf(mem_ctx, "%s tessellation control shader %s", nir->info->label ? nir->info->label : "unnamed", nir->info->name)); } g.generate_code(v.cfg, 8); return g.get_assembly(final_assembly_size); } else { vec4_tcs_visitor v(compiler, log_data, key, prog_data, nir, mem_ctx, shader_time_index, &input_vue_map); if (!v.run()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } if (unlikely(INTEL_DEBUG & DEBUG_TCS)) v.dump_instructions(); return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, &prog_data->base, v.cfg, final_assembly_size); } }
void flatten_named_interface_blocks_declarations::run(exec_list *instructions) { interface_namespace = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); /* First pass: adjust instance block variables with an instance name * to not have an instance name. * * The interface block variables are stored in the interface_namespace * hash table so they can be used in the second pass. */ foreach_list_safe(node, instructions) { ir_variable *var = ((ir_instruction *) node)->as_variable(); if (!var || !var->is_interface_instance()) continue; /* It should be possible to handle uniforms during this pass, * but, this will require changes to the other uniform block * support code. */ if (var->data.mode == ir_var_uniform) continue; const glsl_type * iface_t = var->type; const glsl_type * array_t = NULL; exec_node *insert_pos = var; if (iface_t->is_array()) { array_t = iface_t; iface_t = array_t->fields.array; } assert (iface_t->is_interface()); for (unsigned i = 0; i < iface_t->length; i++) { const char * field_name = iface_t->fields.structure[i].name; char *iface_field_name = ralloc_asprintf(mem_ctx, "%s.%s.%s", iface_t->name, var->name, field_name); ir_variable *found_var = (ir_variable *) hash_table_find(interface_namespace, iface_field_name); if (!found_var) { ir_variable *new_var; char *var_name = ralloc_strdup(mem_ctx, iface_t->fields.structure[i].name); if (array_t == NULL) { new_var = new(mem_ctx) ir_variable(iface_t->fields.structure[i].type, var_name, (ir_variable_mode) var->data.mode); new_var->data.from_named_ifc_block_nonarray = 1; } else { const glsl_type *new_array_type = glsl_type::get_array_instance( iface_t->fields.structure[i].type, array_t->length); new_var = new(mem_ctx) ir_variable(new_array_type, var_name, (ir_variable_mode) var->data.mode); new_var->data.from_named_ifc_block_array = 1; } new_var->data.location = iface_t->fields.structure[i].location; new_var->data.explicit_location = (new_var->data.location >= 0); new_var->data.interpolation = iface_t->fields.structure[i].interpolation; new_var->data.centroid = iface_t->fields.structure[i].centroid; new_var->data.sample = iface_t->fields.structure[i].sample; new_var->init_interface_type(iface_t); hash_table_insert(interface_namespace, new_var, iface_field_name); insert_pos->insert_after(new_var); insert_pos = new_var; } } var->remove(); }
const glsl_type * glsl_type::get_templated_instance(const glsl_type *base, const char *name, int num_samples, int patch_size) { if (sampler_types == NULL) { sampler_types = hash_table_ctor(64, hash_table_string_hash, hash_table_string_compare); // Base sampler types. hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_1D, /*shadow=*/ false, /*array=*/ false, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ true, /*type=*/ NULL, "samplerBuffer", "sampler"), "Buffer"); hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_1D, /*shadow=*/ false, /*array=*/ false, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler1D", "texture1d"), "Texture1D"); hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_1D, /*shadow=*/ false, /*array=*/ true, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler1DArray", nullptr), "Texture1DArray"); hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_2D, /*shadow=*/ false, /*array=*/ false, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler2D", "texture2d"), "Texture2D"); hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_2D, /*shadow=*/ false, /*array=*/ true, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler2DArray", nullptr), "Texture2DArray"); hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_2D, /*shadow=*/ false, /*array=*/ false, /*multisample=*/ true, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler2DMS", nullptr), "Texture2DMS"); hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_2D, /*shadow=*/ false, /*array=*/ true, /*multisample=*/ true, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler2DMSArray", nullptr), "Texture2DMSArray"); hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_3D, /*shadow=*/ false, /*array=*/ false, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler3D", "texture3d"), "Texture3D"); hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_CUBE, /*shadow=*/ false, /*array=*/ false, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "samplerCube", "texturecube"), "TextureCube"); hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_CUBE, /*shadow=*/ false, /*array=*/ true, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "samplerCubeArray", nullptr), "TextureCubeArray"); } if (outputstream_types == NULL) { outputstream_types = hash_table_ctor(64, hash_table_string_hash, hash_table_string_compare); // Base outputstream types. hash_table_insert(outputstream_types, new glsl_type(GLSL_OUTPUTSTREAM_POINTS, /*type=*/ NULL, "point_stream"), "PointStream"); hash_table_insert(outputstream_types, new glsl_type(GLSL_OUTPUTSTREAM_LINES, /*type=*/ NULL, "line_stream"), "LineStream"); hash_table_insert(outputstream_types, new glsl_type(GLSL_OUTPUTSTREAM_TRIANGLES, /*type=*/ NULL, "triangle_stream"), "TriangleStream"); } if (inputpatch_types == NULL) { inputpatch_types = hash_table_ctor(64, hash_table_string_hash, hash_table_string_compare); // Base input patch types. hash_table_insert(inputpatch_types, new glsl_type(GLSL_TYPE_INPUTPATCH, 0, (glsl_type*)NULL, "input_patch"), "InputPatch"); } if (outputpatch_types == NULL) { outputpatch_types = hash_table_ctor(64, hash_table_string_hash, hash_table_string_compare); // Base output patch types. hash_table_insert(outputpatch_types, new glsl_type(GLSL_TYPE_OUTPUTPATCH, 0, (glsl_type*)NULL, "output_patch"), "OutputPatch"); } if (image_types == NULL) { image_types = hash_table_ctor(64, hash_table_string_hash, hash_table_string_compare); // Base sampler types. hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_1D, /*array=*/ false, /*sampler_buffer=*/ true, /*type=*/ NULL, "imageBuffer"), "RWBuffer"); hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_1D, /*array=*/ false, /*sampler_buffer=*/ false, /*type=*/ NULL, "image1D"), "RWTexture1D"); hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_1D, /*array=*/ true, /*sampler_buffer=*/ false, /*type=*/ NULL, "image1DArray"), "RWTexture1DArray"); hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_2D, /*array=*/ false, /*sampler_buffer=*/ false, /*type=*/ NULL, "image2D"), "RWTexture2D"); hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_2D, /*array=*/ true, /*sampler_buffer=*/ false, /*type=*/ NULL, "image2DArray"), "RWTexture2DArray"); hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_3D, /*array=*/ false, /*sampler_buffer=*/ false, /*type=*/ NULL, "image3D"), "RWTexture3D"); hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_BUF, /*array=*/ false, /*sampler_buffer=*/ true, /*type=*/ NULL, "StructuredBuffer"), "RWStructuredBuffer"); hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_BUF, /*array=*/ false, /*sampler_buffer=*/ true, /*type=*/ NULL, "ByteAddressBuffer"), "RWByteAddressBuffer"); } if (base == NULL) { return NULL; } const glsl_type* outputstream_base_type = (const glsl_type*)hash_table_find(outputstream_types, name); if (outputstream_base_type != NULL) { /** Generate a key that is the combination of outputstream type and inner type. */ char key[128]; snprintf(key, sizeof(key), "%s<%s>", outputstream_base_type->name, base->name); const glsl_type *actual_type = (glsl_type *)hash_table_find(outputstream_types, key); if (actual_type == NULL) { actual_type = new glsl_type( (glsl_outputstream_type)(outputstream_base_type->outputstream_type), base, key); hash_table_insert(outputstream_types, (void *)actual_type, ralloc_strdup(mem_ctx, key)); } return actual_type; } const glsl_type* inputpatch_base_type = (const glsl_type*)hash_table_find(inputpatch_types, name); if (inputpatch_base_type != NULL) { /** Generate a key that is the combination of input patch type and inner type. */ char key[128]; snprintf(key, sizeof(key), "%s<%s>", inputpatch_base_type->name, base->name); const glsl_type *actual_type = (glsl_type *)hash_table_find(inputpatch_types, key); if (actual_type == NULL) { actual_type = new glsl_type(GLSL_TYPE_INPUTPATCH, patch_size, base, key); hash_table_insert(inputpatch_types, (void *)actual_type, ralloc_strdup(mem_ctx, key)); } return actual_type; } const glsl_type* outputpatch_base_type = (const glsl_type*)hash_table_find(outputpatch_types, name); if (outputpatch_base_type != NULL) { /** Generate a key that is the combination of input patch type and inner type. */ char key[128]; snprintf(key, sizeof(key), "%s<%s>", outputpatch_base_type->name, base->name); const glsl_type *actual_type = (glsl_type *)hash_table_find(outputpatch_types, key); if (actual_type == NULL) { actual_type = new glsl_type(GLSL_TYPE_OUTPUTPATCH, patch_size, base, key); hash_table_insert(outputpatch_types, (void *)actual_type, ralloc_strdup(mem_ctx, key)); } return actual_type; } if (!base->is_numeric()) { // Hack! //#todo-rco: Proper support! //if (strcmp(name, "RWStructuredBuffer") && strcmp(name, "RWByteAddressBuffer")) { return nullptr; } } const glsl_type* image_base_type = (const glsl_type*)hash_table_find(image_types, name); if (image_base_type != NULL) { /** Generate a key that is the combination of sampler type and inner type. */ char key[128]; snprintf(key, sizeof(key), "%s<%s>", image_base_type->name, base->name); const glsl_type *actual_type = (glsl_type *)hash_table_find(image_types, key); if (actual_type == NULL) { actual_type = new glsl_type( (glsl_sampler_dim)(image_base_type->sampler_dimensionality), image_base_type->sampler_array, image_base_type->sampler_buffer, base, ralloc_asprintf(mem_ctx, "%s%s", sampler_type_prefix[base->base_type], image_base_type->name)); hash_table_insert(image_types, (void *)actual_type, ralloc_strdup(mem_ctx, key)); } return actual_type; } const glsl_type* sampler_base_type = (const glsl_type*)hash_table_find(sampler_types, name); if (sampler_base_type == NULL) { return NULL; } /** Generate a key that is the combination of sampler type and inner type. */ char key[128]; if (num_samples>1) { snprintf(key, sizeof(key), "%s<%s,%d>", sampler_base_type->name, base->name, num_samples); } else { snprintf(key, sizeof(key), "%s<%s>", sampler_base_type->name, base->name); } const glsl_type *actual_type = (glsl_type *)hash_table_find(sampler_types, key); if (actual_type == NULL) { actual_type = new glsl_type( (glsl_sampler_dim)(sampler_base_type->sampler_dimensionality), sampler_base_type->sampler_shadow, sampler_base_type->sampler_array, sampler_base_type->sampler_ms, num_samples, sampler_base_type->sampler_buffer, base, ralloc_asprintf(mem_ctx, "%s%s", sampler_type_prefix[base->base_type], sampler_base_type->name), sampler_base_type->HlslName); hash_table_insert(sampler_types, (void *)actual_type, ralloc_strdup(mem_ctx, key)); } return actual_type; }
ir_rvalue * _mesa_ast_field_selection_to_hir(const ast_expression *expr, exec_list *instructions, struct _mesa_glsl_parse_state *state) { void *ctx = state; ir_rvalue *result = NULL; ir_rvalue *op; op = expr->subexpressions[0]->hir(instructions, state); /* There are two kinds of field selection. There is the selection of a * specific field from a structure, and there is the selection of a * swizzle / mask from a vector. Which is which is determined entirely * by the base type of the thing to which the field selection operator is * being applied. */ YYLTYPE loc = expr->get_location(); if (op->type->is_error()) { /* silently propagate the error */ } else if (op->type->is_vector() || op->type->is_scalar()) { ir_swizzle *swiz = ir_swizzle::create(op, expr->primary_expression.identifier, op->type->vector_elements); if (swiz != NULL) { result = swiz; } else { /* FINISHME: Logging of error messages should be moved into * FINISHME: ir_swizzle::create. This allows the generation of more * FINISHME: specific error messages. */ _mesa_glsl_error(& loc, state, "Invalid swizzle / mask '%s'", expr->primary_expression.identifier); } } else if (op->type->is_matrix() && expr->primary_expression.identifier) { int src_components = op->type->components(); int components[4] = {0}; uint32 num_components = 0; ir_swizzle_mask mask = {0}; const char* mask_str = expr->primary_expression.identifier; if (mask_str[0] == '_' && mask_str[1] == 'm') { do { mask_str += 2; int col = (*mask_str) ? (*mask_str++) - '0' : -1; int row = (*mask_str) ? (*mask_str++) - '0' : -1; if (col >= 0 && col <= op->type->matrix_columns && row >= 0 && row <= op->type->vector_elements) { components[num_components++] = col * op->type->vector_elements + row; } else { components[num_components++] = -1; } } while (*mask_str != 0 && num_components < 4); } else if (mask_str[0] == '_' && mask_str[1] >= '1' && mask_str[2] <= '4') { do { mask_str += 1; int col = (*mask_str) ? (*mask_str++) - '1' : -1; int row = (*mask_str) ? (*mask_str++) - '1' : -1; if (col >= 0 && col <= op->type->matrix_columns && row >= 0 && row <= op->type->vector_elements) { components[num_components++] = col * op->type->vector_elements + row; } else { components[num_components++] = -1; } } while (*mask_str != 0 && num_components < 4); } if (*mask_str == 0) { if (num_components > 0 && components[0] >= 0 && components[0] <= src_components) { mask.x = (unsigned)components[0]; mask.num_components++; } if (num_components > 1 && components[1] >= 0 && components[1] <= src_components) { mask.y = (unsigned)components[1]; mask.has_duplicates = (mask.y == mask.x); mask.num_components++; } if (num_components > 2 && components[2] >= 0 && components[2] <= src_components) { mask.z = (unsigned)components[2]; mask.has_duplicates = mask.has_duplicates || (mask.z == mask.y) || (mask.z == mask.x); mask.num_components++; } if (num_components > 3 && components[3] >= 0 && components[3] <= src_components) { mask.w = (unsigned)components[3]; mask.has_duplicates = mask.has_duplicates || (mask.w == mask.z) || (mask.w == mask.y) || (mask.w == mask.x); mask.num_components++; } } if (mask.num_components == num_components) { result = new(ctx)ir_swizzle(op, mask); } if (result == NULL) { _mesa_glsl_error(&loc, state, "invalid matrix swizzle '%s'", expr->primary_expression.identifier); } } else if (op->type->base_type == GLSL_TYPE_STRUCT) { result = new(ctx)ir_dereference_record(op, expr->primary_expression.identifier); if (result->type->is_error()) { _mesa_glsl_error(& loc, state, "Cannot access field '%s' of " "structure", expr->primary_expression.identifier); } } else if (expr->subexpressions[1] != NULL) { /* Handle "method calls" in GLSL 1.20+ */ if (state->language_version < 120) _mesa_glsl_error(&loc, state, "Methods not supported in GLSL 1.10."); ast_expression *call = expr->subexpressions[1]; check(call->oper == ast_function_call); const char *method; method = call->subexpressions[0]->primary_expression.identifier; if (op->type->is_array() && strcmp(method, "length") == 0) { if (!call->expressions.is_empty()) _mesa_glsl_error(&loc, state, "length method takes no arguments."); if (op->type->array_size() == 0) _mesa_glsl_error(&loc, state, "length called on unsized array."); result = new(ctx)ir_constant(op->type->array_size()); } else if (op->type->is_sampler() && op->as_dereference() != NULL) { return gen_texture_op(expr, op->as_dereference(), instructions, state); } else if (op->type->is_image() && op->as_dereference() != NULL) { return gen_image_op(expr, op->as_dereference(), instructions, state); } else if (op->type->is_outputstream() && strcmp(method, "Append") == 0) { check(op->variable_referenced()->type->inner_type->is_record()); check(op->variable_referenced()->type->inner_type->name); const char* function_name = "OutputStream_Append"; ir_function *func = state->symbols->get_function(function_name); if (!func) { // Prepare the function, add it to global symbols. It will be added to declarations at GenerateGlslMain(). func = new(ctx)ir_function(function_name); state->symbols->add_global_function(func); // _mesa_glsl_warning(state, "Append function generation for type '%s'", op->variable_referenced()->type->inner_type->name ); // { // const glsl_type* output_type = op->variable_referenced()->type->inner_type; // for (int i = 0; i < output_type->length; i++) // { // _mesa_glsl_warning(state, " name '%s' : semantic '%s'", output_type->fields.structure[i].name, output_type->fields.structure[i].semantic ); // } // } } exec_list comparison_parameter; ir_variable* var = new(ctx)ir_variable(op->variable_referenced()->type->inner_type, ralloc_asprintf(ctx, "arg0"), ir_var_in); comparison_parameter.push_tail(var); bool is_exact = false; ir_function_signature *sig = func->matching_signature(&comparison_parameter, &is_exact); if (!sig || !is_exact) { sig = new(ctx)ir_function_signature(glsl_type::void_type); sig->parameters.push_tail(var); sig->is_builtin = false; sig->is_defined = true; func->add_signature(sig); } if (call->expressions.is_empty() || (call->expressions.get_head() != call->expressions.get_tail())) { _mesa_glsl_error(&loc, state, "Append method takes one argument."); } else { exec_list actual_parameter; ast_node *const ast = exec_node_data(ast_node, call->expressions.get_head(), link); ir_rvalue *result = ast->hir(instructions, state); actual_parameter.push_tail(result); instructions->push_tail(new(ctx)ir_call(sig, NULL, &actual_parameter)); } return NULL; } else if (op->type->is_outputstream() && strcmp(method, "RestartStrip") == 0) { exec_list actual_parameters; // empty, as no parameters ir_function *func = state->symbols->get_function("EndPrimitive"); check(func); bool is_exact = false; ir_function_signature *sig = func->matching_signature(&actual_parameters, &is_exact); check(sig && is_exact); instructions->push_tail(new(ctx)ir_call(sig, NULL, &actual_parameters)); return NULL; } else { _mesa_glsl_error(&loc, state, "Unknown method: '%s'.", method); } } else { _mesa_glsl_error(& loc, state, "Cannot access field '%s' of " "non-structure / non-vector.", expr->primary_expression.identifier); } return result ? result : ir_rvalue::error_value(ctx); }
static const unsigned * brw_cs_emit(struct brw_context *brw, void *mem_ctx, const struct brw_cs_prog_key *key, struct brw_cs_prog_data *prog_data, struct gl_compute_program *cp, struct gl_shader_program *prog, unsigned *final_assembly_size) { bool start_busy = false; double start_time = 0; if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && drm_intel_bo_busy(brw->batch.last_bo)); start_time = get_time(); } struct brw_shader *shader = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE]; if (unlikely(INTEL_DEBUG & DEBUG_CS)) brw_dump_ir("compute", prog, &shader->base, &cp->Base); prog_data->local_size[0] = cp->LocalSize[0]; prog_data->local_size[1] = cp->LocalSize[1]; prog_data->local_size[2] = cp->LocalSize[2]; unsigned local_workgroup_size = cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2]; cfg_t *cfg = NULL; const char *fail_msg = NULL; int st_index = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS); /* Now the main event: Visit the shader IR and generate our CS IR for it. */ fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, &cp->Base, 8, st_index); if (!v8.run_cs()) { fail_msg = v8.fail_msg; } else if (local_workgroup_size <= 8 * brw->max_cs_threads) { cfg = v8.cfg; prog_data->simd_size = 8; } fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, &cp->Base, 16, st_index); if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && !fail_msg && !v8.simd16_unsupported && local_workgroup_size <= 16 * brw->max_cs_threads) { /* Try a SIMD16 compile */ v16.import_uniforms(&v8); if (!v16.run_cs()) { perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg); if (!cfg) { fail_msg = "Couldn't generate SIMD16 program and not " "enough threads for SIMD8"; } } else { cfg = v16.cfg; prog_data->simd_size = 16; } } if (unlikely(cfg == NULL)) { assert(fail_msg); prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, fail_msg); _mesa_problem(NULL, "Failed to compile compute shader: %s\n", fail_msg); return NULL; } fs_generator g(brw->intelScreen->compiler, brw, mem_ctx, (void*) key, &prog_data->base, &cp->Base, v8.promoted_constants, v8.runtime_check_aads_emit, "CS"); if (INTEL_DEBUG & DEBUG_CS) { char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d", prog->Label ? prog->Label : "unnamed", prog->Name); g.enable_debug(name); } g.generate_code(cfg, prog_data->simd_size); if (unlikely(brw->perf_debug) && shader) { if (shader->compiled_once) { _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles"); } shader->compiled_once = true; if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { perf_debug("CS compile took %.03f ms and stalled the GPU\n", (get_time() - start_time) * 1000); } } return g.get_assembly(final_assembly_size); }
void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { switch (opcode) { case SpvOpVariable: { struct vtn_variable *var = rzalloc(b, struct vtn_variable); var->type = vtn_value(b, w[1], vtn_value_type_type)->type; var->chain.var = var; var->chain.length = 0; struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_access_chain); val->access_chain = &var->chain; struct vtn_type *without_array = var->type; while(glsl_type_is_array(without_array->type)) without_array = without_array->array_element; nir_variable_mode nir_mode; switch ((SpvStorageClass)w[3]) { case SpvStorageClassUniform: case SpvStorageClassUniformConstant: if (without_array->block) { var->mode = vtn_variable_mode_ubo; b->shader->info.num_ubos++; } else if (without_array->buffer_block) { var->mode = vtn_variable_mode_ssbo; b->shader->info.num_ssbos++; } else if (glsl_type_is_image(without_array->type)) { var->mode = vtn_variable_mode_image; nir_mode = nir_var_uniform; b->shader->info.num_images++; } else if (glsl_type_is_sampler(without_array->type)) { var->mode = vtn_variable_mode_sampler; nir_mode = nir_var_uniform; b->shader->info.num_textures++; } else { assert(!"Invalid uniform variable type"); } break; case SpvStorageClassPushConstant: var->mode = vtn_variable_mode_push_constant; assert(b->shader->num_uniforms == 0); b->shader->num_uniforms = vtn_type_block_size(var->type) * 4; break; case SpvStorageClassInput: var->mode = vtn_variable_mode_input; nir_mode = nir_var_shader_in; break; case SpvStorageClassOutput: var->mode = vtn_variable_mode_output; nir_mode = nir_var_shader_out; break; case SpvStorageClassPrivate: var->mode = vtn_variable_mode_global; nir_mode = nir_var_global; break; case SpvStorageClassFunction: var->mode = vtn_variable_mode_local; nir_mode = nir_var_local; break; case SpvStorageClassWorkgroup: var->mode = vtn_variable_mode_workgroup; nir_mode = nir_var_shared; break; case SpvStorageClassCrossWorkgroup: case SpvStorageClassGeneric: case SpvStorageClassAtomicCounter: default: unreachable("Unhandled variable storage class"); } switch (var->mode) { case vtn_variable_mode_local: case vtn_variable_mode_global: case vtn_variable_mode_image: case vtn_variable_mode_sampler: case vtn_variable_mode_workgroup: /* For these, we create the variable normally */ var->var = rzalloc(b->shader, nir_variable); var->var->name = ralloc_strdup(var->var, val->name); var->var->type = var->type->type; var->var->data.mode = nir_mode; switch (var->mode) { case vtn_variable_mode_image: case vtn_variable_mode_sampler: var->var->interface_type = without_array->type; break; default: var->var->interface_type = NULL; break; } break; case vtn_variable_mode_input: case vtn_variable_mode_output: { /* For inputs and outputs, we immediately split structures. This * is for a couple of reasons. For one, builtins may all come in * a struct and we really want those split out into separate * variables. For another, interpolation qualifiers can be * applied to members of the top-level struct ane we need to be * able to preserve that information. */ int array_length = -1; struct vtn_type *interface_type = var->type; if (b->shader->stage == MESA_SHADER_GEOMETRY && glsl_type_is_array(var->type->type)) { /* In Geometry shaders (and some tessellation), inputs come * in per-vertex arrays. However, some builtins come in * non-per-vertex, hence the need for the is_array check. In * any case, there are no non-builtin arrays allowed so this * check should be sufficient. */ interface_type = var->type->array_element; array_length = glsl_get_length(var->type->type); } if (glsl_type_is_struct(interface_type->type)) { /* It's a struct. Split it. */ unsigned num_members = glsl_get_length(interface_type->type); var->members = ralloc_array(b, nir_variable *, num_members); for (unsigned i = 0; i < num_members; i++) { const struct glsl_type *mtype = interface_type->members[i]->type; if (array_length >= 0) mtype = glsl_array_type(mtype, array_length); var->members[i] = rzalloc(b->shader, nir_variable); var->members[i]->name = ralloc_asprintf(var->members[i], "%s.%d", val->name, i); var->members[i]->type = mtype; var->members[i]->interface_type = interface_type->members[i]->type; var->members[i]->data.mode = nir_mode; } } else { var->var = rzalloc(b->shader, nir_variable); var->var->name = ralloc_strdup(var->var, val->name); var->var->type = var->type->type; var->var->interface_type = interface_type->type; var->var->data.mode = nir_mode; } /* For inputs and outputs, we need to grab locations and builtin * information from the interface type. */ vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var); break; case vtn_variable_mode_param: unreachable("Not created through OpVariable"); } case vtn_variable_mode_ubo: case vtn_variable_mode_ssbo: case vtn_variable_mode_push_constant: /* These don't need actual variables. */ break; } if (count > 4) { assert(count == 5); nir_constant *constant = vtn_value(b, w[4], vtn_value_type_constant)->constant; var->var->constant_initializer = nir_constant_clone(constant, var->var); } vtn_foreach_decoration(b, val, var_decoration_cb, var); if (var->mode == vtn_variable_mode_image || var->mode == vtn_variable_mode_sampler) { /* XXX: We still need the binding information in the nir_variable * for these. We should fix that. */ var->var->data.binding = var->binding; var->var->data.descriptor_set = var->descriptor_set; if (var->mode == vtn_variable_mode_image) var->var->data.image.format = without_array->image_format; } if (var->mode == vtn_variable_mode_local) { assert(var->members == NULL && var->var != NULL); nir_function_impl_add_variable(b->impl, var->var); } else if (var->var) { nir_shader_add_variable(b->shader, var->var); } else if (var->members) { unsigned count = glsl_get_length(without_array->type); for (unsigned i = 0; i < count; i++) { assert(var->members[i]->data.mode != nir_var_local); nir_shader_add_variable(b->shader, var->members[i]); } } else { assert(var->mode == vtn_variable_mode_ubo || var->mode == vtn_variable_mode_ssbo || var->mode == vtn_variable_mode_push_constant); } break; }
void program_resource_visitor::process(ir_variable *var) { const glsl_type *t = var->type; const bool row_major = var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; /* false is always passed for the row_major parameter to the other * processing functions because no information is available to do * otherwise. See the warning in linker.h. */ /* Only strdup the name if we actually will need to modify it. */ if (var->data.from_named_ifc_block_array) { /* lower_named_interface_blocks created this variable by lowering an * interface block array to an array variable. For example if the * original source code was: * * out Blk { vec4 bar } foo[3]; * * Then the variable is now: * * out vec4 bar[3]; * * We need to visit each array element using the names constructed like * so: * * Blk[0].bar * Blk[1].bar * Blk[2].bar */ assert(t->is_array()); const glsl_type *ifc_type = var->get_interface_type(); char *name = ralloc_strdup(NULL, ifc_type->name); size_t name_length = strlen(name); for (unsigned i = 0; i < t->length; i++) { size_t new_length = name_length; ralloc_asprintf_rewrite_tail(&name, &new_length, "[%u].%s", i, var->name); /* Note: row_major is only meaningful for uniform blocks, and * lowering is only applied to non-uniform interface blocks, so we * can safely pass false for row_major. */ recursion(var->type, &name, new_length, row_major, NULL, false); } ralloc_free(name); } else if (var->data.from_named_ifc_block_nonarray) { /* lower_named_interface_blocks created this variable by lowering a * named interface block (non-array) to an ordinary variable. For * example if the original source code was: * * out Blk { vec4 bar } foo; * * Then the variable is now: * * out vec4 bar; * * We need to visit this variable using the name: * * Blk.bar */ const glsl_type *ifc_type = var->get_interface_type(); char *name = ralloc_asprintf(NULL, "%s.%s", ifc_type->name, var->name); /* Note: row_major is only meaningful for uniform blocks, and lowering * is only applied to non-uniform interface blocks, so we can safely * pass false for row_major. */ recursion(var->type, &name, strlen(name), row_major, NULL, false); ralloc_free(name); } else if (t->without_array()->is_record()) { char *name = ralloc_strdup(NULL, var->name); recursion(var->type, &name, strlen(name), row_major, NULL, false); ralloc_free(name); } else if (t->is_interface()) { char *name = ralloc_strdup(NULL, var->type->name); recursion(var->type, &name, strlen(name), row_major, NULL, false); ralloc_free(name); } else if (t->is_array() && t->fields.array->is_interface()) { char *name = ralloc_strdup(NULL, var->type->fields.array->name); recursion(var->type, &name, strlen(name), row_major, NULL, false); ralloc_free(name); } else { this->visit_field(t, var->name, row_major, NULL, false); } }
static void setup_glsl_msaa_blit_shader(struct gl_context *ctx, struct blit_state *blit, const struct gl_framebuffer *drawFb, struct gl_renderbuffer *src_rb, GLenum target) { const char *vs_source; char *fs_source; void *mem_ctx; enum blit_msaa_shader shader_index; bool dst_is_msaa = false; GLenum src_datatype; const char *vec4_prefix; const char *sampler_array_suffix = ""; char *name; const char *texcoord_type = "vec2"; int samples; int shader_offset = 0; if (src_rb) { samples = MAX2(src_rb->NumSamples, 1); src_datatype = _mesa_get_format_datatype(src_rb->Format); } else { /* depth-or-color glCopyTexImage fallback path that passes a NULL rb and * doesn't handle integer. */ samples = 1; src_datatype = GL_UNSIGNED_NORMALIZED; } /* We expect only power of 2 samples in source multisample buffer. */ assert(samples > 0 && _mesa_is_pow_two(samples)); while (samples >> (shader_offset + 1)) { shader_offset++; } /* Update the assert if we plan to support more than 16X MSAA. */ assert(shader_offset >= 0 && shader_offset <= 4); if (drawFb->Visual.samples > 1) { /* If you're calling meta_BlitFramebuffer with the destination * multisampled, this is the only path that will work -- swrast and * CopyTexImage won't work on it either. */ assert(ctx->Extensions.ARB_sample_shading); dst_is_msaa = true; /* We need shader invocation per sample, not per pixel */ _mesa_set_enable(ctx, GL_MULTISAMPLE, GL_TRUE); _mesa_set_enable(ctx, GL_SAMPLE_SHADING, GL_TRUE); _mesa_MinSampleShading(1.0); } switch (target) { case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: if (src_rb && (src_rb->_BaseFormat == GL_DEPTH_COMPONENT || src_rb->_BaseFormat == GL_DEPTH_STENCIL)) { if (dst_is_msaa) shader_index = BLIT_MSAA_SHADER_2D_MULTISAMPLE_DEPTH_COPY; else shader_index = BLIT_MSAA_SHADER_2D_MULTISAMPLE_DEPTH_RESOLVE; } else { if (dst_is_msaa) shader_index = BLIT_MSAA_SHADER_2D_MULTISAMPLE_COPY; else { shader_index = BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE + shader_offset; } } if (target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { shader_index += (BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_RESOLVE - BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE); sampler_array_suffix = "Array"; texcoord_type = "vec3"; } break; default: _mesa_problem(ctx, "Unkown texture target %s\n", _mesa_enum_to_string(target)); shader_index = BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE; } /* We rely on the enum being sorted this way. */ STATIC_ASSERT(BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE_INT == BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE + 5); STATIC_ASSERT(BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE_UINT == BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE + 10); if (src_datatype == GL_INT) { shader_index += 5; vec4_prefix = "i"; } else if (src_datatype == GL_UNSIGNED_INT) { shader_index += 10; vec4_prefix = "u"; } else { vec4_prefix = ""; } if (blit->msaa_shaders[shader_index]) { _mesa_UseProgram(blit->msaa_shaders[shader_index]); return; } mem_ctx = ralloc_context(NULL); if (shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_DEPTH_RESOLVE || shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_DEPTH_RESOLVE || shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_DEPTH_COPY || shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_DEPTH_COPY) { char *sample_index; const char *arb_sample_shading_extension_string; if (dst_is_msaa) { arb_sample_shading_extension_string = "#extension GL_ARB_sample_shading : enable"; sample_index = "gl_SampleID"; name = "depth MSAA copy"; } else { /* Don't need that extension, since we're drawing to a single-sampled * destination. */ arb_sample_shading_extension_string = ""; /* From the GL 4.3 spec: * * "If there is a multisample buffer (the value of SAMPLE_BUFFERS * is one), then values are obtained from the depth samples in * this buffer. It is recommended that the depth value of the * centermost sample be used, though implementations may choose * any function of the depth sample values at each pixel. * * We're slacking and instead of choosing centermost, we've got 0. */ sample_index = "0"; name = "depth MSAA resolve"; } vs_source = ralloc_asprintf(mem_ctx, "#version 130\n" "in vec2 position;\n" "in %s textureCoords;\n" "out %s texCoords;\n" "void main()\n" "{\n" " texCoords = textureCoords;\n" " gl_Position = vec4(position, 0.0, 1.0);\n" "}\n", texcoord_type, texcoord_type); fs_source = ralloc_asprintf(mem_ctx, "#version 130\n" "#extension GL_ARB_texture_multisample : enable\n" "%s\n" "uniform sampler2DMS%s texSampler;\n" "in %s texCoords;\n" "out vec4 out_color;\n" "\n" "void main()\n" "{\n" " gl_FragDepth = texelFetch(texSampler, i%s(texCoords), %s).r;\n" "}\n", arb_sample_shading_extension_string, sampler_array_suffix, texcoord_type, texcoord_type, sample_index); } else { /* You can create 2D_MULTISAMPLE textures with 0 sample count (meaning 1 * sample). Yes, this is ridiculous. */ char *sample_resolve; const char *arb_sample_shading_extension_string; const char *merge_function; name = ralloc_asprintf(mem_ctx, "%svec4 MSAA %s", vec4_prefix, dst_is_msaa ? "copy" : "resolve"); if (dst_is_msaa) { arb_sample_shading_extension_string = "#extension GL_ARB_sample_shading : enable"; sample_resolve = ralloc_asprintf(mem_ctx, " out_color = texelFetch(texSampler, i%s(texCoords), gl_SampleID);", texcoord_type); merge_function = ""; } else { int i; int step; if (src_datatype == GL_INT || src_datatype == GL_UNSIGNED_INT) { merge_function = "gvec4 merge(gvec4 a, gvec4 b) { return (a >> gvec4(1)) + (b >> gvec4(1)) + (a & b & gvec4(1)); }\n"; } else { /* The divide will happen at the end for floats. */ merge_function = "vec4 merge(vec4 a, vec4 b) { return (a + b); }\n"; } arb_sample_shading_extension_string = ""; /* We're assuming power of two samples for this resolution procedure. * * To avoid losing any floating point precision if the samples all * happen to have the same value, we merge pairs of values at a time * (so the floating point exponent just gets increased), rather than * doing a naive sum and dividing. */ assert(_mesa_is_pow_two(samples)); /* Fetch each individual sample. */ sample_resolve = rzalloc_size(mem_ctx, 1); for (i = 0; i < samples; i++) { ralloc_asprintf_append(&sample_resolve, " gvec4 sample_1_%d = texelFetch(texSampler, i%s(texCoords), %d);\n", i, texcoord_type, i); } /* Now, merge each pair of samples, then merge each pair of those, * etc. */ for (step = 2; step <= samples; step *= 2) { for (i = 0; i < samples; i += step) { ralloc_asprintf_append(&sample_resolve, " gvec4 sample_%d_%d = merge(sample_%d_%d, sample_%d_%d);\n", step, i, step / 2, i, step / 2, i + step / 2); } } /* Scale the final result. */ if (src_datatype == GL_UNSIGNED_INT || src_datatype == GL_INT) { ralloc_asprintf_append(&sample_resolve, " out_color = sample_%d_0;\n", samples); } else { ralloc_asprintf_append(&sample_resolve, " gl_FragColor = sample_%d_0 / %f;\n", samples, (float)samples); } } vs_source = ralloc_asprintf(mem_ctx, "#version 130\n" "in vec2 position;\n" "in %s textureCoords;\n" "out %s texCoords;\n" "void main()\n" "{\n" " texCoords = textureCoords;\n" " gl_Position = vec4(position, 0.0, 1.0);\n" "}\n", texcoord_type, texcoord_type); fs_source = ralloc_asprintf(mem_ctx, "#version 130\n" "#extension GL_ARB_texture_multisample : enable\n" "%s\n" "#define gvec4 %svec4\n" "uniform %ssampler2DMS%s texSampler;\n" "in %s texCoords;\n" "out gvec4 out_color;\n" "\n" "%s" /* merge_function */ "void main()\n" "{\n" "%s\n" /* sample_resolve */ "}\n", arb_sample_shading_extension_string, vec4_prefix, vec4_prefix, sampler_array_suffix, texcoord_type, merge_function, sample_resolve); } _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, name, &blit->msaa_shaders[shader_index]); ralloc_free(mem_ctx); }
extern "C" bool _mesa_sampler_uniforms_pipeline_are_valid(struct gl_pipeline_object *pipeline) { /* Section 2.11.11 (Shader Execution), subheading "Validation," of the * OpenGL 4.1 spec says: * * "[INVALID_OPERATION] is generated by any command that transfers * vertices to the GL if: * * ... * * - Any two active samplers in the current program object are of * different types, but refer to the same texture image unit. * * - The number of active samplers in the program exceeds the * maximum number of texture image units allowed." */ GLbitfield mask; GLbitfield TexturesUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; struct gl_linked_shader *shader; unsigned active_samplers = 0; const struct gl_shader_program **shProg = (const struct gl_shader_program **) pipeline->CurrentProgram; memset(TexturesUsed, 0, sizeof(TexturesUsed)); for (unsigned idx = 0; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) { if (!shProg[idx]) continue; shader = shProg[idx]->_LinkedShaders[idx]; if (!shader || !shader->Program) continue; mask = shader->Program->SamplersUsed; while (mask) { const int s = u_bit_scan(&mask); GLuint unit = shader->SamplerUnits[s]; GLuint tgt = shader->SamplerTargets[s]; /* FIXME: Samplers are initialized to 0 and Mesa doesn't do a * great job of eliminating unused uniforms currently so for now * don't throw an error if two sampler types both point to 0. */ if (unit == 0) continue; if (TexturesUsed[unit] & ~(1 << tgt)) { pipeline->InfoLog = ralloc_asprintf(pipeline, "Program %d: " "Texture unit %d is accessed with 2 different types", shProg[idx]->Name, unit); return false; } TexturesUsed[unit] |= (1 << tgt); } active_samplers += shader->num_samplers; } if (active_samplers > MAX_COMBINED_TEXTURE_IMAGE_UNITS) { pipeline->InfoLog = ralloc_asprintf(pipeline, "the number of active samplers %d exceed the " "maximum %d", active_samplers, MAX_COMBINED_TEXTURE_IMAGE_UNITS); return false; } return true; }
static void setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, struct blit_state *blit, struct gl_renderbuffer *src_rb, GLenum target, GLenum filter) { GLint loc_src_width, loc_src_height; int i, samples; int shader_offset = 0; void *mem_ctx = ralloc_context(NULL); char *fs_source; char *name, *sample_number; const uint8_t *sample_map; char *sample_map_str = rzalloc_size(mem_ctx, 1); char *sample_map_expr = rzalloc_size(mem_ctx, 1); char *texel_fetch_macro = rzalloc_size(mem_ctx, 1); const char *sampler_array_suffix = ""; float y_scale; enum blit_msaa_shader shader_index; assert(src_rb); samples = MAX2(src_rb->NumSamples, 1); y_scale = samples * 0.5; /* We expect only power of 2 samples in source multisample buffer. */ assert(samples > 0 && _mesa_is_pow_two(samples)); while (samples >> (shader_offset + 1)) { shader_offset++; } /* Update the assert if we plan to support more than 8X MSAA. */ assert(shader_offset > 0 && shader_offset < 4); assert(target == GL_TEXTURE_2D_MULTISAMPLE || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY); shader_index = BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE + shader_offset - 1; if (target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { shader_index += BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE - BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE; sampler_array_suffix = "Array"; } if (blit->msaa_shaders[shader_index]) { _mesa_UseProgram(blit->msaa_shaders[shader_index]); /* Update the uniform values. */ loc_src_width = _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_width"); loc_src_height = _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_height"); _mesa_Uniform1f(loc_src_width, src_rb->Width); _mesa_Uniform1f(loc_src_height, src_rb->Height); return; } name = ralloc_asprintf(mem_ctx, "vec4 MSAA scaled resolve"); /* Below switch is used to setup the shader expression, which computes * sample index and map it to to a sample number on hardware. */ switch(samples) { case 2: sample_number = "sample_map[int(2 * fract(coord.x))]"; sample_map = ctx->Const.SampleMap2x; break; case 4: sample_number = "sample_map[int(2 * fract(coord.x) + 4 * fract(coord.y))]"; sample_map = ctx->Const.SampleMap4x; break; case 8: sample_number = "sample_map[int(2 * fract(coord.x) + 8 * fract(coord.y))]"; sample_map = ctx->Const.SampleMap8x; break; default: sample_number = NULL; sample_map = NULL; _mesa_problem(ctx, "Unsupported sample count %d\n", samples); unreachable("Unsupported sample count"); } /* Create sample map string. */ for (i = 0 ; i < samples - 1; i++) { ralloc_asprintf_append(&sample_map_str, "%d, ", sample_map[i]); } ralloc_asprintf_append(&sample_map_str, "%d", sample_map[samples - 1]); /* Create sample map expression using above string. */ ralloc_asprintf_append(&sample_map_expr, " const int sample_map[%d] = int[%d](%s);\n", samples, samples, sample_map_str); if (target == GL_TEXTURE_2D_MULTISAMPLE) { ralloc_asprintf_append(&texel_fetch_macro, "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec2(coord), %s);\n", sample_number); } else { ralloc_asprintf_append(&texel_fetch_macro, "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord, layer), %s);\n", sample_number); } static const char vs_source[] = "#version 130\n" "in vec2 position;\n" "in vec3 textureCoords;\n" "out vec2 texCoords;\n" "flat out int layer;\n" "void main()\n" "{\n" " texCoords = textureCoords.xy;\n" " layer = int(textureCoords.z);\n" " gl_Position = vec4(position, 0.0, 1.0);\n" "}\n" ; fs_source = ralloc_asprintf(mem_ctx, "#version 130\n" "#extension GL_ARB_texture_multisample : enable\n" "uniform sampler2DMS%s texSampler;\n" "uniform float src_width, src_height;\n" "in vec2 texCoords;\n" "flat in int layer;\n" "out vec4 out_color;\n" "\n" "void main()\n" "{\n" "%s" " vec2 interp;\n" " const vec2 scale = vec2(2.0f, %ff);\n" " const vec2 scale_inv = vec2(0.5f, %ff);\n" " const vec2 s_0_offset = vec2(0.25f, %ff);\n" " vec2 s_0_coord, s_1_coord, s_2_coord, s_3_coord;\n" " vec4 s_0_color, s_1_color, s_2_color, s_3_color;\n" " vec4 x_0_color, x_1_color;\n" " vec2 tex_coord = texCoords - s_0_offset;\n" "\n" " tex_coord *= scale;\n" " clamp(tex_coord.x, 0.0f, scale.x * src_width - 1.0f);\n" " clamp(tex_coord.y, 0.0f, scale.y * src_height - 1.0f);\n" " interp = fract(tex_coord);\n" " tex_coord = ivec2(tex_coord) * scale_inv;\n" "\n" " /* Compute the sample coordinates used for filtering. */\n" " s_0_coord = tex_coord;\n" " s_1_coord = tex_coord + vec2(scale_inv.x, 0.0f);\n" " s_2_coord = tex_coord + vec2(0.0f, scale_inv.y);\n" " s_3_coord = tex_coord + vec2(scale_inv.x, scale_inv.y);\n" "\n" " /* Fetch sample color values. */\n" "%s" " s_0_color = TEXEL_FETCH(s_0_coord)\n" " s_1_color = TEXEL_FETCH(s_1_coord)\n" " s_2_color = TEXEL_FETCH(s_2_coord)\n" " s_3_color = TEXEL_FETCH(s_3_coord)\n" "#undef TEXEL_FETCH\n" "\n" " /* Do bilinear filtering on sample colors. */\n" " x_0_color = mix(s_0_color, s_1_color, interp.x);\n" " x_1_color = mix(s_2_color, s_3_color, interp.x);\n" " out_color = mix(x_0_color, x_1_color, interp.y);\n" "}\n", sampler_array_suffix, sample_map_expr, y_scale, 1.0f / y_scale, 1.0f / samples, texel_fetch_macro); _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, name, &blit->msaa_shaders[shader_index]); loc_src_width = _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_width"); loc_src_height = _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_height"); _mesa_Uniform1f(loc_src_width, src_rb->Width); _mesa_Uniform1f(loc_src_height, src_rb->Height); ralloc_free(mem_ctx); }