Пример #1
0
 char *interface_field_name(const glsl_type *iface, unsigned field = 0)
 {
    return ralloc_asprintf(mem_ctx,
                           "%s.%s",
                           iface->name,
                           iface->fields.structure[field].name);
 }
Пример #2
0
/**
 * Determines whether every stage in a linked program is active in the
 * specified pipeline.
 */
static bool
program_stages_all_active(struct gl_pipeline_object *pipe,
                          const struct gl_shader_program *prog)
{
   unsigned i;
   bool status = true;

   if (!prog)
      return true;

   for (i = 0; i < MESA_SHADER_STAGES; i++) {
      if (prog->_LinkedShaders[i]) {
         if (pipe->CurrentProgram[i]) {
            if (prog->Name != pipe->CurrentProgram[i]->Name) {
               status = false;
            }
         } else {
            status = false;
         }
      }
   }

   if (!status) {
      pipe->InfoLog = ralloc_asprintf(pipe,
                                      "Program %d is not active for all "
                                      "shaders that was linked",
                                      prog->Name);
   }

   return status;
}
static GLuint
setup_program(struct brw_context *brw, bool msaa_tex)
{
   struct gl_context *ctx = &brw->ctx;
   struct blit_state *blit = &ctx->Meta->Blit;
   char *fs_source;
   const struct sampler_and_fetch *sampler = &samplers[msaa_tex];

   _mesa_meta_setup_vertex_objects(&blit->VAO, &blit->VBO, true, 2, 2, 0);

   GLuint *prog_id = &brw->meta_stencil_blit_programs[msaa_tex];

   if (*prog_id) {
      _mesa_UseProgram(*prog_id);
      return *prog_id;
   }
  
   fs_source = ralloc_asprintf(NULL, fs_tmpl, sampler->sampler,
                               sampler->fetch);
   _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source,
                                       "i965 stencil blit",
                                       prog_id);
   ralloc_free(fs_source);

   return *prog_id;
}
Пример #4
0
    virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
    {
        ir_constant *index = ir->array_index->as_constant();
        int i;

        if (index) {
            i = index->value.i[0];
        } else {
            /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
             * while GLSL 1.30 requires that the array indices be
             * constant integer expressions.  We don't expect any driver
             * to actually work with a really variable array index, so
             * all that would work would be an unrolled loop counter that ends
             * up being constant above.
             */
            ralloc_strcat(&shader_program->InfoLog,
                          "warning: Variable sampler array index unsupported.\n"
                          "This feature of the language was removed in GLSL 1.20 "
                          "and is unlikely to be supported for 1.10 in Mesa.\n");
            i = 0;
        }
        if (ir != last) {
            this->name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i);
        } else {
            offset = i;
        }
        return visit_continue;
    }
Пример #5
0
int glsl_symbol_table::get_default_precision_qualifier(const char *type_name)
{
   char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name);
   symbol_table_entry *entry = get_entry(name);
   if (!entry)
      return ast_precision_none;
   return entry->a->default_precision;
}
Пример #6
0
/* Return a filename within the cache's directory corresponding to 'key'. The
 * returned filename is ralloced with 'cache' as the parent context.
 *
 * Returns NULL if out of memory.
 */
static char *
get_cache_file(struct program_cache *cache, cache_key key)
{
   char buf[41];

   _mesa_sha1_format(buf, key);

   return ralloc_asprintf(cache, "%s/%c%c/%s",
                          cache->path, buf[0], buf[1], buf + 2);
}
Пример #7
0
/* simple passthrough shader */
static nir_shader *
build_nir_fs(void)
{
	nir_builder b;

	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
	b.shader->info.name = ralloc_asprintf(b.shader,
					      "meta_depth_decomp_noop_fs");

	return b.shader;
}
Пример #8
0
/* Create the directory that will be needed for the cache file for \key.
 *
 * Obviously, the implementation here must closely match
 * _get_cache_file above.
*/
static void
make_cache_file_directory(struct program_cache *cache, cache_key key)
{
   char *dir;
   char buf[41];

   _mesa_sha1_format(buf, key);

   dir = ralloc_asprintf(cache, "%s/%c%c", cache->path, buf[0], buf[1]);

   mkdir_if_needed(dir);

   ralloc_free(dir);
}
Пример #9
0
/* Concatenate an existing path and a new name to form a new path.  If the new
 * path does not exist as a directory, create it then return the resulting
 * name of the new path (ralloc'ed off of 'ctx').
 *
 * Returns NULL on any error, such as:
 *
 *      <path> does not exist or is not a directory
 *      <path>/<name> exists but is not a directory
 *      <path>/<name> cannot be created as a directory
 */
static char *
concatenate_and_mkdir(void *ctx, char *path, char *name)
{
   char *new_path;
   struct stat sb;

   if (stat(path, &sb) != 0 || ! S_ISDIR(sb.st_mode))
      return NULL;

   new_path = ralloc_asprintf(ctx, "%s/%s", path, name);

   if (mkdir_if_needed(new_path) == 0)
      return new_path;
   else
      return NULL;
}
Пример #10
0
bool glsl_symbol_table::add_default_precision_qualifier(const char *type_name,
                                                        int precision)
{
   char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name);

   ast_type_specifier *default_specifier = new(mem_ctx) ast_type_specifier(name);
   default_specifier->default_precision = precision;

   symbol_table_entry *entry =
      new(mem_ctx) symbol_table_entry(default_specifier);

   if (!get_entry(name))
      return _mesa_symbol_table_add_symbol(table, name, entry) == 0;

   return _mesa_symbol_table_replace_symbol(table, name, entry) == 0;
}
Пример #11
0
static void
log_error(validate_state *state, const char *cond, const char *file, int line)
{
   const void *obj;

   if (state->instr)
      obj = state->instr;
   else if (state->var)
      obj = state->var;
   else
      obj = cond;

   char *msg = ralloc_asprintf(state->errors, "error: %s (%s:%d)",
                               cond, file, line);

   _mesa_hash_table_insert(state->errors, obj, msg);
}
Пример #12
0
/* emit 0, 0, 0, 1 */
static nir_shader *
build_nir_fs(void)
{
	const struct glsl_type *vec4 = glsl_vec4_type();
	nir_builder b;
	nir_variable *f_color; /* vec4, fragment output color */

	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
	b.shader->info.name = ralloc_asprintf(b.shader,
					       "meta_resolve_fs");

	f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
				      "f_color");
	f_color->data.location = FRAG_RESULT_DATA0;
	nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);

	return b.shader;
}
Пример #13
0
const glsl_type * glsl_type::get_shadow_sampler_type() const
{
    const glsl_type* shadow_type = NULL;

    if (base_type == GLSL_TYPE_SAMPLER && sampler_types)
    {
        /** Generate a key that is the combination of sampler type and inner type. */
        char key[128];
        snprintf(key, sizeof(key), "%sShadow", name);
        shadow_type = (const glsl_type*)hash_table_find(sampler_types, key);
        if (shadow_type == NULL)
        {
            glsl_type* new_shadow_type = new glsl_type(*this);
            new_shadow_type->sampler_shadow = true;
            new_shadow_type->name = ralloc_asprintf(mem_ctx, "%sShadow", name);
            new_shadow_type->inner_type = glsl_type::uint_type;
            shadow_type = new_shadow_type;
        }
    }

    return shadow_type;
}
Пример #14
0
extern GLboolean
_mesa_validate_program_pipeline(struct gl_context* ctx,
                                struct gl_pipeline_object *pipe,
                                GLboolean IsBound)
{
   unsigned i;

   pipe->Validated = GL_FALSE;

   /* Release and reset the info log.
    */
   if (pipe->InfoLog != NULL)
      ralloc_free(pipe->InfoLog);

   pipe->InfoLog = NULL;

   /* Section 2.11.11 (Shader Execution), subheading "Validation," of the
    * OpenGL 4.1 spec says:
    *
    *     "[INVALID_OPERATION] is generated by any command that transfers
    *     vertices to the GL if:
    *
    *         - A program object is active for at least one, but not all of
    *           the shader stages that were present when the program was
    *           linked."
    *
    * For each possible program stage, verify that the program bound to that
    * stage has all of its stages active.  In other words, if the program
    * bound to the vertex stage also has a fragment shader, the fragment
    * shader must also be bound to the fragment stage.
    */
   for (i = 0; i < MESA_SHADER_STAGES; i++) {
      if (!program_stages_all_active(pipe, pipe->CurrentProgram[i])) {
         goto err;
      }
   }

   /* Section 2.11.11 (Shader Execution), subheading "Validation," of the
    * OpenGL 4.1 spec says:
    *
    *     "[INVALID_OPERATION] is generated by any command that transfers
    *     vertices to the GL if:
    *
    *         ...
    *
    *         - One program object is active for at least two shader stages
    *           and a second program is active for a shader stage between two
    *           stages for which the first program was active."
    *
    * Without Tesselation, the only case where this can occur is the geometry
    * shader between the fragment shader and vertex shader.
    */
   if (pipe->CurrentProgram[MESA_SHADER_GEOMETRY]
       && pipe->CurrentProgram[MESA_SHADER_FRAGMENT]
       && pipe->CurrentProgram[MESA_SHADER_VERTEX]) {
      if (pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name == pipe->CurrentProgram[MESA_SHADER_FRAGMENT]->Name &&
          pipe->CurrentProgram[MESA_SHADER_GEOMETRY]->Name != pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name) {
         pipe->InfoLog =
            ralloc_asprintf(pipe,
                            "Program %d is active for geometry stage between "
                            "two stages for which another program %d is "
                            "active",
                            pipe->CurrentProgram[MESA_SHADER_GEOMETRY]->Name,
                            pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name);
         goto err;
      }
   }

   /* Section 2.11.11 (Shader Execution), subheading "Validation," of the
    * OpenGL 4.1 spec says:
    *
    *     "[INVALID_OPERATION] is generated by any command that transfers
    *     vertices to the GL if:
    *
    *         ...
    *
    *         - There is an active program for tessellation control,
    *           tessellation evaluation, or geometry stages with corresponding
    *           executable shader, but there is no active program with
    *           executable vertex shader."
    */
   if (!pipe->CurrentProgram[MESA_SHADER_VERTEX]
       && pipe->CurrentProgram[MESA_SHADER_GEOMETRY]) {
      pipe->InfoLog = ralloc_strdup(pipe, "Program lacks a vertex shader");
      goto err;
   }

   /* Section 2.11.11 (Shader Execution), subheading "Validation," of the
    * OpenGL 4.1 spec says:
    *
    *     "[INVALID_OPERATION] is generated by any command that transfers
    *     vertices to the GL if:
    *
    *         ...
    *
    *         - There is no current program object specified by UseProgram,
    *           there is a current program pipeline object, and the current
    *           program for any shader stage has been relinked since being
    *           applied to the pipeline object via UseProgramStages with the
    *           PROGRAM_SEPARABLE parameter set to FALSE.
    */
   for (i = 0; i < MESA_SHADER_STAGES; i++) {
      if (pipe->CurrentProgram[i] && !pipe->CurrentProgram[i]->SeparateShader) {
         pipe->InfoLog = ralloc_asprintf(pipe,
                                         "Program %d was relinked without "
                                         "PROGRAM_SEPARABLE state",
                                         pipe->CurrentProgram[i]->Name);
         goto err;
      }
   }

   /* Section 2.11.11 (Shader Execution), subheading "Validation," of the
    * OpenGL 4.1 spec says:
    *
    *     "[INVALID_OPERATION] is generated by any command that transfers
    *     vertices to the GL if:
    *
    *         ...
    *
    *         - Any two active samplers in the current program object are of
    *           different types, but refer to the same texture image unit.
    *
    *         - The number of active samplers in the program exceeds the
    *           maximum number of texture image units allowed."
    */
   if (!_mesa_sampler_uniforms_pipeline_are_valid(pipe))
      goto err;

   pipe->Validated = GL_TRUE;
   return GL_TRUE;

err:
   if (IsBound)
      _mesa_error(ctx, GL_INVALID_OPERATION,
                  "glValidateProgramPipeline failed to validate the pipeline");

   return GL_FALSE;
}
Пример #15
0
void
cache_put(struct program_cache *cache,
          cache_key key,
          const void *data,
          size_t size)
{
   int fd = -1, fd_final = -1, err, ret;
   size_t len;
   char *filename = NULL, *filename_tmp = NULL;
   const char *p = data;

   filename = get_cache_file(cache, key);
   if (filename == NULL)
      goto done;

   /* Write to a temporary file to allow for an atomic rename to the
    * final destination filename, (to prevent any readers from seeing
    * a partially written file).
    */
   filename_tmp = ralloc_asprintf(cache, "%s.tmp", filename);
   if (filename_tmp == NULL)
      goto done;

   fd = open(filename_tmp, O_WRONLY | O_CLOEXEC | O_CREAT, 0644);

   /* Make the two-character subdirectory within the cache as needed. */
   if (fd == -1) {
      if (errno != ENOENT)
         goto done;

      make_cache_file_directory(cache, key);

      fd = open(filename_tmp, O_WRONLY | O_CLOEXEC | O_CREAT, 0644);
      if (fd == -1)
         goto done;
   }

   /* With the temporary file open, we take an exclusive flock on
    * it. If the flock fails, then another process still has the file
    * open with the flock held. So just let that file be responsible
    * for writing the file.
    */
   err = flock(fd, LOCK_EX | LOCK_NB);
   if (err == -1)
      goto done;

   /* Now that we have the lock on the open temporary file, we can
    * check to see if the destination file already exists. If so,
    * another process won the race between when we saw that the file
    * didn't exist and now. In this case, we don't do anything more,
    * (to ensure the size accounting of the cache doesn't get off).
    */
   fd_final = open(filename, O_RDONLY | O_CLOEXEC);
   if (fd_final != -1)
      goto done;

   /* OK, we're now on the hook to write out a file that we know is
    * not in the cache, and is also not being written out to the cache
    * by some other process.
    *
    * Before we do that, if the cache is too large, evict something
    * else first.
    */
   if (*cache->size + size > cache->max_size)
      evict_random_item(cache);

   /* Now, finally, write out the contents to the temporary file, then
    * rename them atomically to the destination filename, and also
    * perform an atomic increment of the total cache size.
    */
   for (len = 0; len < size; len += ret) {
      ret = write(fd, p + len, size - len);
      if (ret == -1) {
         unlink(filename_tmp);
         goto done;
      }
   }

   rename(filename_tmp, filename);

   p_atomic_add(cache->size, size);

   /* This close finally releases the flock, (now that the final dile
    * has been renamed into place and the size has been added).
    */
   close(fd);
   fd = -1;

 done:
   if (filename_tmp)
      ralloc_free(filename_tmp);
   if (filename)
      ralloc_free(filename);
   if (fd != -1)
      close(fd);
}
Пример #16
0
static nir_alu_src
construct_value(const nir_search_value *value, nir_alu_type type,
                unsigned num_components, struct match_state *state,
                nir_instr *instr, void *mem_ctx)
{
    switch (value->type) {
    case nir_search_value_expression: {
        const nir_search_expression *expr = nir_search_value_as_expression(value);

        if (nir_op_infos[expr->opcode].output_size != 0)
            num_components = nir_op_infos[expr->opcode].output_size;

        nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);
        nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL);
        alu->dest.write_mask = (1 << num_components) - 1;
        alu->dest.saturate = false;

        for (unsigned i = 0; i < nir_op_infos[expr->opcode].num_inputs; i++) {
            /* If the source is an explicitly sized source, then we need to reset
             * the number of components to match.
             */
            if (nir_op_infos[alu->op].input_sizes[i] != 0)
                num_components = nir_op_infos[alu->op].input_sizes[i];

            alu->src[i] = construct_value(expr->srcs[i],
                                          nir_op_infos[alu->op].input_types[i],
                                          num_components,
                                          state, instr, mem_ctx);
        }

        nir_instr_insert_before(instr, &alu->instr);

        nir_alu_src val;
        val.src = nir_src_for_ssa(&alu->dest.dest.ssa);
        val.negate = false;
        val.abs = false,
            memcpy(val.swizzle, identity_swizzle, sizeof val.swizzle);

        return val;
    }

    case nir_search_value_variable: {
        const nir_search_variable *var = nir_search_value_as_variable(value);
        assert(state->variables_seen & (1 << var->variable));

        nir_alu_src val = { NIR_SRC_INIT };
        nir_alu_src_copy(&val, &state->variables[var->variable], mem_ctx);

        assert(!var->is_constant);

        return val;
    }

    case nir_search_value_constant: {
        const nir_search_constant *c = nir_search_value_as_constant(value);
        nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1);

        switch (type) {
        case nir_type_float:
            load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f);
            load->value.f[0] = c->data.f;
            break;
        case nir_type_int:
            load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
            load->value.i[0] = c->data.i;
            break;
        case nir_type_unsigned:
        case nir_type_bool:
            load->value.u[0] = c->data.u;
            break;
        default:
            unreachable("Invalid alu source type");
        }

        nir_instr_insert_before(instr, &load->instr);

        nir_alu_src val;
        val.src = nir_src_for_ssa(&load->def);
        val.negate = false;
        val.abs = false,
            memset(val.swizzle, 0, sizeof val.swizzle);

        return val;
    }

    default:
        unreachable("Invalid search value type");
    }
}
Пример #17
0
void GLAPIENTRY
_mesa_ProgramStringARB(GLenum target, GLenum format, GLsizei len,
                       const GLvoid *string)
{
   struct gl_program *base;
   bool failed;
   GET_CURRENT_CONTEXT(ctx);

   FLUSH_VERTICES(ctx, _NEW_PROGRAM);

   if (!ctx->Extensions.ARB_vertex_program
       && !ctx->Extensions.ARB_fragment_program) {
      _mesa_error(ctx, GL_INVALID_OPERATION, "glProgramStringARB()");
      return;
   }

   if (format != GL_PROGRAM_FORMAT_ASCII_ARB) {
      _mesa_error(ctx, GL_INVALID_ENUM, "glProgramStringARB(format)");
      return;
   }

   if (target == GL_VERTEX_PROGRAM_ARB && ctx->Extensions.ARB_vertex_program) {
      struct gl_vertex_program *prog = ctx->VertexProgram.Current;
      _mesa_parse_arb_vertex_program(ctx, target, string, len, prog);

      base = & prog->Base;
   }
   else if (target == GL_FRAGMENT_PROGRAM_ARB
            && ctx->Extensions.ARB_fragment_program) {
      struct gl_fragment_program *prog = ctx->FragmentProgram.Current;
      _mesa_parse_arb_fragment_program(ctx, target, string, len, prog);

      base = & prog->Base;
   }
   else {
      _mesa_error(ctx, GL_INVALID_ENUM, "glProgramStringARB(target)");
      return;
   }

   failed = ctx->Program.ErrorPos != -1;

   if (!failed) {
      /* finally, give the program to the driver for translation/checking */
      if (!ctx->Driver.ProgramStringNotify(ctx, target, base)) {
         failed = true;
         _mesa_error(ctx, GL_INVALID_OPERATION,
                     "glProgramStringARB(rejected by driver");
      }
   }

   if (ctx->_Shader->Flags & GLSL_DUMP) {
      const char *shader_type =
         target == GL_FRAGMENT_PROGRAM_ARB ? "fragment" : "vertex";

      fprintf(stderr, "ARB_%s_program source for program %d:\n",
              shader_type, base->Id);
      fprintf(stderr, "%s\n", (const char *) string);

      if (failed) {
         fprintf(stderr, "ARB_%s_program %d failed to compile.\n",
                 shader_type, base->Id);
      } else {
         fprintf(stderr, "Mesa IR for ARB_%s_program %d:\n",
                 shader_type, base->Id);
         _mesa_print_program(base);
         fprintf(stderr, "\n");
      }
      fflush(stderr);
   }

   /* Capture vp-*.shader_test/fp-*.shader_test files. */
   const char *capture_path = _mesa_get_shader_capture_path();
   if (capture_path != NULL) {
      FILE *file;
      const char *shader_type =
         target == GL_FRAGMENT_PROGRAM_ARB ? "fragment" : "vertex";
      char *filename =
         ralloc_asprintf(NULL, "%s/%cp-%u.shader_test",
                         capture_path, shader_type[0], base->Id);

      file = fopen(filename, "w");
      if (file) {
         fprintf(file,
                 "[require]\nGL_ARB_%s_program\n\n[%s program]\n%s\n",
                 shader_type, shader_type, (const char *) string);
         fclose(file);
      } else {
         _mesa_warning(ctx, "Failed to open %s", filename);
      }
      ralloc_free(filename);
   }
}
Пример #18
0
 virtual ir_visitor_status visit_leave(ir_dereference_record *ir)
 {
     this->name = ralloc_asprintf(mem_ctx, "%s.%s", name, ir->field);
     return visit_continue;
 }
Пример #19
0
extern "C" bool
_mesa_sampler_uniforms_pipeline_are_valid(struct gl_pipeline_object *pipeline)
{
   /* Section 2.11.11 (Shader Execution), subheading "Validation," of the
    * OpenGL 4.1 spec says:
    *
    *     "[INVALID_OPERATION] is generated by any command that transfers
    *     vertices to the GL if:
    *
    *         ...
    *
    *         - Any two active samplers in the current program object are of
    *           different types, but refer to the same texture image unit.
    *
    *         - The number of active samplers in the program exceeds the
    *           maximum number of texture image units allowed."
    */
   unsigned active_samplers = 0;
   const struct gl_shader_program **shProg =
      (const struct gl_shader_program **) pipeline->CurrentProgram;

   const glsl_type *unit_types[MAX_COMBINED_TEXTURE_IMAGE_UNITS];
   memset(unit_types, 0, sizeof(unit_types));

   for (unsigned idx = 0; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) {
      if (!shProg[idx])
         continue;

      for (unsigned i = 0; i < shProg[idx]->NumUniformStorage; i++) {
         const struct gl_uniform_storage *const storage =
            &shProg[idx]->UniformStorage[i];

         if (!storage->type->is_sampler())
            continue;

         active_samplers++;

         const unsigned count = MAX2(1, storage->array_elements);
         for (unsigned j = 0; j < count; j++) {
            const unsigned unit = storage->storage[j].i;

            /* FIXME: Samplers are initialized to 0 and Mesa doesn't do a
             * great job of eliminating unused uniforms currently so for now
             * don't throw an error if two sampler types both point to 0.
             */
            if (unit == 0)
               continue;

            /* The types of the samplers associated with a particular texture
             * unit must be an exact match.  Page 74 (page 89 of the PDF) of
             * the OpenGL 3.3 core spec says:
             *
             *     "It is not allowed to have variables of different sampler
             *     types pointing to the same texture image unit within a
             *     program object."
             */
            if (unit_types[unit] == NULL) {
               unit_types[unit] = storage->type;
            } else if (unit_types[unit] != storage->type) {
               pipeline->InfoLog =
                  ralloc_asprintf(pipeline,
                                  "Texture unit %d is accessed both as %s "
                                  "and %s",
                                  unit, unit_types[unit]->name,
                                  storage->type->name);
               return false;
            }
         }
      }
   }

   if (active_samplers > MAX_COMBINED_TEXTURE_IMAGE_UNITS) {
      pipeline->InfoLog =
         ralloc_asprintf(pipeline,
                         "the number of active samplers %d exceed the "
                         "maximum %d",
                         active_samplers, MAX_COMBINED_TEXTURE_IMAGE_UNITS);
      return false;
   }

   return true;
}
Пример #20
0
extern "C" const unsigned *
brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
               void *mem_ctx,
               const struct brw_gs_prog_key *key,
               struct brw_gs_prog_data *prog_data,
               const nir_shader *src_shader,
               struct gl_shader_program *shader_prog,
               int shader_time_index,
               unsigned *final_assembly_size,
               char **error_str)
{
    struct brw_gs_compile c;
    memset(&c, 0, sizeof(c));
    c.key = *key;

    const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY];
    nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);

    /* The GLSL linker will have already matched up GS inputs and the outputs
     * of prior stages.  The driver does extend VS outputs in some cases, but
     * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
     * geometry shader support.  So we can safely ignore that.
     *
     * For SSO pipelines, we use a fixed VUE map layout based on variable
     * locations, so we can rely on rendezvous-by-location making this work.
     */
    GLbitfield64 inputs_read = shader->info->inputs_read;
    brw_compute_vue_map(compiler->devinfo,
                        &c.input_vue_map, inputs_read,
                        shader->info->separate_shader);

    shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
                                       is_scalar);
    brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map);
    brw_nir_lower_vue_outputs(shader, is_scalar);
    shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar);

    prog_data->base.clip_distance_mask =
        ((1 << shader->info->clip_distance_array_size) - 1);
    prog_data->base.cull_distance_mask =
        ((1 << shader->info->cull_distance_array_size) - 1) <<
        shader->info->clip_distance_array_size;

    prog_data->include_primitive_id =
        (shader->info->system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)) != 0;

    prog_data->invocations = shader->info->gs.invocations;

    if (compiler->devinfo->gen >= 8)
        prog_data->static_vertex_count = nir_gs_count_vertices(shader);

    if (compiler->devinfo->gen >= 7) {
        if (shader->info->gs.output_primitive == GL_POINTS) {
            /* When the output type is points, the geometry shader may output data
             * to multiple streams, and EndPrimitive() has no effect.  So we
             * configure the hardware to interpret the control data as stream ID.
             */
            prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;

            /* We only have to emit control bits if we are using streams */
            if (shader_prog && shader_prog->Geom.UsesStreams)
                c.control_data_bits_per_vertex = 2;
            else
                c.control_data_bits_per_vertex = 0;
        } else {
            /* When the output type is triangle_strip or line_strip, EndPrimitive()
             * may be used to terminate the current strip and start a new one
             * (similar to primitive restart), and outputting data to multiple
             * streams is not supported.  So we configure the hardware to interpret
             * the control data as EndPrimitive information (a.k.a. "cut bits").
             */
            prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;

            /* We only need to output control data if the shader actually calls
             * EndPrimitive().
             */
            c.control_data_bits_per_vertex =
                shader->info->gs.uses_end_primitive ? 1 : 0;
        }
    } else {
        /* There are no control data bits in gen6. */
        c.control_data_bits_per_vertex = 0;

        /* If it is using transform feedback, enable it */
        if (shader->info->has_transform_feedback_varyings)
            prog_data->gen6_xfb_enabled = true;
        else
            prog_data->gen6_xfb_enabled = false;
    }
    c.control_data_header_size_bits =
        shader->info->gs.vertices_out * c.control_data_bits_per_vertex;

    /* 1 HWORD = 32 bytes = 256 bits */
    prog_data->control_data_header_size_hwords =
        ALIGN(c.control_data_header_size_bits, 256) / 256;

    /* Compute the output vertex size.
     *
     * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex
     * Size (p168):
     *
     *     [0,62] indicating [1,63] 16B units
     *
     *     Specifies the size of each vertex stored in the GS output entry
     *     (following any Control Header data) as a number of 128-bit units
     *     (minus one).
     *
     *     Programming Restrictions: The vertex size must be programmed as a
     *     multiple of 32B units with the following exception: Rendering is
     *     disabled (as per SOL stage state) and the vertex size output by the
     *     GS thread is 16B.
     *
     *     If rendering is enabled (as per SOL state) the vertex size must be
     *     programmed as a multiple of 32B units. In other words, the only time
     *     software can program a vertex size with an odd number of 16B units
     *     is when rendering is disabled.
     *
     * Note: B=bytes in the above text.
     *
     * It doesn't seem worth the extra trouble to optimize the case where the
     * vertex size is 16B (especially since this would require special-casing
     * the GEN assembly that writes to the URB).  So we just set the vertex
     * size to a multiple of 32B (2 vec4's) in all cases.
     *
     * The maximum output vertex size is 62*16 = 992 bytes (31 hwords).  We
     * budget that as follows:
     *
     *   512 bytes for varyings (a varying component is 4 bytes and
     *             gl_MaxGeometryOutputComponents = 128)
     *    16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
     *             bytes)
     *    16 bytes overhead for gl_Position (we allocate it a slot in the VUE
     *             even if it's not used)
     *    32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
     *             whenever clip planes are enabled, even if the shader doesn't
     *             write to gl_ClipDistance)
     *    16 bytes overhead since the VUE size must be a multiple of 32 bytes
     *             (see below)--this causes up to 1 VUE slot to be wasted
     *   400 bytes available for varying packing overhead
     *
     * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes)
     * per interpolation type, so this is plenty.
     *
     */
    unsigned output_vertex_size_bytes = prog_data->base.vue_map.num_slots * 16;
    assert(compiler->devinfo->gen == 6 ||
           output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
    prog_data->output_vertex_size_hwords =
        ALIGN(output_vertex_size_bytes, 32) / 32;

    /* Compute URB entry size.  The maximum allowed URB entry size is 32k.
     * That divides up as follows:
     *
     *     64 bytes for the control data header (cut indices or StreamID bits)
     *   4096 bytes for varyings (a varying component is 4 bytes and
     *              gl_MaxGeometryTotalOutputComponents = 1024)
     *   4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
     *              bytes/vertex and gl_MaxGeometryOutputVertices is 256)
     *   4096 bytes overhead for gl_Position (we allocate it a slot in the VUE
     *              even if it's not used)
     *   8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
     *              whenever clip planes are enabled, even if the shader doesn't
     *              write to gl_ClipDistance)
     *   4096 bytes overhead since the VUE size must be a multiple of 32
     *              bytes (see above)--this causes up to 1 VUE slot to be wasted
     *   8128 bytes available for varying packing overhead
     *
     * Worst-case varying packing overhead is 3/4 of a varying slot per
     * interpolation type, which works out to 3072 bytes, so this would allow
     * us to accommodate 2 interpolation types without any danger of running
     * out of URB space.
     *
     * In practice, the risk of running out of URB space is very small, since
     * the above figures are all worst-case, and most of them scale with the
     * number of output vertices.  So we'll just calculate the amount of space
     * we need, and if it's too large, fail to compile.
     *
     * The above is for gen7+ where we have a single URB entry that will hold
     * all the output. In gen6, we will have to allocate URB entries for every
     * vertex we emit, so our URB entries only need to be large enough to hold
     * a single vertex. Also, gen6 does not have a control data header.
     */
    unsigned output_size_bytes;
    if (compiler->devinfo->gen >= 7) {
        output_size_bytes =
            prog_data->output_vertex_size_hwords * 32 * shader->info->gs.vertices_out;
        output_size_bytes += 32 * prog_data->control_data_header_size_hwords;
    } else {
        output_size_bytes = prog_data->output_vertex_size_hwords * 32;
    }

    /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
     * which comes before the control header.
     */
    if (compiler->devinfo->gen >= 8)
        output_size_bytes += 32;

    /* Shaders can technically set max_vertices = 0, at which point we
     * may have a URB size of 0 bytes.  Nothing good can come from that,
     * so enforce a minimum size.
     */
    if (output_size_bytes == 0)
        output_size_bytes = 1;

    unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
    if (compiler->devinfo->gen == 6)
        max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
    if (output_size_bytes > max_output_size_bytes)
        return NULL;


    /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
     * a multiple of 128 bytes in gen6.
     */
    if (compiler->devinfo->gen >= 7)
        prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
    else
        prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;

    prog_data->output_topology =
        get_hw_prim_for_gl_prim(shader->info->gs.output_primitive);

    prog_data->vertices_in = shader->info->gs.vertices_in;

    /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
     * need to program a URB read length of ceiling(num_slots / 2).
     */
    prog_data->base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;

    /* Now that prog_data setup is done, we are ready to actually compile the
     * program.
     */
    if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
        fprintf(stderr, "GS Input ");
        brw_print_vue_map(stderr, &c.input_vue_map);
        fprintf(stderr, "GS Output ");
        brw_print_vue_map(stderr, &prog_data->base.vue_map);
    }

    if (is_scalar) {
        fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, shader,
                     shader_time_index);
        if (v.run_gs()) {
            prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
            prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;

            fs_generator g(compiler, log_data, mem_ctx, &c.key,
                           &prog_data->base.base, v.promoted_constants,
                           false, MESA_SHADER_GEOMETRY);
            if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
                const char *label =
                    shader->info->label ? shader->info->label : "unnamed";
                char *name = ralloc_asprintf(mem_ctx, "%s geometry shader %s",
                                             label, shader->info->name);
                g.enable_debug(name);
            }
            g.generate_code(v.cfg, 8);
            return g.get_assembly(final_assembly_size);
        }
    }

    if (compiler->devinfo->gen >= 7) {
        /* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
         * so without spilling. If the GS invocations count > 1, then we can't use
         * dual object mode.
         */
        if (prog_data->invocations <= 1 &&
                likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
            prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;

            vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
                              mem_ctx, true /* no_spills */, shader_time_index);
            if (v.run()) {
                return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
                                                  shader, &prog_data->base, v.cfg,
                                                  final_assembly_size);
            }
        }
    }

    /* Either we failed to compile in DUAL_OBJECT mode (probably because it
     * would have required spilling) or DUAL_OBJECT mode is disabled.  So fall
     * back to DUAL_INSTANCED or SINGLE mode, which consumes fewer registers.
     *
     * FIXME: Single dispatch mode requires that the driver can handle
     * interleaving of input registers, but this is already supported (dual
     * instance mode has the same requirement). However, to take full advantage
     * of single dispatch mode to reduce register pressure we would also need to
     * do interleaved outputs, but currently, the vec4 visitor and generator
     * classes do not support this, so at the moment register pressure in
     * single and dual instance modes is the same.
     *
     * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 "3DSTATE_GS"
     * "If InstanceCount>1, DUAL_OBJECT mode is invalid. Software will likely
     * want to use DUAL_INSTANCE mode for higher performance, but SINGLE mode
     * is also supported. When InstanceCount=1 (one instance per object) software
     * can decide which dispatch mode to use. DUAL_OBJECT mode would likely be
     * the best choice for performance, followed by SINGLE mode."
     *
     * So SINGLE mode is more performant when invocations == 1 and DUAL_INSTANCE
     * mode is more performant when invocations > 1. Gen6 only supports
     * SINGLE mode.
     */
    if (prog_data->invocations <= 1 || compiler->devinfo->gen < 7)
        prog_data->base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
    else
        prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;

    vec4_gs_visitor *gs = NULL;
    const unsigned *ret = NULL;

    if (compiler->devinfo->gen >= 7)
        gs = new vec4_gs_visitor(compiler, log_data, &c, prog_data,
                                 shader, mem_ctx, false /* no_spills */,
                                 shader_time_index);
    else
        gs = new gen6_gs_visitor(compiler, log_data, &c, prog_data, shader_prog,
                                 shader, mem_ctx, false /* no_spills */,
                                 shader_time_index);

    if (!gs->run()) {
        if (error_str)
            *error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
    } else {
        ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader,
                                         &prog_data->base, gs->cfg,
                                         final_assembly_size);
    }

    delete gs;
    return ret;
}
Пример #21
0
extern "C" const unsigned *
brw_compile_tcs(const struct brw_compiler *compiler,
                void *log_data,
                void *mem_ctx,
                const struct brw_tcs_prog_key *key,
                struct brw_tcs_prog_data *prog_data,
                const nir_shader *src_shader,
                int shader_time_index,
                unsigned *final_assembly_size,
                char **error_str)
{
   const struct gen_device_info *devinfo = compiler->devinfo;
   struct brw_vue_prog_data *vue_prog_data = &prog_data->base;
   const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL];

   nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
   nir->info->outputs_written = key->outputs_written;
   nir->info->patch_outputs_written = key->patch_outputs_written;

   struct brw_vue_map input_vue_map;
   brw_compute_vue_map(devinfo, &input_vue_map, nir->info->inputs_read,
                       nir->info->separate_shader);
   brw_compute_tess_vue_map(&vue_prog_data->vue_map,
                            nir->info->outputs_written,
                            nir->info->patch_outputs_written);

   nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
   brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map);
   brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map);
   if (key->quads_workaround)
      brw_nir_apply_tcs_quads_workaround(nir);

   nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);

   if (is_scalar)
      prog_data->instances = DIV_ROUND_UP(nir->info->tcs.vertices_out, 8);
   else
      prog_data->instances = DIV_ROUND_UP(nir->info->tcs.vertices_out, 2);

   /* Compute URB entry size.  The maximum allowed URB entry size is 32k.
    * That divides up as follows:
    *
    *     32 bytes for the patch header (tessellation factors)
    *    480 bytes for per-patch varyings (a varying component is 4 bytes and
    *              gl_MaxTessPatchComponents = 120)
    *  16384 bytes for per-vertex varyings (a varying component is 4 bytes,
    *              gl_MaxPatchVertices = 32 and
    *              gl_MaxTessControlOutputComponents = 128)
    *
    *  15808 bytes left for varying packing overhead
    */
   const int num_per_patch_slots = vue_prog_data->vue_map.num_per_patch_slots;
   const int num_per_vertex_slots = vue_prog_data->vue_map.num_per_vertex_slots;
   unsigned output_size_bytes = 0;
   /* Note that the patch header is counted in num_per_patch_slots. */
   output_size_bytes += num_per_patch_slots * 16;
   output_size_bytes += nir->info->tcs.vertices_out * num_per_vertex_slots * 16;

   assert(output_size_bytes >= 1);
   if (output_size_bytes > GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES)
      return NULL;

   /* URB entry sizes are stored as a multiple of 64 bytes. */
   vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;

   /* HS does not use the usual payload pushing from URB to GRFs,
    * because we don't have enough registers for a full-size payload, and
    * the hardware is broken on Haswell anyway.
    */
   vue_prog_data->urb_read_length = 0;

   if (unlikely(INTEL_DEBUG & DEBUG_TCS)) {
      fprintf(stderr, "TCS Input ");
      brw_print_vue_map(stderr, &input_vue_map);
      fprintf(stderr, "TCS Output ");
      brw_print_vue_map(stderr, &vue_prog_data->vue_map);
   }

   if (is_scalar) {
      fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
                   &prog_data->base.base, NULL, nir, 8,
                   shader_time_index, &input_vue_map);
      if (!v.run_tcs_single_patch()) {
         if (error_str)
            *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
         return NULL;
      }

      prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
      prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;

      fs_generator g(compiler, log_data, mem_ctx, (void *) key,
                     &prog_data->base.base, v.promoted_constants, false,
                     MESA_SHADER_TESS_CTRL);
      if (unlikely(INTEL_DEBUG & DEBUG_TCS)) {
         g.enable_debug(ralloc_asprintf(mem_ctx,
                                        "%s tessellation control shader %s",
                                        nir->info->label ? nir->info->label
                                                        : "unnamed",
                                        nir->info->name));
      }

      g.generate_code(v.cfg, 8);

      return g.get_assembly(final_assembly_size);
   } else {
      vec4_tcs_visitor v(compiler, log_data, key, prog_data,
                         nir, mem_ctx, shader_time_index, &input_vue_map);
      if (!v.run()) {
         if (error_str)
            *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
         return NULL;
      }

      if (unlikely(INTEL_DEBUG & DEBUG_TCS))
         v.dump_instructions();


      return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
                                        &prog_data->base, v.cfg,
                                        final_assembly_size);
   }
}
void
flatten_named_interface_blocks_declarations::run(exec_list *instructions)
{
   interface_namespace = hash_table_ctor(0, hash_table_string_hash,
                                         hash_table_string_compare);

   /* First pass: adjust instance block variables with an instance name
    * to not have an instance name.
    *
    * The interface block variables are stored in the interface_namespace
    * hash table so they can be used in the second pass.
    */
   foreach_list_safe(node, instructions) {
      ir_variable *var = ((ir_instruction *) node)->as_variable();
      if (!var || !var->is_interface_instance())
         continue;

      /* It should be possible to handle uniforms during this pass,
       * but, this will require changes to the other uniform block
       * support code.
       */
      if (var->data.mode == ir_var_uniform)
         continue;

      const glsl_type * iface_t = var->type;
      const glsl_type * array_t = NULL;
      exec_node *insert_pos = var;

      if (iface_t->is_array()) {
         array_t = iface_t;
         iface_t = array_t->fields.array;
      }

      assert (iface_t->is_interface());

      for (unsigned i = 0; i < iface_t->length; i++) {
         const char * field_name = iface_t->fields.structure[i].name;
         char *iface_field_name =
            ralloc_asprintf(mem_ctx, "%s.%s.%s",
                            iface_t->name, var->name, field_name);

         ir_variable *found_var =
            (ir_variable *) hash_table_find(interface_namespace,
                                            iface_field_name);
         if (!found_var) {
            ir_variable *new_var;
            char *var_name =
               ralloc_strdup(mem_ctx, iface_t->fields.structure[i].name);
            if (array_t == NULL) {
               new_var =
                  new(mem_ctx) ir_variable(iface_t->fields.structure[i].type,
                                           var_name,
                                           (ir_variable_mode) var->data.mode);
               new_var->data.from_named_ifc_block_nonarray = 1;
            } else {
               const glsl_type *new_array_type =
                  glsl_type::get_array_instance(
                     iface_t->fields.structure[i].type,
                     array_t->length);
               new_var =
                  new(mem_ctx) ir_variable(new_array_type,
                                           var_name,
                                           (ir_variable_mode) var->data.mode);
               new_var->data.from_named_ifc_block_array = 1;
            }
            new_var->data.location = iface_t->fields.structure[i].location;
            new_var->data.explicit_location = (new_var->data.location >= 0);
            new_var->data.interpolation =
               iface_t->fields.structure[i].interpolation;
            new_var->data.centroid = iface_t->fields.structure[i].centroid;
            new_var->data.sample = iface_t->fields.structure[i].sample;

            new_var->init_interface_type(iface_t);
            hash_table_insert(interface_namespace, new_var,
                              iface_field_name);
            insert_pos->insert_after(new_var);
            insert_pos = new_var;
         }
      }
      var->remove();
   }
Пример #23
0
const glsl_type * glsl_type::get_templated_instance(const glsl_type *base, const char *name, int num_samples, int patch_size)
{
    if (sampler_types == NULL)
    {
        sampler_types = hash_table_ctor(64, hash_table_string_hash,
                                        hash_table_string_compare);

        // Base sampler types.
        hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_1D,   /*shadow=*/ false, /*array=*/ false, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ true,  /*type=*/ NULL, "samplerBuffer", "sampler"), "Buffer");
        hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_1D,   /*shadow=*/ false, /*array=*/ false, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler1D", "texture1d"), "Texture1D");
        hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_1D,   /*shadow=*/ false, /*array=*/ true,  /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler1DArray", nullptr), "Texture1DArray");
        hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_2D,   /*shadow=*/ false, /*array=*/ false, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler2D", "texture2d"), "Texture2D");
        hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_2D,   /*shadow=*/ false, /*array=*/ true,  /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler2DArray", nullptr), "Texture2DArray");
        hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_2D,   /*shadow=*/ false, /*array=*/ false, /*multisample=*/ true,  /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler2DMS", nullptr), "Texture2DMS");
        hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_2D,   /*shadow=*/ false, /*array=*/ true,  /*multisample=*/ true,  /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler2DMSArray", nullptr), "Texture2DMSArray");
        hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_3D,   /*shadow=*/ false, /*array=*/ false, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "sampler3D", "texture3d"), "Texture3D");
        hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_CUBE, /*shadow=*/ false, /*array=*/ false, /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "samplerCube", "texturecube"), "TextureCube");
        hash_table_insert(sampler_types, new glsl_type(GLSL_SAMPLER_DIM_CUBE, /*shadow=*/ false, /*array=*/ true,  /*multisample=*/ false, /*samples=*/ 0, /*sampler_buffer=*/ false, /*type=*/ NULL, "samplerCubeArray", nullptr), "TextureCubeArray");
    }

    if (outputstream_types == NULL)
    {
        outputstream_types = hash_table_ctor(64, hash_table_string_hash,
                                             hash_table_string_compare);

        // Base outputstream types.
        hash_table_insert(outputstream_types, new glsl_type(GLSL_OUTPUTSTREAM_POINTS, /*type=*/ NULL, "point_stream"), "PointStream");
        hash_table_insert(outputstream_types, new glsl_type(GLSL_OUTPUTSTREAM_LINES, /*type=*/ NULL, "line_stream"), "LineStream");
        hash_table_insert(outputstream_types, new glsl_type(GLSL_OUTPUTSTREAM_TRIANGLES, /*type=*/ NULL, "triangle_stream"), "TriangleStream");
    }

    if (inputpatch_types == NULL)
    {
        inputpatch_types = hash_table_ctor(64, hash_table_string_hash,
                                           hash_table_string_compare);

        // Base input patch types.
        hash_table_insert(inputpatch_types, new glsl_type(GLSL_TYPE_INPUTPATCH, 0, (glsl_type*)NULL, "input_patch"), "InputPatch");
    }

    if (outputpatch_types == NULL)
    {
        outputpatch_types = hash_table_ctor(64, hash_table_string_hash,
                                            hash_table_string_compare);

        // Base output patch types.
        hash_table_insert(outputpatch_types, new glsl_type(GLSL_TYPE_OUTPUTPATCH, 0, (glsl_type*)NULL, "output_patch"), "OutputPatch");
    }

    if (image_types == NULL)
    {
        image_types = hash_table_ctor(64, hash_table_string_hash, hash_table_string_compare);

        // Base sampler types.
        hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_1D, /*array=*/ false, /*sampler_buffer=*/ true,  /*type=*/ NULL, "imageBuffer"), "RWBuffer");
        hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_1D, /*array=*/ false, /*sampler_buffer=*/ false, /*type=*/ NULL, "image1D"), "RWTexture1D");
        hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_1D, /*array=*/ true,  /*sampler_buffer=*/ false, /*type=*/ NULL, "image1DArray"), "RWTexture1DArray");
        hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_2D, /*array=*/ false, /*sampler_buffer=*/ false, /*type=*/ NULL, "image2D"), "RWTexture2D");
        hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_2D, /*array=*/ true,  /*sampler_buffer=*/ false, /*type=*/ NULL, "image2DArray"), "RWTexture2DArray");
        hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_3D, /*array=*/ false, /*sampler_buffer=*/ false, /*type=*/ NULL, "image3D"), "RWTexture3D");
        hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_BUF, /*array=*/ false, /*sampler_buffer=*/ true, /*type=*/ NULL, "StructuredBuffer"), "RWStructuredBuffer");
        hash_table_insert(image_types, new glsl_type(GLSL_SAMPLER_DIM_BUF, /*array=*/ false, /*sampler_buffer=*/ true, /*type=*/ NULL, "ByteAddressBuffer"), "RWByteAddressBuffer");
    }

    if (base == NULL)
    {
        return NULL;
    }

    const glsl_type* outputstream_base_type = (const glsl_type*)hash_table_find(outputstream_types, name);

    if (outputstream_base_type != NULL)
    {
        /** Generate a key that is the combination of outputstream type and inner type. */
        char key[128];
        snprintf(key, sizeof(key), "%s<%s>", outputstream_base_type->name, base->name);

        const glsl_type *actual_type = (glsl_type *)hash_table_find(outputstream_types, key);
        if (actual_type == NULL)
        {
            actual_type = new glsl_type(
                (glsl_outputstream_type)(outputstream_base_type->outputstream_type),
                base,
                key);

            hash_table_insert(outputstream_types, (void *)actual_type, ralloc_strdup(mem_ctx, key));
        }

        return actual_type;
    }



    const glsl_type* inputpatch_base_type = (const glsl_type*)hash_table_find(inputpatch_types, name);
    if (inputpatch_base_type != NULL)
    {
        /** Generate a key that is the combination of input patch  type and inner type. */
        char key[128];

        snprintf(key, sizeof(key), "%s<%s>", inputpatch_base_type->name, base->name);

        const glsl_type *actual_type = (glsl_type *)hash_table_find(inputpatch_types, key);
        if (actual_type == NULL)
        {
            actual_type = new glsl_type(GLSL_TYPE_INPUTPATCH, patch_size, base, key);
            hash_table_insert(inputpatch_types, (void *)actual_type, ralloc_strdup(mem_ctx, key));
        }

        return actual_type;
    }


    const glsl_type* outputpatch_base_type = (const glsl_type*)hash_table_find(outputpatch_types, name);
    if (outputpatch_base_type != NULL)
    {
        /** Generate a key that is the combination of input patch  type and inner type. */
        char key[128];
        snprintf(key, sizeof(key), "%s<%s>", outputpatch_base_type->name, base->name);

        const glsl_type *actual_type = (glsl_type *)hash_table_find(outputpatch_types, key);
        if (actual_type == NULL)
        {
            actual_type = new glsl_type(GLSL_TYPE_OUTPUTPATCH, patch_size, base, key);
            hash_table_insert(outputpatch_types, (void *)actual_type, ralloc_strdup(mem_ctx, key));
        }

        return actual_type;
    }

    if (!base->is_numeric())
    {
        // Hack!
        //#todo-rco: Proper support!
        //if (strcmp(name, "RWStructuredBuffer") && strcmp(name, "RWByteAddressBuffer"))
        {
            return nullptr;
        }
    }

    const glsl_type* image_base_type = (const glsl_type*)hash_table_find(image_types, name);

    if (image_base_type != NULL)
    {
        /** Generate a key that is the combination of sampler type and inner type. */
        char key[128];
        snprintf(key, sizeof(key), "%s<%s>", image_base_type->name, base->name);

        const glsl_type *actual_type = (glsl_type *)hash_table_find(image_types, key);
        if (actual_type == NULL)
        {
            actual_type = new glsl_type(
                (glsl_sampler_dim)(image_base_type->sampler_dimensionality),
                image_base_type->sampler_array,
                image_base_type->sampler_buffer,
                base,
                ralloc_asprintf(mem_ctx, "%s%s", sampler_type_prefix[base->base_type], image_base_type->name));

            hash_table_insert(image_types, (void *)actual_type, ralloc_strdup(mem_ctx, key));
        }
        return actual_type;
    }

    const glsl_type* sampler_base_type = (const glsl_type*)hash_table_find(sampler_types, name);

    if (sampler_base_type == NULL)
    {
        return NULL;
    }

    /** Generate a key that is the combination of sampler type and inner type. */
    char key[128];
    if (num_samples>1)
    {
        snprintf(key, sizeof(key), "%s<%s,%d>", sampler_base_type->name, base->name, num_samples);
    }
    else
    {
        snprintf(key, sizeof(key), "%s<%s>", sampler_base_type->name, base->name);
    }

    const glsl_type *actual_type = (glsl_type *)hash_table_find(sampler_types, key);
    if (actual_type == NULL)
    {
        actual_type = new glsl_type(
            (glsl_sampler_dim)(sampler_base_type->sampler_dimensionality),
            sampler_base_type->sampler_shadow,
            sampler_base_type->sampler_array,
            sampler_base_type->sampler_ms,
            num_samples,
            sampler_base_type->sampler_buffer,
            base,
            ralloc_asprintf(mem_ctx, "%s%s", sampler_type_prefix[base->base_type], sampler_base_type->name),
            sampler_base_type->HlslName);

        hash_table_insert(sampler_types, (void *)actual_type, ralloc_strdup(mem_ctx, key));
    }

    return actual_type;
}
Пример #24
0
ir_rvalue * _mesa_ast_field_selection_to_hir(const ast_expression *expr,
	exec_list *instructions, struct _mesa_glsl_parse_state *state)
{
	void *ctx = state;
	ir_rvalue *result = NULL;
	ir_rvalue *op;

	op = expr->subexpressions[0]->hir(instructions, state);

	/* There are two kinds of field selection.  There is the selection of a
	* specific field from a structure, and there is the selection of a
	* swizzle / mask from a vector.  Which is which is determined entirely
	* by the base type of the thing to which the field selection operator is
	* being applied.
	*/
	YYLTYPE loc = expr->get_location();
	if (op->type->is_error())
	{
		/* silently propagate the error */
	}
	else if (op->type->is_vector() || op->type->is_scalar())
	{
		ir_swizzle *swiz = ir_swizzle::create(op,
			expr->primary_expression.identifier,
			op->type->vector_elements);
		if (swiz != NULL)
		{
			result = swiz;
		}
		else
		{
			/* FINISHME: Logging of error messages should be moved into
			* FINISHME: ir_swizzle::create.  This allows the generation of more
			* FINISHME: specific error messages.
			*/
			_mesa_glsl_error(& loc, state, "Invalid swizzle / mask '%s'",
				expr->primary_expression.identifier);
		}
	}
	else if (op->type->is_matrix() && expr->primary_expression.identifier)
	{
		int src_components = op->type->components();
		int components[4] = {0};
		uint32 num_components = 0;
		ir_swizzle_mask mask = {0};
		const char* mask_str = expr->primary_expression.identifier;
		if (mask_str[0] == '_' && mask_str[1] == 'm')
		{
			do
			{
				mask_str += 2;
				int col = (*mask_str) ? (*mask_str++) - '0' : -1;
				int row = (*mask_str) ? (*mask_str++) - '0' : -1;

				if (col >= 0 && col <= op->type->matrix_columns &&
					row >= 0 && row <= op->type->vector_elements)
				{
					components[num_components++] = col * op->type->vector_elements + row;
				}
				else
				{
					components[num_components++] = -1;
				}
			} while (*mask_str != 0 && num_components < 4);
		}
		else if (mask_str[0] == '_' && mask_str[1] >= '1' && mask_str[2] <= '4')
		{
			do
			{
				mask_str += 1;
				int col = (*mask_str) ? (*mask_str++) - '1' : -1;
				int row = (*mask_str) ? (*mask_str++) - '1' : -1;

				if (col >= 0 && col <= op->type->matrix_columns &&
					row >= 0 && row <= op->type->vector_elements)
				{
					components[num_components++] = col * op->type->vector_elements + row;
				}
				else
				{
					components[num_components++] = -1;
				}
			} while (*mask_str != 0 && num_components < 4);
		}

		if (*mask_str == 0)
		{
			if (num_components > 0 && components[0] >= 0 && components[0] <= src_components)
			{
				mask.x = (unsigned)components[0];
				mask.num_components++;
			}
			if (num_components > 1 && components[1] >= 0 && components[1] <= src_components)
			{
				mask.y = (unsigned)components[1];
				mask.has_duplicates = (mask.y == mask.x);
				mask.num_components++;
			}
			if (num_components > 2 && components[2] >= 0 && components[2] <= src_components)
			{
				mask.z = (unsigned)components[2];
				mask.has_duplicates = mask.has_duplicates || (mask.z == mask.y) || (mask.z == mask.x);
				mask.num_components++;
			}
			if (num_components > 3 && components[3] >= 0 && components[3] <= src_components)
			{
				mask.w = (unsigned)components[3];
				mask.has_duplicates = mask.has_duplicates || (mask.w == mask.z) ||
					(mask.w == mask.y) || (mask.w == mask.x);
				mask.num_components++;
			}
		}

		if (mask.num_components == num_components)
		{
			result = new(ctx)ir_swizzle(op, mask);
		}

		if (result == NULL)
		{
			_mesa_glsl_error(&loc, state, "invalid matrix swizzle '%s'",
				expr->primary_expression.identifier);
		}
	}
	else if (op->type->base_type == GLSL_TYPE_STRUCT)
	{
		result = new(ctx)ir_dereference_record(op,
			expr->primary_expression.identifier);

		if (result->type->is_error())
		{
			_mesa_glsl_error(& loc, state, "Cannot access field '%s' of "
				"structure",
				expr->primary_expression.identifier);
		}
	}
	else if (expr->subexpressions[1] != NULL)
	{
		/* Handle "method calls" in GLSL 1.20+ */
		if (state->language_version < 120)
			_mesa_glsl_error(&loc, state, "Methods not supported in GLSL 1.10.");

		ast_expression *call = expr->subexpressions[1];
		check(call->oper == ast_function_call);

		const char *method;
		method = call->subexpressions[0]->primary_expression.identifier;

		if (op->type->is_array() && strcmp(method, "length") == 0)
		{
			if (!call->expressions.is_empty())
				_mesa_glsl_error(&loc, state, "length method takes no arguments.");

			if (op->type->array_size() == 0)
				_mesa_glsl_error(&loc, state, "length called on unsized array.");

			result = new(ctx)ir_constant(op->type->array_size());
		}
		else if (op->type->is_sampler() && op->as_dereference() != NULL)
		{
			return gen_texture_op(expr, op->as_dereference(), instructions, state);
		}
		else if (op->type->is_image() && op->as_dereference() != NULL)
		{
			return gen_image_op(expr, op->as_dereference(), instructions, state);
		}
		else if (op->type->is_outputstream() && strcmp(method, "Append") == 0)
		{
			check(op->variable_referenced()->type->inner_type->is_record());
			check(op->variable_referenced()->type->inner_type->name);

			const char* function_name = "OutputStream_Append";

			ir_function *func = state->symbols->get_function(function_name);
			if (!func)
			{
				// Prepare the function, add it to global symbols. It will be added to declarations at GenerateGlslMain().
				func = new(ctx)ir_function(function_name);
				state->symbols->add_global_function(func);

				//				_mesa_glsl_warning(state, "Append function generation for type '%s'", op->variable_referenced()->type->inner_type->name );
				//				{
				//					const glsl_type* output_type = op->variable_referenced()->type->inner_type;
				//					for (int i = 0; i < output_type->length; i++)
				//					{
				//						_mesa_glsl_warning(state, "   name '%s' : semantic '%s'", output_type->fields.structure[i].name, output_type->fields.structure[i].semantic );
				//					}
				//				}
			}

			exec_list comparison_parameter;
			ir_variable* var = new(ctx)ir_variable(op->variable_referenced()->type->inner_type, ralloc_asprintf(ctx, "arg0"), ir_var_in);
			comparison_parameter.push_tail(var);

			bool is_exact = false;
			ir_function_signature *sig = func->matching_signature(&comparison_parameter, &is_exact);
			if (!sig || !is_exact)
			{
				sig = new(ctx)ir_function_signature(glsl_type::void_type);
				sig->parameters.push_tail(var);
				sig->is_builtin = false;
				sig->is_defined = true;
				func->add_signature(sig);
			}

			if (call->expressions.is_empty() || (call->expressions.get_head() != call->expressions.get_tail()))
			{
				_mesa_glsl_error(&loc, state, "Append method takes one argument.");
			}
			else
			{
				exec_list actual_parameter;
				ast_node *const ast = exec_node_data(ast_node, call->expressions.get_head(), link);
				ir_rvalue *result = ast->hir(instructions, state);
				actual_parameter.push_tail(result);

				instructions->push_tail(new(ctx)ir_call(sig, NULL, &actual_parameter));
			}

			return NULL;
		}
		else if (op->type->is_outputstream() && strcmp(method, "RestartStrip") == 0)
		{
			exec_list actual_parameters;	// empty, as no parameters

			ir_function *func = state->symbols->get_function("EndPrimitive");
			check(func);

			bool is_exact = false;
			ir_function_signature *sig = func->matching_signature(&actual_parameters, &is_exact);
			check(sig && is_exact);

			instructions->push_tail(new(ctx)ir_call(sig, NULL, &actual_parameters));

			return NULL;
		}
		else
		{
			_mesa_glsl_error(&loc, state, "Unknown method: '%s'.", method);
		}
	}
	else
	{
		_mesa_glsl_error(& loc, state, "Cannot access field '%s' of "
			"non-structure / non-vector.",
			expr->primary_expression.identifier);
	}

	return result ? result : ir_rvalue::error_value(ctx);
}
Пример #25
0
static const unsigned *
brw_cs_emit(struct brw_context *brw,
            void *mem_ctx,
            const struct brw_cs_prog_key *key,
            struct brw_cs_prog_data *prog_data,
            struct gl_compute_program *cp,
            struct gl_shader_program *prog,
            unsigned *final_assembly_size)
{
   bool start_busy = false;
   double start_time = 0;

   if (unlikely(brw->perf_debug)) {
      start_busy = (brw->batch.last_bo &&
                    drm_intel_bo_busy(brw->batch.last_bo));
      start_time = get_time();
   }

   struct brw_shader *shader =
      (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];

   if (unlikely(INTEL_DEBUG & DEBUG_CS))
      brw_dump_ir("compute", prog, &shader->base, &cp->Base);

   prog_data->local_size[0] = cp->LocalSize[0];
   prog_data->local_size[1] = cp->LocalSize[1];
   prog_data->local_size[2] = cp->LocalSize[2];
   unsigned local_workgroup_size =
      cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];

   cfg_t *cfg = NULL;
   const char *fail_msg = NULL;

   int st_index = -1;
   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
      st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);

   /* Now the main event: Visit the shader IR and generate our CS IR for it.
    */
   fs_visitor v8(brw->intelScreen->compiler, brw,
                 mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
                 &cp->Base, 8, st_index);
   if (!v8.run_cs()) {
      fail_msg = v8.fail_msg;
   } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
      cfg = v8.cfg;
      prog_data->simd_size = 8;
   }

   fs_visitor v16(brw->intelScreen->compiler, brw,
                  mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
                  &cp->Base, 16, st_index);
   if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
       !fail_msg && !v8.simd16_unsupported &&
       local_workgroup_size <= 16 * brw->max_cs_threads) {
      /* Try a SIMD16 compile */
      v16.import_uniforms(&v8);
      if (!v16.run_cs()) {
         perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
         if (!cfg) {
            fail_msg =
               "Couldn't generate SIMD16 program and not "
               "enough threads for SIMD8";
         }
      } else {
         cfg = v16.cfg;
         prog_data->simd_size = 16;
      }
   }

   if (unlikely(cfg == NULL)) {
      assert(fail_msg);
      prog->LinkStatus = false;
      ralloc_strcat(&prog->InfoLog, fail_msg);
      _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
                    fail_msg);
      return NULL;
   }

   fs_generator g(brw->intelScreen->compiler, brw,
                  mem_ctx, (void*) key, &prog_data->base, &cp->Base,
                  v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
   if (INTEL_DEBUG & DEBUG_CS) {
      char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
                                   prog->Label ? prog->Label : "unnamed",
                                   prog->Name);
      g.enable_debug(name);
   }

   g.generate_code(cfg, prog_data->simd_size);

   if (unlikely(brw->perf_debug) && shader) {
      if (shader->compiled_once) {
         _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles");
      }
      shader->compiled_once = true;

      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
         perf_debug("CS compile took %.03f ms and stalled the GPU\n",
                    (get_time() - start_time) * 1000);
      }
   }

   return g.get_assembly(final_assembly_size);
}
Пример #26
0
void
vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
                     const uint32_t *w, unsigned count)
{
   switch (opcode) {
   case SpvOpVariable: {
      struct vtn_variable *var = rzalloc(b, struct vtn_variable);
      var->type = vtn_value(b, w[1], vtn_value_type_type)->type;

      var->chain.var = var;
      var->chain.length = 0;

      struct vtn_value *val =
         vtn_push_value(b, w[2], vtn_value_type_access_chain);
      val->access_chain = &var->chain;

      struct vtn_type *without_array = var->type;
      while(glsl_type_is_array(without_array->type))
         without_array = without_array->array_element;

      nir_variable_mode nir_mode;
      switch ((SpvStorageClass)w[3]) {
      case SpvStorageClassUniform:
      case SpvStorageClassUniformConstant:
         if (without_array->block) {
            var->mode = vtn_variable_mode_ubo;
            b->shader->info.num_ubos++;
         } else if (without_array->buffer_block) {
            var->mode = vtn_variable_mode_ssbo;
            b->shader->info.num_ssbos++;
         } else if (glsl_type_is_image(without_array->type)) {
            var->mode = vtn_variable_mode_image;
            nir_mode = nir_var_uniform;
            b->shader->info.num_images++;
         } else if (glsl_type_is_sampler(without_array->type)) {
            var->mode = vtn_variable_mode_sampler;
            nir_mode = nir_var_uniform;
            b->shader->info.num_textures++;
         } else {
            assert(!"Invalid uniform variable type");
         }
         break;
      case SpvStorageClassPushConstant:
         var->mode = vtn_variable_mode_push_constant;
         assert(b->shader->num_uniforms == 0);
         b->shader->num_uniforms = vtn_type_block_size(var->type) * 4;
         break;
      case SpvStorageClassInput:
         var->mode = vtn_variable_mode_input;
         nir_mode = nir_var_shader_in;
         break;
      case SpvStorageClassOutput:
         var->mode = vtn_variable_mode_output;
         nir_mode = nir_var_shader_out;
         break;
      case SpvStorageClassPrivate:
         var->mode = vtn_variable_mode_global;
         nir_mode = nir_var_global;
         break;
      case SpvStorageClassFunction:
         var->mode = vtn_variable_mode_local;
         nir_mode = nir_var_local;
         break;
      case SpvStorageClassWorkgroup:
         var->mode = vtn_variable_mode_workgroup;
         nir_mode = nir_var_shared;
         break;
      case SpvStorageClassCrossWorkgroup:
      case SpvStorageClassGeneric:
      case SpvStorageClassAtomicCounter:
      default:
         unreachable("Unhandled variable storage class");
      }

      switch (var->mode) {
      case vtn_variable_mode_local:
      case vtn_variable_mode_global:
      case vtn_variable_mode_image:
      case vtn_variable_mode_sampler:
      case vtn_variable_mode_workgroup:
         /* For these, we create the variable normally */
         var->var = rzalloc(b->shader, nir_variable);
         var->var->name = ralloc_strdup(var->var, val->name);
         var->var->type = var->type->type;
         var->var->data.mode = nir_mode;

         switch (var->mode) {
         case vtn_variable_mode_image:
         case vtn_variable_mode_sampler:
            var->var->interface_type = without_array->type;
            break;
         default:
            var->var->interface_type = NULL;
            break;
         }
         break;

      case vtn_variable_mode_input:
      case vtn_variable_mode_output: {
         /* For inputs and outputs, we immediately split structures.  This
          * is for a couple of reasons.  For one, builtins may all come in
          * a struct and we really want those split out into separate
          * variables.  For another, interpolation qualifiers can be
          * applied to members of the top-level struct ane we need to be
          * able to preserve that information.
          */

         int array_length = -1;
         struct vtn_type *interface_type = var->type;
         if (b->shader->stage == MESA_SHADER_GEOMETRY &&
             glsl_type_is_array(var->type->type)) {
            /* In Geometry shaders (and some tessellation), inputs come
             * in per-vertex arrays.  However, some builtins come in
             * non-per-vertex, hence the need for the is_array check.  In
             * any case, there are no non-builtin arrays allowed so this
             * check should be sufficient.
             */
            interface_type = var->type->array_element;
            array_length = glsl_get_length(var->type->type);
         }

         if (glsl_type_is_struct(interface_type->type)) {
            /* It's a struct.  Split it. */
            unsigned num_members = glsl_get_length(interface_type->type);
            var->members = ralloc_array(b, nir_variable *, num_members);

            for (unsigned i = 0; i < num_members; i++) {
               const struct glsl_type *mtype = interface_type->members[i]->type;
               if (array_length >= 0)
                  mtype = glsl_array_type(mtype, array_length);

               var->members[i] = rzalloc(b->shader, nir_variable);
               var->members[i]->name =
                  ralloc_asprintf(var->members[i], "%s.%d", val->name, i);
               var->members[i]->type = mtype;
               var->members[i]->interface_type =
                  interface_type->members[i]->type;
               var->members[i]->data.mode = nir_mode;
            }
         } else {
            var->var = rzalloc(b->shader, nir_variable);
            var->var->name = ralloc_strdup(var->var, val->name);
            var->var->type = var->type->type;
            var->var->interface_type = interface_type->type;
            var->var->data.mode = nir_mode;
         }

         /* For inputs and outputs, we need to grab locations and builtin
          * information from the interface type.
          */
         vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var);
         break;

      case vtn_variable_mode_param:
         unreachable("Not created through OpVariable");
      }

      case vtn_variable_mode_ubo:
      case vtn_variable_mode_ssbo:
      case vtn_variable_mode_push_constant:
         /* These don't need actual variables. */
         break;
      }

      if (count > 4) {
         assert(count == 5);
         nir_constant *constant =
            vtn_value(b, w[4], vtn_value_type_constant)->constant;
         var->var->constant_initializer =
            nir_constant_clone(constant, var->var);
      }

      vtn_foreach_decoration(b, val, var_decoration_cb, var);

      if (var->mode == vtn_variable_mode_image ||
          var->mode == vtn_variable_mode_sampler) {
         /* XXX: We still need the binding information in the nir_variable
          * for these. We should fix that.
          */
         var->var->data.binding = var->binding;
         var->var->data.descriptor_set = var->descriptor_set;

         if (var->mode == vtn_variable_mode_image)
            var->var->data.image.format = without_array->image_format;
      }

      if (var->mode == vtn_variable_mode_local) {
         assert(var->members == NULL && var->var != NULL);
         nir_function_impl_add_variable(b->impl, var->var);
      } else if (var->var) {
         nir_shader_add_variable(b->shader, var->var);
      } else if (var->members) {
         unsigned count = glsl_get_length(without_array->type);
         for (unsigned i = 0; i < count; i++) {
            assert(var->members[i]->data.mode != nir_var_local);
            nir_shader_add_variable(b->shader, var->members[i]);
         }
      } else {
         assert(var->mode == vtn_variable_mode_ubo ||
                var->mode == vtn_variable_mode_ssbo ||
                var->mode == vtn_variable_mode_push_constant);
      }
      break;
   }
Пример #27
0
void
program_resource_visitor::process(ir_variable *var)
{
   const glsl_type *t = var->type;
   const bool row_major =
      var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;

   /* false is always passed for the row_major parameter to the other
    * processing functions because no information is available to do
    * otherwise.  See the warning in linker.h.
    */

   /* Only strdup the name if we actually will need to modify it. */
   if (var->data.from_named_ifc_block_array) {
      /* lower_named_interface_blocks created this variable by lowering an
       * interface block array to an array variable.  For example if the
       * original source code was:
       *
       *     out Blk { vec4 bar } foo[3];
       *
       * Then the variable is now:
       *
       *     out vec4 bar[3];
       *
       * We need to visit each array element using the names constructed like
       * so:
       *
       *     Blk[0].bar
       *     Blk[1].bar
       *     Blk[2].bar
       */
      assert(t->is_array());
      const glsl_type *ifc_type = var->get_interface_type();
      char *name = ralloc_strdup(NULL, ifc_type->name);
      size_t name_length = strlen(name);
      for (unsigned i = 0; i < t->length; i++) {
         size_t new_length = name_length;
         ralloc_asprintf_rewrite_tail(&name, &new_length, "[%u].%s", i,
                                      var->name);
         /* Note: row_major is only meaningful for uniform blocks, and
          * lowering is only applied to non-uniform interface blocks, so we
          * can safely pass false for row_major.
          */
         recursion(var->type, &name, new_length, row_major, NULL, false);
      }
      ralloc_free(name);
   } else if (var->data.from_named_ifc_block_nonarray) {
      /* lower_named_interface_blocks created this variable by lowering a
       * named interface block (non-array) to an ordinary variable.  For
       * example if the original source code was:
       *
       *     out Blk { vec4 bar } foo;
       *
       * Then the variable is now:
       *
       *     out vec4 bar;
       *
       * We need to visit this variable using the name:
       *
       *     Blk.bar
       */
      const glsl_type *ifc_type = var->get_interface_type();
      char *name = ralloc_asprintf(NULL, "%s.%s", ifc_type->name, var->name);
      /* Note: row_major is only meaningful for uniform blocks, and lowering
       * is only applied to non-uniform interface blocks, so we can safely
       * pass false for row_major.
       */
      recursion(var->type, &name, strlen(name), row_major, NULL, false);
      ralloc_free(name);
   } else if (t->without_array()->is_record()) {
      char *name = ralloc_strdup(NULL, var->name);
      recursion(var->type, &name, strlen(name), row_major, NULL, false);
      ralloc_free(name);
   } else if (t->is_interface()) {
      char *name = ralloc_strdup(NULL, var->type->name);
      recursion(var->type, &name, strlen(name), row_major, NULL, false);
      ralloc_free(name);
   } else if (t->is_array() && t->fields.array->is_interface()) {
      char *name = ralloc_strdup(NULL, var->type->fields.array->name);
      recursion(var->type, &name, strlen(name), row_major, NULL, false);
      ralloc_free(name);
   } else {
      this->visit_field(t, var->name, row_major, NULL, false);
   }
}
static void
setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                            struct blit_state *blit,
                            const struct gl_framebuffer *drawFb,
                            struct gl_renderbuffer *src_rb,
                            GLenum target)
{
   const char *vs_source;
   char *fs_source;
   void *mem_ctx;
   enum blit_msaa_shader shader_index;
   bool dst_is_msaa = false;
   GLenum src_datatype;
   const char *vec4_prefix;
   const char *sampler_array_suffix = "";
   char *name;
   const char *texcoord_type = "vec2";
   int samples;
   int shader_offset = 0;

   if (src_rb) {
      samples = MAX2(src_rb->NumSamples, 1);
      src_datatype = _mesa_get_format_datatype(src_rb->Format);
   } else {
      /* depth-or-color glCopyTexImage fallback path that passes a NULL rb and
       * doesn't handle integer.
       */
      samples = 1;
      src_datatype = GL_UNSIGNED_NORMALIZED;
   }

   /* We expect only power of 2 samples in source multisample buffer. */
   assert(samples > 0 && _mesa_is_pow_two(samples));
   while (samples >> (shader_offset + 1)) {
      shader_offset++;
   }
   /* Update the assert if we plan to support more than 16X MSAA. */
   assert(shader_offset >= 0 && shader_offset <= 4);

   if (drawFb->Visual.samples > 1) {
      /* If you're calling meta_BlitFramebuffer with the destination
       * multisampled, this is the only path that will work -- swrast and
       * CopyTexImage won't work on it either.
       */
      assert(ctx->Extensions.ARB_sample_shading);

      dst_is_msaa = true;

      /* We need shader invocation per sample, not per pixel */
      _mesa_set_enable(ctx, GL_MULTISAMPLE, GL_TRUE);
      _mesa_set_enable(ctx, GL_SAMPLE_SHADING, GL_TRUE);
      _mesa_MinSampleShading(1.0);
   }

   switch (target) {
   case GL_TEXTURE_2D_MULTISAMPLE:
   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
      if (src_rb && (src_rb->_BaseFormat == GL_DEPTH_COMPONENT ||
          src_rb->_BaseFormat == GL_DEPTH_STENCIL)) {
         if (dst_is_msaa)
            shader_index = BLIT_MSAA_SHADER_2D_MULTISAMPLE_DEPTH_COPY;
         else
            shader_index = BLIT_MSAA_SHADER_2D_MULTISAMPLE_DEPTH_RESOLVE;
      } else {
         if (dst_is_msaa)
            shader_index = BLIT_MSAA_SHADER_2D_MULTISAMPLE_COPY;
         else {
            shader_index = BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE +
                           shader_offset;
         }
      }

      if (target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
         shader_index += (BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_RESOLVE -
                          BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE);
         sampler_array_suffix = "Array";
         texcoord_type = "vec3";
      }
      break;
   default:
      _mesa_problem(ctx, "Unkown texture target %s\n",
                    _mesa_enum_to_string(target));
      shader_index = BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE;
   }

   /* We rely on the enum being sorted this way. */
   STATIC_ASSERT(BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE_INT ==
                 BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE + 5);
   STATIC_ASSERT(BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE_UINT ==
                 BLIT_1X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE + 10);
   if (src_datatype == GL_INT) {
      shader_index += 5;
      vec4_prefix = "i";
   } else if (src_datatype == GL_UNSIGNED_INT) {
      shader_index += 10;
      vec4_prefix = "u";
   } else {
      vec4_prefix = "";
   }

   if (blit->msaa_shaders[shader_index]) {
      _mesa_UseProgram(blit->msaa_shaders[shader_index]);
      return;
   }

   mem_ctx = ralloc_context(NULL);

   if (shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_DEPTH_RESOLVE ||
       shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_DEPTH_RESOLVE ||
       shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_DEPTH_COPY ||
       shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_DEPTH_COPY) {
      char *sample_index;
      const char *arb_sample_shading_extension_string;

      if (dst_is_msaa) {
         arb_sample_shading_extension_string = "#extension GL_ARB_sample_shading : enable";
         sample_index = "gl_SampleID";
         name = "depth MSAA copy";
      } else {
         /* Don't need that extension, since we're drawing to a single-sampled
          * destination.
          */
         arb_sample_shading_extension_string = "";
         /* From the GL 4.3 spec:
          *
          *     "If there is a multisample buffer (the value of SAMPLE_BUFFERS
          *      is one), then values are obtained from the depth samples in
          *      this buffer. It is recommended that the depth value of the
          *      centermost sample be used, though implementations may choose
          *      any function of the depth sample values at each pixel.
          *
          * We're slacking and instead of choosing centermost, we've got 0.
          */
         sample_index = "0";
         name = "depth MSAA resolve";
      }

      vs_source = ralloc_asprintf(mem_ctx,
                                  "#version 130\n"
                                  "in vec2 position;\n"
                                  "in %s textureCoords;\n"
                                  "out %s texCoords;\n"
                                  "void main()\n"
                                  "{\n"
                                  "   texCoords = textureCoords;\n"
                                  "   gl_Position = vec4(position, 0.0, 1.0);\n"
                                  "}\n",
                                  texcoord_type,
                                  texcoord_type);
      fs_source = ralloc_asprintf(mem_ctx,
                                  "#version 130\n"
                                  "#extension GL_ARB_texture_multisample : enable\n"
                                  "%s\n"
                                  "uniform sampler2DMS%s texSampler;\n"
                                  "in %s texCoords;\n"
                                  "out vec4 out_color;\n"
                                  "\n"
                                  "void main()\n"
                                  "{\n"
                                  "   gl_FragDepth = texelFetch(texSampler, i%s(texCoords), %s).r;\n"
                                  "}\n",
                                  arb_sample_shading_extension_string,
                                  sampler_array_suffix,
                                  texcoord_type,
                                  texcoord_type,
                                  sample_index);
   } else {
      /* You can create 2D_MULTISAMPLE textures with 0 sample count (meaning 1
       * sample).  Yes, this is ridiculous.
       */
      char *sample_resolve;
      const char *arb_sample_shading_extension_string;
      const char *merge_function;
      name = ralloc_asprintf(mem_ctx, "%svec4 MSAA %s",
                             vec4_prefix,
                             dst_is_msaa ? "copy" : "resolve");

      if (dst_is_msaa) {
         arb_sample_shading_extension_string = "#extension GL_ARB_sample_shading : enable";
         sample_resolve = ralloc_asprintf(mem_ctx, "   out_color = texelFetch(texSampler, i%s(texCoords), gl_SampleID);", texcoord_type);
         merge_function = "";
      } else {
         int i;
         int step;

         if (src_datatype == GL_INT || src_datatype == GL_UNSIGNED_INT) {
            merge_function =
               "gvec4 merge(gvec4 a, gvec4 b) { return (a >> gvec4(1)) + (b >> gvec4(1)) + (a & b & gvec4(1)); }\n";
         } else {
            /* The divide will happen at the end for floats. */
            merge_function =
               "vec4 merge(vec4 a, vec4 b) { return (a + b); }\n";
         }

         arb_sample_shading_extension_string = "";

         /* We're assuming power of two samples for this resolution procedure.
          *
          * To avoid losing any floating point precision if the samples all
          * happen to have the same value, we merge pairs of values at a time
          * (so the floating point exponent just gets increased), rather than
          * doing a naive sum and dividing.
          */
         assert(_mesa_is_pow_two(samples));
         /* Fetch each individual sample. */
         sample_resolve = rzalloc_size(mem_ctx, 1);
         for (i = 0; i < samples; i++) {
            ralloc_asprintf_append(&sample_resolve,
                                   "   gvec4 sample_1_%d = texelFetch(texSampler, i%s(texCoords), %d);\n",
                                   i, texcoord_type, i);
         }
         /* Now, merge each pair of samples, then merge each pair of those,
          * etc.
          */
         for (step = 2; step <= samples; step *= 2) {
            for (i = 0; i < samples; i += step) {
               ralloc_asprintf_append(&sample_resolve,
                                      "   gvec4 sample_%d_%d = merge(sample_%d_%d, sample_%d_%d);\n",
                                      step, i,
                                      step / 2, i,
                                      step / 2, i + step / 2);
            }
         }

         /* Scale the final result. */
         if (src_datatype == GL_UNSIGNED_INT || src_datatype == GL_INT) {
            ralloc_asprintf_append(&sample_resolve,
                                   "   out_color = sample_%d_0;\n",
                                   samples);
         } else {
            ralloc_asprintf_append(&sample_resolve,
                                   "   gl_FragColor = sample_%d_0 / %f;\n",
                                   samples, (float)samples);
         }
      }

      vs_source = ralloc_asprintf(mem_ctx,
                                  "#version 130\n"
                                  "in vec2 position;\n"
                                  "in %s textureCoords;\n"
                                  "out %s texCoords;\n"
                                  "void main()\n"
                                  "{\n"
                                  "   texCoords = textureCoords;\n"
                                  "   gl_Position = vec4(position, 0.0, 1.0);\n"
                                  "}\n",
                                  texcoord_type,
                                  texcoord_type);
      fs_source = ralloc_asprintf(mem_ctx,
                                  "#version 130\n"
                                  "#extension GL_ARB_texture_multisample : enable\n"
                                  "%s\n"
                                  "#define gvec4 %svec4\n"
                                  "uniform %ssampler2DMS%s texSampler;\n"
                                  "in %s texCoords;\n"
                                  "out gvec4 out_color;\n"
                                  "\n"
                                  "%s" /* merge_function */
                                  "void main()\n"
                                  "{\n"
                                  "%s\n" /* sample_resolve */
                                  "}\n",
                                  arb_sample_shading_extension_string,
                                  vec4_prefix,
                                  vec4_prefix,
                                  sampler_array_suffix,
                                  texcoord_type,
                                  merge_function,
                                  sample_resolve);
   }

   _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, name,
                                       &blit->msaa_shaders[shader_index]);

   ralloc_free(mem_ctx);
}
Пример #29
0
extern "C" bool
_mesa_sampler_uniforms_pipeline_are_valid(struct gl_pipeline_object *pipeline)
{
   /* Section 2.11.11 (Shader Execution), subheading "Validation," of the
    * OpenGL 4.1 spec says:
    *
    *     "[INVALID_OPERATION] is generated by any command that transfers
    *     vertices to the GL if:
    *
    *         ...
    *
    *         - Any two active samplers in the current program object are of
    *           different types, but refer to the same texture image unit.
    *
    *         - The number of active samplers in the program exceeds the
    *           maximum number of texture image units allowed."
    */

   GLbitfield mask;
   GLbitfield TexturesUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS];
   struct gl_linked_shader *shader;
   unsigned active_samplers = 0;
   const struct gl_shader_program **shProg =
      (const struct gl_shader_program **) pipeline->CurrentProgram;


   memset(TexturesUsed, 0, sizeof(TexturesUsed));

   for (unsigned idx = 0; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) {
      if (!shProg[idx])
         continue;

      shader = shProg[idx]->_LinkedShaders[idx];
      if (!shader || !shader->Program)
         continue;

      mask = shader->Program->SamplersUsed;
      while (mask) {
         const int s = u_bit_scan(&mask);
         GLuint unit = shader->SamplerUnits[s];
         GLuint tgt = shader->SamplerTargets[s];

         /* FIXME: Samplers are initialized to 0 and Mesa doesn't do a
          * great job of eliminating unused uniforms currently so for now
          * don't throw an error if two sampler types both point to 0.
          */
         if (unit == 0)
            continue;

         if (TexturesUsed[unit] & ~(1 << tgt)) {
            pipeline->InfoLog =
               ralloc_asprintf(pipeline,
                     "Program %d: "
                     "Texture unit %d is accessed with 2 different types",
                     shProg[idx]->Name, unit);
            return false;
         }

         TexturesUsed[unit] |= (1 << tgt);
      }

      active_samplers += shader->num_samplers;
   }

   if (active_samplers > MAX_COMBINED_TEXTURE_IMAGE_UNITS) {
      pipeline->InfoLog =
         ralloc_asprintf(pipeline,
                         "the number of active samplers %d exceed the "
                         "maximum %d",
                         active_samplers, MAX_COMBINED_TEXTURE_IMAGE_UNITS);
      return false;
   }

   return true;
}
static void
setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
                                   struct blit_state *blit,
                                   struct gl_renderbuffer *src_rb,
                                   GLenum target, GLenum filter)
{
   GLint loc_src_width, loc_src_height;
   int i, samples;
   int shader_offset = 0;
   void *mem_ctx = ralloc_context(NULL);
   char *fs_source;
   char *name, *sample_number;
   const uint8_t *sample_map;
   char *sample_map_str = rzalloc_size(mem_ctx, 1);
   char *sample_map_expr = rzalloc_size(mem_ctx, 1);
   char *texel_fetch_macro = rzalloc_size(mem_ctx, 1);
   const char *sampler_array_suffix = "";
   float y_scale;
   enum blit_msaa_shader shader_index;

   assert(src_rb);
   samples = MAX2(src_rb->NumSamples, 1);
   y_scale = samples * 0.5;

   /* We expect only power of 2 samples in source multisample buffer. */
   assert(samples > 0 && _mesa_is_pow_two(samples));
   while (samples >> (shader_offset + 1)) {
      shader_offset++;
   }
   /* Update the assert if we plan to support more than 8X MSAA. */
   assert(shader_offset > 0 && shader_offset < 4);

   assert(target == GL_TEXTURE_2D_MULTISAMPLE ||
          target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY);

   shader_index = BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE +
                  shader_offset - 1;

   if (target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
      shader_index += BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE -
                      BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE;
      sampler_array_suffix = "Array";
   }

   if (blit->msaa_shaders[shader_index]) {
      _mesa_UseProgram(blit->msaa_shaders[shader_index]);
      /* Update the uniform values. */
      loc_src_width =
         _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_width");
      loc_src_height =
         _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_height");
      _mesa_Uniform1f(loc_src_width, src_rb->Width);
      _mesa_Uniform1f(loc_src_height, src_rb->Height);
      return;
   }

   name = ralloc_asprintf(mem_ctx, "vec4 MSAA scaled resolve");

   /* Below switch is used to setup the shader expression, which computes
    * sample index and map it to to a sample number on hardware.
    */
   switch(samples) {
   case 2:
      sample_number =  "sample_map[int(2 * fract(coord.x))]";
      sample_map = ctx->Const.SampleMap2x;
      break;
   case 4:
      sample_number =  "sample_map[int(2 * fract(coord.x) + 4 * fract(coord.y))]";
      sample_map = ctx->Const.SampleMap4x;
      break;
   case 8:
      sample_number =  "sample_map[int(2 * fract(coord.x) + 8 * fract(coord.y))]";
      sample_map = ctx->Const.SampleMap8x;
      break;
   default:
      sample_number = NULL;
      sample_map = NULL;
      _mesa_problem(ctx, "Unsupported sample count %d\n", samples);
      unreachable("Unsupported sample count");
   }

   /* Create sample map string. */
   for (i = 0 ; i < samples - 1; i++) {
      ralloc_asprintf_append(&sample_map_str, "%d, ", sample_map[i]);
   }
   ralloc_asprintf_append(&sample_map_str, "%d", sample_map[samples - 1]);

   /* Create sample map expression using above string. */
   ralloc_asprintf_append(&sample_map_expr,
                          "   const int sample_map[%d] = int[%d](%s);\n",
                          samples, samples, sample_map_str);

   if (target == GL_TEXTURE_2D_MULTISAMPLE) {
      ralloc_asprintf_append(&texel_fetch_macro,
                             "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec2(coord), %s);\n",
                             sample_number);
   } else {
      ralloc_asprintf_append(&texel_fetch_macro,
                             "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord, layer), %s);\n",
                             sample_number);
   }

   static const char vs_source[] =
                               "#version 130\n"
                               "in vec2 position;\n"
                               "in vec3 textureCoords;\n"
                               "out vec2 texCoords;\n"
                               "flat out int layer;\n"
                               "void main()\n"
                               "{\n"
                               "   texCoords = textureCoords.xy;\n"
                               "   layer = int(textureCoords.z);\n"
                               "   gl_Position = vec4(position, 0.0, 1.0);\n"
                               "}\n"
      ;

   fs_source = ralloc_asprintf(mem_ctx,
                               "#version 130\n"
                               "#extension GL_ARB_texture_multisample : enable\n"
                               "uniform sampler2DMS%s texSampler;\n"
                               "uniform float src_width, src_height;\n"
                               "in vec2 texCoords;\n"
                               "flat in int layer;\n"
                               "out vec4 out_color;\n"
                               "\n"
                               "void main()\n"
                               "{\n"
                               "%s"
                               "   vec2 interp;\n"
                               "   const vec2 scale = vec2(2.0f, %ff);\n"
                               "   const vec2 scale_inv = vec2(0.5f, %ff);\n"
                               "   const vec2 s_0_offset = vec2(0.25f, %ff);\n"
                               "   vec2 s_0_coord, s_1_coord, s_2_coord, s_3_coord;\n"
                               "   vec4 s_0_color, s_1_color, s_2_color, s_3_color;\n"
                               "   vec4 x_0_color, x_1_color;\n"
                               "   vec2 tex_coord = texCoords - s_0_offset;\n"
                               "\n"
                               "   tex_coord *= scale;\n"
                               "   clamp(tex_coord.x, 0.0f, scale.x * src_width - 1.0f);\n"
                               "   clamp(tex_coord.y, 0.0f, scale.y * src_height - 1.0f);\n"
                               "   interp = fract(tex_coord);\n"
                               "   tex_coord = ivec2(tex_coord) * scale_inv;\n"
                               "\n"
                               "   /* Compute the sample coordinates used for filtering. */\n"
                               "   s_0_coord = tex_coord;\n"
                               "   s_1_coord = tex_coord + vec2(scale_inv.x, 0.0f);\n"
                               "   s_2_coord = tex_coord + vec2(0.0f, scale_inv.y);\n"
                               "   s_3_coord = tex_coord + vec2(scale_inv.x, scale_inv.y);\n"
                               "\n"
                               "   /* Fetch sample color values. */\n"
                               "%s"
                               "   s_0_color = TEXEL_FETCH(s_0_coord)\n"
                               "   s_1_color = TEXEL_FETCH(s_1_coord)\n"
                               "   s_2_color = TEXEL_FETCH(s_2_coord)\n"
                               "   s_3_color = TEXEL_FETCH(s_3_coord)\n"
                               "#undef TEXEL_FETCH\n"
                               "\n"
                               "   /* Do bilinear filtering on sample colors. */\n"
                               "   x_0_color =  mix(s_0_color, s_1_color, interp.x);\n"
                               "   x_1_color =  mix(s_2_color, s_3_color, interp.x);\n"
                               "   out_color = mix(x_0_color, x_1_color, interp.y);\n"
                               "}\n",
                               sampler_array_suffix,
                               sample_map_expr,
                               y_scale,
                               1.0f / y_scale,
                               1.0f / samples,
                               texel_fetch_macro);

   _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, name,
                                       &blit->msaa_shaders[shader_index]);
   loc_src_width =
      _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_width");
   loc_src_height =
      _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_height");
   _mesa_Uniform1f(loc_src_width, src_rb->Width);
   _mesa_Uniform1f(loc_src_height, src_rb->Height);

   ralloc_free(mem_ctx);
}