/** * Write shader and associated info to a file. */ void _mesa_write_shader_to_file(const struct gl_shader *shader) { const char *type = "????"; char filename[100]; FILE *f; switch (shader->Stage) { case MESA_SHADER_FRAGMENT: type = "frag"; break; case MESA_SHADER_TESS_CTRL: type = "tesc"; break; case MESA_SHADER_TESS_EVAL: type = "tese"; break; case MESA_SHADER_VERTEX: type = "vert"; break; case MESA_SHADER_GEOMETRY: type = "geom"; break; case MESA_SHADER_COMPUTE: type = "comp"; break; } _mesa_snprintf(filename, sizeof(filename), "shader_%u.%s", shader->Name, type); f = fopen(filename, "w"); if (!f) { fprintf(stderr, "Unable to open %s for writing\n", filename); return; } fprintf(f, "/* Shader %u source, checksum %u */\n", shader->Name, shader->SourceChecksum); fputs(shader->Source, f); fprintf(f, "\n"); fprintf(f, "/* Compile status: %s */\n", shader->CompileStatus ? "ok" : "fail"); fprintf(f, "/* Log Info: */\n"); if (shader->InfoLog) { fputs(shader->InfoLog, f); } if (shader->CompileStatus && shader->Program) { fprintf(f, "/* GPU code */\n"); fprintf(f, "/*\n"); _mesa_fprint_program_opt(f, shader->Program, PROG_PRINT_DEBUG, GL_TRUE); fprintf(f, "*/\n"); fprintf(f, "/* Parameters / constants */\n"); fprintf(f, "/*\n"); _mesa_fprint_parameter_list(f, shader->Program->Parameters); fprintf(f, "*/\n"); } fclose(f); }
static void PrintShaderInstructions(GLuint shader, FILE *f) { GET_CURRENT_CONTEXT(ctx); struct gl_shader *sh = _mesa_lookup_shader(ctx, shader); struct gl_program *prog = sh->Program; _mesa_fprint_program_opt(stdout, prog, Options.Mode, Options.LineNumbers); if (Options.Params) _mesa_print_program_parameters(ctx, prog); }
/** * Write shader and associated info to a file. */ void _mesa_write_shader_to_file(const struct gl_shader *shader) { const char *type; char filename[100]; FILE *f; if (shader->Type == GL_FRAGMENT_SHADER) type = "frag"; else if (shader->Type == GL_VERTEX_SHADER) type = "vert"; else type = "geom"; _mesa_snprintf(filename, sizeof(filename), "shader_%u.%s", shader->Name, type); f = fopen(filename, "w"); if (!f) { fprintf(stderr, "Unable to open %s for writing\n", filename); return; } fprintf(f, "/* Shader %u source, checksum %u */\n", shader->Name, shader->SourceChecksum); fputs(shader->Source, f); fprintf(f, "\n"); fprintf(f, "/* Compile status: %s */\n", shader->CompileStatus ? "ok" : "fail"); fprintf(f, "/* Log Info: */\n"); if (shader->InfoLog) { fputs(shader->InfoLog, f); } if (shader->CompileStatus && shader->Program) { fprintf(f, "/* GPU code */\n"); fprintf(f, "/*\n"); _mesa_fprint_program_opt(f, shader->Program, PROG_PRINT_DEBUG, GL_TRUE); fprintf(f, "*/\n"); fprintf(f, "/* Parameters / constants */\n"); fprintf(f, "/*\n"); _mesa_fprint_parameter_list(f, shader->Program->Parameters); fprintf(f, "*/\n"); } fclose(f); }
bool brw_codegen_vs_prog(struct brw_context *brw, struct gl_shader_program *prog, struct brw_vertex_program *vp, struct brw_vs_prog_key *key) { GLuint program_size; const GLuint *program; struct brw_vs_prog_data prog_data; struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base; void *mem_ctx; int i; struct brw_shader *vs = NULL; bool start_busy = false; double start_time = 0; if (prog) vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; memset(&prog_data, 0, sizeof(prog_data)); /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ if (!prog) stage_prog_data->use_alt_mode = true; mem_ctx = ralloc_context(NULL); brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX, brw->intelScreen->devinfo, prog, &vp->program.Base, &prog_data.base.base, 0); /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ int param_count = vp->program.Base.nir->num_uniforms; if (!brw->intelScreen->compiler->scalar_vs) param_count *= 4; if (vs) prog_data.base.base.nr_image_params = vs->base.NumImages; /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip * planes as uniforms. */ param_count += key->nr_userclip_plane_consts * 4; stage_prog_data->param = rzalloc_array(NULL, const gl_constant_value *, param_count); stage_prog_data->pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); stage_prog_data->image_param = rzalloc_array(NULL, struct brw_image_param, stage_prog_data->nr_image_params); stage_prog_data->nr_params = param_count; if (prog) { brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base, &prog_data.base.base, brw->intelScreen->compiler->scalar_vs); } else { brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base, &prog_data.base.base); } GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data.inputs_read = vp->program.Base.InputsRead; if (key->copy_edgeflag) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); prog_data.inputs_read |= VERT_BIT_EDGEFLAG; } if (brw->gen < 6) { /* Put dummy slots into the VUE for the SF to put the replaced * point sprite coords in. We shouldn't need these dummy slots, * which take up precious URB space, but it would mean that the SF * doesn't get nice aligned pairs of input coords into output * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { if (key->point_coord_replace & (1 << i)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); } /* if back colors are written, allocate slots for front colors too */ if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); } /* In order for legacy clipping to work, we need to populate the clip * distance varying slots whenever clipping is enabled, even if the vertex * shader doesn't write to gl_ClipDistance. */ if (key->nr_userclip_plane_consts > 0) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); } brw_compute_vue_map(brw->intelScreen->devinfo, &prog_data.base.vue_map, outputs_written, prog ? prog->SeparateShader : false); if (0) { _mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG, true); } if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && drm_intel_bo_busy(brw->batch.last_bo)); start_time = get_time(); } if (unlikely(INTEL_DEBUG & DEBUG_VS)) brw_dump_ir("vertex", prog, vs ? &vs->base : NULL, &vp->program.Base); int st_index = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) st_index = brw_get_shader_time_index(brw, prog, &vp->program.Base, ST_VS); /* Emit GEN4 code. */ char *error_str; program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, key, &prog_data, vp->program.Base.nir, brw_select_clip_planes(&brw->ctx), !_mesa_is_gles3(&brw->ctx), st_index, &program_size, &error_str); if (program == NULL) { if (prog) { prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, error_str); } _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", error_str); ralloc_free(mem_ctx); return false; } if (unlikely(brw->perf_debug) && vs) { if (vs->compiled_once) { brw_vs_debug_recompile(brw, prog, key); } if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { perf_debug("VS compile took %.03f ms and stalled the GPU\n", (get_time() - start_time) * 1000); } vs->compiled_once = true; } /* Scratch space is used for register spilling */ if (prog_data.base.base.total_scratch) { brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo, prog_data.base.base.total_scratch * brw->max_vs_threads); } brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG, key, sizeof(struct brw_vs_prog_key), program, program_size, &prog_data, sizeof(prog_data), &brw->vs.base.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); return true; }
/** * Print program to stderr, default options. */ void _mesa_print_program(const struct gl_program *prog) { _mesa_fprint_program_opt(stderr, prog, PROG_PRINT_DEBUG, GL_TRUE); }
static bool do_vs_prog(struct brw_context *brw, struct gl_shader_program *prog, struct brw_vertex_program *vp, struct brw_vs_prog_key *key) { GLuint program_size; const GLuint *program; struct brw_vs_compile c; struct brw_vs_prog_data prog_data; void *mem_ctx; int i; struct gl_shader *vs = NULL; if (prog) vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); memset(&prog_data, 0, sizeof(prog_data)); mem_ctx = ralloc_context(NULL); c.vp = vp; /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ int param_count; if (vs) { /* We add padding around uniform values below vec4 size, with the worst * case being a float value that gets blown up to a vec4, so be * conservative here. */ param_count = vs->num_uniform_components * 4; } else { param_count = vp->program.Base.Parameters->NumParameters * 4; } /* We also upload clip plane data as uniforms */ param_count += MAX_CLIP_PLANES * 4; prog_data.base.param = rzalloc_array(NULL, const float *, param_count); prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count); GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); prog_data.inputs_read |= VERT_BIT_EDGEFLAG; } if (brw->gen < 6) { /* Put dummy slots into the VUE for the SF to put the replaced * point sprite coords in. We shouldn't need these dummy slots, * which take up precious URB space, but it would mean that the SF * doesn't get nice aligned pairs of input coords into output * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { if (c.key.point_coord_replace & (1 << i)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); } /* if back colors are written, allocate slots for front colors too */ if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); } brw_compute_vue_map(brw, &prog_data.base.vue_map, outputs_written, c.key.base.userclip_active); if (0) { _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG, true); } /* Emit GEN4 code. */ program = brw_vs_emit(brw, prog, &c, &prog_data, mem_ctx, &program_size); if (program == NULL) { ralloc_free(mem_ctx); return false; } if (prog_data.base.nr_pull_params) prog_data.base.num_surfaces = 1; if (c.vp->program.Base.SamplersUsed) prog_data.base.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT); if (prog && prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks) { prog_data.base.num_surfaces = SURF_INDEX_VS_UBO(prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks); } /* Scratch space is used for register spilling */ if (c.base.last_scratch) { perf_debug("Vertex shader triggered register spilling. " "Try reducing the number of live vec4 values to " "improve performance.\n"); prog_data.base.total_scratch = brw_get_scratch_size(c.base.last_scratch*REG_SIZE); brw_get_scratch_bo(brw, &brw->vs.scratch_bo, prog_data.base.total_scratch * brw->max_vs_threads); } brw_upload_cache(&brw->cache, BRW_VS_PROG, &c.key, sizeof(c.key), program, program_size, &prog_data, sizeof(prog_data), &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); return true; }
static bool brw_vs_do_compile(struct brw_context *brw, struct brw_vs_compile *c) { struct brw_stage_prog_data *stage_prog_data = &c->prog_data.base.base; struct gl_shader *vs = NULL; int i; if (c->base.shader_prog) vs = c->base.shader_prog->_LinkedShaders[MESA_SHADER_VERTEX]; /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ int param_count; if (vs) { /* We add padding around uniform values below vec4 size, with the worst * case being a float value that gets blown up to a vec4, so be * conservative here. */ param_count = vs->num_uniform_components * 4; } else { param_count = c->vp->program.Base.Parameters->NumParameters * 4; } /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip * planes as uniforms. */ param_count += c->key.base.nr_userclip_plane_consts * 4; stage_prog_data->param = rzalloc_array(NULL, const float *, param_count); stage_prog_data->pull_param = rzalloc_array(NULL, const float *, param_count); /* Setting nr_params here NOT to the size of the param and pull_param * arrays, but to the number of uniform components vec4_visitor * needs. vec4_visitor::setup_uniforms() will set it back to a proper value. */ stage_prog_data->nr_params = ALIGN(param_count, 4) / 4; if (vs) { stage_prog_data->nr_params += vs->num_samplers; } GLbitfield64 outputs_written = c->vp->program.Base.OutputsWritten; c->prog_data.inputs_read = c->vp->program.Base.InputsRead; if (c->key.copy_edgeflag) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); c->prog_data.inputs_read |= VERT_BIT_EDGEFLAG; } if (brw->gen < 6) { /* Put dummy slots into the VUE for the SF to put the replaced * point sprite coords in. We shouldn't need these dummy slots, * which take up precious URB space, but it would mean that the SF * doesn't get nice aligned pairs of input coords into output * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { if (c->key.point_coord_replace & (1 << i)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); } /* if back colors are written, allocate slots for front colors too */ if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); } /* In order for legacy clipping to work, we need to populate the clip * distance varying slots whenever clipping is enabled, even if the vertex * shader doesn't write to gl_ClipDistance. */ if (c->key.base.userclip_active) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); } brw_compute_vue_map(brw, &c->prog_data.base.vue_map, outputs_written); if (0) { _mesa_fprint_program_opt(stderr, &c->vp->program.Base, PROG_PRINT_DEBUG, true); } /* Emit GEN4 code. */ c->base.program = brw_vs_emit(brw, c->base.shader_prog, c, &c->prog_data, c->base.mem_ctx, &c->base.program_size); if (c->base.program == NULL) return false; if (c->base.last_scratch) { c->prog_data.base.total_scratch = brw_get_scratch_size(c->base.last_scratch*REG_SIZE); } return true; }
static bool do_vs_prog(struct brw_context *brw, struct gl_shader_program *prog, struct brw_vertex_program *vp, struct brw_vs_prog_key *key) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; GLuint program_size; const GLuint *program; struct brw_vs_compile c; void *mem_ctx; int aux_size; int i; struct gl_shader *vs = NULL; if (prog) vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); mem_ctx = ralloc_context(NULL); brw_init_compile(brw, &c.func, mem_ctx); c.vp = vp; /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ int param_count; if (vs) { /* We add padding around uniform values below vec4 size, with the worst * case being a float value that gets blown up to a vec4, so be * conservative here. */ param_count = vs->num_uniform_components * 4; /* We also upload clip plane data as uniforms */ param_count += MAX_CLIP_PLANES * 4; } else { param_count = vp->program.Base.Parameters->NumParameters * 4; } c.prog_data.param = rzalloc_array(NULL, const float *, param_count); c.prog_data.pull_param = rzalloc_array(NULL, const float *, param_count); c.prog_data.outputs_written = vp->program.Base.OutputsWritten; c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE); c.prog_data.inputs_read |= VERT_BIT_EDGEFLAG; } /* Put dummy slots into the VUE for the SF to put the replaced * point sprite coords in. We shouldn't need these dummy slots, * which take up precious URB space, but it would mean that the SF * doesn't get nice aligned pairs of input coords into output * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { if (c.key.point_coord_replace & (1 << i)) c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i); } brw_compute_vue_map(&c); if (0) { _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG, true); } /* Emit GEN4 code. */ if (prog) { if (!brw_vs_emit(prog, &c)) { ralloc_free(mem_ctx); return false; } } else { brw_old_vs_emit(&c); } if (c.prog_data.nr_pull_params) c.prog_data.num_surfaces = 1; if (c.vp->program.Base.SamplersUsed) c.prog_data.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT); if (prog && prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks) { c.prog_data.num_surfaces = SURF_INDEX_VS_UBO(prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks); } /* Scratch space is used for register spilling */ if (c.last_scratch) { perf_debug("Vertex shader triggered register spilling. " "Try reducing the number of live vec4 values to " "improve performance.\n"); c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch); brw_get_scratch_bo(intel, &brw->vs.scratch_bo, c.prog_data.total_scratch * brw->max_vs_threads); } /* get the program */ program = brw_get_program(&c.func, &program_size); /* We upload from &c.prog_data including the constant_map assuming * they're packed together. It would be nice to have a * compile-time assert macro here. */ assert(c.constant_map == (int8_t *)&c.prog_data + sizeof(c.prog_data)); assert(ctx->Const.VertexProgram.MaxNativeParameters == ARRAY_SIZE(c.constant_map)); (void) ctx; aux_size = sizeof(c.prog_data); /* constant_map */ aux_size += c.vp->program.Base.Parameters->NumParameters; brw_upload_cache(&brw->cache, BRW_VS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, aux_size, &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); return true; }
static bool do_vs_prog(struct brw_context *brw, struct gl_shader_program *prog, struct brw_vertex_program *vp, struct brw_vs_prog_key *key) { GLuint program_size; const GLuint *program; struct brw_vs_compile c; struct brw_vs_prog_data prog_data; struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base; void *mem_ctx; int i; struct gl_shader *vs = NULL; if (prog) vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); memset(&prog_data, 0, sizeof(prog_data)); mem_ctx = ralloc_context(NULL); c.vp = vp; /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ int param_count; if (vs) { /* We add padding around uniform values below vec4 size, with the worst * case being a float value that gets blown up to a vec4, so be * conservative here. */ param_count = vs->num_uniform_components * 4; } else { param_count = vp->program.Base.Parameters->NumParameters * 4; } /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip * planes as uniforms. */ param_count += c.key.base.nr_userclip_plane_consts * 4; stage_prog_data->param = rzalloc_array(NULL, const float *, param_count); stage_prog_data->pull_param = rzalloc_array(NULL, const float *, param_count); /* Setting nr_params here NOT to the size of the param and pull_param * arrays, but to the number of uniform components vec4_visitor * needs. vec4_visitor::setup_uniforms() will set it back to a proper value. */ stage_prog_data->nr_params = ALIGN(param_count, 4) / 4; if (vs) { stage_prog_data->nr_params += vs->num_samplers; } GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); prog_data.inputs_read |= VERT_BIT_EDGEFLAG; } if (brw->gen < 6) { /* Put dummy slots into the VUE for the SF to put the replaced * point sprite coords in. We shouldn't need these dummy slots, * which take up precious URB space, but it would mean that the SF * doesn't get nice aligned pairs of input coords into output * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { if (c.key.point_coord_replace & (1 << i)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); } /* if back colors are written, allocate slots for front colors too */ if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); } /* In order for legacy clipping to work, we need to populate the clip * distance varying slots whenever clipping is enabled, even if the vertex * shader doesn't write to gl_ClipDistance. */ if (c.key.base.userclip_active) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); } brw_compute_vue_map(brw, &prog_data.base.vue_map, outputs_written); if (0) { _mesa_fprint_program_opt(stderr, &c.vp->program.Base, PROG_PRINT_DEBUG, true); } /* Emit GEN4 code. */ program = brw_vs_emit(brw, prog, &c, &prog_data, mem_ctx, &program_size); if (program == NULL) { ralloc_free(mem_ctx); return false; } /* Scratch space is used for register spilling */ if (c.base.last_scratch) { perf_debug("Vertex shader triggered register spilling. " "Try reducing the number of live vec4 values to " "improve performance.\n"); prog_data.base.total_scratch = brw_get_scratch_size(c.base.last_scratch*REG_SIZE); brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo, prog_data.base.total_scratch * brw->max_vs_threads); } brw_upload_cache(&brw->cache, BRW_VS_PROG, &c.key, sizeof(c.key), program, program_size, &prog_data, sizeof(prog_data), &brw->vs.base.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); return true; }
/** * Parse/compile the 'str' returning the compiled 'program'. * ctx->Program.ErrorPos will be -1 if successful. Otherwise, ErrorPos * indicates the position of the error in 'str'. */ void _mesa_parse_nv_fragment_program(struct gl_context *ctx, GLenum dstTarget, const GLubyte *str, GLsizei len, struct gl_fragment_program *program) { struct parse_state parseState; struct prog_instruction instBuffer[MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS]; struct prog_instruction *newInst; GLenum target; GLubyte *programString; /* Make a null-terminated copy of the program string */ programString = (GLubyte *) MALLOC(len + 1); if (!programString) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV"); return; } memcpy(programString, str, len); programString[len] = 0; /* Get ready to parse */ memset(&parseState, 0, sizeof(struct parse_state)); parseState.ctx = ctx; parseState.start = programString; parseState.program = program; parseState.numInst = 0; parseState.curLine = programString; parseState.parameters = _mesa_new_parameter_list(); /* Reset error state */ _mesa_set_program_error(ctx, -1, NULL); /* check the program header */ if (strncmp((const char *) programString, "!!FP1.0", 7) == 0) { target = GL_FRAGMENT_PROGRAM_NV; parseState.pos = programString + 7; } else if (strncmp((const char *) programString, "!!FCP1.0", 8) == 0) { /* fragment / register combiner program - not supported */ _mesa_set_program_error(ctx, 0, "Invalid fragment program header"); _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV(bad header)"); return; } else { /* invalid header */ _mesa_set_program_error(ctx, 0, "Invalid fragment program header"); _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV(bad header)"); return; } /* make sure target and header match */ if (target != dstTarget) { _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV(target mismatch 0x%x != 0x%x)", target, dstTarget); return; } if (Parse_InstructionSequence(&parseState, instBuffer)) { GLuint u; /* successful parse! */ if (parseState.outputsWritten == 0) { /* must write at least one output! */ _mesa_error(ctx, GL_INVALID_OPERATION, "Invalid fragment program - no outputs written."); return; } /* copy the compiled instructions */ assert(parseState.numInst <= MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS); newInst = _mesa_alloc_instructions(parseState.numInst); if (!newInst) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV"); return; /* out of memory */ } _mesa_copy_instructions(newInst, instBuffer, parseState.numInst); /* install the program */ program->Base.Target = target; if (program->Base.String) { FREE(program->Base.String); } program->Base.String = programString; program->Base.Format = GL_PROGRAM_FORMAT_ASCII_ARB; if (program->Base.Instructions) { free(program->Base.Instructions); } program->Base.Instructions = newInst; program->Base.NumInstructions = parseState.numInst; program->Base.InputsRead = parseState.inputsRead; program->Base.OutputsWritten = parseState.outputsWritten; for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) program->Base.TexturesUsed[u] = parseState.texturesUsed[u]; /* save program parameters */ program->Base.Parameters = parseState.parameters; /* allocate registers for declared program parameters */ #if 00 _mesa_assign_program_registers(&(program->SymbolTable)); #endif #ifdef DEBUG_foo printf("--- glLoadProgramNV(%d) result ---\n", program->Base.Id); _mesa_fprint_program_opt(stdout, &program->Base, PROG_PRINT_NV, 0); printf("----------------------------------\n"); #endif } else { /* Error! */ _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV"); /* NOTE: _mesa_set_program_error would have been called already */ } }
static void do_vs_prog( struct brw_context *brw, struct brw_vertex_program *vp, struct brw_vs_prog_key *key ) { struct gl_context *ctx = &brw->intel.ctx; GLuint program_size; const GLuint *program; struct brw_vs_compile c; int aux_size; int i; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); brw_init_compile(brw, &c.func); c.vp = vp; c.prog_data.outputs_written = vp->program.Base.OutputsWritten; c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE); c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; } /* Put dummy slots into the VUE for the SF to put the replaced * point sprite coords in. We shouldn't need these dummy slots, * which take up precious URB space, but it would mean that the SF * doesn't get nice aligned pairs of input coords into output * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { if (c.key.point_coord_replace & (1 << i)) c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i); } if (0) { _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG, GL_TRUE); } /* Emit GEN4 code. */ brw_vs_emit(&c); /* get the program */ program = brw_get_program(&c.func, &program_size); /* We upload from &c.prog_data including the constant_map assuming * they're packed together. It would be nice to have a * compile-time assert macro here. */ assert(c.constant_map == (int8_t *)&c.prog_data + sizeof(c.prog_data)); assert(ctx->Const.VertexProgram.MaxNativeParameters == ARRAY_SIZE(c.constant_map)); (void) ctx; aux_size = sizeof(c.prog_data); /* constant_map */ aux_size += c.vp->program.Base.Parameters->NumParameters; drm_intel_bo_unreference(brw->vs.prog_bo); brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, &c.key, sizeof(c.key), NULL, 0, program, program_size, &c.prog_data, aux_size, &brw->vs.prog_data); }