static bool brw_codegen_gs_prog(struct brw_context *brw, struct brw_program *gp, struct brw_gs_prog_key *key) { struct brw_compiler *compiler = brw->screen->compiler; const struct gen_device_info *devinfo = &brw->screen->devinfo; struct brw_stage_state *stage_state = &brw->gs.base; struct brw_gs_prog_data prog_data; bool start_busy = false; double start_time = 0; memset(&prog_data, 0, sizeof(prog_data)); assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data); /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. * * Note: param_count needs to be num_uniform_components * 4, since we add * padding around uniform values below vec4 size, so the worst case is that * every uniform is a float which gets padded to the size of a vec4. */ int param_count = gp->program.nir->num_uniforms / 4; prog_data.base.base.param = rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.base.image_param = rzalloc_array(NULL, struct brw_image_param, gp->program.info.num_images); prog_data.base.base.nr_params = param_count; prog_data.base.base.nr_image_params = gp->program.info.num_images; brw_nir_setup_glsl_uniforms(gp->program.nir, &gp->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_GEOMETRY]); brw_nir_analyze_ubo_ranges(compiler, gp->program.nir, prog_data.base.base.ubo_ranges); uint64_t outputs_written = gp->program.info.outputs_written; brw_compute_vue_map(devinfo, &prog_data.base.vue_map, outputs_written, gp->program.info.separate_shader); int st_index = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) st_index = brw_get_shader_time_index(brw, &gp->program, ST_GS, true); if (unlikely(brw->perf_debug)) { start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo); start_time = get_time(); } void *mem_ctx = ralloc_context(NULL); unsigned program_size; char *error_str; const unsigned *program = brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key, &prog_data, gp->program.nir, &gp->program, st_index, &program_size, &error_str); if (program == NULL) { ralloc_strcat(&gp->program.sh.data->InfoLog, error_str); _mesa_problem(NULL, "Failed to compile geometry shader: %s\n", error_str); ralloc_free(mem_ctx); return false; } if (unlikely(brw->perf_debug)) { if (gp->compiled_once) { brw_gs_debug_recompile(brw, &gp->program, key); } if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { perf_debug("GS compile took %.03f ms and stalled the GPU\n", (get_time() - start_time) * 1000); } gp->compiled_once = true; } /* Scratch space is used for register spilling */ brw_alloc_stage_scratch(brw, stage_state, prog_data.base.base.total_scratch, devinfo->max_gs_threads); brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG, key, sizeof(*key), program, program_size, &prog_data, sizeof(prog_data), &stage_state->prog_offset, &brw->gs.base.prog_data); ralloc_free(mem_ctx); return true; }
static bool brw_codegen_wm_prog(struct brw_context *brw, struct brw_program *fp, struct brw_wm_prog_key *key, struct brw_vue_map *vue_map) { const struct gen_device_info *devinfo = &brw->screen->devinfo; void *mem_ctx = ralloc_context(NULL); struct brw_wm_prog_data prog_data; const GLuint *program; bool start_busy = false; double start_time = 0; nir_shader *nir = nir_shader_clone(mem_ctx, fp->program.nir); memset(&prog_data, 0, sizeof(prog_data)); /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ if (fp->program.is_arb_asm) prog_data.base.use_alt_mode = true; assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data); if (!fp->program.is_arb_asm) { brw_nir_setup_glsl_uniforms(mem_ctx, nir, &fp->program, &prog_data.base, true); brw_nir_analyze_ubo_ranges(brw->screen->compiler, nir, NULL, prog_data.base.ubo_ranges); } else { brw_nir_setup_arb_uniforms(mem_ctx, nir, &fp->program, &prog_data.base); if (unlikely(INTEL_DEBUG & DEBUG_WM)) brw_dump_arb_asm("fragment", &fp->program); } if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo)); start_time = get_time(); } int st_index8 = -1, st_index16 = -1, st_index32 = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) { st_index8 = brw_get_shader_time_index(brw, &fp->program, ST_FS8, !fp->program.is_arb_asm); st_index16 = brw_get_shader_time_index(brw, &fp->program, ST_FS16, !fp->program.is_arb_asm); st_index32 = brw_get_shader_time_index(brw, &fp->program, ST_FS32, !fp->program.is_arb_asm); } char *error_str = NULL; program = brw_compile_fs(brw->screen->compiler, brw, mem_ctx, key, &prog_data, nir, &fp->program, st_index8, st_index16, st_index32, true, false, vue_map, &error_str); if (program == NULL) { if (!fp->program.is_arb_asm) { fp->program.sh.data->LinkStatus = LINKING_FAILURE; ralloc_strcat(&fp->program.sh.data->InfoLog, error_str); } _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", error_str); ralloc_free(mem_ctx); return false; } if (unlikely(brw->perf_debug)) { if (fp->compiled_once) { brw_debug_recompile(brw, MESA_SHADER_FRAGMENT, fp->program.Id, key->program_string_id, key); } fp->compiled_once = true; if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { perf_debug("FS compile took %.03f ms and stalled the GPU\n", (get_time() - start_time) * 1000); } } brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch); if (unlikely((INTEL_DEBUG & DEBUG_WM) && fp->program.is_arb_asm)) fprintf(stderr, "\n"); /* The param and pull_param arrays will be freed by the shader cache. */ ralloc_steal(NULL, prog_data.base.param); ralloc_steal(NULL, prog_data.base.pull_param); brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG, key, sizeof(struct brw_wm_prog_key), program, prog_data.base.program_size, &prog_data, sizeof(prog_data), &brw->wm.base.prog_offset, &brw->wm.base.prog_data); ralloc_free(mem_ctx); return true; }
static bool brw_codegen_tes_prog(struct brw_context *brw, struct brw_program *tep, struct brw_tes_prog_key *key) { const struct brw_compiler *compiler = brw->screen->compiler; const struct gen_device_info *devinfo = &brw->screen->devinfo; struct brw_stage_state *stage_state = &brw->tes.base; struct brw_tes_prog_data prog_data; bool start_busy = false; double start_time = 0; memset(&prog_data, 0, sizeof(prog_data)); void *mem_ctx = ralloc_context(NULL); nir_shader *nir = nir_shader_clone(mem_ctx, tep->program.nir); brw_assign_common_binding_table_offsets(devinfo, &tep->program, &prog_data.base.base, 0); brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_TESS_EVAL]); brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data.base.base.ubo_ranges); int st_index = -1; if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME)) st_index = brw_get_shader_time_index(brw, &tep->program, ST_TES, true); if (unlikely(brw->perf_debug)) { start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo); start_time = get_time(); } struct brw_vue_map input_vue_map; brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, key->patch_inputs_read); char *error_str; const unsigned *program = brw_compile_tes(compiler, brw, mem_ctx, key, &input_vue_map, &prog_data, nir, &tep->program, st_index, &error_str); if (program == NULL) { tep->program.sh.data->LinkStatus = LINKING_FAILURE; ralloc_strcat(&tep->program.sh.data->InfoLog, error_str); _mesa_problem(NULL, "Failed to compile tessellation evaluation shader: " "%s\n", error_str); ralloc_free(mem_ctx); return false; } if (unlikely(brw->perf_debug)) { if (tep->compiled_once) { brw_debug_recompile(brw, MESA_SHADER_TESS_EVAL, tep->program.Id, key->program_string_id, key); } if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { perf_debug("TES compile took %.03f ms and stalled the GPU\n", (get_time() - start_time) * 1000); } tep->compiled_once = true; } /* Scratch space is used for register spilling */ brw_alloc_stage_scratch(brw, stage_state, prog_data.base.base.total_scratch); /* The param and pull_param arrays will be freed by the shader cache. */ ralloc_steal(NULL, prog_data.base.base.param); ralloc_steal(NULL, prog_data.base.base.pull_param); brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG, key, sizeof(*key), program, prog_data.base.base.program_size, &prog_data, sizeof(prog_data), &stage_state->prog_offset, &brw->tes.base.prog_data); ralloc_free(mem_ctx); return true; }