extern "C" const unsigned * brw_compile_tcs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_tcs_prog_key *key, struct brw_tcs_prog_data *prog_data, const nir_shader *src_shader, int shader_time_index, unsigned *final_assembly_size, char **error_str) { const struct gen_device_info *devinfo = compiler->devinfo; struct brw_vue_prog_data *vue_prog_data = &prog_data->base; const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL]; nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); nir->info->outputs_written = key->outputs_written; nir->info->patch_outputs_written = key->patch_outputs_written; struct brw_vue_map input_vue_map; brw_compute_vue_map(devinfo, &input_vue_map, nir->info->inputs_read, nir->info->separate_shader); brw_compute_tess_vue_map(&vue_prog_data->vue_map, nir->info->outputs_written, nir->info->patch_outputs_written); nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map); brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map); if (key->quads_workaround) brw_nir_apply_tcs_quads_workaround(nir); nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); if (is_scalar) prog_data->instances = DIV_ROUND_UP(nir->info->tcs.vertices_out, 8); else prog_data->instances = DIV_ROUND_UP(nir->info->tcs.vertices_out, 2); /* Compute URB entry size. The maximum allowed URB entry size is 32k. * That divides up as follows: * * 32 bytes for the patch header (tessellation factors) * 480 bytes for per-patch varyings (a varying component is 4 bytes and * gl_MaxTessPatchComponents = 120) * 16384 bytes for per-vertex varyings (a varying component is 4 bytes, * gl_MaxPatchVertices = 32 and * gl_MaxTessControlOutputComponents = 128) * * 15808 bytes left for varying packing overhead */ const int num_per_patch_slots = vue_prog_data->vue_map.num_per_patch_slots; const int num_per_vertex_slots = vue_prog_data->vue_map.num_per_vertex_slots; unsigned output_size_bytes = 0; /* Note that the patch header is counted in num_per_patch_slots. */ output_size_bytes += num_per_patch_slots * 16; output_size_bytes += nir->info->tcs.vertices_out * num_per_vertex_slots * 16; assert(output_size_bytes >= 1); if (output_size_bytes > GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES) return NULL; /* URB entry sizes are stored as a multiple of 64 bytes. */ vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64; /* HS does not use the usual payload pushing from URB to GRFs, * because we don't have enough registers for a full-size payload, and * the hardware is broken on Haswell anyway. */ vue_prog_data->urb_read_length = 0; if (unlikely(INTEL_DEBUG & DEBUG_TCS)) { fprintf(stderr, "TCS Input "); brw_print_vue_map(stderr, &input_vue_map); fprintf(stderr, "TCS Output "); brw_print_vue_map(stderr, &vue_prog_data->vue_map); } if (is_scalar) { fs_visitor v(compiler, log_data, mem_ctx, (void *) key, &prog_data->base.base, NULL, nir, 8, shader_time_index, &input_vue_map); if (!v.run_tcs_single_patch()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base.base, v.promoted_constants, false, MESA_SHADER_TESS_CTRL); if (unlikely(INTEL_DEBUG & DEBUG_TCS)) { g.enable_debug(ralloc_asprintf(mem_ctx, "%s tessellation control shader %s", nir->info->label ? nir->info->label : "unnamed", nir->info->name)); } g.generate_code(v.cfg, 8); return g.get_assembly(final_assembly_size); } else { vec4_tcs_visitor v(compiler, log_data, key, prog_data, nir, mem_ctx, shader_time_index, &input_vue_map); if (!v.run()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } if (unlikely(INTEL_DEBUG & DEBUG_TCS)) v.dump_instructions(); return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, &prog_data->base, v.cfg, final_assembly_size); } }
static bool brw_codegen_tes_prog(struct brw_context *brw, struct brw_program *tep, struct brw_tes_prog_key *key) { const struct brw_compiler *compiler = brw->screen->compiler; const struct gen_device_info *devinfo = &brw->screen->devinfo; struct brw_stage_state *stage_state = &brw->tes.base; struct brw_tes_prog_data prog_data; bool start_busy = false; double start_time = 0; memset(&prog_data, 0, sizeof(prog_data)); void *mem_ctx = ralloc_context(NULL); nir_shader *nir = nir_shader_clone(mem_ctx, tep->program.nir); brw_assign_common_binding_table_offsets(devinfo, &tep->program, &prog_data.base.base, 0); brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_TESS_EVAL]); brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data.base.base.ubo_ranges); int st_index = -1; if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME)) st_index = brw_get_shader_time_index(brw, &tep->program, ST_TES, true); if (unlikely(brw->perf_debug)) { start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo); start_time = get_time(); } struct brw_vue_map input_vue_map; brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, key->patch_inputs_read); char *error_str; const unsigned *program = brw_compile_tes(compiler, brw, mem_ctx, key, &input_vue_map, &prog_data, nir, &tep->program, st_index, &error_str); if (program == NULL) { tep->program.sh.data->LinkStatus = LINKING_FAILURE; ralloc_strcat(&tep->program.sh.data->InfoLog, error_str); _mesa_problem(NULL, "Failed to compile tessellation evaluation shader: " "%s\n", error_str); ralloc_free(mem_ctx); return false; } if (unlikely(brw->perf_debug)) { if (tep->compiled_once) { brw_debug_recompile(brw, MESA_SHADER_TESS_EVAL, tep->program.Id, key->program_string_id, key); } if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { perf_debug("TES compile took %.03f ms and stalled the GPU\n", (get_time() - start_time) * 1000); } tep->compiled_once = true; } /* Scratch space is used for register spilling */ brw_alloc_stage_scratch(brw, stage_state, prog_data.base.base.total_scratch); /* The param and pull_param arrays will be freed by the shader cache. */ ralloc_steal(NULL, prog_data.base.base.param); ralloc_steal(NULL, prog_data.base.base.pull_param); brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG, key, sizeof(*key), program, prog_data.base.base.program_size, &prog_data, sizeof(prog_data), &stage_state->prog_offset, &brw->tes.base.prog_data); ralloc_free(mem_ctx); return true; }