const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ) { brw_compact_instructions(p); *sz = p->next_insn_offset; return (const unsigned *)p->store; }
void brw_codegen_ff_gs_prog(struct brw_context *brw, struct brw_ff_gs_prog_key *key) { struct brw_ff_gs_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); c.key = *key; c.vue_map = brw->vs.prog_data->base.vue_map; c.nr_regs = (c.vue_map.num_slots + 1)/2; mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_codegen(brw->intelScreen->devinfo, &c.func, mem_ctx); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE); if (brw->gen >= 6) { unsigned num_verts; bool check_edge_flag; /* On Sandybridge, we use the GS for implementing transform feedback * (called "Stream Out" in the PRM). */ switch (key->primitive) { case _3DPRIM_POINTLIST: num_verts = 1; check_edge_flag = false; break; case _3DPRIM_LINELIST: case _3DPRIM_LINESTRIP: case _3DPRIM_LINELOOP: num_verts = 2; check_edge_flag = false; break; case _3DPRIM_TRILIST: case _3DPRIM_TRIFAN: case _3DPRIM_TRISTRIP: case _3DPRIM_RECTLIST: num_verts = 3; check_edge_flag = false; break; case _3DPRIM_QUADLIST: case _3DPRIM_QUADSTRIP: case _3DPRIM_POLYGON: num_verts = 3; check_edge_flag = true; break; default: unreachable("Unexpected primitive type in Gen6 SOL program."); } gen6_sol_program(&c, key, num_verts, check_edge_flag); } else { /* On Gen4-5, we use the GS to decompose certain types of primitives. * Note that primitives which don't require a GS program have already * been weeded out by now. */ switch (key->primitive) { case _3DPRIM_QUADLIST: brw_ff_gs_quads( &c, key ); break; case _3DPRIM_QUADSTRIP: brw_ff_gs_quad_strip( &c, key ); break; case _3DPRIM_LINELOOP: brw_ff_gs_lines( &c ); break; default: ralloc_free(mem_ctx); return; } } brw_compact_instructions(&c.func, 0, 0, NULL); /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { fprintf(stderr, "gs:\n"); brw_disassemble(brw->intelScreen->devinfo, c.func.store, 0, program_size, stderr); fprintf(stderr, "\n"); } brw_upload_cache(&brw->cache, BRW_CACHE_FF_GS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data); ralloc_free(mem_ctx); }
static void compile_clip_prog( struct brw_context *brw, struct brw_clip_prog_key *key ) { struct brw_clip_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_codegen(&brw->screen->devinfo, &c.func, mem_ctx); c.func.single_program_flow = 1; c.key = *key; c.vue_map = brw->vue_map_geom_out; /* nr_regs is the number of registers filled by reading data from the VUE. * This program accesses the entire VUE, so nr_regs needs to be the size of * the VUE (measured in pairs, since two slots are stored in each * register). */ c.nr_regs = (c.vue_map.num_slots + 1)/2; c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE); /* Would ideally have the option of producing a program which could * do all three: */ switch (key->primitive) { case GL_TRIANGLES: if (key->do_unfilled) brw_emit_unfilled_clip( &c ); else brw_emit_tri_clip( &c ); break; case GL_LINES: brw_emit_line_clip( &c ); break; case GL_POINTS: brw_emit_point_clip( &c ); break; default: unreachable("not reached"); } brw_compact_instructions(&c.func, 0, 0, NULL); /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) { fprintf(stderr, "clip:\n"); brw_disassemble(&brw->screen->devinfo, c.func.store, 0, program_size, stderr); fprintf(stderr, "\n"); } brw_upload_cache(&brw->cache, BRW_CACHE_CLIP_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->clip.prog_offset, &brw->clip.prog_data); ralloc_free(mem_ctx); }
const GLuint * brw_blorp_const_color_program::compile(struct brw_context *brw, GLuint *program_size) { /* Set up prog_data */ memset(&prog_data, 0, sizeof(prog_data)); prog_data.persample_msaa_dispatch = false; alloc_regs(); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); struct brw_reg mrf_rt_write = retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F); uint32_t mlen, msg_type; if (key->use_simd16_replicated_data) { /* The message payload is a single register with the low 4 floats/ints * filled with the constant clear color. */ brw_set_mask_control(&func, BRW_MASK_DISABLE); brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba); brw_set_mask_control(&func, BRW_MASK_ENABLE); msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; mlen = 1; } else { for (int i = 0; i < 4; i++) { /* The message payload is pairs of registers for 16 pixels each of r, * g, b, and a. */ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED); brw_MOV(&func, brw_message_reg(base_mrf + i * 2), brw_vec1_grf(clear_rgba.nr, i)); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); } msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; mlen = 8; } /* Now write to the render target and terminate the thread */ brw_fb_WRITE(&func, 16 /* dispatch_width */, base_mrf /* msg_reg_nr */, mrf_rt_write /* src0 */, msg_type, BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX, mlen, 0 /* response_length */, true /* eot */, false /* header present */); if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) { fprintf(stderr, "Native code for BLORP clear:\n"); brw_disassemble(brw, &func.store, 0, func.next_insn_offset, stderr); fprintf(stderr, "\n"); } brw_compact_instructions(&func); return brw_get_program(&func, program_size); }
void vec4_generator::generate_code(const cfg_t *cfg) { struct annotation_info annotation; memset(&annotation, 0, sizeof(annotation)); foreach_block_and_inst (block, vec4_instruction, inst, cfg) { struct brw_reg src[3], dst; if (unlikely(debug_flag)) annotate(brw, &annotation, cfg, inst, p->next_insn_offset); for (unsigned int i = 0; i < 3; i++) { src[i] = inst->get_src(this->prog_data, i); } dst = inst->get_dst(); brw_set_default_predicate_control(p, inst->predicate); brw_set_default_predicate_inverse(p, inst->predicate_inverse); brw_set_default_saturate(p, inst->saturate); brw_set_default_mask_control(p, inst->force_writemask_all); brw_set_default_acc_write_control(p, inst->writes_accumulator); unsigned pre_emit_nr_insn = p->nr_insn; generate_vec4_instruction(inst, dst, src); if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) { assert(p->nr_insn == pre_emit_nr_insn + 1 || !"conditional_mod, no_dd_check, or no_dd_clear set for IR " "emitting more than 1 instruction"); brw_inst *last = &p->store[pre_emit_nr_insn]; brw_inst_set_cond_modifier(brw, last, inst->conditional_mod); brw_inst_set_no_dd_clear(brw, last, inst->no_dd_clear); brw_inst_set_no_dd_check(brw, last, inst->no_dd_check); } } brw_set_uip_jip(p); annotation_finalize(&annotation, p->next_insn_offset); int before_size = p->next_insn_offset; brw_compact_instructions(p, 0, annotation.ann_count, annotation.ann); int after_size = p->next_insn_offset; if (unlikely(debug_flag)) { if (shader_prog) { fprintf(stderr, "Native code for %s vertex shader %d:\n", shader_prog->Label ? shader_prog->Label : "unnamed", shader_prog->Name); } else { fprintf(stderr, "Native code for vertex program %d:\n", prog->Id); } fprintf(stderr, "vec4 shader: %d instructions. Compacted %d to %d" " bytes (%.0f%%)\n", before_size / 16, before_size, after_size, 100.0f * (before_size - after_size) / before_size); dump_assembly(p->store, annotation.ann_count, annotation.ann, brw, prog); ralloc_free(annotation.ann); } }
static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { struct brw_sf_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_compile(brw, &c.func, mem_ctx); c.key = *key; c.vue_map = brw->vue_map_geom_out; if (c.key.do_point_coord) { /* * gl_PointCoord is a FS instead of VS builtin variable, thus it's * not included in c.vue_map generated in VS stage. Here we add * it manually to let SF shader generate the needed interpolation * coefficient for FS shader. */ c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots; c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC; } c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; c.nr_setup_regs = c.nr_attr_regs; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode); /* Which primitive? Or all three? */ switch (key->primitive) { case SF_TRIANGLES: c.nr_verts = 3; brw_emit_tri_setup( &c, true ); break; case SF_LINES: c.nr_verts = 2; brw_emit_line_setup( &c, true ); break; case SF_POINTS: c.nr_verts = 1; if (key->do_point_sprite) brw_emit_point_sprite_setup( &c, true ); else brw_emit_point_setup( &c, true ); break; case SF_UNFILLED_TRIS: c.nr_verts = 3; brw_emit_anyprim_setup( &c ); break; default: unreachable("not reached"); } brw_compact_instructions(&c.func, 0, 0, NULL); /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_SF)) { fprintf(stderr, "sf:\n"); brw_disassemble(brw, c.func.store, 0, program_size, stderr); fprintf(stderr, "\n"); } brw_upload_cache(&brw->cache, BRW_SF_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->sf.prog_offset, &brw->sf.prog_data); ralloc_free(mem_ctx); }