void dump_assembly(void *assembly, int num_annotations, struct annotation *annotation, struct brw_context *brw, const struct gl_program *prog) { const char *last_annotation_string = NULL; const void *last_annotation_ir = NULL; for (int i = 0; i < num_annotations; i++) { int start_offset = annotation[i].offset; int end_offset = annotation[i + 1].offset; if (annotation[i].block_start) { fprintf(stderr, " START B%d", annotation[i].block_start->num); foreach_list_typed(struct bblock_link, predecessor_link, link, &annotation[i].block_start->parents) { struct bblock_t *predecessor_block = predecessor_link->block; fprintf(stderr, " <-B%d", predecessor_block->num); } fprintf(stderr, "\n"); } if (last_annotation_ir != annotation[i].ir) { last_annotation_ir = annotation[i].ir; if (last_annotation_ir) { fprintf(stderr, " "); if (!prog->Instructions) fprint_ir(stderr, annotation[i].ir); else { const struct prog_instruction *pi = (const struct prog_instruction *)annotation[i].ir; fprintf(stderr, "%d: ", (int)(pi - prog->Instructions)); _mesa_fprint_instruction_opt(stderr, pi, 0, PROG_PRINT_DEBUG, NULL); } fprintf(stderr, "\n"); } } if (last_annotation_string != annotation[i].annotation) { last_annotation_string = annotation[i].annotation; if (last_annotation_string) fprintf(stderr, " %s\n", last_annotation_string); } brw_disassemble(brw, assembly, start_offset, end_offset, stderr); if (annotation[i].block_end) { fprintf(stderr, " END B%d", annotation[i].block_end->num); foreach_list_typed(struct bblock_link, successor_link, link, &annotation[i].block_end->children) { struct bblock_t *successor_block = successor_link->block; fprintf(stderr, " ->B%d", successor_block->num); } fprintf(stderr, "\n"); }
void dump_assembly(void *assembly, int num_annotations, struct annotation *annotation, const struct brw_device_info *devinfo) { const char *last_annotation_string = NULL; const void *last_annotation_ir = NULL; for (int i = 0; i < num_annotations; i++) { int start_offset = annotation[i].offset; int end_offset = annotation[i + 1].offset; if (annotation[i].block_start) { fprintf(stderr, " START B%d", annotation[i].block_start->num); foreach_list_typed(struct bblock_link, predecessor_link, link, &annotation[i].block_start->parents) { struct bblock_t *predecessor_block = predecessor_link->block; fprintf(stderr, " <-B%d", predecessor_block->num); } fprintf(stderr, "\n"); } if (last_annotation_ir != annotation[i].ir) { last_annotation_ir = annotation[i].ir; if (last_annotation_ir) { fprintf(stderr, " "); nir_print_instr(annotation[i].ir, stderr); fprintf(stderr, "\n"); } } if (last_annotation_string != annotation[i].annotation) { last_annotation_string = annotation[i].annotation; if (last_annotation_string) fprintf(stderr, " %s\n", last_annotation_string); } brw_disassemble(devinfo, assembly, start_offset, end_offset, stderr); if (annotation[i].error) { fputs(annotation[i].error, stderr); } if (annotation[i].block_end) { fprintf(stderr, " END B%d", annotation[i].block_end->num); foreach_list_typed(struct bblock_link, successor_link, link, &annotation[i].block_end->children) { struct bblock_t *successor_block = successor_link->block; fprintf(stderr, " ->B%d", successor_block->num); } fprintf(stderr, "\n"); }
void brw_codegen_ff_gs_prog(struct brw_context *brw, struct brw_ff_gs_prog_key *key) { struct brw_ff_gs_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); c.key = *key; c.vue_map = brw->vs.prog_data->base.vue_map; c.nr_regs = (c.vue_map.num_slots + 1)/2; mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_codegen(brw->intelScreen->devinfo, &c.func, mem_ctx); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE); if (brw->gen >= 6) { unsigned num_verts; bool check_edge_flag; /* On Sandybridge, we use the GS for implementing transform feedback * (called "Stream Out" in the PRM). */ switch (key->primitive) { case _3DPRIM_POINTLIST: num_verts = 1; check_edge_flag = false; break; case _3DPRIM_LINELIST: case _3DPRIM_LINESTRIP: case _3DPRIM_LINELOOP: num_verts = 2; check_edge_flag = false; break; case _3DPRIM_TRILIST: case _3DPRIM_TRIFAN: case _3DPRIM_TRISTRIP: case _3DPRIM_RECTLIST: num_verts = 3; check_edge_flag = false; break; case _3DPRIM_QUADLIST: case _3DPRIM_QUADSTRIP: case _3DPRIM_POLYGON: num_verts = 3; check_edge_flag = true; break; default: unreachable("Unexpected primitive type in Gen6 SOL program."); } gen6_sol_program(&c, key, num_verts, check_edge_flag); } else { /* On Gen4-5, we use the GS to decompose certain types of primitives. * Note that primitives which don't require a GS program have already * been weeded out by now. */ switch (key->primitive) { case _3DPRIM_QUADLIST: brw_ff_gs_quads( &c, key ); break; case _3DPRIM_QUADSTRIP: brw_ff_gs_quad_strip( &c, key ); break; case _3DPRIM_LINELOOP: brw_ff_gs_lines( &c ); break; default: ralloc_free(mem_ctx); return; } } brw_compact_instructions(&c.func, 0, 0, NULL); /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { fprintf(stderr, "gs:\n"); brw_disassemble(brw->intelScreen->devinfo, c.func.store, 0, program_size, stderr); fprintf(stderr, "\n"); } brw_upload_cache(&brw->cache, BRW_CACHE_FF_GS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data); ralloc_free(mem_ctx); }
static void compile_clip_prog( struct brw_context *brw, struct brw_clip_prog_key *key ) { struct brw_clip_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_codegen(&brw->screen->devinfo, &c.func, mem_ctx); c.func.single_program_flow = 1; c.key = *key; c.vue_map = brw->vue_map_geom_out; /* nr_regs is the number of registers filled by reading data from the VUE. * This program accesses the entire VUE, so nr_regs needs to be the size of * the VUE (measured in pairs, since two slots are stored in each * register). */ c.nr_regs = (c.vue_map.num_slots + 1)/2; c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE); /* Would ideally have the option of producing a program which could * do all three: */ switch (key->primitive) { case GL_TRIANGLES: if (key->do_unfilled) brw_emit_unfilled_clip( &c ); else brw_emit_tri_clip( &c ); break; case GL_LINES: brw_emit_line_clip( &c ); break; case GL_POINTS: brw_emit_point_clip( &c ); break; default: unreachable("not reached"); } brw_compact_instructions(&c.func, 0, 0, NULL); /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) { fprintf(stderr, "clip:\n"); brw_disassemble(&brw->screen->devinfo, c.func.store, 0, program_size, stderr); fprintf(stderr, "\n"); } brw_upload_cache(&brw->cache, BRW_CACHE_CLIP_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->clip.prog_offset, &brw->clip.prog_data); ralloc_free(mem_ctx); }
const GLuint * brw_blorp_const_color_program::compile(struct brw_context *brw, GLuint *program_size) { /* Set up prog_data */ memset(&prog_data, 0, sizeof(prog_data)); prog_data.persample_msaa_dispatch = false; alloc_regs(); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); struct brw_reg mrf_rt_write = retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F); uint32_t mlen, msg_type; if (key->use_simd16_replicated_data) { /* The message payload is a single register with the low 4 floats/ints * filled with the constant clear color. */ brw_set_mask_control(&func, BRW_MASK_DISABLE); brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba); brw_set_mask_control(&func, BRW_MASK_ENABLE); msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; mlen = 1; } else { for (int i = 0; i < 4; i++) { /* The message payload is pairs of registers for 16 pixels each of r, * g, b, and a. */ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED); brw_MOV(&func, brw_message_reg(base_mrf + i * 2), brw_vec1_grf(clear_rgba.nr, i)); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); } msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; mlen = 8; } /* Now write to the render target and terminate the thread */ brw_fb_WRITE(&func, 16 /* dispatch_width */, base_mrf /* msg_reg_nr */, mrf_rt_write /* src0 */, msg_type, BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX, mlen, 0 /* response_length */, true /* eot */, false /* header present */); if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) { fprintf(stderr, "Native code for BLORP clear:\n"); brw_disassemble(brw, &func.store, 0, func.next_insn_offset, stderr); fprintf(stderr, "\n"); } brw_compact_instructions(&func); return brw_get_program(&func, program_size); }
static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { struct brw_sf_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_codegen(brw->intelScreen->devinfo, &c.func, mem_ctx); c.key = *key; c.vue_map = brw->vue_map_geom_out; if (c.key.do_point_coord) { /* * gl_PointCoord is a FS instead of VS builtin variable, thus it's * not included in c.vue_map generated in VS stage. Here we add * it manually to let SF shader generate the needed interpolation * coefficient for FS shader. */ c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots; c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC; } c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; c.nr_setup_regs = c.nr_attr_regs; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode); /* Which primitive? Or all three? */ switch (key->primitive) { case SF_TRIANGLES: c.nr_verts = 3; brw_emit_tri_setup( &c, true ); break; case SF_LINES: c.nr_verts = 2; brw_emit_line_setup( &c, true ); break; case SF_POINTS: c.nr_verts = 1; if (key->do_point_sprite) brw_emit_point_sprite_setup( &c, true ); else brw_emit_point_setup( &c, true ); break; case SF_UNFILLED_TRIS: c.nr_verts = 3; brw_emit_anyprim_setup( &c ); break; default: unreachable("not reached"); } /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register * source). Compacting would be difficult. */ /* brw_compact_instructions(&c.func, 0, 0, NULL); */ /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_SF)) { fprintf(stderr, "sf:\n"); brw_disassemble(brw->intelScreen->devinfo, c.func.store, 0, program_size, stderr); fprintf(stderr, "\n"); } brw_upload_cache(&brw->cache, BRW_CACHE_SF_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->sf.prog_offset, &brw->sf.prog_data); ralloc_free(mem_ctx); }