static bool run_tests(struct brw_context *brw) { bool fail = false; for (int i = 0; i < ARRAY_SIZE(tests); i++) { for (int align_16 = 0; align_16 <= 1; align_16++) { struct brw_compile *p = rzalloc(NULL, struct brw_compile); brw_init_compile(brw, p, p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); if (align_16) brw_set_default_access_mode(p, BRW_ALIGN_16); else brw_set_default_access_mode(p, BRW_ALIGN_1); tests[i].func(p); assert(p->nr_insn == 1); if (!test_compact_instruction(p, p->store[0])) { fail = true; continue; } if (!test_fuzz_compact_instruction(p, p->store[0])) { fail = true; continue; } ralloc_free(p); } } return fail; }
brw_blorp_clear_program::brw_blorp_clear_program( struct brw_context *brw, const brw_blorp_clear_prog_key *key) : mem_ctx(ralloc_context(NULL)), brw(brw), key(key) { brw_init_compile(brw, &func, mem_ctx); }
brw_blorp_const_color_program::brw_blorp_const_color_program( struct brw_context *brw, const brw_blorp_const_color_prog_key *key) : mem_ctx(ralloc_context(NULL)), brw(brw), key(key), R0(), R1(), clear_rgba(), base_mrf(0) { brw_init_compile(brw, &func, mem_ctx); }
brw_blorp_const_color_program::brw_blorp_const_color_program( struct brw_context *brw, const brw_blorp_const_color_prog_key *key) : mem_ctx(ralloc_context(NULL)), brw(brw), key(key), R0(), R1(), clear_rgba(), base_mrf(0) { prog_data.first_curbe_grf = 0; prog_data.persample_msaa_dispatch = false; brw_init_compile(brw, &func, mem_ctx); }
static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { struct brw_sf_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; GLuint i; memset(&c, 0, sizeof(c)); mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_compile(brw, &c.func, mem_ctx); c.key = *key; c.vue_map = brw->vue_map_geom_out; if (c.key.do_point_coord) { /* * gl_PointCoord is a FS instead of VS builtin variable, thus it's * not included in c.vue_map generated in VS stage. Here we add * it manually to let SF shader generate the needed interpolation * coefficient for FS shader. */ c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots; c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC; } c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; c.nr_setup_regs = c.nr_attr_regs; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode); /* Which primitive? Or all three? */ switch (key->primitive) { case SF_TRIANGLES: c.nr_verts = 3; brw_emit_tri_setup( &c, true ); break; case SF_LINES: c.nr_verts = 2; brw_emit_line_setup( &c, true ); break; case SF_POINTS: c.nr_verts = 1; if (key->do_point_sprite) brw_emit_point_sprite_setup( &c, true ); else brw_emit_point_setup( &c, true ); break; case SF_UNFILLED_TRIS: c.nr_verts = 3; brw_emit_anyprim_setup( &c ); break; default: assert(0); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_SF)) { printf("sf:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], brw->gen); printf("\n"); } brw_upload_cache(&brw->cache, BRW_SF_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->sf.prog_offset, &brw->sf.prog_data); ralloc_free(mem_ctx); }
static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { GLcontext *ctx = &brw->intel.ctx; struct brw_sf_compile c; const GLuint *program; GLuint program_size; GLuint i, idx; memset(&c, 0, sizeof(c)); /* Begin the compilation: */ brw_init_compile(brw, &c.func); c.key = *key; c.nr_attrs = brw_count_bits(c.key.attrs); c.nr_attr_regs = (c.nr_attrs+1)/2; c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; /* Construct map from attribute number to position in the vertex. */ for (i = idx = 0; i < VERT_RESULT_MAX; i++) if (c.key.attrs & (1<<i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { c.point_attrs[i].CoordReplace = ctx->Point.CoordReplace[i - VERT_RESULT_TEX0]; } else { c.point_attrs[i].CoordReplace = GL_FALSE; } idx++; } /* Which primitive? Or all three? */ switch (key->primitive) { case SF_TRIANGLES: c.nr_verts = 3; brw_emit_tri_setup( &c, GL_TRUE ); break; case SF_LINES: c.nr_verts = 2; brw_emit_line_setup( &c, GL_TRUE ); break; case SF_POINTS: c.nr_verts = 1; if (key->do_point_sprite) brw_emit_point_sprite_setup( &c, GL_TRUE ); else brw_emit_point_setup( &c, GL_TRUE ); break; case SF_UNFILLED_TRIS: c.nr_verts = 3; brw_emit_anyprim_setup( &c ); break; default: assert(0); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); /* Upload */ dri_bo_unreference(brw->sf.prog_bo); brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, &c.key, sizeof(c.key), NULL, 0, program, program_size, &c.prog_data, &brw->sf.prog_data ); }
static bool do_vs_prog(struct brw_context *brw, struct gl_shader_program *prog, struct brw_vertex_program *vp, struct brw_vs_prog_key *key) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; GLuint program_size; const GLuint *program; struct brw_vs_compile c; void *mem_ctx; int aux_size; int i; struct gl_shader *vs = NULL; if (prog) vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); mem_ctx = ralloc_context(NULL); brw_init_compile(brw, &c.func, mem_ctx); c.vp = vp; /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ int param_count; if (vs) { /* We add padding around uniform values below vec4 size, with the worst * case being a float value that gets blown up to a vec4, so be * conservative here. */ param_count = vs->num_uniform_components * 4; /* We also upload clip plane data as uniforms */ param_count += MAX_CLIP_PLANES * 4; } else { param_count = vp->program.Base.Parameters->NumParameters * 4; } c.prog_data.param = rzalloc_array(NULL, const float *, param_count); c.prog_data.pull_param = rzalloc_array(NULL, const float *, param_count); c.prog_data.outputs_written = vp->program.Base.OutputsWritten; c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE); c.prog_data.inputs_read |= VERT_BIT_EDGEFLAG; } /* Put dummy slots into the VUE for the SF to put the replaced * point sprite coords in. We shouldn't need these dummy slots, * which take up precious URB space, but it would mean that the SF * doesn't get nice aligned pairs of input coords into output * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { if (c.key.point_coord_replace & (1 << i)) c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i); } brw_compute_vue_map(&c); if (0) { _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG, true); } /* Emit GEN4 code. */ if (prog) { if (!brw_vs_emit(prog, &c)) { ralloc_free(mem_ctx); return false; } } else { brw_old_vs_emit(&c); } if (c.prog_data.nr_pull_params) c.prog_data.num_surfaces = 1; if (c.vp->program.Base.SamplersUsed) c.prog_data.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT); if (prog && prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks) { c.prog_data.num_surfaces = SURF_INDEX_VS_UBO(prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks); } /* Scratch space is used for register spilling */ if (c.last_scratch) { perf_debug("Vertex shader triggered register spilling. " "Try reducing the number of live vec4 values to " "improve performance.\n"); c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch); brw_get_scratch_bo(intel, &brw->vs.scratch_bo, c.prog_data.total_scratch * brw->max_vs_threads); } /* get the program */ program = brw_get_program(&c.func, &program_size); /* We upload from &c.prog_data including the constant_map assuming * they're packed together. It would be nice to have a * compile-time assert macro here. */ assert(c.constant_map == (int8_t *)&c.prog_data + sizeof(c.prog_data)); assert(ctx->Const.VertexProgram.MaxNativeParameters == ARRAY_SIZE(c.constant_map)); (void) ctx; aux_size = sizeof(c.prog_data); /* constant_map */ aux_size += c.vp->program.Base.Parameters->NumParameters; brw_upload_cache(&brw->cache, BRW_VS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, aux_size, &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); return true; }
static void compile_gs_prog( struct brw_context *brw, struct brw_gs_prog_key *key ) { struct brw_gs_compile c; const unsigned *program; unsigned program_size; memset(&c, 0, sizeof(c)); c.key = *key; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = brw_count_bits(c.key.attrs); c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ c.nr_bytes = c.nr_regs * REG_SIZE; /* Begin the compilation: */ brw_init_compile(&c.func); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); /* Note that primitives which don't require a GS program have * already been weeded out by this stage: */ switch (key->primitive) { case PIPE_PRIM_QUADS: brw_gs_quads( &c ); break; case PIPE_PRIM_QUAD_STRIP: brw_gs_quad_strip( &c ); break; case PIPE_PRIM_LINE_LOOP: brw_gs_lines( &c ); break; case PIPE_PRIM_LINES: if (key->hint_gs_always) brw_gs_lines( &c ); else { return; } break; case PIPE_PRIM_TRIANGLES: if (key->hint_gs_always) brw_gs_tris( &c ); else { return; } break; case PIPE_PRIM_POINTS: if (key->hint_gs_always) brw_gs_points( &c ); else { return; } break; default: return; } /* get the program */ program = brw_get_program(&c.func, &program_size); /* Upload */ brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], &c.key, sizeof(c.key), program, program_size, &c.prog_data, &brw->gs.prog_data ); }
static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { struct brw_sf_compile c; const unsigned *program; unsigned program_size; memset(&c, 0, sizeof(c)); /* Begin the compilation: */ brw_init_compile(&c.func); c.key = *key; c.nr_attrs = c.key.vp_output_count; c.nr_attr_regs = (c.nr_attrs+1)/2; c.nr_setup_attrs = c.key.fp_input_count + 1; /* +1 for position */ c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; /* Which primitive? Or all three? */ switch (key->primitive) { case SF_TRIANGLES: c.nr_verts = 3; brw_emit_tri_setup( &c ); break; case SF_LINES: c.nr_verts = 2; brw_emit_line_setup( &c ); break; case SF_POINTS: c.nr_verts = 1; brw_emit_point_setup( &c ); break; case SF_UNFILLED_TRIS: default: assert(0); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); /* Upload */ brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], &c.key, sizeof(c.key), program, program_size, &c.prog_data, &brw->sf.prog_data ); }
static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) { struct brw_wm_compile *c; const GLuint *program; GLuint program_size; c = brw->wm.compile_data; if (c == NULL) { brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); c = brw->wm.compile_data; } else { memset(c, 0, sizeof(*brw->wm.compile_data)); } memcpy(&c->key, key, sizeof(*key)); c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; /* Augment fragment program. Add instructions for pre- and * post-fragment-program tasks such as interpolation and fogging. */ brw_wm_pass_fp(c); /* Translate to intermediate representation. Build register usage * chains. */ brw_wm_pass0(c); /* Dead code removal. */ brw_wm_pass1(c); /* Hal optimization */ brw_wm_pass_hal (c); /* Register allocation. */ c->grf_limit = BRW_WM_MAX_GRF/2; /* This is where we start emitting gen4 code: */ brw_init_compile(&c->func); brw_wm_pass2(c); c->prog_data.total_grf = c->max_wm_grf; if (c->last_scratch) { c->prog_data.total_scratch = c->last_scratch + 0x40; } else { c->prog_data.total_scratch = 0; } /* Emit GEN4 code. */ brw_wm_emit(c); /* get the program */ program = brw_get_program(&c->func, &program_size); /* */ brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], &c->key, sizeof(c->key), program, program_size, &c->prog_data, &brw->wm.prog_data ); }
static void compile_clip_prog( struct brw_context *brw, struct brw_clip_prog_key *key ) { struct brw_clip_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_compile(brw, &c.func, mem_ctx); c.func.single_program_flow = 1; c.key = *key; c.vue_map = brw->vue_map_geom_out; c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode); c.has_noperspective_shading = brw_any_noperspective_varyings(&key->interpolation_mode); /* nr_regs is the number of registers filled by reading data from the VUE. * This program accesses the entire VUE, so nr_regs needs to be the size of * the VUE (measured in pairs, since two slots are stored in each * register). */ c.nr_regs = (c.vue_map.num_slots + 1)/2; c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE); /* Would ideally have the option of producing a program which could * do all three: */ switch (key->primitive) { case GL_TRIANGLES: if (key->do_unfilled) brw_emit_unfilled_clip( &c ); else brw_emit_tri_clip( &c ); break; case GL_LINES: brw_emit_line_clip( &c ); break; case GL_POINTS: brw_emit_point_clip( &c ); break; default: assert(0); return; } brw_compact_instructions(&c.func, 0, 0, NULL); /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) { fprintf(stderr, "clip:\n"); brw_disassemble(brw, c.func.store, 0, program_size, stderr); fprintf(stderr, "\n"); } brw_upload_cache(&brw->cache, BRW_CLIP_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->clip.prog_offset, &brw->clip.prog_data); ralloc_free(mem_ctx); }
static void do_vs_prog( struct brw_context *brw, struct brw_vertex_program *vp, struct brw_vs_prog_key *key ) { struct gl_context *ctx = &brw->intel.ctx; GLuint program_size; const GLuint *program; struct brw_vs_compile c; int aux_size; int i; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); brw_init_compile(brw, &c.func); c.vp = vp; c.prog_data.outputs_written = vp->program.Base.OutputsWritten; c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE); c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; } /* Put dummy slots into the VUE for the SF to put the replaced * point sprite coords in. We shouldn't need these dummy slots, * which take up precious URB space, but it would mean that the SF * doesn't get nice aligned pairs of input coords into output * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { if (c.key.point_coord_replace & (1 << i)) c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i); } if (0) { _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG, GL_TRUE); } /* Emit GEN4 code. */ brw_vs_emit(&c); /* get the program */ program = brw_get_program(&c.func, &program_size); /* We upload from &c.prog_data including the constant_map assuming * they're packed together. It would be nice to have a * compile-time assert macro here. */ assert(c.constant_map == (int8_t *)&c.prog_data + sizeof(c.prog_data)); assert(ctx->Const.VertexProgram.MaxNativeParameters == ARRAY_SIZE(c.constant_map)); (void) ctx; aux_size = sizeof(c.prog_data); /* constant_map */ aux_size += c.vp->program.Base.Parameters->NumParameters; drm_intel_bo_unreference(brw->vs.prog_bo); brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, &c.key, sizeof(c.key), NULL, 0, program, program_size, &c.prog_data, aux_size, &brw->vs.prog_data); }
/** * All Mesa program -> GPU code generation goes through this function. * Depending on the instructions used (i.e. flow control instructions) * we'll use one of two code generators. */ static enum pipe_error do_wm_prog( struct brw_context *brw, struct brw_fragment_shader *fp, struct brw_wm_prog_key *key, struct brw_winsys_buffer **bo_out) { enum pipe_error ret; struct brw_wm_compile *c; const GLuint *program; GLuint program_size; if (brw->wm.compile_data == NULL) { brw->wm.compile_data = MALLOC(sizeof(*brw->wm.compile_data)); if (!brw->wm.compile_data) return PIPE_ERROR_OUT_OF_MEMORY; } c = brw->wm.compile_data; memset(c, 0, sizeof *c); c->key = *key; c->fp = fp; c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/ brw_init_compile(brw, &c->func); /* * Shader which use GLSL features such as flow control are handled * differently from "simple" shaders. */ if (fp->has_flow_control) { c->dispatch_width = 8; /* XXX: GLSL support */ exit(1); /* brw_wm_branching_shader_emit(brw, c); */ } else { c->dispatch_width = 16; brw_wm_linear_shader_emit(brw, c); } if (BRW_DEBUG & DEBUG_WM) debug_printf("\n"); /* get the program */ ret = brw_get_program(&c->func, &program, &program_size); if (ret) return ret; ret = brw_upload_cache( &brw->cache, BRW_WM_PROG, &c->key, sizeof(c->key), NULL, 0, program, program_size, &c->prog_data, &brw->wm.prog_data, bo_out ); if (ret) return ret; return PIPE_OK; }
static void compile_clip_prog( struct brw_context *brw, struct brw_clip_prog_key *key ) { struct intel_context *intel = &brw->intel; struct brw_clip_compile c; const GLuint *program; GLuint program_size; GLuint delta; GLuint i; GLuint header_regs; memset(&c, 0, sizeof(c)); /* Begin the compilation: */ brw_init_compile(brw, &c.func); c.func.single_program_flow = 1; c.key = *key; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; if (intel->gen == 5) header_regs = 3; else header_regs = 1; delta = header_regs * REG_SIZE; for (i = 0; i < VERT_RESULT_MAX; i++) { if (c.key.attrs & BITFIELD64_BIT(i)) { c.offset[i] = delta; delta += ATTR_SIZE; c.idx_to_attr[c.nr_attrs] = i; c.nr_attrs++; } } /* The vertex attributes start at a URB row-aligned offset after * the 8-20 dword vertex header, and continue for a URB row-aligned * length. nr_regs determines the urb_read_length from the start * of the header to the end of the vertex data. */ c.nr_regs = header_regs + (c.nr_attrs + 1) / 2; c.nr_bytes = c.nr_regs * REG_SIZE; c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); /* Would ideally have the option of producing a program which could * do all three: */ switch (key->primitive) { case GL_TRIANGLES: if (key->do_unfilled) brw_emit_unfilled_clip( &c ); else brw_emit_tri_clip( &c ); break; case GL_LINES: brw_emit_line_clip( &c ); break; case GL_POINTS: brw_emit_point_clip( &c ); break; default: assert(0); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); if (INTEL_DEBUG & DEBUG_CLIP) { printf("clip:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], intel->gen); printf("\n"); } /* Upload */ drm_intel_bo_unreference(brw->clip.prog_bo); brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_CLIP_PROG, &c.key, sizeof(c.key), NULL, 0, program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->clip.prog_data); }
static void compile_ff_gs_prog(struct brw_context *brw, struct brw_ff_gs_prog_key *key) { struct brw_ff_gs_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); c.key = *key; c.vue_map = brw->vs.prog_data->base.vue_map; c.nr_regs = (c.vue_map.num_slots + 1)/2; mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_compile(brw, &c.func, mem_ctx); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); if (brw->gen >= 6) { unsigned num_verts; bool check_edge_flag; /* On Sandybridge, we use the GS for implementing transform feedback * (called "Stream Out" in the PRM). */ switch (key->primitive) { case _3DPRIM_POINTLIST: num_verts = 1; check_edge_flag = false; break; case _3DPRIM_LINELIST: case _3DPRIM_LINESTRIP: case _3DPRIM_LINELOOP: num_verts = 2; check_edge_flag = false; break; case _3DPRIM_TRILIST: case _3DPRIM_TRIFAN: case _3DPRIM_TRISTRIP: case _3DPRIM_RECTLIST: num_verts = 3; check_edge_flag = false; break; case _3DPRIM_QUADLIST: case _3DPRIM_QUADSTRIP: case _3DPRIM_POLYGON: num_verts = 3; check_edge_flag = true; break; default: assert(!"Unexpected primitive type in Gen6 SOL program."); return; } gen6_sol_program(&c, key, num_verts, check_edge_flag); } else { /* On Gen4-5, we use the GS to decompose certain types of primitives. * Note that primitives which don't require a GS program have already * been weeded out by now. */ switch (key->primitive) { case _3DPRIM_QUADLIST: brw_ff_gs_quads( &c, key ); break; case _3DPRIM_QUADSTRIP: brw_ff_gs_quad_strip( &c, key ); break; case _3DPRIM_LINELOOP: brw_ff_gs_lines( &c ); break; default: ralloc_free(mem_ctx); return; } } /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { int i; printf("gs:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], brw->gen); printf("\n"); } brw_upload_cache(&brw->cache, BRW_FF_GS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data); ralloc_free(mem_ctx); }
static void compile_gs_prog( struct brw_context *brw, struct brw_gs_prog_key *key ) { struct intel_context *intel = &brw->intel; struct brw_gs_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; /* Gen6: VF has already converted into polygon, and LINELOOP is * converted to LINESTRIP at the beginning of the 3D pipeline. */ if (intel->gen >= 6) return; memset(&c, 0, sizeof(c)); c.key = *key; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = brw_count_bits(c.key.attrs); if (intel->gen >= 5) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ c.nr_bytes = c.nr_regs * REG_SIZE; mem_ctx = NULL; /* Begin the compilation: */ brw_init_compile(brw, &c.func, mem_ctx); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); /* Note that primitives which don't require a GS program have * already been weeded out by this stage: */ switch (key->primitive) { case GL_QUADS: brw_gs_quads( &c, key ); break; case GL_QUAD_STRIP: brw_gs_quad_strip( &c, key ); break; case GL_LINE_LOOP: brw_gs_lines( &c ); break; default: ralloc_free(mem_ctx); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { int i; printf("gs:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], intel->gen); printf("\n"); } brw_upload_cache(&brw->cache, BRW_GS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->gs.prog_offset, &brw->gs.prog_data); ralloc_free(mem_ctx); }
int main(int argc, char **argv) { char *output_file = NULL; char *entry_table_file = NULL; FILE *output = stdout; FILE *export_file; struct brw_program_instruction *entry, *entry1, *tmp_entry; int err, inst_offset; char o; void *mem_ctx; while ((o = getopt_long(argc, argv, "e:l:o:g:abW", longopts, NULL)) != -1) { switch (o) { case 'o': if (strcmp(optarg, "-") != 0) output_file = optarg; break; case 'g': { char *dec_ptr, *end_ptr; unsigned long decimal; gen_level = strtol(optarg, &dec_ptr, 10) * 10; if (*dec_ptr == '.') { decimal = strtoul(++dec_ptr, &end_ptr, 10); if (end_ptr != dec_ptr && *end_ptr == '\0') { if (decimal > 10) { fprintf(stderr, "Invalid Gen X decimal version\n"); exit(1); } gen_level += decimal; } } if (gen_level < 40 || gen_level > 90) { usage(); exit(1); } break; } case 'a': advanced_flag = 1; break; case 'b': binary_like_output = 1; break; case 'e': need_export = 1; if (strcmp(optarg, "-") != 0) export_filename = optarg; break; case 'l': if (strcmp(optarg, "-") != 0) entry_table_file = optarg; break; case 'W': warning_flags |= WARN_ALL; break; default: usage(); exit(1); } } argc -= optind; argv += optind; if (argc != 1) { usage(); exit(1); } if (strcmp(argv[0], "-") != 0) { input_filename = argv[0]; yyin = fopen(input_filename, "r"); if (yyin == NULL) { perror("Couldn't open input file"); exit(1); } } brw_init_context(&genasm_brw_context, gen_level); mem_ctx = ralloc_context(NULL); brw_init_compile(&genasm_brw_context, &genasm_compile, mem_ctx); err = yyparse(); if (strcmp(argv[0], "-")) fclose(yyin); yylex_destroy(); if (err || errors) exit (1); if (output_file) { output = fopen(output_file, "w"); if (output == NULL) { perror("Couldn't open output file"); exit(1); } } if (read_entry_file(entry_table_file)) { fprintf(stderr, "Read entry file error\n"); exit(1); } inst_offset = 0 ; for (entry = compiled_program.first; entry != NULL; entry = entry->next) { entry->inst_offset = inst_offset; entry1 = entry->next; if (entry1 && is_label(entry1) && is_entry_point(entry1)) { // insert NOP instructions until (inst_offset+1) % 4 == 0 while (((inst_offset+1) % 4) != 0) { tmp_entry = calloc(sizeof(*tmp_entry), 1); tmp_entry->insn.gen.header.opcode = BRW_OPCODE_NOP; entry->next = tmp_entry; tmp_entry->next = entry1; entry = tmp_entry; tmp_entry->inst_offset = ++inst_offset; } } if (!is_label(entry)) inst_offset++; } for (entry = compiled_program.first; entry; entry = entry->next) if (is_label(entry)) add_label(entry); if (need_export) { if (export_filename) { export_file = fopen(export_filename, "w"); } else { export_file = fopen("export.inc", "w"); } for (entry = compiled_program.first; entry != NULL; entry = entry->next) { if (is_label(entry)) fprintf(export_file, "#define %s_IP %d\n", label_name(entry), (IS_GENx(5) ? 2 : 1)*(entry->inst_offset)); } fclose(export_file); } for (entry = compiled_program.first; entry; entry = entry->next) { struct relocation *reloc = &entry->reloc; if (!is_relocatable(entry)) continue; if (reloc->first_reloc_target) reloc->first_reloc_offset = label_to_addr(reloc->first_reloc_target, entry->inst_offset) - entry->inst_offset; if (reloc->second_reloc_target) reloc->second_reloc_offset = label_to_addr(reloc->second_reloc_target, entry->inst_offset) - entry->inst_offset; if (reloc->second_reloc_offset) { // this is a branch instruction with two offset arguments set_branch_two_offsets(entry, reloc->first_reloc_offset, reloc->second_reloc_offset); } else if (reloc->first_reloc_offset) { set_branch_one_offset(entry, reloc->first_reloc_offset); } } if (binary_like_output) fprintf(output, "%s", binary_prepend); for (entry = compiled_program.first; entry != NULL; entry = entry1) { entry1 = entry->next; if (!is_label(entry)) print_instruction(output, &entry->insn.gen); else free(entry->insn.label.name); free(entry); } if (binary_like_output) fprintf(output, "};"); free_entry_point_table(entry_point_table); free_hash_table(declared_register_table); free_label_table(label_table); fflush (output); if (ferror (output)) { perror ("Could not flush output file"); if (output_file) unlink (output_file); err = 1; } return err; }