const unsigned * vec4_generator::generate_assembly(const cfg_t *cfg, unsigned *assembly_size) { brw_set_default_access_mode(p, BRW_ALIGN_16); generate_code(cfg); return brw_get_program(p, assembly_size); }
const unsigned * vec4_generator::generate_assembly(exec_list *instructions, unsigned *assembly_size) { brw_set_default_access_mode(p, BRW_ALIGN_16); generate_code(instructions); return brw_get_program(p, assembly_size); }
static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { struct brw_sf_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; GLuint i; memset(&c, 0, sizeof(c)); mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_compile(brw, &c.func, mem_ctx); c.key = *key; c.vue_map = brw->vue_map_geom_out; if (c.key.do_point_coord) { /* * gl_PointCoord is a FS instead of VS builtin variable, thus it's * not included in c.vue_map generated in VS stage. Here we add * it manually to let SF shader generate the needed interpolation * coefficient for FS shader. */ c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots; c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC; } c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; c.nr_setup_regs = c.nr_attr_regs; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode); /* Which primitive? Or all three? */ switch (key->primitive) { case SF_TRIANGLES: c.nr_verts = 3; brw_emit_tri_setup( &c, true ); break; case SF_LINES: c.nr_verts = 2; brw_emit_line_setup( &c, true ); break; case SF_POINTS: c.nr_verts = 1; if (key->do_point_sprite) brw_emit_point_sprite_setup( &c, true ); else brw_emit_point_setup( &c, true ); break; case SF_UNFILLED_TRIS: c.nr_verts = 3; brw_emit_anyprim_setup( &c ); break; default: assert(0); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_SF)) { printf("sf:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], brw->gen); printf("\n"); } brw_upload_cache(&brw->cache, BRW_SF_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->sf.prog_offset, &brw->sf.prog_data); ralloc_free(mem_ctx); }
static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { GLcontext *ctx = &brw->intel.ctx; struct brw_sf_compile c; const GLuint *program; GLuint program_size; GLuint i, idx; memset(&c, 0, sizeof(c)); /* Begin the compilation: */ brw_init_compile(brw, &c.func); c.key = *key; c.nr_attrs = brw_count_bits(c.key.attrs); c.nr_attr_regs = (c.nr_attrs+1)/2; c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; /* Construct map from attribute number to position in the vertex. */ for (i = idx = 0; i < VERT_RESULT_MAX; i++) if (c.key.attrs & (1<<i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { c.point_attrs[i].CoordReplace = ctx->Point.CoordReplace[i - VERT_RESULT_TEX0]; } else { c.point_attrs[i].CoordReplace = GL_FALSE; } idx++; } /* Which primitive? Or all three? */ switch (key->primitive) { case SF_TRIANGLES: c.nr_verts = 3; brw_emit_tri_setup( &c, GL_TRUE ); break; case SF_LINES: c.nr_verts = 2; brw_emit_line_setup( &c, GL_TRUE ); break; case SF_POINTS: c.nr_verts = 1; if (key->do_point_sprite) brw_emit_point_sprite_setup( &c, GL_TRUE ); else brw_emit_point_setup( &c, GL_TRUE ); break; case SF_UNFILLED_TRIS: c.nr_verts = 3; brw_emit_anyprim_setup( &c ); break; default: assert(0); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); /* Upload */ dri_bo_unreference(brw->sf.prog_bo); brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, &c.key, sizeof(c.key), NULL, 0, program, program_size, &c.prog_data, &brw->sf.prog_data ); }
static bool do_vs_prog(struct brw_context *brw, struct gl_shader_program *prog, struct brw_vertex_program *vp, struct brw_vs_prog_key *key) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; GLuint program_size; const GLuint *program; struct brw_vs_compile c; void *mem_ctx; int aux_size; int i; struct gl_shader *vs = NULL; if (prog) vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); mem_ctx = ralloc_context(NULL); brw_init_compile(brw, &c.func, mem_ctx); c.vp = vp; /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ int param_count; if (vs) { /* We add padding around uniform values below vec4 size, with the worst * case being a float value that gets blown up to a vec4, so be * conservative here. */ param_count = vs->num_uniform_components * 4; /* We also upload clip plane data as uniforms */ param_count += MAX_CLIP_PLANES * 4; } else { param_count = vp->program.Base.Parameters->NumParameters * 4; } c.prog_data.param = rzalloc_array(NULL, const float *, param_count); c.prog_data.pull_param = rzalloc_array(NULL, const float *, param_count); c.prog_data.outputs_written = vp->program.Base.OutputsWritten; c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE); c.prog_data.inputs_read |= VERT_BIT_EDGEFLAG; } /* Put dummy slots into the VUE for the SF to put the replaced * point sprite coords in. We shouldn't need these dummy slots, * which take up precious URB space, but it would mean that the SF * doesn't get nice aligned pairs of input coords into output * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { if (c.key.point_coord_replace & (1 << i)) c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i); } brw_compute_vue_map(&c); if (0) { _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG, true); } /* Emit GEN4 code. */ if (prog) { if (!brw_vs_emit(prog, &c)) { ralloc_free(mem_ctx); return false; } } else { brw_old_vs_emit(&c); } if (c.prog_data.nr_pull_params) c.prog_data.num_surfaces = 1; if (c.vp->program.Base.SamplersUsed) c.prog_data.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT); if (prog && prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks) { c.prog_data.num_surfaces = SURF_INDEX_VS_UBO(prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks); } /* Scratch space is used for register spilling */ if (c.last_scratch) { perf_debug("Vertex shader triggered register spilling. " "Try reducing the number of live vec4 values to " "improve performance.\n"); c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch); brw_get_scratch_bo(intel, &brw->vs.scratch_bo, c.prog_data.total_scratch * brw->max_vs_threads); } /* get the program */ program = brw_get_program(&c.func, &program_size); /* We upload from &c.prog_data including the constant_map assuming * they're packed together. It would be nice to have a * compile-time assert macro here. */ assert(c.constant_map == (int8_t *)&c.prog_data + sizeof(c.prog_data)); assert(ctx->Const.VertexProgram.MaxNativeParameters == ARRAY_SIZE(c.constant_map)); (void) ctx; aux_size = sizeof(c.prog_data); /* constant_map */ aux_size += c.vp->program.Base.Parameters->NumParameters; brw_upload_cache(&brw->cache, BRW_VS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, aux_size, &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); return true; }
static void compile_gs_prog( struct brw_context *brw, struct brw_gs_prog_key *key ) { struct brw_gs_compile c; const unsigned *program; unsigned program_size; memset(&c, 0, sizeof(c)); c.key = *key; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = brw_count_bits(c.key.attrs); c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ c.nr_bytes = c.nr_regs * REG_SIZE; /* Begin the compilation: */ brw_init_compile(&c.func); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); /* Note that primitives which don't require a GS program have * already been weeded out by this stage: */ switch (key->primitive) { case PIPE_PRIM_QUADS: brw_gs_quads( &c ); break; case PIPE_PRIM_QUAD_STRIP: brw_gs_quad_strip( &c ); break; case PIPE_PRIM_LINE_LOOP: brw_gs_lines( &c ); break; case PIPE_PRIM_LINES: if (key->hint_gs_always) brw_gs_lines( &c ); else { return; } break; case PIPE_PRIM_TRIANGLES: if (key->hint_gs_always) brw_gs_tris( &c ); else { return; } break; case PIPE_PRIM_POINTS: if (key->hint_gs_always) brw_gs_points( &c ); else { return; } break; default: return; } /* get the program */ program = brw_get_program(&c.func, &program_size); /* Upload */ brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], &c.key, sizeof(c.key), program, program_size, &c.prog_data, &brw->gs.prog_data ); }
const GLuint * brw_blorp_const_color_program::compile(struct brw_context *brw, GLuint *program_size) { /* Set up prog_data */ memset(&prog_data, 0, sizeof(prog_data)); prog_data.persample_msaa_dispatch = false; alloc_regs(); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); struct brw_reg mrf_rt_write = retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F); uint32_t mlen, msg_type; if (key->use_simd16_replicated_data) { /* The message payload is a single register with the low 4 floats/ints * filled with the constant clear color. */ brw_set_mask_control(&func, BRW_MASK_DISABLE); brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba); brw_set_mask_control(&func, BRW_MASK_ENABLE); msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; mlen = 1; } else { for (int i = 0; i < 4; i++) { /* The message payload is pairs of registers for 16 pixels each of r, * g, b, and a. */ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED); brw_MOV(&func, brw_message_reg(base_mrf + i * 2), brw_vec1_grf(clear_rgba.nr, i)); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); } msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; mlen = 8; } /* Now write to the render target and terminate the thread */ brw_fb_WRITE(&func, 16 /* dispatch_width */, base_mrf /* msg_reg_nr */, mrf_rt_write /* src0 */, msg_type, BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX, mlen, 0 /* response_length */, true /* eot */, false /* header present */); if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) { fprintf(stderr, "Native code for BLORP clear:\n"); brw_dump_compile(&func, stderr, 0, func.next_insn_offset); fprintf(stderr, "\n"); } return brw_get_program(&func, program_size); }
static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { struct brw_sf_compile c; const unsigned *program; unsigned program_size; memset(&c, 0, sizeof(c)); /* Begin the compilation: */ brw_init_compile(&c.func); c.key = *key; c.nr_attrs = c.key.vp_output_count; c.nr_attr_regs = (c.nr_attrs+1)/2; c.nr_setup_attrs = c.key.fp_input_count + 1; /* +1 for position */ c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; /* Which primitive? Or all three? */ switch (key->primitive) { case SF_TRIANGLES: c.nr_verts = 3; brw_emit_tri_setup( &c ); break; case SF_LINES: c.nr_verts = 2; brw_emit_line_setup( &c ); break; case SF_POINTS: c.nr_verts = 1; brw_emit_point_setup( &c ); break; case SF_UNFILLED_TRIS: default: assert(0); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); /* Upload */ brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], &c.key, sizeof(c.key), program, program_size, &c.prog_data, &brw->sf.prog_data ); }
static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { struct brw_sf_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_codegen(brw->intelScreen->devinfo, &c.func, mem_ctx); c.key = *key; c.vue_map = brw->vue_map_geom_out; if (c.key.do_point_coord) { /* * gl_PointCoord is a FS instead of VS builtin variable, thus it's * not included in c.vue_map generated in VS stage. Here we add * it manually to let SF shader generate the needed interpolation * coefficient for FS shader. */ c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots; c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC; } c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; c.nr_setup_regs = c.nr_attr_regs; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode); /* Which primitive? Or all three? */ switch (key->primitive) { case SF_TRIANGLES: c.nr_verts = 3; brw_emit_tri_setup( &c, true ); break; case SF_LINES: c.nr_verts = 2; brw_emit_line_setup( &c, true ); break; case SF_POINTS: c.nr_verts = 1; if (key->do_point_sprite) brw_emit_point_sprite_setup( &c, true ); else brw_emit_point_setup( &c, true ); break; case SF_UNFILLED_TRIS: c.nr_verts = 3; brw_emit_anyprim_setup( &c ); break; default: unreachable("not reached"); } /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register * source). Compacting would be difficult. */ /* brw_compact_instructions(&c.func, 0, 0, NULL); */ /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_SF)) { fprintf(stderr, "sf:\n"); brw_disassemble(brw->intelScreen->devinfo, c.func.store, 0, program_size, stderr); fprintf(stderr, "\n"); } brw_upload_cache(&brw->cache, BRW_CACHE_SF_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->sf.prog_offset, &brw->sf.prog_data); ralloc_free(mem_ctx); }
static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) { struct brw_wm_compile *c; const GLuint *program; GLuint program_size; c = brw->wm.compile_data; if (c == NULL) { brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); c = brw->wm.compile_data; } else { memset(c, 0, sizeof(*brw->wm.compile_data)); } memcpy(&c->key, key, sizeof(*key)); c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; /* Augment fragment program. Add instructions for pre- and * post-fragment-program tasks such as interpolation and fogging. */ brw_wm_pass_fp(c); /* Translate to intermediate representation. Build register usage * chains. */ brw_wm_pass0(c); /* Dead code removal. */ brw_wm_pass1(c); /* Hal optimization */ brw_wm_pass_hal (c); /* Register allocation. */ c->grf_limit = BRW_WM_MAX_GRF/2; /* This is where we start emitting gen4 code: */ brw_init_compile(&c->func); brw_wm_pass2(c); c->prog_data.total_grf = c->max_wm_grf; if (c->last_scratch) { c->prog_data.total_scratch = c->last_scratch + 0x40; } else { c->prog_data.total_scratch = 0; } /* Emit GEN4 code. */ brw_wm_emit(c); /* get the program */ program = brw_get_program(&c->func, &program_size); /* */ brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], &c->key, sizeof(c->key), program, program_size, &c->prog_data, &brw->wm.prog_data ); }
static void compile_clip_prog( struct brw_context *brw, struct brw_clip_prog_key *key ) { struct brw_clip_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; GLuint i; memset(&c, 0, sizeof(c)); mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_compile(brw, &c.func, mem_ctx); c.func.single_program_flow = 1; c.key = *key; c.vue_map = brw->vue_map_geom_out; c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode); c.has_noperspective_shading = brw_any_noperspective_varyings(&key->interpolation_mode); /* nr_regs is the number of registers filled by reading data from the VUE. * This program accesses the entire VUE, so nr_regs needs to be the size of * the VUE (measured in pairs, since two slots are stored in each * register). */ c.nr_regs = (c.vue_map.num_slots + 1)/2; c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); /* Would ideally have the option of producing a program which could * do all three: */ switch (key->primitive) { case GL_TRIANGLES: if (key->do_unfilled) brw_emit_unfilled_clip( &c ); else brw_emit_tri_clip( &c ); break; case GL_LINES: brw_emit_line_clip( &c ); break; case GL_POINTS: brw_emit_point_clip( &c ); break; default: assert(0); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) { printf("clip:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], brw->gen); printf("\n"); } brw_upload_cache(&brw->cache, BRW_CLIP_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->clip.prog_offset, &brw->clip.prog_data); ralloc_free(mem_ctx); }
static void do_vs_prog( struct brw_context *brw, struct brw_vertex_program *vp, struct brw_vs_prog_key *key ) { struct gl_context *ctx = &brw->intel.ctx; GLuint program_size; const GLuint *program; struct brw_vs_compile c; int aux_size; int i; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); brw_init_compile(brw, &c.func); c.vp = vp; c.prog_data.outputs_written = vp->program.Base.OutputsWritten; c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE); c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; } /* Put dummy slots into the VUE for the SF to put the replaced * point sprite coords in. We shouldn't need these dummy slots, * which take up precious URB space, but it would mean that the SF * doesn't get nice aligned pairs of input coords into output * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { if (c.key.point_coord_replace & (1 << i)) c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i); } if (0) { _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG, GL_TRUE); } /* Emit GEN4 code. */ brw_vs_emit(&c); /* get the program */ program = brw_get_program(&c.func, &program_size); /* We upload from &c.prog_data including the constant_map assuming * they're packed together. It would be nice to have a * compile-time assert macro here. */ assert(c.constant_map == (int8_t *)&c.prog_data + sizeof(c.prog_data)); assert(ctx->Const.VertexProgram.MaxNativeParameters == ARRAY_SIZE(c.constant_map)); (void) ctx; aux_size = sizeof(c.prog_data); /* constant_map */ aux_size += c.vp->program.Base.Parameters->NumParameters; drm_intel_bo_unreference(brw->vs.prog_bo); brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, &c.key, sizeof(c.key), NULL, 0, program, program_size, &c.prog_data, aux_size, &brw->vs.prog_data); }
/** * All Mesa program -> GPU code generation goes through this function. * Depending on the instructions used (i.e. flow control instructions) * we'll use one of two code generators. */ static enum pipe_error do_wm_prog( struct brw_context *brw, struct brw_fragment_shader *fp, struct brw_wm_prog_key *key, struct brw_winsys_buffer **bo_out) { enum pipe_error ret; struct brw_wm_compile *c; const GLuint *program; GLuint program_size; if (brw->wm.compile_data == NULL) { brw->wm.compile_data = MALLOC(sizeof(*brw->wm.compile_data)); if (!brw->wm.compile_data) return PIPE_ERROR_OUT_OF_MEMORY; } c = brw->wm.compile_data; memset(c, 0, sizeof *c); c->key = *key; c->fp = fp; c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/ brw_init_compile(brw, &c->func); /* * Shader which use GLSL features such as flow control are handled * differently from "simple" shaders. */ if (fp->has_flow_control) { c->dispatch_width = 8; /* XXX: GLSL support */ exit(1); /* brw_wm_branching_shader_emit(brw, c); */ } else { c->dispatch_width = 16; brw_wm_linear_shader_emit(brw, c); } if (BRW_DEBUG & DEBUG_WM) debug_printf("\n"); /* get the program */ ret = brw_get_program(&c->func, &program, &program_size); if (ret) return ret; ret = brw_upload_cache( &brw->cache, BRW_WM_PROG, &c->key, sizeof(c->key), NULL, 0, program, program_size, &c->prog_data, &brw->wm.prog_data, bo_out ); if (ret) return ret; return PIPE_OK; }
static void compile_ff_gs_prog(struct brw_context *brw, struct brw_ff_gs_prog_key *key) { struct brw_ff_gs_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); c.key = *key; c.vue_map = brw->vs.prog_data->base.vue_map; c.nr_regs = (c.vue_map.num_slots + 1)/2; mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_compile(brw, &c.func, mem_ctx); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); if (brw->gen >= 6) { unsigned num_verts; bool check_edge_flag; /* On Sandybridge, we use the GS for implementing transform feedback * (called "Stream Out" in the PRM). */ switch (key->primitive) { case _3DPRIM_POINTLIST: num_verts = 1; check_edge_flag = false; break; case _3DPRIM_LINELIST: case _3DPRIM_LINESTRIP: case _3DPRIM_LINELOOP: num_verts = 2; check_edge_flag = false; break; case _3DPRIM_TRILIST: case _3DPRIM_TRIFAN: case _3DPRIM_TRISTRIP: case _3DPRIM_RECTLIST: num_verts = 3; check_edge_flag = false; break; case _3DPRIM_QUADLIST: case _3DPRIM_QUADSTRIP: case _3DPRIM_POLYGON: num_verts = 3; check_edge_flag = true; break; default: assert(!"Unexpected primitive type in Gen6 SOL program."); return; } gen6_sol_program(&c, key, num_verts, check_edge_flag); } else { /* On Gen4-5, we use the GS to decompose certain types of primitives. * Note that primitives which don't require a GS program have already * been weeded out by now. */ switch (key->primitive) { case _3DPRIM_QUADLIST: brw_ff_gs_quads( &c, key ); break; case _3DPRIM_QUADSTRIP: brw_ff_gs_quad_strip( &c, key ); break; case _3DPRIM_LINELOOP: brw_ff_gs_lines( &c ); break; default: ralloc_free(mem_ctx); return; } } /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { int i; printf("gs:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], brw->gen); printf("\n"); } brw_upload_cache(&brw->cache, BRW_FF_GS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data); ralloc_free(mem_ctx); }
static void compile_clip_prog( struct brw_context *brw, struct brw_clip_prog_key *key ) { struct brw_clip_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_codegen(&brw->screen->devinfo, &c.func, mem_ctx); c.func.single_program_flow = 1; c.key = *key; c.vue_map = brw->vue_map_geom_out; /* nr_regs is the number of registers filled by reading data from the VUE. * This program accesses the entire VUE, so nr_regs needs to be the size of * the VUE (measured in pairs, since two slots are stored in each * register). */ c.nr_regs = (c.vue_map.num_slots + 1)/2; c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE); /* Would ideally have the option of producing a program which could * do all three: */ switch (key->primitive) { case GL_TRIANGLES: if (key->do_unfilled) brw_emit_unfilled_clip( &c ); else brw_emit_tri_clip( &c ); break; case GL_LINES: brw_emit_line_clip( &c ); break; case GL_POINTS: brw_emit_point_clip( &c ); break; default: unreachable("not reached"); } brw_compact_instructions(&c.func, 0, 0, NULL); /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) { fprintf(stderr, "clip:\n"); brw_disassemble(&brw->screen->devinfo, c.func.store, 0, program_size, stderr); fprintf(stderr, "\n"); } brw_upload_cache(&brw->cache, BRW_CACHE_CLIP_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->clip.prog_offset, &brw->clip.prog_data); ralloc_free(mem_ctx); }
static void compile_gs_prog( struct brw_context *brw, struct brw_gs_prog_key *key ) { struct intel_context *intel = &brw->intel; struct brw_gs_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; /* Gen6: VF has already converted into polygon, and LINELOOP is * converted to LINESTRIP at the beginning of the 3D pipeline. */ if (intel->gen >= 6) return; memset(&c, 0, sizeof(c)); c.key = *key; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = brw_count_bits(c.key.attrs); if (intel->gen >= 5) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ c.nr_bytes = c.nr_regs * REG_SIZE; mem_ctx = NULL; /* Begin the compilation: */ brw_init_compile(brw, &c.func, mem_ctx); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); /* Note that primitives which don't require a GS program have * already been weeded out by this stage: */ switch (key->primitive) { case GL_QUADS: brw_gs_quads( &c, key ); break; case GL_QUAD_STRIP: brw_gs_quad_strip( &c, key ); break; case GL_LINE_LOOP: brw_gs_lines( &c ); break; default: ralloc_free(mem_ctx); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { int i; printf("gs:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], intel->gen); printf("\n"); } brw_upload_cache(&brw->cache, BRW_GS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->gs.prog_offset, &brw->gs.prog_data); ralloc_free(mem_ctx); }
static void compile_clip_prog( struct brw_context *brw, struct brw_clip_prog_key *key ) { struct intel_context *intel = &brw->intel; struct brw_clip_compile c; const GLuint *program; GLuint program_size; GLuint delta; GLuint i; GLuint header_regs; memset(&c, 0, sizeof(c)); /* Begin the compilation: */ brw_init_compile(brw, &c.func); c.func.single_program_flow = 1; c.key = *key; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; if (intel->gen == 5) header_regs = 3; else header_regs = 1; delta = header_regs * REG_SIZE; for (i = 0; i < VERT_RESULT_MAX; i++) { if (c.key.attrs & BITFIELD64_BIT(i)) { c.offset[i] = delta; delta += ATTR_SIZE; c.idx_to_attr[c.nr_attrs] = i; c.nr_attrs++; } } /* The vertex attributes start at a URB row-aligned offset after * the 8-20 dword vertex header, and continue for a URB row-aligned * length. nr_regs determines the urb_read_length from the start * of the header to the end of the vertex data. */ c.nr_regs = header_regs + (c.nr_attrs + 1) / 2; c.nr_bytes = c.nr_regs * REG_SIZE; c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); /* Would ideally have the option of producing a program which could * do all three: */ switch (key->primitive) { case GL_TRIANGLES: if (key->do_unfilled) brw_emit_unfilled_clip( &c ); else brw_emit_tri_clip( &c ); break; case GL_LINES: brw_emit_line_clip( &c ); break; case GL_POINTS: brw_emit_point_clip( &c ); break; default: assert(0); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); if (INTEL_DEBUG & DEBUG_CLIP) { printf("clip:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], intel->gen); printf("\n"); } /* Upload */ drm_intel_bo_unreference(brw->clip.prog_bo); brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_CLIP_PROG, &c.key, sizeof(c.key), NULL, 0, program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->clip.prog_data); }