static void do_flatshade_line( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; if (!nr) return; /* Already done in clip program: */ if (c->key.primitive == SF_UNFILLED_TRIS) return; if (intel->gen == 5) jmpi = 2; brw_push_insn_state(p); brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr)); copy_colors(c, c->vert[0], c->vert[1]); brw_pop_insn_state(p); }
/* Need to use a computed jump to copy flatshaded attributes as the * vertices are ordered according to y-coordinate before reaching this * point, so the PV could be anywhere. */ static void do_flatshade_triangle( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); if (!nr) return; /* Already done in clip program: */ if (c->key.primitive == SF_UNFILLED_TRIS) return; brw_push_insn_state(p); brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1)); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); copy_colors(c, c->vert[2], c->vert[0]); brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1)); copy_colors(c, c->vert[0], c->vert[1]); copy_colors(c, c->vert[2], c->vert[1]); brw_JMPI(p, ip, ip, brw_imm_ud(nr*2)); copy_colors(c, c->vert[0], c->vert[2]); copy_colors(c, c->vert[1], c->vert[2]); brw_pop_insn_state(p); }
static void upload_clip_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; uint32_t depth_clamp = 0; uint32_t provoking, userclip; if (!ctx->Transform.DepthClamp) depth_clamp = GEN6_CLIP_Z_TEST; /* _NEW_LIGHT */ if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) { provoking = (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) | (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT); } else { provoking = (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) | (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT); } /* _NEW_TRANSFORM */ userclip = (1 << brw_count_bits(ctx->Transform.ClipPlanesEnabled)) - 1; BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); OUT_BATCH(GEN6_CLIP_ENABLE | GEN6_CLIP_API_OGL | GEN6_CLIP_MODE_NORMAL | GEN6_CLIP_XY_TEST | userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT | depth_clamp | provoking); OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | GEN6_CLIP_FORCE_ZERO_RTAINDEX); ADVANCE_BATCH(); }
static void brw_upload_vs_prog(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct brw_vs_prog_key key; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; int i; memset(&key, 0, sizeof(key)); /* Just upload the program verbatim for now. Always send it all * the inputs it asks for, whether they are varying or not. */ key.program_string_id = vp->id; key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_POINT */ if (ctx->Point.PointSprite) { for (i = 0; i < 8; i++) { if (ctx->Point.CoordReplace[i]) key.point_coord_replace |= (1 << i); } } /* Make an early check for the key. */ drm_intel_bo_unreference(brw->vs.prog_bo); brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG, &key, sizeof(key), NULL, 0, &brw->vs.prog_data); if (brw->vs.prog_bo == NULL) do_vs_prog(brw, vp, &key); brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + sizeof(*brw->vs.prog_data)); }
static void compile_gs_prog( struct brw_context *brw, struct brw_gs_prog_key *key ) { struct intel_context *intel = &brw->intel; struct brw_gs_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; /* Gen6: VF has already converted into polygon, and LINELOOP is * converted to LINESTRIP at the beginning of the 3D pipeline. */ if (intel->gen >= 6) return; memset(&c, 0, sizeof(c)); c.key = *key; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = brw_count_bits(c.key.attrs); if (intel->gen >= 5) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ c.nr_bytes = c.nr_regs * REG_SIZE; mem_ctx = NULL; /* Begin the compilation: */ brw_init_compile(brw, &c.func, mem_ctx); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); /* Note that primitives which don't require a GS program have * already been weeded out by this stage: */ switch (key->primitive) { case GL_QUADS: brw_gs_quads( &c, key ); break; case GL_QUAD_STRIP: brw_gs_quad_strip( &c, key ); break; case GL_LINE_LOOP: brw_gs_lines( &c ); break; default: ralloc_free(mem_ctx); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { int i; printf("gs:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], intel->gen); printf("\n"); } brw_upload_cache(&brw->cache, BRW_GS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->gs.prog_offset, &brw->gs.prog_data); ralloc_free(mem_ctx); }
static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { GLcontext *ctx = &brw->intel.ctx; struct brw_sf_compile c; const GLuint *program; GLuint program_size; GLuint i, idx; memset(&c, 0, sizeof(c)); /* Begin the compilation: */ brw_init_compile(brw, &c.func); c.key = *key; c.nr_attrs = brw_count_bits(c.key.attrs); c.nr_attr_regs = (c.nr_attrs+1)/2; c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; /* Construct map from attribute number to position in the vertex. */ for (i = idx = 0; i < VERT_RESULT_MAX; i++) if (c.key.attrs & (1<<i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { c.point_attrs[i].CoordReplace = ctx->Point.CoordReplace[i - VERT_RESULT_TEX0]; } else { c.point_attrs[i].CoordReplace = GL_FALSE; } idx++; } /* Which primitive? Or all three? */ switch (key->primitive) { case SF_TRIANGLES: c.nr_verts = 3; brw_emit_tri_setup( &c, GL_TRUE ); break; case SF_LINES: c.nr_verts = 2; brw_emit_line_setup( &c, GL_TRUE ); break; case SF_POINTS: c.nr_verts = 1; if (key->do_point_sprite) brw_emit_point_sprite_setup( &c, GL_TRUE ); else brw_emit_point_setup( &c, GL_TRUE ); break; case SF_UNFILLED_TRIS: c.nr_verts = 3; brw_emit_anyprim_setup( &c ); break; default: assert(0); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); /* Upload */ dri_bo_unreference(brw->sf.prog_bo); brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, &c.key, sizeof(c.key), NULL, 0, program, program_size, &c.prog_data, &brw->sf.prog_data ); }
/* Partition the CURBE between the various users of constant values: */ static void calculate_curbe_offsets( struct brw_context *brw ) { /* CACHE_NEW_WM_PROG */ GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; /* BRW_NEW_VERTEX_PROGRAM */ struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16; GLuint nr_clip_regs = 0; GLuint total_regs; /* _NEW_TRANSFORM */ if (brw->attribs.Transform->ClipPlanesEnabled) { GLuint nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); nr_clip_regs = (nr_planes * 4 + 15) / 16; } total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; /* This can happen - what to do? Probably rather than falling * back, the best thing to do is emit programs which code the * constants as immediate values. Could do this either as a static * cap on WM and VS, or adaptively. * * Unfortunately, this is currently dependent on the results of the * program generation process (in the case of wm), so this would * introduce the need to re-generate programs in the event of a * curbe allocation failure. */ /* Max size is 32 - just large enough to * hold the 128 parameters allowed by * the fragment and vertex program * api's. It's not clear what happens * when both VP and FP want to use 128 * parameters, though. */ assert(total_regs <= 32); /* Lazy resize: */ if (nr_fp_regs > brw->curbe.wm_size || nr_vp_regs > brw->curbe.vs_size || nr_clip_regs > brw->curbe.clip_size || (total_regs < brw->curbe.total_size / 4 && brw->curbe.total_size > 16)) { GLuint reg = 0; /* Calculate a new layout: */ reg = 0; brw->curbe.wm_start = reg; brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; brw->curbe.clip_start = reg; brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; brw->curbe.vs_start = reg; brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; brw->curbe.total_size = reg; if (0) _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", brw->curbe.wm_start, brw->curbe.wm_size, brw->curbe.clip_start, brw->curbe.clip_size, brw->curbe.vs_start, brw->curbe.vs_size ); brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; } }
static void compile_gs_prog( struct brw_context *brw, struct brw_gs_prog_key *key ) { struct brw_gs_compile c; const unsigned *program; unsigned program_size; memset(&c, 0, sizeof(c)); c.key = *key; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = brw_count_bits(c.key.attrs); c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ c.nr_bytes = c.nr_regs * REG_SIZE; /* Begin the compilation: */ brw_init_compile(&c.func); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); /* Note that primitives which don't require a GS program have * already been weeded out by this stage: */ switch (key->primitive) { case PIPE_PRIM_QUADS: brw_gs_quads( &c ); break; case PIPE_PRIM_QUAD_STRIP: brw_gs_quad_strip( &c ); break; case PIPE_PRIM_LINE_LOOP: brw_gs_lines( &c ); break; case PIPE_PRIM_LINES: if (key->hint_gs_always) brw_gs_lines( &c ); else { return; } break; case PIPE_PRIM_TRIANGLES: if (key->hint_gs_always) brw_gs_tris( &c ); else { return; } break; case PIPE_PRIM_POINTS: if (key->hint_gs_always) brw_gs_points( &c ); else { return; } break; default: return; } /* get the program */ program = brw_get_program(&c.func, &program_size); /* Upload */ brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], &c.key, sizeof(c.key), program, program_size, &c.prog_data, &brw->gs.prog_data ); }
/* Calculate interpolants for triangle and line rasterization. */ static void upload_clip_prog(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLcontext *ctx = &intel->ctx; struct brw_clip_prog_key key; memset(&key, 0, sizeof(key)); /* Populate the key: */ /* BRW_NEW_REDUCED_PRIMITIVE */ key.primitive = brw->intel.reduced_primitive; /* CACHE_NEW_VS_PROG */ key.attrs = brw->vs.prog_data->outputs_written; /* _NEW_LIGHT */ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); /* _NEW_TRANSFORM */ key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); if (intel->gen == 5) key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; else key.clip_mode = BRW_CLIPMODE_NORMAL; /* _NEW_POLYGON */ if (key.primitive == GL_TRIANGLES) { if (ctx->Polygon.CullFlag && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) key.clip_mode = BRW_CLIPMODE_REJECT_ALL; else { GLuint fill_front = CLIP_CULL; GLuint fill_back = CLIP_CULL; GLuint offset_front = 0; GLuint offset_back = 0; if (!ctx->Polygon.CullFlag || ctx->Polygon.CullFaceMode != GL_FRONT) { switch (ctx->Polygon.FrontMode) { case GL_FILL: fill_front = CLIP_FILL; offset_front = 0; break; case GL_LINE: fill_front = CLIP_LINE; offset_front = ctx->Polygon.OffsetLine; break; case GL_POINT: fill_front = CLIP_POINT; offset_front = ctx->Polygon.OffsetPoint; break; } } if (!ctx->Polygon.CullFlag || ctx->Polygon.CullFaceMode != GL_BACK) { switch (ctx->Polygon.BackMode) { case GL_FILL: fill_back = CLIP_FILL; offset_back = 0; break; case GL_LINE: fill_back = CLIP_LINE; offset_back = ctx->Polygon.OffsetLine; break; case GL_POINT: fill_back = CLIP_POINT; offset_back = ctx->Polygon.OffsetPoint; break; } } if (ctx->Polygon.BackMode != GL_FILL || ctx->Polygon.FrontMode != GL_FILL) { key.do_unfilled = 1; /* Most cases the fixed function units will handle. Cases where * one or more polygon faces are unfilled will require help: */ key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; if (offset_back || offset_front) { /* _NEW_POLYGON, _NEW_BUFFERS */ key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale; key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD; } switch (ctx->Polygon.FrontFace) { case GL_CCW: key.fill_ccw = fill_front; key.fill_cw = fill_back; key.offset_ccw = offset_front; key.offset_cw = offset_back; if (ctx->Light.Model.TwoSide && key.fill_cw != CLIP_CULL) key.copy_bfc_cw = 1; break; case GL_CW: key.fill_cw = fill_front; key.fill_ccw = fill_back; key.offset_cw = offset_front; key.offset_ccw = offset_back; if (ctx->Light.Model.TwoSide && key.fill_ccw != CLIP_CULL) key.copy_bfc_ccw = 1; break; } } } } drm_intel_bo_unreference(brw->clip.prog_bo); brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, &key, sizeof(key), NULL, 0, &brw->clip.prog_data); if (brw->clip.prog_bo == NULL) compile_clip_prog( brw, &key ); }
static void upload_sf_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLcontext *ctx = &intel->ctx; /* CACHE_NEW_VS_PROG */ uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written); uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); uint32_t dw1, dw2, dw3, dw4, dw16; int i; /* _NEW_BUFFER */ GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; int attr = 0; dw1 = num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | 1 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE | GEN6_SF_STATISTICS_ENABLE; dw3 = 0; dw4 = 0; dw16 = 0; /* _NEW_POLYGON */ if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) dw2 |= GEN6_SF_WINDING_CCW; if (ctx->Polygon.OffsetFill) dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; /* _NEW_SCISSOR */ if (ctx->Scissor.Enabled) dw3 |= GEN6_SF_SCISSOR_ENABLE; /* _NEW_POLYGON */ if (ctx->Polygon.CullFlag) { switch (ctx->Polygon.CullFaceMode) { case GL_FRONT: dw3 |= GEN6_SF_CULL_FRONT; break; case GL_BACK: dw3 |= GEN6_SF_CULL_BACK; break; case GL_FRONT_AND_BACK: dw3 |= GEN6_SF_CULL_BOTH; break; default: assert(0); break; } } else { dw3 |= GEN6_SF_CULL_NONE; } /* _NEW_LINE */ dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) << GEN6_SF_LINE_WIDTH_SHIFT; if (ctx->Line.SmoothFlag) { dw3 |= GEN6_SF_LINE_AA_ENABLE; dw3 |= GEN6_SF_LINE_AA_MODE_TRUE; dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0; } /* _NEW_POINT */ if (ctx->Point._Attenuated) dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH; dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) << GEN6_SF_POINT_WIDTH_SHIFT; if (ctx->Point.SpriteOrigin == GL_LOWER_LEFT) dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; /* _NEW_LIGHT */ if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { dw4 |= (2 << GEN6_SF_TRI_PROVOKE_SHIFT) | (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) | (1 << GEN6_SF_LINE_PROVOKE_SHIFT); } else { dw4 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT); } if (ctx->Point.PointSprite) { for (i = 0; i < 8; i++) { if (ctx->Point.CoordReplace[i]) dw16 |= (1 << i); } } BEGIN_BATCH(20); OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2)); OUT_BATCH(dw1); OUT_BATCH(dw2); OUT_BATCH(dw3); OUT_BATCH(dw4); OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */ OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */ OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */ for (i = 0; i < 8; i++) { uint32_t attr_overrides = 0; for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { attr_overrides |= get_attr_override(brw, attr); attr++; break; } } for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { attr_overrides |= get_attr_override(brw, attr) << 16; attr++; break; } } OUT_BATCH(attr_overrides); } OUT_BATCH(dw16); /* point sprite texcoord bitmask */ OUT_BATCH(0); /* constant interp bitmask */ OUT_BATCH(0); /* wrapshortest enables 0-7 */ OUT_BATCH(0); /* wrapshortest enables 8-15 */ ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel->batch); }