static void brw_emit_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); GLuint i, nr_elements; brw_prepare_vertices(brw); brw_emit_query_begin(brw); /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. * * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ if (brw->vb.nr_enabled == 0) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); if (intel->gen >= 6) { OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); CACHED_BATCH(); return; } /* Now emit VB and VEP state packets. */ if (brw->vb.nr_buffers) { if (intel->gen >= 6) { assert(brw->vb.nr_buffers <= 33); } else { assert(brw->vb.nr_buffers <= 17); } BEGIN_BATCH(1 + 4*brw->vb.nr_buffers); OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1)); for (i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; uint32_t dw0; if (intel->gen >= 6) { dw0 = buffer->step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA : GEN6_VB0_ACCESS_VERTEXDATA; dw0 |= i << GEN6_VB0_INDEX_SHIFT; } else { dw0 = buffer->step_rate ? BRW_VB0_ACCESS_INSTANCEDATA : BRW_VB0_ACCESS_VERTEXDATA; dw0 |= i << BRW_VB0_INDEX_SHIFT; } if (intel->gen >= 7) dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); if (intel->gen >= 5) { OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1); } else OUT_BATCH(0); OUT_BATCH(buffer->step_rate); brw->vb.current_buffers[i].handle = buffer->bo->handle; brw->vb.current_buffers[i].offset = buffer->offset; brw->vb.current_buffers[i].stride = buffer->stride; brw->vb.current_buffers[i].step_rate = buffer->step_rate; } brw->vb.nr_current_buffers = i; ADVANCE_BATCH(); } nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid; /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably * for VertexID/InstanceID. */ if (intel->gen >= 6) { assert(nr_elements <= 34); } else { assert(nr_elements <= 18); } BEGIN_BATCH(1 + nr_elements * 2); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, input->glarray->Size, input->glarray->Format, input->glarray->Normalized, input->glarray->Integer); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; switch (input->glarray->Size) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT : BRW_VE1_COMPONENT_STORE_1_FLT; break; } if (intel->gen >= 6) { OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } if (intel->gen >= 5) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); else OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } if (brw->vs.prog_data->uses_vertexid) { uint32_t dw0 = 0, dw1 = 0; dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); if (intel->gen >= 6) { dw0 |= GEN6_VE0_VALID; } else { dw0 |= BRW_VE0_VALID; dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; } /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values, * the format is ignored and the value is always int. */ OUT_BATCH(dw0); OUT_BATCH(dw1); } CACHED_BATCH(); }
static void brw_emit_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; GLuint i, nr_elements; brw_prepare_vertices(brw); brw_emit_query_begin(brw); nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid; /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. * * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ if (nr_elements == 0) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); if (brw->gen >= 6) { OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); ADVANCE_BATCH(); return; } /* Now emit VB and VEP state packets. */ if (brw->vb.nr_buffers) { if (brw->gen >= 6) { assert(brw->vb.nr_buffers <= 33); } else { assert(brw->vb.nr_buffers <= 17); } BEGIN_BATCH(1 + 4*brw->vb.nr_buffers); OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1)); for (i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; uint32_t dw0; if (brw->gen >= 6) { dw0 = buffer->step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA : GEN6_VB0_ACCESS_VERTEXDATA; dw0 |= i << GEN6_VB0_INDEX_SHIFT; } else { dw0 = buffer->step_rate ? BRW_VB0_ACCESS_INSTANCEDATA : BRW_VB0_ACCESS_VERTEXDATA; dw0 |= i << BRW_VB0_INDEX_SHIFT; } if (brw->gen >= 7) dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; if (brw->gen == 7) dw0 |= GEN7_MOCS_L3 << 16; WARN_ONCE(buffer->stride >= (brw->gen >= 5 ? 2048 : 2047), "VBO stride %d too large, bad rendering may occur\n", buffer->stride); OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); if (brw->gen >= 5) { OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1); } else OUT_BATCH(0); OUT_BATCH(buffer->step_rate); } ADVANCE_BATCH(); } /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably * for VertexID/InstanceID. */ if (brw->gen >= 6) { assert(nr_elements <= 34); } else { assert(nr_elements <= 18); } struct brw_vertex_element *gen6_edgeflag_input = NULL; BEGIN_BATCH(1 + nr_elements * 2); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { /* Gen6+ passes edgeflag as sideband along with the vertex, instead * of in the VUE. We have to upload it sideband as the last vertex * element according to the B-Spec. */ if (brw->gen >= 6) { gen6_edgeflag_input = input; continue; } } switch (input->glarray->Size) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT : BRW_VE1_COMPONENT_STORE_1_FLT; break; } if (brw->gen >= 6) { OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } if (brw->gen >= 5) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); else OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } if (brw->gen >= 6 && gen6_edgeflag_input) { uint32_t format = brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | GEN6_VE0_EDGE_FLAG_ENABLE | (format << BRW_VE0_FORMAT_SHIFT) | (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } if (brw->vs.prog_data->uses_vertexid) { uint32_t dw0 = 0, dw1 = 0; dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); if (brw->gen >= 6) { dw0 |= GEN6_VE0_VALID; } else { dw0 |= BRW_VE0_VALID; dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; } /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values, * the format is ignored and the value is always int. */ OUT_BATCH(dw0); OUT_BATCH(dw1); } ADVANCE_BATCH(); }
static void brw_emit_vertices(struct brw_context *brw) { GLuint i; brw_prepare_vertices(brw); brw_prepare_shader_draw_parameters(brw); brw_emit_query_begin(brw); unsigned nr_elements = brw->vb.nr_enabled; if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) ++nr_elements; /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. * * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ if (nr_elements == 0) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); if (brw->gen >= 6) { OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); ADVANCE_BATCH(); return; } /* Now emit VB and VEP state packets. */ unsigned nr_buffers = brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid; if (nr_buffers) { if (brw->gen >= 6) { assert(nr_buffers <= 33); } else { assert(nr_buffers <= 17); } BEGIN_BATCH(1 + 4 * nr_buffers); OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); for (i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; emit_vertex_buffer_state(brw, i, buffer->bo, buffer->bo->size - 1, buffer->offset, buffer->stride, buffer->step_rate); } if (brw->vs.prog_data->uses_vertexid) { emit_vertex_buffer_state(brw, brw->vb.nr_buffers, brw->draw.draw_params_bo, brw->draw.draw_params_bo->size - 1, brw->draw.draw_params_offset, 0, /* stride */ 0); /* step rate */ } ADVANCE_BATCH(); } /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably * for VertexID/InstanceID. */ if (brw->gen >= 6) { assert(nr_elements <= 34); } else { assert(nr_elements <= 18); } struct brw_vertex_element *gen6_edgeflag_input = NULL; BEGIN_BATCH(1 + nr_elements * 2); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { /* Gen6+ passes edgeflag as sideband along with the vertex, instead * of in the VUE. We have to upload it sideband as the last vertex * element according to the B-Spec. */ if (brw->gen >= 6) { gen6_edgeflag_input = input; continue; } } switch (input->glarray->Size) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT : BRW_VE1_COMPONENT_STORE_1_FLT; break; } if (brw->gen >= 6) { OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } if (brw->gen >= 5) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); else OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } if (brw->gen >= 6 && gen6_edgeflag_input) { uint32_t format = brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | GEN6_VE0_EDGE_FLAG_ENABLE | (format << BRW_VE0_FORMAT_SHIFT) | (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) { uint32_t dw0 = 0, dw1 = 0; uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0; if (brw->vs.prog_data->uses_vertexid) { comp0 = BRW_VE1_COMPONENT_STORE_SRC; comp2 = BRW_VE1_COMPONENT_STORE_VID; } if (brw->vs.prog_data->uses_instanceid) { comp3 = BRW_VE1_COMPONENT_STORE_IID; } dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT); if (brw->gen >= 6) { dw0 |= GEN6_VE0_VALID | brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT; } else { dw0 |= BRW_VE0_VALID | brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT | BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT; dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; } /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values, * the format is ignored and the value is always int. */ OUT_BATCH(dw0); OUT_BATCH(dw1); } ADVANCE_BATCH(); }
static void brw_emit_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); GLuint i, nr_elements; brw_prepare_vertices(brw); brw_emit_query_begin(brw); /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. * * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ if (brw->vb.nr_enabled == 0) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); if (intel->gen >= 6) { OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); CACHED_BATCH(); return; } /* Now emit VB and VEP state packets. */ if (brw->vb.nr_buffers) { if (intel->gen >= 6) { assert(brw->vb.nr_buffers <= 33); } else { assert(brw->vb.nr_buffers <= 17); } BEGIN_BATCH(1 + 4*brw->vb.nr_buffers); OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1)); for (i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; uint32_t dw0; if (intel->gen >= 6) { dw0 = buffer->step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA : GEN6_VB0_ACCESS_VERTEXDATA; dw0 |= i << GEN6_VB0_INDEX_SHIFT; } else { dw0 = buffer->step_rate ? BRW_VB0_ACCESS_INSTANCEDATA : BRW_VB0_ACCESS_VERTEXDATA; dw0 |= i << BRW_VB0_INDEX_SHIFT; } if (intel->gen >= 7) dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); if (intel->gen >= 5) { OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1); } else OUT_BATCH(0); OUT_BATCH(buffer->step_rate); brw->vb.current_buffers[i].handle = buffer->bo->handle; brw->vb.current_buffers[i].offset = buffer->offset; brw->vb.current_buffers[i].stride = buffer->stride; brw->vb.current_buffers[i].step_rate = buffer->step_rate; } brw->vb.nr_current_buffers = i; ADVANCE_BATCH(); } nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid; /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably * for VertexID/InstanceID. */ if (intel->gen >= 6) { assert(nr_elements <= 34); } else { assert(nr_elements <= 18); } struct brw_vertex_element *gen6_edgeflag_input = NULL; BEGIN_BATCH(1 + nr_elements * 2); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, input->glarray->Size, input->glarray->Format, input->glarray->Normalized, input->glarray->Integer); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; /* The gen4 driver expects edgeflag to come in as a float, and passes * that float on to the tests in the clipper. Mesa's current vertex * attribute value for EdgeFlag is stored as a float, which works out. * glEdgeFlagPointer, on the other hand, gives us an unnormalized * integer ubyte. Just rewrite that to convert to a float. */ if (input->attrib == VERT_ATTRIB_EDGEFLAG) { /* Gen6+ passes edgeflag as sideband along with the vertex, instead * of in the VUE. We have to upload it sideband as the last vertex * element according to the B-Spec. */ if (intel->gen >= 6) { gen6_edgeflag_input = input; continue; } if (format == BRW_SURFACEFORMAT_R8_UINT) format = BRW_SURFACEFORMAT_R8_SSCALED; } switch (input->glarray->Size) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT : BRW_VE1_COMPONENT_STORE_1_FLT; break; } if (intel->gen >= 6) { OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } if (intel->gen >= 5) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); else OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } if (intel->gen >= 6 && gen6_edgeflag_input) { uint32_t format = get_surface_type(gen6_edgeflag_input->glarray->Type, gen6_edgeflag_input->glarray->Size, gen6_edgeflag_input->glarray->Format, gen6_edgeflag_input->glarray->Normalized, gen6_edgeflag_input->glarray->Integer); OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | GEN6_VE0_EDGE_FLAG_ENABLE | (format << BRW_VE0_FORMAT_SHIFT) | (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } if (brw->vs.prog_data->uses_vertexid) { uint32_t dw0 = 0, dw1 = 0; dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); if (intel->gen >= 6) { dw0 |= GEN6_VE0_VALID; } else { dw0 |= BRW_VE0_VALID; dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; } /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values, * the format is ignored and the value is always int. */ OUT_BATCH(dw0); OUT_BATCH(dw1); } CACHED_BATCH(); }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static GLboolean brw_try_draw_prims( GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ) { struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; GLuint i; GLuint ib_offset; dri_bo *ib_bo; GLboolean force_flush = GL_FALSE; int ret; if (ctx->NewState) _mesa_update_state( ctx ); brw_validate_textures( brw ); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); /* Have to validate state quite late. Will rebuild tnl_program, * which depends on varying information. * * Note this is where brw->vs->prog_data.inputs_read is calculated, * so can't access it earlier. */ LOCK_HARDWARE(intel); if (brw->intel.numClipRects == 0) { UNLOCK_HARDWARE(intel); return GL_TRUE; } { /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. This fraction is just a guess (minimal full state plus * a primitive is around 512 bytes), and would be better if we had * an upper bound of how much we might emit in a single * brw_try_draw_prims(). */ flush: if (force_flush) brw->no_batch_wrap = GL_FALSE; if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4 /* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */ || intel->batch->cliprect_mode != LOOP_CLIPRECTS || (force_flush == GL_TRUE)) intel_batchbuffer_flush(intel->batch); force_flush = GL_FALSE; brw->no_batch_wrap = GL_TRUE; /* Set the first primitive early, ahead of validate_state: */ brw_set_prim(brw, prim[0].mode, &force_flush); /* XXX: Need to separate validate and upload of state. */ ret = brw_validate_state( brw ); if (ret) { force_flush = GL_TRUE; goto flush; } /* Various fallback checks: */ if (brw->intel.Fallback) goto out; if (check_fallbacks( brw, prim, nr_prims )) goto out; /* need to account for index buffer and vertex buffer */ if (ib) { ret = brw_prepare_indices( brw, ib , &ib_bo, &ib_offset); if (ret) { force_flush = GL_TRUE; goto flush; } } ret = brw_prepare_vertices( brw, min_index, max_index); if (ret < 0) goto out; if (ret > 0) { force_flush = GL_TRUE; goto flush; } /* Upload index, vertex data: */ if (ib) brw_emit_indices( brw, ib, ib_bo, ib_offset); brw_emit_vertices( brw, min_index, max_index); for (i = 0; i < nr_prims; i++) { brw_emit_prim(brw, &prim[i]); } retval = GL_TRUE; } out: brw->no_batch_wrap = GL_FALSE; UNLOCK_HARDWARE(intel); if (!retval) DBG("%s failed\n", __FUNCTION__); return retval; }
static void gen8_emit_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; bool uses_edge_flag; brw_prepare_vertices(brw); brw_prepare_shader_draw_parameters(brw); uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) { unsigned vue = brw->vb.nr_enabled; /* The element for the edge flags must always be last, so we have to * insert the SGVS before it in that case. */ if (uses_edge_flag) { assert(vue > 0); vue--; } WARN_ONCE(vue >= 33, "Trying to insert VID/IID past 33rd vertex element, " "need to reorder the vertex attrbutes."); unsigned dw1 = 0; if (brw->vs.prog_data->uses_vertexid) { dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID | (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) | /* .z channel */ (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT); } if (brw->vs.prog_data->uses_instanceid) { dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID | (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */ (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT); } BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); OUT_BATCH(dw1); ADVANCE_BATCH(); BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); OUT_BATCH(vue | GEN8_VF_INSTANCING_ENABLE); OUT_BATCH(0); ADVANCE_BATCH(); } else { BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); OUT_BATCH(0); ADVANCE_BATCH(); } /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. * * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ if (brw->vb.nr_enabled == 0) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (3 - 2)); OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); ADVANCE_BATCH(); return; } /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ unsigned nr_buffers = brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid; if (nr_buffers) { assert(nr_buffers <= 33); BEGIN_BATCH(1 + 4 * nr_buffers); OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; uint32_t dw0 = 0; dw0 |= i << GEN6_VB0_INDEX_SHIFT; dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; dw0 |= buffer->stride << BRW_VB0_PITCH_SHIFT; dw0 |= mocs_wb << 16; OUT_BATCH(dw0); OUT_RELOC64(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); OUT_BATCH(buffer->bo->size); } if (brw->vs.prog_data->uses_vertexid) { OUT_BATCH(brw->vb.nr_buffers << GEN6_VB0_INDEX_SHIFT | GEN7_VB0_ADDRESS_MODIFYENABLE | mocs_wb << 16); OUT_RELOC64(brw->draw.draw_params_bo, I915_GEM_DOMAIN_VERTEX, 0, brw->draw.draw_params_offset); OUT_BATCH(brw->draw.draw_params_bo->size); } ADVANCE_BATCH(); } /* Normally we don't need an element for the SGVS attribute because the * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the * vertex ID is used then it needs an element for the base vertex buffer. * Additionally if there is an edge flag element then the SGVS can't be * inserted past that so we need a dummy element to ensure that the edge * flag is the last one. */ bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid || (brw->vs.prog_data->uses_instanceid && uses_edge_flag)); unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element; /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, * presumably for VertexID/InstanceID. */ assert(nr_elements <= 34); struct brw_vertex_element *gen6_edgeflag_input = NULL; BEGIN_BATCH(1 + nr_elements * 2); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; /* The gen4 driver expects edgeflag to come in as a float, and passes * that float on to the tests in the clipper. Mesa's current vertex * attribute value for EdgeFlag is stored as a float, which works out. * glEdgeFlagPointer, on the other hand, gives us an unnormalized * integer ubyte. Just rewrite that to convert to a float. */ if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { /* Gen6+ passes edgeflag as sideband along with the vertex, instead * of in the VUE. We have to upload it sideband as the last vertex * element according to the B-Spec. */ gen6_edgeflag_input = input; continue; } switch (input->glarray->Size) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT : BRW_VE1_COMPONENT_STORE_1_FLT; break; } OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); } if (needs_sgvs_element) { if (brw->vs.prog_data->uses_vertexid) { OUT_BATCH(GEN6_VE0_VALID | brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } else { OUT_BATCH(GEN6_VE0_VALID); OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } } if (gen6_edgeflag_input) { uint32_t format = brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | GEN6_VE0_EDGE_FLAG_ENABLE | (format << BRW_VE0_FORMAT_SHIFT) | (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } ADVANCE_BATCH(); for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { const struct brw_vertex_element *input = brw->vb.enabled[i]; const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer]; unsigned element_index; /* The edge flag element is reordered to be the last one in the code * above so we need to compensate for that in the element indices used * below. */ if (input == gen6_edgeflag_input) element_index = nr_elements - 1; else element_index = j++; BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); OUT_BATCH(element_index | (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0)); OUT_BATCH(buffer->step_rate); ADVANCE_BATCH(); } }
static void gen8_emit_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; bool uses_edge_flag; brw_prepare_vertices(brw); brw_prepare_shader_draw_parameters(brw); uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) { unsigned vue = brw->vb.nr_enabled; /* The element for the edge flags must always be last, so we have to * insert the SGVS before it in that case. */ if (uses_edge_flag) { assert(vue > 0); vue--; } WARN_ONCE(vue >= 33, "Trying to insert VID/IID past 33rd vertex element, " "need to reorder the vertex attrbutes."); unsigned dw1 = 0; if (brw->vs.prog_data->uses_vertexid) { dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID | (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) | /* .z channel */ (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT); } if (brw->vs.prog_data->uses_instanceid) { dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID | (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */ (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT); } BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); OUT_BATCH(dw1); ADVANCE_BATCH(); BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); OUT_BATCH(vue | GEN8_VF_INSTANCING_ENABLE); OUT_BATCH(0); ADVANCE_BATCH(); } else { BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); OUT_BATCH(0); ADVANCE_BATCH(); } /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. * * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ if (brw->vb.nr_enabled == 0) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (3 - 2)); OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); ADVANCE_BATCH(); return; } /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ const bool uses_draw_params = brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance; const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params + brw->vs.prog_data->uses_drawid; if (nr_buffers) { assert(nr_buffers <= 33); BEGIN_BATCH(1 + 4 * nr_buffers); OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->offset, buffer->offset + buffer->size, buffer->stride, 0 /* unused */); } if (uses_draw_params) { EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers, brw->draw.draw_params_bo, brw->draw.draw_params_offset, brw->draw.draw_params_bo->size, 0 /* stride */, 0 /* unused */); } if (brw->vs.prog_data->uses_drawid) { EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1, brw->draw.draw_id_bo, brw->draw.draw_id_offset, brw->draw.draw_id_bo->size, 0 /* stride */, 0 /* unused */); } ADVANCE_BATCH(); } /* Normally we don't need an element for the SGVS attribute because the * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if * we're using draw parameters then we need an element for the those * values. Additionally if there is an edge flag element then the SGVS * can't be inserted past that so we need a dummy element to ensure that * the edge flag is the last one. */ const bool needs_sgvs_element = (brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance || ((brw->vs.prog_data->uses_instanceid || brw->vs.prog_data->uses_vertexid) && uses_edge_flag)); const unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element + brw->vs.prog_data->uses_drawid; /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, * presumably for VertexID/InstanceID. */ assert(nr_elements <= 34); struct brw_vertex_element *gen6_edgeflag_input = NULL; BEGIN_BATCH(1 + nr_elements * 2); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an * element which has edge flag enabled." */ assert(!(is_passthru_format(format) && uses_edge_flag)); /* The gen4 driver expects edgeflag to come in as a float, and passes * that float on to the tests in the clipper. Mesa's current vertex * attribute value for EdgeFlag is stored as a float, which works out. * glEdgeFlagPointer, on the other hand, gives us an unnormalized * integer ubyte. Just rewrite that to convert to a float. */ if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { /* Gen6+ passes edgeflag as sideband along with the vertex, instead * of in the VUE. We have to upload it sideband as the last vertex * element according to the B-Spec. */ gen6_edgeflag_input = input; continue; } switch (input->glarray->Size) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT : BRW_VE1_COMPONENT_STORE_1_FLT; break; } /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE): * * "When SourceElementFormat is set to one of the *64*_PASSTHRU * formats, 64-bit components are stored in the URB without any * conversion. In this case, vertex elements must be written as 128 * or 256 bits, with VFCOMP_STORE_0 being used to pad the output * as required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red * component into the URB, Component 1 must be specified as * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) * in order to output a 128-bit vertex element, or Components 1-3 must * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3 * to be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex * element." */ if (input->glarray->Doubles) { switch (input->glarray->Size) { case 0: case 1: case 2: /* Use 128-bits instead of 256-bits to write double and dvec2 * vertex elements. */ comp2 = BRW_VE1_COMPONENT_NOSTORE; comp3 = BRW_VE1_COMPONENT_NOSTORE; break; case 3: /* Pad the output using VFCOMP_STORE_0 as suggested * by the BDW PRM. */ comp3 = BRW_VE1_COMPONENT_STORE_0; break; } } OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); } if (needs_sgvs_element) { if (brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance) { OUT_BATCH(GEN6_VE0_VALID | brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT); OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } else { OUT_BATCH(GEN6_VE0_VALID); OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } } if (brw->vs.prog_data->uses_drawid) { OUT_BATCH(GEN6_VE0_VALID | ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT)); OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } if (gen6_edgeflag_input) { uint32_t format = brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | GEN6_VE0_EDGE_FLAG_ENABLE | (format << BRW_VE0_FORMAT_SHIFT) | (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } ADVANCE_BATCH(); for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { const struct brw_vertex_element *input = brw->vb.enabled[i]; const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer]; unsigned element_index; /* The edge flag element is reordered to be the last one in the code * above so we need to compensate for that in the element indices used * below. */ if (input == gen6_edgeflag_input) element_index = nr_elements - 1; else element_index = j++; BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); OUT_BATCH(element_index | (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0)); OUT_BATCH(buffer->step_rate); ADVANCE_BATCH(); } if (brw->vs.prog_data->uses_drawid) { const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); OUT_BATCH(element); OUT_BATCH(0); ADVANCE_BATCH(); } }