Beispiel #1
0
/**
 * Compute the number of primitives written during our most recent
 * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value
 * minus the stashed "start" value), and add it to our running tally.
 *
 * If \p finalize is true, also compute the number of vertices written
 * (by multiplying by the number of vertices per primitive), and store
 * that to the "final" location.
 *
 * Otherwise, just overwrite the old tally with the new one.
 */
static void
tally_prims_written(struct brw_context *brw,
                    struct brw_transform_feedback_object *obj,
                    bool finalize)
{
   /* Flush any drawing so that the counters have the right values. */
   brw_emit_mi_flush(brw);

   for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
      /* GPR0 = Tally */
      brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
      brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo,
                            I915_GEM_DOMAIN_INSTRUCTION,
                            I915_GEM_DOMAIN_INSTRUCTION,
                            TALLY_OFFSET + i * sizeof(uint32_t));
      if (!obj->base.Paused) {
         /* GPR1 = Start Snapshot */
         brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo,
                                 I915_GEM_DOMAIN_INSTRUCTION,
                                 I915_GEM_DOMAIN_INSTRUCTION,
                                 START_OFFSET + i * sizeof(uint64_t));
         /* GPR2 = Ending Snapshot */
         brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2));

         BEGIN_BATCH(9);
         OUT_BATCH(HSW_MI_MATH | (9 - 2));
         /* GPR1 = GPR2 (End) - GPR1 (Start) */
         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2));
         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
         OUT_BATCH(MI_MATH_ALU0(SUB));
         OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
         /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */
         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
            OUT_BATCH(MI_MATH_ALU0(ADD));
         OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
         ADVANCE_BATCH();
      }

      if (!finalize) {
         /* Write back the new tally */
         brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
                                  TALLY_OFFSET + i * sizeof(uint32_t));
      } else {
         /* Convert the number of primitives to the number of vertices. */
         if (obj->primitive_mode == GL_LINES) {
            /* Double R0 (R0 = R0 + R0) */
            BEGIN_BATCH(5);
            OUT_BATCH(HSW_MI_MATH | (5 - 2));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
            OUT_BATCH(MI_MATH_ALU0(ADD));
            OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
            ADVANCE_BATCH();
         } else if (obj->primitive_mode == GL_TRIANGLES) {
            /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */
            BEGIN_BATCH(9);
            OUT_BATCH(HSW_MI_MATH | (9 - 2));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
            OUT_BATCH(MI_MATH_ALU0(ADD));
            OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
            OUT_BATCH(MI_MATH_ALU0(ADD));
            OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
            ADVANCE_BATCH();
         }
         /* Store it to the final result */
         brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
                                  i * sizeof(uint32_t));
      }
   }
}
Beispiel #2
0
static void
brw_emit_prim(struct brw_context *brw,
              const struct _mesa_prim *prim,
              uint32_t hw_prim)
{
   int verts_per_instance;
   int vertex_access_type;
   int indirect_flag;

   DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode),
       prim->start, prim->count);

   int start_vertex_location = prim->start;
   int base_vertex_location = prim->basevertex;

   if (prim->indexed) {
      vertex_access_type = brw->gen >= 7 ?
         GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
         GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
      start_vertex_location += brw->ib.start_vertex_offset;
      base_vertex_location += brw->vb.start_vertex_bias;
   } else {
      vertex_access_type = brw->gen >= 7 ?
         GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL :
         GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
      start_vertex_location += brw->vb.start_vertex_bias;
   }

   /* We only need to trim the primitive count on pre-Gen6. */
   if (brw->gen < 6)
      verts_per_instance = trim(prim->mode, prim->count);
   else
      verts_per_instance = prim->count;

   /* If nothing to emit, just return. */
   if (verts_per_instance == 0 && !prim->is_indirect)
      return;

   /* If we're set to always flush, do it before and after the primitive emit.
    * We want to catch both missed flushes that hurt instruction/state cache
    * and missed flushes of the render cache as it heads to other parts of
    * the besides the draw code.
    */
   if (brw->always_flush_cache)
      brw_emit_mi_flush(brw);

   /* If indirect, emit a bunch of loads from the indirect BO. */
   if (prim->is_indirect) {
      struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer;
      drm_intel_bo *bo = intel_bufferobj_buffer(brw,
            intel_buffer_object(indirect_buffer),
            prim->indirect_offset, 5 * sizeof(GLuint));

      indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE;

      brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT, bo,
                            I915_GEM_DOMAIN_VERTEX, 0,
                            prim->indirect_offset + 0);
      brw_load_register_mem(brw, GEN7_3DPRIM_INSTANCE_COUNT, bo,
                            I915_GEM_DOMAIN_VERTEX, 0,
                            prim->indirect_offset + 4);

      brw_load_register_mem(brw, GEN7_3DPRIM_START_VERTEX, bo,
                            I915_GEM_DOMAIN_VERTEX, 0,
                            prim->indirect_offset + 8);
      if (prim->indexed) {
         brw_load_register_mem(brw, GEN7_3DPRIM_BASE_VERTEX, bo,
                               I915_GEM_DOMAIN_VERTEX, 0,
                               prim->indirect_offset + 12);
         brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
                               I915_GEM_DOMAIN_VERTEX, 0,
                               prim->indirect_offset + 16);
      } else {
         brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
                               I915_GEM_DOMAIN_VERTEX, 0,
                               prim->indirect_offset + 12);
         BEGIN_BATCH(3);
         OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
         OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX);
         OUT_BATCH(0);
         ADVANCE_BATCH();
      }
   } else {
      indirect_flag = 0;
   }

   BEGIN_BATCH(brw->gen >= 7 ? 7 : 6);

   if (brw->gen >= 7) {
      const int predicate_enable =
         (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
         ? GEN7_3DPRIM_PREDICATE_ENABLE : 0;

      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
      OUT_BATCH(hw_prim | vertex_access_type);
   } else {
      OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
                hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
                vertex_access_type);
   }
   OUT_BATCH(verts_per_instance);
   OUT_BATCH(start_vertex_location);
   OUT_BATCH(prim->num_instances);
   OUT_BATCH(prim->base_instance);
   OUT_BATCH(base_vertex_location);
   ADVANCE_BATCH();

   if (brw->always_flush_cache)
      brw_emit_mi_flush(brw);
}