예제 #1
0
파일: intel_upload.c 프로젝트: Sheph/mesa
/**
 * Handy interface to upload some data to temporary GPU memory quickly.
 *
 * References to this memory should not be retained across batch flushes.
 */
void
intel_upload_data(struct brw_context *brw,
                  const void *data,
                  uint32_t size,
                  uint32_t alignment,
                  drm_intel_bo **out_bo,
                  uint32_t *out_offset)
{
   void *dst = intel_upload_space(brw, size, alignment, out_bo, out_offset);
   memcpy(dst, data, size);
}
예제 #2
0
/**
 * Creates a temporary BO containing the pull constant data for the shader
 * stage, and the SURFACE_STATE struct that points at it.
 *
 * Pull constants are GLSL uniforms (and other constant data) beyond what we
 * could fit as push constants, or that have variable-index array access
 * (which is easiest to support using pull constants, and avoids filling
 * register space with mostly-unused data).
 *
 * Compare this path to brw_curbe.c for gen4/5 push constants, and
 * gen6_vs_state.c for gen6+ push constants.
 */
void
brw_upload_pull_constants(struct brw_context *brw,
                          GLbitfield brw_new_constbuf,
                          const struct gl_program *prog,
                          struct brw_stage_state *stage_state,
                          const struct brw_stage_prog_data *prog_data,
                          bool dword_pitch)
{
   int i;
   uint32_t surf_index = prog_data->binding_table.pull_constants_start;

   if (!prog_data->nr_pull_params) {
      if (stage_state->surf_offset[surf_index]) {
	 stage_state->surf_offset[surf_index] = 0;
	 brw->ctx.NewDriverState |= brw_new_constbuf;
      }
      return;
   }

   /* Updates the ParamaterValues[i] pointers for all parameters of the
    * basic type of PROGRAM_STATE_VAR.
    */
   _mesa_load_state_parameters(&brw->ctx, prog->Parameters);

   /* BRW_NEW_*_PROG_DATA | _NEW_PROGRAM_CONSTANTS */
   uint32_t size = prog_data->nr_pull_params * 4;
   drm_intel_bo *const_bo = NULL;
   uint32_t const_offset;
   gl_constant_value *constants = intel_upload_space(brw, size, 64,
                                                     &const_bo, &const_offset);

   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));

   for (i = 0; i < prog_data->nr_pull_params; i++) {
      constants[i] = *prog_data->pull_param[i];
   }

   if (0) {
      for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) {
	 const gl_constant_value *row = &constants[i * 4];
	 fprintf(stderr, "const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
                 i, row[0].f, row[1].f, row[2].f, row[3].f);
      }
   }

   brw_create_constant_surface(brw, const_bo, const_offset, size,
                               &stage_state->surf_offset[surf_index],
                               dword_pitch);
   drm_intel_bo_unreference(const_bo);

   brw->ctx.NewDriverState |= brw_new_constbuf;
}
예제 #3
0
static void
copy_array_to_vbo_array(struct brw_context *brw,
			struct brw_vertex_element *element,
			int min, int max,
			struct brw_vertex_buffer *buffer,
			GLuint dst_stride)
{
   const int src_stride = element->glarray->StrideB;

   /* If the source stride is zero, we just want to upload the current
    * attribute once and set the buffer's stride to 0.  There's no need
    * to replicate it out.
    */
   if (src_stride == 0) {
      intel_upload_data(brw, element->glarray->Ptr,
                        element->glarray->_ElementSize,
                        element->glarray->_ElementSize,
			&buffer->bo, &buffer->offset);

      buffer->stride = 0;
      buffer->size = element->glarray->_ElementSize;
      return;
   }

   const unsigned char *src = element->glarray->Ptr + min * src_stride;
   int count = max - min + 1;
   GLuint size = count * dst_stride;
   uint8_t *dst = intel_upload_space(brw, size, dst_stride,
                                     &buffer->bo, &buffer->offset);

   if (dst_stride == src_stride) {
      memcpy(dst, src, size);
   } else {
      while (count--) {
	 memcpy(dst, src, dst_stride);
	 src += src_stride;
	 dst += dst_stride;
      }
   }
   buffer->stride = dst_stride;
   buffer->size = size;
}
예제 #4
0
파일: brw_curbe.c 프로젝트: vsyrjala/mesa
/**
 * Gathers together all the uniform values into a block of memory to be
 * uploaded into the CURBE, then emits the state packet telling the hardware
 * the new location.
 */
static void
brw_upload_constant_buffer(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_CURBE_OFFSETS */
   const GLuint sz = brw->curbe.total_size;
   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
   gl_constant_value *buf;
   GLuint i;
   gl_clip_plane *clip_planes;

   if (sz == 0) {
      goto emit;
   }

   buf = intel_upload_space(brw, bufsz, 64,
                            &brw->curbe.curbe_bo, &brw->curbe.curbe_offset);

   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));

   /* fragment shader constants */
   if (brw->curbe.wm_size) {
      _mesa_load_state_parameters(ctx, brw->fragment_program->Base.Parameters);

      /* BRW_NEW_CURBE_OFFSETS */
      GLuint offset = brw->curbe.wm_start * 16;

      /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
      for (i = 0; i < brw->wm.prog_data->base.nr_params; i++) {
	 buf[offset + i] = *brw->wm.prog_data->base.param[i];
      }
   }

   /* clipper constants */
   if (brw->curbe.clip_size) {
      GLuint offset = brw->curbe.clip_start * 16;
      GLuint j;

      /* If any planes are going this way, send them all this way:
       */
      for (i = 0; i < 6; i++) {
	 buf[offset + i * 4 + 0].f = fixed_plane[i][0];
	 buf[offset + i * 4 + 1].f = fixed_plane[i][1];
	 buf[offset + i * 4 + 2].f = fixed_plane[i][2];
	 buf[offset + i * 4 + 3].f = fixed_plane[i][3];
      }

      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
       * clip-space:
       */
      clip_planes = brw_select_clip_planes(ctx);
      for (j = 0; j < MAX_CLIP_PLANES; j++) {
	 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
	    buf[offset + i * 4 + 0].f = clip_planes[j][0];
	    buf[offset + i * 4 + 1].f = clip_planes[j][1];
	    buf[offset + i * 4 + 2].f = clip_planes[j][2];
	    buf[offset + i * 4 + 3].f = clip_planes[j][3];
	    i++;
	 }
      }
   }

   /* vertex shader constants */
   if (brw->curbe.vs_size) {
      _mesa_load_state_parameters(ctx, brw->vertex_program->Base.Parameters);

      GLuint offset = brw->curbe.vs_start * 16;

      /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
      for (i = 0; i < brw->vs.prog_data->base.base.nr_params; i++) {
         buf[offset + i] = *brw->vs.prog_data->base.base.param[i];
      }
   }

   if (0) {
      for (i = 0; i < sz*16; i+=4)
	 fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
                 buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f);
   }

   /* Because this provokes an action (ie copy the constants into the
    * URB), it shouldn't be shortcircuited if identical to the
    * previous time - because eg. the urb destination may have
    * changed, or the urb contents different to last time.
    *
    * Note that the data referred to is actually copied internally,
    * not just used in place according to passed pointer.
    *
    * It appears that the CS unit takes care of using each available
    * URB entry (Const URB Entry == CURBE) in turn, and issuing
    * flushes as necessary when doublebuffering of CURBEs isn't
    * possible.
    */

emit:
   /* Work around mysterious 965 hangs that appear to happen if you do
    * two 3DPRIMITIVEs with only a CONSTANT_BUFFER inbetween.  If we
    * haven't already flushed for some other reason, explicitly do so.
    *
    * We've found no documented reason why this should be necessary.
    */
   if (brw->gen == 4 && !brw->is_g4x &&
       (brw->ctx.NewDriverState & (BRW_NEW_BATCH | BRW_NEW_PSP)) == 0) {
      BEGIN_BATCH(1);
      OUT_BATCH(MI_FLUSH);
      ADVANCE_BATCH();
   }

   /* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8
    * (CONSTANT_BUFFER (CURBE Load)):
    *
    *     "Modifying the CS URB allocation via URB_FENCE invalidates any
    *      previous CURBE entries. Therefore software must subsequently
    *      [re]issue a CONSTANT_BUFFER command before CURBE data can be used
    *      in the pipeline."
    */
   BEGIN_BATCH(2);
   if (brw->curbe.total_size == 0) {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
      OUT_BATCH(0);
   } else {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
      OUT_RELOC(brw->curbe.curbe_bo,
		I915_GEM_DOMAIN_INSTRUCTION, 0,
		(brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
   }
   ADVANCE_BATCH();
}
예제 #5
0
파일: brw_curbe.c 프로젝트: Echelon9/mesa
/**
 * Gathers together all the uniform values into a block of memory to be
 * uploaded into the CURBE, then emits the state packet telling the hardware
 * the new location.
 */
static void
brw_upload_constant_buffer(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_CURBE_OFFSETS */
   const GLuint sz = brw->curbe.total_size;
   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
   gl_constant_value *buf;
   GLuint i;
   gl_clip_plane *clip_planes;

   if (sz == 0) {
      goto emit;
   }

   buf = intel_upload_space(brw, bufsz, 64,
                            &brw->curbe.curbe_bo, &brw->curbe.curbe_offset);

   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));

   /* fragment shader constants */
   if (brw->curbe.wm_size) {
      _mesa_load_state_parameters(ctx, brw->fragment_program->Parameters);

      /* BRW_NEW_CURBE_OFFSETS */
      GLuint offset = brw->curbe.wm_start * 16;

      /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
      for (i = 0; i < brw->wm.base.prog_data->nr_params; i++) {
	 buf[offset + i] = *brw->wm.base.prog_data->param[i];
      }
   }

   /* clipper constants */
   if (brw->curbe.clip_size) {
      GLuint offset = brw->curbe.clip_start * 16;
      GLbitfield mask;

      /* If any planes are going this way, send them all this way:
       */
      for (i = 0; i < 6; i++) {
	 buf[offset + i * 4 + 0].f = fixed_plane[i][0];
	 buf[offset + i * 4 + 1].f = fixed_plane[i][1];
	 buf[offset + i * 4 + 2].f = fixed_plane[i][2];
	 buf[offset + i * 4 + 3].f = fixed_plane[i][3];
      }

      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
       * clip-space:
       */
      clip_planes = brw_select_clip_planes(ctx);
      mask = ctx->Transform.ClipPlanesEnabled;
      while (mask) {
         const int j = u_bit_scan(&mask);
         buf[offset + i * 4 + 0].f = clip_planes[j][0];
         buf[offset + i * 4 + 1].f = clip_planes[j][1];
         buf[offset + i * 4 + 2].f = clip_planes[j][2];
         buf[offset + i * 4 + 3].f = clip_planes[j][3];
         i++;
      }
   }

   /* vertex shader constants */
   if (brw->curbe.vs_size) {
      _mesa_load_state_parameters(ctx, brw->vertex_program->Parameters);

      GLuint offset = brw->curbe.vs_start * 16;

      /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
      for (i = 0; i < brw->vs.base.prog_data->nr_params; i++) {
         buf[offset + i] = *brw->vs.base.prog_data->param[i];
      }
   }

   if (0) {
      for (i = 0; i < sz*16; i+=4)
	 fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
                 buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f);
   }

   /* Because this provokes an action (ie copy the constants into the
    * URB), it shouldn't be shortcircuited if identical to the
    * previous time - because eg. the urb destination may have
    * changed, or the urb contents different to last time.
    *
    * Note that the data referred to is actually copied internally,
    * not just used in place according to passed pointer.
    *
    * It appears that the CS unit takes care of using each available
    * URB entry (Const URB Entry == CURBE) in turn, and issuing
    * flushes as necessary when doublebuffering of CURBEs isn't
    * possible.
    */

emit:
   /* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8
    * (CONSTANT_BUFFER (CURBE Load)):
    *
    *     "Modifying the CS URB allocation via URB_FENCE invalidates any
    *      previous CURBE entries. Therefore software must subsequently
    *      [re]issue a CONSTANT_BUFFER command before CURBE data can be used
    *      in the pipeline."
    */
   BEGIN_BATCH(2);
   if (brw->curbe.total_size == 0) {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
      OUT_BATCH(0);
   } else {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
      OUT_RELOC(brw->curbe.curbe_bo,
		I915_GEM_DOMAIN_INSTRUCTION, 0,
		(brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
   }
   ADVANCE_BATCH();

   /* Work around a Broadwater/Crestline depth interpolator bug.  The
    * following sequence will cause GPU hangs:
    *
    * 1. Change state so that all depth related fields in CC_STATE are
    *    disabled, and in WM_STATE, only "PS Use Source Depth" is enabled.
    * 2. Emit a CONSTANT_BUFFER packet.
    * 3. Draw via 3DPRIMITIVE.
    *
    * The recommended workaround is to emit a non-pipelined state change after
    * emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline.
    *
    * We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small),
    * and always emit it when "PS Use Source Depth" is set.  We could be more
    * precise, but the additional complexity is probably not worth it.
    *
    * BRW_NEW_FRAGMENT_PROGRAM
    */
   if (brw->gen == 4 && !brw->is_g4x &&
       (brw->fragment_program->info.inputs_read & (1 << VARYING_SLOT_POS))) {
      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
}
예제 #6
0
/**
 * Creates a streamed BO containing the push constants for the VS or GS on
 * gen6+.
 *
 * Push constants are constant values (such as GLSL uniforms) that are
 * pre-loaded into a shader stage's register space at thread spawn time.
 *
 * Not all GLSL uniforms will be uploaded as push constants: The hardware has
 * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be
 * uploaded as push constants, while GL 4.4 requires at least 1024 components
 * to be usable for the VS.  Plus, currently we always use pull constants
 * instead of push constants when doing variable-index array access.
 *
 * See brw_curbe.c for the equivalent gen4/5 code.
 */
void
gen6_upload_push_constants(struct brw_context *brw,
                           const struct gl_program *prog,
                           const struct brw_stage_prog_data *prog_data,
                           struct brw_stage_state *stage_state)
{
   struct gl_context *ctx = &brw->ctx;

   if (prog_data->nr_params == 0) {
      stage_state->push_const_size = 0;
   } else {
      /* Updates the ParamaterValues[i] pointers for all parameters of the
       * basic type of PROGRAM_STATE_VAR.
       */
      /* XXX: Should this happen somewhere before to get our state flag set? */
      if (prog)
         _mesa_load_state_parameters(ctx, prog->Parameters);

      int i;
      const int size = prog_data->nr_params * sizeof(gl_constant_value);
      gl_constant_value *param;
      if (brw->gen >= 8 || brw->is_haswell) {
         param = intel_upload_space(brw, size, 32,
                                    &stage_state->push_const_bo,
                                    &stage_state->push_const_offset);
      } else {
         param = brw_state_batch(brw, size, 32,
                                 &stage_state->push_const_offset);
      }

      STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));

      /* _NEW_PROGRAM_CONSTANTS
       *
       * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
       * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
       * wouldn't be set for them.
       */
      for (i = 0; i < prog_data->nr_params; i++) {
         param[i] = *prog_data->param[i];
      }

      if (0) {
         fprintf(stderr, "%s constants:\n",
                 _mesa_shader_stage_to_string(stage_state->stage));
         for (i = 0; i < prog_data->nr_params; i++) {
            if ((i & 7) == 0)
               fprintf(stderr, "g%d: ",
                       prog_data->dispatch_grf_start_reg + i / 8);
            fprintf(stderr, "%8f ", param[i].f);
            if ((i & 7) == 7)
               fprintf(stderr, "\n");
         }
         if ((i & 7) != 0)
            fprintf(stderr, "\n");
         fprintf(stderr, "\n");
      }

      stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
      /* We can only push 32 registers of constants at a time. */

      /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS:
       *
       *     "The sum of all four read length fields (each incremented to
       *      represent the actual read length) must be less than or equal to
       *      32"
       *
       * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS:
       *
       *     "The sum of all four read length fields must be less than or
       *      equal to the size of 64"
       *
       * The other shader stages all match the VS's limits.
       */
      assert(stage_state->push_const_size <= 32);
   }

   stage_state->push_constants_dirty = true;
}