Example #1
0
/* Setup all vertex pipeline state, rasterizer state, and fragment shader
 * constants, and issue the draw call for PBO upload/download.
 *
 * The caller is responsible for saving and restoring state, as well as for
 * setting other fragment shader state (fragment shader, samplers), and
 * framebuffer/viewport/DSA/blend state.
 */
bool
st_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr,
            unsigned surface_width, unsigned surface_height)
{
   struct cso_context *cso = st->cso_context;

   /* Setup vertex and geometry shaders */
   if (!st->pbo.vs) {
      st->pbo.vs = st_pbo_create_vs(st);
      if (!st->pbo.vs)
         return false;
   }

   if (addr->depth != 1 && st->pbo.use_gs && !st->pbo.gs) {
      st->pbo.gs = st_pbo_create_gs(st);
      if (!st->pbo.gs)
         return false;
   }

   cso_set_vertex_shader_handle(cso, st->pbo.vs);

   cso_set_geometry_shader_handle(cso, addr->depth != 1 ? st->pbo.gs : NULL);

   cso_set_tessctrl_shader_handle(cso, NULL);

   cso_set_tesseval_shader_handle(cso, NULL);

   /* Upload vertices */
   {
      struct pipe_vertex_buffer vbo;
      struct pipe_vertex_element velem;

      float x0 = (float) addr->xoffset / surface_width * 2.0f - 1.0f;
      float y0 = (float) addr->yoffset / surface_height * 2.0f - 1.0f;
      float x1 = (float) (addr->xoffset + addr->width) / surface_width * 2.0f - 1.0f;
      float y1 = (float) (addr->yoffset + addr->height) / surface_height * 2.0f - 1.0f;

      float *verts = NULL;

      vbo.user_buffer = NULL;
      vbo.buffer = NULL;
      vbo.stride = 2 * sizeof(float);

      u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4,
                     &vbo.buffer_offset, &vbo.buffer, (void **) &verts);
      if (!verts)
         return false;

      verts[0] = x0;
      verts[1] = y0;
      verts[2] = x0;
      verts[3] = y1;
      verts[4] = x1;
      verts[5] = y0;
      verts[6] = x1;
      verts[7] = y1;

      u_upload_unmap(st->uploader);

      velem.src_offset = 0;
      velem.instance_divisor = 0;
      velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(cso);
      velem.src_format = PIPE_FORMAT_R32G32_FLOAT;

      cso_set_vertex_elements(cso, 1, &velem);

      cso_set_vertex_buffers(cso, velem.vertex_buffer_index, 1, &vbo);

      pipe_resource_reference(&vbo.buffer, NULL);
   }

   /* Upload constants */
   {
      struct pipe_constant_buffer cb;

      if (st->constbuf_uploader) {
         cb.buffer = NULL;
         cb.user_buffer = NULL;
         u_upload_data(st->constbuf_uploader, 0, sizeof(addr->constants),
                       st->ctx->Const.UniformBufferOffsetAlignment,
                       &addr->constants, &cb.buffer_offset, &cb.buffer);
         if (!cb.buffer)
            return false;

         u_upload_unmap(st->constbuf_uploader);
      } else {
         cb.buffer = NULL;
         cb.user_buffer = &addr->constants;
         cb.buffer_offset = 0;
      }
      cb.buffer_size = sizeof(addr->constants);

      cso_set_constant_buffer(cso, PIPE_SHADER_FRAGMENT, 0, &cb);

      pipe_resource_reference(&cb.buffer, NULL);
   }

   /* Rasterizer state */
   cso_set_rasterizer(cso, &st->pbo.raster);

   /* Disable stream output */
   cso_set_stream_outputs(cso, 0, NULL, 0);

   if (addr->depth == 1) {
      cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
   } else {
      cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP,
                                0, 4, 0, addr->depth);
   }

   return true;
}
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                      struct pipe_resource *resource,
                                      unsigned level,
                                      unsigned usage,
                                      const struct pipe_box *box,
                                      struct pipe_transfer **ptransfer)
{
	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
	struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
        struct r600_resource *rbuffer = r600_resource(resource);
        uint8_t *data;

	assert(box->x + box->width <= resource->width0);

	/* See if the buffer range being mapped has never been initialized,
	 * in which case it can be mapped unsynchronized. */
	if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
	    usage & PIPE_TRANSFER_WRITE &&
	    !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
	}

	/* If discarding the entire range, discard the whole resource instead. */
	if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
	    box->x == 0 && box->width == resource->width0) {
		usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
	}

	if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
	    !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
		assert(usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
		}
		/* At this point, the buffer is always idle. */
		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
	}
	else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
		 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
		 !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
		 r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
		assert(usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			/* Do a wait-free write-only transfer using a temporary buffer. */
			unsigned offset;
			struct r600_resource *staging = NULL;

			u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
				       &offset, (struct pipe_resource**)&staging, (void**)&data);

			if (staging) {
				data += box->x % R600_MAP_BUFFER_ALIGNMENT;
				return r600_buffer_get_transfer(ctx, resource, level, usage, box,
								ptransfer, data, staging, offset);
			} else {
				return NULL; /* error, shouldn't occur though */
			}
		}
		/* At this point, the buffer is always idle (we checked it above). */
		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
	}
	/* Using a staging buffer in GTT for larger reads is much faster. */
	else if ((usage & PIPE_TRANSFER_READ) &&
		 !(usage & PIPE_TRANSFER_WRITE) &&
		 rbuffer->domains == RADEON_DOMAIN_VRAM &&
		 r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
		struct r600_resource *staging;

		staging = (struct r600_resource*) pipe_buffer_create(
				ctx->screen, PIPE_BIND_TRANSFER_READ, PIPE_USAGE_STAGING,
				box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
		if (staging) {
			/* Copy the VRAM buffer to the staging buffer. */
			rctx->dma_copy(ctx, &staging->b.b, 0,
				       box->x % R600_MAP_BUFFER_ALIGNMENT,
				       0, 0, resource, level, box);

			data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
			data += box->x % R600_MAP_BUFFER_ALIGNMENT;

			return r600_buffer_get_transfer(ctx, resource, level, usage, box,
							ptransfer, data, staging, 0);
		}
	}

	data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
	if (!data) {
		return NULL;
	}
	data += box->x;

	return r600_buffer_get_transfer(ctx, resource, level, usage, box,
					ptransfer, data, NULL, 0);
}
Example #3
0
static enum pipe_error
emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
{
   const struct pipe_constant_buffer *cbuf;
   struct pipe_resource *dst_buffer = NULL;
   enum pipe_error ret = PIPE_OK;
   struct pipe_transfer *src_transfer;
   struct svga_winsys_surface *dst_handle;
   float extras[MAX_EXTRA_CONSTS][4];
   unsigned extra_count, extra_size, extra_offset;
   unsigned new_buf_size;
   void *src_map = NULL, *dst_map;
   unsigned offset;
   const struct svga_shader_variant *variant;

   assert(shader == PIPE_SHADER_VERTEX ||
          shader == PIPE_SHADER_GEOMETRY ||
          shader == PIPE_SHADER_FRAGMENT);

   cbuf = &svga->curr.constbufs[shader][0];

   switch (shader) {
   case PIPE_SHADER_VERTEX:
      variant = svga->state.hw_draw.vs;
      extra_count = svga_get_extra_vs_constants(svga, (float *) extras);
      break;
   case PIPE_SHADER_FRAGMENT:
      variant = svga->state.hw_draw.fs;
      extra_count = svga_get_extra_fs_constants(svga, (float *) extras);
      break;
   case PIPE_SHADER_GEOMETRY:
      variant = svga->state.hw_draw.gs;
      extra_count = svga_get_extra_gs_constants(svga, (float *) extras);
      break;
   default:
      assert(!"Unexpected shader type");
      /* Don't return an error code since we don't want to keep re-trying
       * this function and getting stuck in an infinite loop.
       */
      return PIPE_OK;
   }

   assert(variant);

   /* Compute extra constants size and offset in bytes */
   extra_size = extra_count * 4 * sizeof(float);
   extra_offset = 4 * sizeof(float) * variant->extra_const_start;

   if (cbuf->buffer_size + extra_size == 0)
      return PIPE_OK;  /* nothing to do */

   /* Typically, the cbuf->buffer here is a user-space buffer so mapping
    * it is really cheap.  If we ever get real HW buffers for constants
    * we should void mapping and instead use a ResourceCopy command.
    */
   if (cbuf->buffer_size > 0) {
      src_map = pipe_buffer_map_range(&svga->pipe, cbuf->buffer,
                                      cbuf->buffer_offset, cbuf->buffer_size,
                                      PIPE_TRANSFER_READ, &src_transfer);
      assert(src_map);
      if (!src_map) {
         return PIPE_ERROR_OUT_OF_MEMORY;
      }
   }

   /* The new/dest buffer's size must be large enough to hold the original,
    * user-specified constants, plus the extra constants.
    * The size of the original constant buffer _should_ agree with what the
    * shader is expecting, but it might not (it's not enforced anywhere by
    * gallium).
    */
   new_buf_size = MAX2(cbuf->buffer_size, extra_offset) + extra_size;

   /* According to the DX10 spec, the constant buffer size must be
    * in multiples of 16.
    */
   new_buf_size = align(new_buf_size, 16);

   u_upload_alloc(svga->const0_upload, 0, new_buf_size, &offset,
                  &dst_buffer, &dst_map);
   if (!dst_map) {
      if (src_map)
         pipe_buffer_unmap(&svga->pipe, src_transfer);
      return PIPE_ERROR_OUT_OF_MEMORY;
   }

   if (src_map) {
      memcpy(dst_map, src_map, cbuf->buffer_size);
      pipe_buffer_unmap(&svga->pipe, src_transfer);
   }

   if (extra_size) {
      assert(extra_offset + extra_size <= new_buf_size);
      memcpy((char *) dst_map + extra_offset, extras, extra_size);
   }
   u_upload_unmap(svga->const0_upload);

   /* Issue the SetSingleConstantBuffer command */
   dst_handle = svga_buffer_handle(svga, dst_buffer);
   if (!dst_handle) {
      pipe_resource_reference(&dst_buffer, NULL);
      return PIPE_ERROR_OUT_OF_MEMORY;
   }

   assert(new_buf_size % 16 == 0);
   ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
                                               0, /* index */
                                               svga_shader_type(shader),
                                               dst_handle,
                                               offset,
                                               new_buf_size);

   if (ret != PIPE_OK) {
      pipe_resource_reference(&dst_buffer, NULL);
      return ret;
   }

   /* Save this const buffer until it's replaced in the future.
    * Otherwise, all references to the buffer will go away after the
    * command buffer is submitted, it'll get recycled and we will have
    * incorrect constant buffer bindings.
    */
   pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], dst_buffer);

   svga->state.hw_draw.default_constbuf_size[shader] = new_buf_size;

   pipe_resource_reference(&dst_buffer, NULL);

   return ret;
}
Example #4
0
static void
setup_bitmap_vertex_data(struct st_context *st, bool normalized,
                         int x, int y, int width, int height,
                         float z, const float color[4],
			 struct pipe_resource **vbuf,
			 unsigned *vbuf_offset)
{
   const GLfloat fb_width = (GLfloat)st->state.framebuffer.width;
   const GLfloat fb_height = (GLfloat)st->state.framebuffer.height;
   const GLfloat x0 = (GLfloat)x;
   const GLfloat x1 = (GLfloat)(x + width);
   const GLfloat y0 = (GLfloat)y;
   const GLfloat y1 = (GLfloat)(y + height);
   GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0;
   GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop;
   const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
   const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
   const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0);
   const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0);
   GLuint i;
   float (*vertices)[3][4];  /**< vertex pos + color + texcoord */

   if(!normalized)
   {
      sRight = (GLfloat) width;
      tBot = (GLfloat) height;
   }

   if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
                      vbuf_offset, vbuf, (void **) &vertices) != PIPE_OK) {
      return;
   }

   /* Positions are in clip coords since we need to do clipping in case
    * the bitmap quad goes beyond the window bounds.
    */
   vertices[0][0][0] = clip_x0;
   vertices[0][0][1] = clip_y0;
   vertices[0][2][0] = sLeft;
   vertices[0][2][1] = tTop;

   vertices[1][0][0] = clip_x1;
   vertices[1][0][1] = clip_y0;
   vertices[1][2][0] = sRight;
   vertices[1][2][1] = tTop;
   
   vertices[2][0][0] = clip_x1;
   vertices[2][0][1] = clip_y1;
   vertices[2][2][0] = sRight;
   vertices[2][2][1] = tBot;
   
   vertices[3][0][0] = clip_x0;
   vertices[3][0][1] = clip_y1;
   vertices[3][2][0] = sLeft;
   vertices[3][2][1] = tBot;
   
   /* same for all verts: */
   for (i = 0; i < 4; i++) {
      vertices[i][0][2] = z;
      vertices[i][0][3] = 1.0f;
      vertices[i][1][0] = color[0];
      vertices[i][1][1] = color[1];
      vertices[i][1][2] = color[2];
      vertices[i][1][3] = color[3];
      vertices[i][2][2] = 0.0; /*R*/
      vertices[i][2][3] = 1.0; /*Q*/
   }

   u_upload_unmap(st->uploader);
}
Example #5
0
static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
{
	struct si_descriptors *desc = &sctx->vertex_buffers;
	bool bound[SI_NUM_VERTEX_BUFFERS] = {};
	unsigned i, count = sctx->vertex_elements->count;
	uint64_t va;
	uint32_t *ptr;

	if (!sctx->vertex_buffers_dirty)
		return true;
	if (!count || !sctx->vertex_elements)
		return true;

	/* Vertex buffer descriptors are the only ones which are uploaded
	 * directly through a staging buffer and don't go through
	 * the fine-grained upload path.
	 */
	u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset,
		       (struct pipe_resource**)&desc->buffer, (void**)&ptr);
	if (!desc->buffer)
		return false;

	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
			      desc->buffer, RADEON_USAGE_READ,
			      RADEON_PRIO_DESCRIPTORS);

	assert(count <= SI_NUM_VERTEX_BUFFERS);

	for (i = 0; i < count; i++) {
		struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
		struct pipe_vertex_buffer *vb;
		struct r600_resource *rbuffer;
		unsigned offset;
		uint32_t *desc = &ptr[i*4];

		if (ve->vertex_buffer_index >= Elements(sctx->vertex_buffer)) {
			memset(desc, 0, 16);
			continue;
		}

		vb = &sctx->vertex_buffer[ve->vertex_buffer_index];
		rbuffer = (struct r600_resource*)vb->buffer;
		if (!rbuffer) {
			memset(desc, 0, 16);
			continue;
		}

		offset = vb->buffer_offset + ve->src_offset;
		va = rbuffer->gpu_address + offset;

		/* Fill in T# buffer resource description */
		desc[0] = va;
		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
			  S_008F04_STRIDE(vb->stride);

		if (sctx->b.chip_class <= CIK && vb->stride)
			/* Round up by rounding down and adding 1 */
			desc[2] = (vb->buffer->width0 - offset -
				   sctx->vertex_elements->format_size[i]) /
				  vb->stride + 1;
		else
			desc[2] = vb->buffer->width0 - offset;

		desc[3] = sctx->vertex_elements->rsrc_word3[i];

		if (!bound[ve->vertex_buffer_index]) {
			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
					      (struct r600_resource*)vb->buffer,
					      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
			bound[ve->vertex_buffer_index] = true;
		}
	}

	/* Don't flush the const cache. It would have a very negative effect
	 * on performance (confirmed by testing). New descriptors are always
	 * uploaded to a fresh new buffer, so I don't think flushing the const
	 * cache is needed. */
	desc->pointer_dirty = true;
	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
	sctx->vertex_buffers_dirty = false;
	return true;
}