Ejemplo n.º 1
0
static void r600_buffer_destroy(struct pipe_screen *screen,
				struct pipe_resource *buf)
{
	struct r600_resource *rbuffer = r600_resource(buf);

	util_range_destroy(&rbuffer->valid_buffer_range);
	pb_reference(&rbuffer->buf, NULL);
	FREE(rbuffer);
}
Ejemplo n.º 2
0
void r600_invalidate_resource(struct pipe_context *ctx,
			      struct pipe_resource *resource)
{
	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
	struct r600_resource *rbuffer = r600_resource(resource);

	/* We currently only do anyting here for buffers */
	if (resource->target == PIPE_BUFFER)
		(void)r600_invalidate_buffer(rctx, rbuffer);
}
Ejemplo n.º 3
0
static void cik_sdma_do_copy_buffer(struct si_context *ctx,
				    struct pipe_resource *dst,
				    struct pipe_resource *src,
				    uint64_t dst_offset,
				    uint64_t src_offset,
				    uint64_t size)
{
	struct radeon_winsys_cs *cs = ctx->b.dma.cs;
	unsigned i, ncopy, csize;
	struct r600_resource *rdst = (struct r600_resource*)dst;
	struct r600_resource *rsrc = (struct r600_resource*)src;

	dst_offset += r600_resource(dst)->gpu_address;
	src_offset += r600_resource(src)->gpu_address;

	ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE;
	r600_need_dma_space(&ctx->b, ncopy * 7);

	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rsrc, RADEON_USAGE_READ,
			      RADEON_PRIO_SDMA_BUFFER);
	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rdst, RADEON_USAGE_WRITE,
			      RADEON_PRIO_SDMA_BUFFER);

	for (i = 0; i < ncopy; i++) {
		csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE;
		cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
						     CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
						     0);
		cs->buf[cs->cdw++] = csize;
		cs->buf[cs->cdw++] = 0; /* src/dst endian swap */
		cs->buf[cs->cdw++] = src_offset;
		cs->buf[cs->cdw++] = src_offset >> 32;
		cs->buf[cs->cdw++] = dst_offset;
		cs->buf[cs->cdw++] = dst_offset >> 32;
		dst_offset += csize;
		src_offset += csize;
		size -= csize;
	}
}
Ejemplo n.º 4
0
static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
					struct pipe_transfer *transfer)
{
	struct r600_context *rctx = (struct r600_context*)pipe;
	struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
	struct r600_resource *rbuffer = r600_resource(transfer->resource);

	if (rtransfer->staging) {
		struct pipe_resource *dst, *src;
		unsigned soffset, doffset, size;

		dst = transfer->resource;
		src = &rtransfer->staging->b.b;
		size = transfer->box.width;
		doffset = transfer->box.x;
		soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
		/* Copy the staging buffer into the original one. */
		if (rctx->b.rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset % 4)) {
			if (rctx->screen->b.chip_class >= EVERGREEN) {
				evergreen_dma_copy(rctx, dst, src, doffset, soffset, size);
			} else {
				r600_dma_copy(rctx, dst, src, doffset, soffset, size);
			}
		} else {
			struct pipe_box box;

			u_box_1d(soffset, size, &box);
			r600_copy_buffer(pipe, dst, doffset, src, &box);
		}
		pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
	}

	if (transfer->usage & PIPE_TRANSFER_WRITE) {
		util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
			       transfer->box.x + transfer->box.width);
	}
	util_slab_free(&rctx->pool_transfers, transfer);
}
Ejemplo n.º 5
0
static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
                                       struct pipe_transfer *transfer)
{
    struct r600_common_context *rctx = (struct r600_common_context*)ctx;
    struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
    struct r600_resource *rbuffer = r600_resource(transfer->resource);

    if (rtransfer->staging) {
        struct pipe_resource *dst, *src;
        unsigned soffset, doffset, size;
        struct pipe_box box;

        dst = transfer->resource;
        src = &rtransfer->staging->b.b;
        size = transfer->box.width;
        doffset = transfer->box.x;
        soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;

        u_box_1d(soffset, size, &box);

        /* Copy the staging buffer into the original one. */
        if (!(size % 4) && !(doffset % 4) && !(soffset % 4) &&
                rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box)) {
            /* DONE. */
        } else {
            ctx->resource_copy_region(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
        }
        pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
    }

    if (transfer->usage & PIPE_TRANSFER_WRITE) {
        util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
                       transfer->box.x + transfer->box.width);
    }
    util_slab_free(&rctx->pool_transfers, transfer);
}
Ejemplo n.º 6
0
    for (i = 0; i < rctx->streamout.num_targets; i++) {
        if (!t[i])
            continue;

        t[i]->stride_in_dw = stride_in_dw[i];

        if (rctx->chip_class >= SI) {
            /* SI binds streamout buffers as shader resources.
             * VGT only counts primitives and tells the shader
             * through SGPRs what to do. */
            r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
            radeon_emit(cs, (t[i]->b.buffer_offset +
                             t[i]->b.buffer_size) >> 2);	/* BUFFER_SIZE (in DW) */
            radeon_emit(cs, stride_in_dw[i]);		/* VTX_STRIDE (in DW) */
        } else {
            uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address;

            update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);

            r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
            radeon_emit(cs, (t[i]->b.buffer_offset +
                             t[i]->b.buffer_size) >> 2);	/* BUFFER_SIZE (in DW) */
            radeon_emit(cs, stride_in_dw[i]);		/* VTX_STRIDE (in DW) */
            radeon_emit(cs, va >> 8);			/* BUFFER_BASE */

            r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
                            RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);

            /* R7xx requires this packet after updating BUFFER_BASE.
             * Without this, R7xx locks up. */
            if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
Ejemplo n.º 7
0
struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
{
	struct r600_screen *rscreen = CALLOC_STRUCT(r600_screen);

	if (rscreen == NULL) {
		return NULL;
	}

	/* Set functions first. */
	rscreen->b.b.context_create = r600_create_context;
	rscreen->b.b.destroy = r600_destroy_screen;
	rscreen->b.b.get_param = r600_get_param;
	rscreen->b.b.get_shader_param = r600_get_shader_param;
	rscreen->b.b.resource_create = r600_resource_create;

	if (!r600_common_screen_init(&rscreen->b, ws)) {
		FREE(rscreen);
		return NULL;
	}

	if (rscreen->b.info.chip_class >= EVERGREEN) {
		rscreen->b.b.is_format_supported = evergreen_is_format_supported;
	} else {
		rscreen->b.b.is_format_supported = r600_is_format_supported;
	}

	rscreen->b.debug_flags |= debug_get_flags_option("R600_DEBUG", r600_debug_options, 0);
	if (debug_get_bool_option("R600_DEBUG_COMPUTE", FALSE))
		rscreen->b.debug_flags |= DBG_COMPUTE;
	if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE))
		rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
	if (debug_get_bool_option("R600_HYPERZ", FALSE))
		rscreen->b.debug_flags |= DBG_HYPERZ;
	if (debug_get_bool_option("R600_LLVM", FALSE))
		rscreen->b.debug_flags |= DBG_LLVM;

	if (rscreen->b.family == CHIP_UNKNOWN) {
		fprintf(stderr, "r600: Unknown chipset 0x%04X\n", rscreen->b.info.pci_id);
		FREE(rscreen);
		return NULL;
	}

	/* Figure out streamout kernel support. */
	switch (rscreen->b.chip_class) {
	case R600:
		if (rscreen->b.family < CHIP_RS780) {
			rscreen->b.has_streamout = rscreen->b.info.drm_minor >= 14;
		} else {
			rscreen->b.has_streamout = rscreen->b.info.drm_minor >= 23;
		}
		break;
	case R700:
		rscreen->b.has_streamout = rscreen->b.info.drm_minor >= 17;
		break;
	case EVERGREEN:
	case CAYMAN:
		rscreen->b.has_streamout = rscreen->b.info.drm_minor >= 14;
		break;
	default:
		rscreen->b.has_streamout = FALSE;
		break;
	}

	/* MSAA support. */
	switch (rscreen->b.chip_class) {
	case R600:
	case R700:
		rscreen->has_msaa = rscreen->b.info.drm_minor >= 22;
		rscreen->has_compressed_msaa_texturing = false;
		break;
	case EVERGREEN:
		rscreen->has_msaa = rscreen->b.info.drm_minor >= 19;
		rscreen->has_compressed_msaa_texturing = rscreen->b.info.drm_minor >= 24;
		break;
	case CAYMAN:
		rscreen->has_msaa = rscreen->b.info.drm_minor >= 19;
		rscreen->has_compressed_msaa_texturing = true;
		break;
	default:
		rscreen->has_msaa = FALSE;
		rscreen->has_compressed_msaa_texturing = false;
	}

	rscreen->b.has_cp_dma = rscreen->b.info.drm_minor >= 27 &&
			      !(rscreen->b.debug_flags & DBG_NO_CP_DMA);

	rscreen->global_pool = compute_memory_pool_new(rscreen);

	/* Create the auxiliary context. This must be done last. */
	rscreen->b.aux_context = rscreen->b.b.context_create(&rscreen->b.b, NULL);

#if 0 /* This is for testing whether aux_context and buffer clearing work correctly. */
	struct pipe_resource templ = {};

	templ.width0 = 4;
	templ.height0 = 2048;
	templ.depth0 = 1;
	templ.array_size = 1;
	templ.target = PIPE_TEXTURE_2D;
	templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
	templ.usage = PIPE_USAGE_DEFAULT;

	struct r600_resource *res = r600_resource(rscreen->screen.resource_create(&rscreen->screen, &templ));
	unsigned char *map = ws->buffer_map(res->cs_buf, NULL, PIPE_TRANSFER_WRITE);

	memset(map, 0, 256);

	r600_screen_clear_buffer(rscreen, &res->b.b, 4, 4, 0xCC);
	r600_screen_clear_buffer(rscreen, &res->b.b, 8, 4, 0xDD);
	r600_screen_clear_buffer(rscreen, &res->b.b, 12, 4, 0xEE);
	r600_screen_clear_buffer(rscreen, &res->b.b, 20, 4, 0xFF);
	r600_screen_clear_buffer(rscreen, &res->b.b, 32, 20, 0x87);

	ws->buffer_wait(res->buf, RADEON_USAGE_WRITE);

	int i;
	for (i = 0; i < 256; i++) {
		printf("%02X", map[i]);
		if (i % 16 == 15)
			printf("\n");
	}
#endif

	return &rscreen->b.b;
}
Ejemplo n.º 8
0
static void *r600_buffer_transfer_map(struct pipe_context *pipe,
				      struct pipe_transfer *transfer)
{
	struct r600_resource *rbuffer = r600_resource(transfer->resource);
	struct r600_context *rctx = (struct r600_context*)pipe;
	uint8_t *data;

	if (transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
	    !(transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
		assert(transfer->usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			unsigned i, mask;

			/* Discard the buffer. */
			pb_reference(&rbuffer->buf, NULL);

			/* Create a new one in the same pipe_resource. */
			/* XXX We probably want a different alignment for buffers and textures. */
			r600_init_resource(rctx->screen, rbuffer, rbuffer->b.b.width0, 4096,
					   rbuffer->b.b.bind, rbuffer->b.b.usage);

			/* We changed the buffer, now we need to bind it where the old one was bound. */
			/* Vertex buffers. */
			mask = rctx->vertex_buffer_state.enabled_mask;
			while (mask) {
				i = u_bit_scan(&mask);
				if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
					rctx->vertex_buffer_state.dirty_mask |= 1 << i;
					r600_vertex_buffers_dirty(rctx);
				}
			}
			/* Streamout buffers. */
			for (i = 0; i < rctx->num_so_targets; i++) {
				if (rctx->so_targets[i]->b.buffer == &rbuffer->b.b) {
					r600_context_streamout_end(rctx);
					rctx->streamout_start = TRUE;
					rctx->streamout_append_bitmask = ~0;
				}
			}
			/* Constant buffers. */
			r600_set_constants_dirty_if_bound(rctx, &rctx->vs_constbuf_state, rbuffer);
			r600_set_constants_dirty_if_bound(rctx, &rctx->ps_constbuf_state, rbuffer);
		}
	}
#if 0 /* this is broken (see Bug 53130) */
	else if ((transfer->usage & PIPE_TRANSFER_DISCARD_RANGE) &&
		 !(transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
		 rctx->screen->has_streamout &&
		 /* The buffer range must be aligned to 4. */
		 transfer->box.x % 4 == 0 && transfer->box.width % 4 == 0) {
		assert(transfer->usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			/* Do a wait-free write-only transfer using a temporary buffer. */
			struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;

			rtransfer->staging = (struct r600_resource*)
				pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER,
						   PIPE_USAGE_STAGING, transfer->box.width);
			return rctx->ws->buffer_map(rtransfer->staging->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
		}
	}
#endif

	data = rctx->ws->buffer_map(rbuffer->cs_buf, rctx->cs, transfer->usage);
	if (!data)
		return NULL;

	return (uint8_t*)data + transfer->box.x;
}
Ejemplo n.º 9
0
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                      struct pipe_resource *resource,
                                      unsigned level,
                                      unsigned usage,
                                      const struct pipe_box *box,
				      struct pipe_transfer **ptransfer)
{
	struct r600_context *rctx = (struct r600_context*)ctx;
	struct r600_resource *rbuffer = r600_resource(resource);
	uint8_t *data;

	assert(box->x + box->width <= resource->width0);

	if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
	    !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
		assert(usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			unsigned i, mask;

			/* Discard the buffer. */
			pb_reference(&rbuffer->buf, NULL);

			/* Create a new one in the same pipe_resource. */
			/* XXX We probably want a different alignment for buffers and textures. */
			r600_init_resource(rctx->screen, rbuffer, rbuffer->b.b.width0, 4096,
					   rbuffer->b.b.bind, rbuffer->b.b.usage);

			/* We changed the buffer, now we need to bind it where the old one was bound. */
			/* Vertex buffers. */
			mask = rctx->vertex_buffer_state.enabled_mask;
			while (mask) {
				i = u_bit_scan(&mask);
				if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
					rctx->vertex_buffer_state.dirty_mask |= 1 << i;
					r600_vertex_buffers_dirty(rctx);
				}
			}
			/* Streamout buffers. */
			for (i = 0; i < rctx->num_so_targets; i++) {
				if (rctx->so_targets[i]->b.buffer == &rbuffer->b.b) {
					r600_context_streamout_end(rctx);
					rctx->streamout_start = TRUE;
					rctx->streamout_append_bitmask = ~0;
				}
			}
			/* Constant buffers. */
			r600_set_constants_dirty_if_bound(rctx, rbuffer);
		}
	}
	else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
		 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
		 rctx->screen->has_streamout &&
		 /* The buffer range must be aligned to 4. */
		 box->x % 4 == 0 && box->width % 4 == 0) {
		assert(usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			/* Do a wait-free write-only transfer using a temporary buffer. */
			struct r600_resource *staging = (struct r600_resource*)
				pipe_buffer_create(ctx->screen, PIPE_BIND_VERTEX_BUFFER,
						   PIPE_USAGE_STAGING,
						   box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
			data = rctx->ws->buffer_map(staging->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);

			if (!data)
				return NULL;

			data += box->x % R600_MAP_BUFFER_ALIGNMENT;
			return r600_buffer_get_transfer(ctx, resource, level, usage, box,
							ptransfer, data, staging);
		}
	}

	data = rctx->ws->buffer_map(rbuffer->cs_buf, rctx->cs, usage);
	if (!data) {
		return NULL;
	}
	data += box->x;

	return r600_buffer_get_transfer(ctx, resource, level, usage, box,
					ptransfer, data, NULL);
}
Ejemplo n.º 10
0
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                      struct pipe_resource *resource,
                                      unsigned level,
                                      unsigned usage,
                                      const struct pipe_box *box,
                                      struct pipe_transfer **ptransfer)
{
	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
	struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
        struct r600_resource *rbuffer = r600_resource(resource);
        uint8_t *data;

	assert(box->x + box->width <= resource->width0);

	/* See if the buffer range being mapped has never been initialized,
	 * in which case it can be mapped unsynchronized. */
	if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
	    usage & PIPE_TRANSFER_WRITE &&
	    !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
	}

	/* If discarding the entire range, discard the whole resource instead. */
	if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
	    box->x == 0 && box->width == resource->width0) {
		usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
	}

	if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
	    !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
		assert(usage & PIPE_TRANSFER_WRITE);

		if (r600_invalidate_buffer(rctx, rbuffer)) {
			/* At this point, the buffer is always idle. */
			usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
		}
	}
	else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
		 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
		 !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
		 r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
		assert(usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
		    !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
			/* Do a wait-free write-only transfer using a temporary buffer. */
			unsigned offset;
			struct r600_resource *staging = NULL;

			u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
				       256, &offset, (struct pipe_resource**)&staging, (void**)&data);

			if (staging) {
				data += box->x % R600_MAP_BUFFER_ALIGNMENT;
				return r600_buffer_get_transfer(ctx, resource, level, usage, box,
								ptransfer, data, staging, offset);
			}
		} else {
			/* At this point, the buffer is always idle (we checked it above). */
			usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
		}
	}
	/* Using a staging buffer in GTT for larger reads is much faster. */
	else if ((usage & PIPE_TRANSFER_READ) &&
		 !(usage & PIPE_TRANSFER_WRITE) &&
		 rbuffer->domains == RADEON_DOMAIN_VRAM &&
		 r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
		struct r600_resource *staging;

		staging = (struct r600_resource*) pipe_buffer_create(
				ctx->screen, PIPE_BIND_TRANSFER_READ, PIPE_USAGE_STAGING,
				box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
		if (staging) {
			/* Copy the VRAM buffer to the staging buffer. */
			rctx->dma_copy(ctx, &staging->b.b, 0,
				       box->x % R600_MAP_BUFFER_ALIGNMENT,
				       0, 0, resource, level, box);

			data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
			data += box->x % R600_MAP_BUFFER_ALIGNMENT;

			return r600_buffer_get_transfer(ctx, resource, level, usage, box,
							ptransfer, data, staging, 0);
		}
	}

	data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
	if (!data) {
		return NULL;
	}
	data += box->x;

	return r600_buffer_get_transfer(ctx, resource, level, usage, box,
					ptransfer, data, NULL, 0);
}
Ejemplo n.º 11
0
void r600_cp_dma_copy_buffer(struct r600_context *rctx,
			     struct pipe_resource *dst, uint64_t dst_offset,
			     struct pipe_resource *src, uint64_t src_offset,
			     unsigned size)
{
	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;

	assert(size);
	assert(rctx->screen->b.has_cp_dma);

	/* Mark the buffer range of destination as valid (initialized),
	 * so that transfer_map knows it should wait for the GPU when mapping
	 * that range. */
	util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
		       dst_offset + size);

	dst_offset += r600_resource(dst)->gpu_address;
	src_offset += r600_resource(src)->gpu_address;

	/* Flush the caches where the resources are bound. */
	rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
			 R600_CONTEXT_INV_VERTEX_CACHE |
			 R600_CONTEXT_INV_TEX_CACHE |
			 R600_CONTEXT_FLUSH_AND_INV |
			 R600_CONTEXT_FLUSH_AND_INV_CB |
			 R600_CONTEXT_FLUSH_AND_INV_DB |
			 R600_CONTEXT_FLUSH_AND_INV_CB_META |
			 R600_CONTEXT_FLUSH_AND_INV_DB_META |
			 R600_CONTEXT_STREAMOUT_FLUSH |
			 R600_CONTEXT_WAIT_3D_IDLE;

	/* There are differences between R700 and EG in CP DMA,
	 * but we only use the common bits here. */
	while (size) {
		unsigned sync = 0;
		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
		unsigned src_reloc, dst_reloc;

		r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);

		/* Flush the caches for the first copy only. */
		if (rctx->b.flags) {
			r600_flush_emit(rctx);
		}

		/* Do the synchronization after the last copy, so that all data is written to memory. */
		if (size == byte_count) {
			sync = PKT3_CP_DMA_CP_SYNC;
		}

		/* This must be done after r600_need_cs_space. */
		src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)src,
						  RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
		dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst,
						  RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);

		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
		radeon_emit(cs, src_offset);	/* SRC_ADDR_LO [31:0] */
		radeon_emit(cs, sync | ((src_offset >> 32) & 0xff));		/* CP_SYNC [31] | SRC_ADDR_HI [7:0] */
		radeon_emit(cs, dst_offset);	/* DST_ADDR_LO [31:0] */
		radeon_emit(cs, (dst_offset >> 32) & 0xff);		/* DST_ADDR_HI [7:0] */
		radeon_emit(cs, byte_count);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */

		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
		radeon_emit(cs, src_reloc);
		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
		radeon_emit(cs, dst_reloc);

		size -= byte_count;
		src_offset += byte_count;
		dst_offset += byte_count;
	}

	/* Invalidate the read caches. */
	rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
			 R600_CONTEXT_INV_VERTEX_CACHE |
			 R600_CONTEXT_INV_TEX_CACHE;
}
Ejemplo n.º 12
0
void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
			      struct pipe_resource *buffer)
{
	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
	struct r600_resource *rbuffer = r600_resource(buffer);
	struct r600_pipe_resource_state *rstate;
	uint32_t offset;

	/* Note that the state tracker can unbind constant buffers by
	 * passing NULL here.
	 */
	if (buffer == NULL) {
		return;
	}

	r600_upload_const_buffer(rctx, &rbuffer, &offset);

	switch (shader) {
	case PIPE_SHADER_VERTEX:
		rctx->vs_const_buffer.nregs = 0;
		r600_pipe_state_add_reg(&rctx->vs_const_buffer,
					R_028180_ALU_CONST_BUFFER_SIZE_VS_0 + index * 4,
					ALIGN_DIVUP(buffer->width0 >> 4, 16),
					0xFFFFFFFF, NULL, 0);
		r600_pipe_state_add_reg(&rctx->vs_const_buffer,
					R_028980_ALU_CONST_CACHE_VS_0 + index * 4,
					offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
		r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);

		rstate = &rctx->vs_const_buffer_resource[index];
		if (!rstate->id) {
			if (rctx->chip_class >= EVERGREEN) {
				evergreen_pipe_init_buffer_resource(rctx, rstate);
			} else {
				r600_pipe_init_buffer_resource(rctx, rstate);
			}
		}

		if (rctx->chip_class >= EVERGREEN) {
			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
			evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
		} else {
			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
			r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
		}
		break;
	case PIPE_SHADER_FRAGMENT:
		rctx->ps_const_buffer.nregs = 0;
		r600_pipe_state_add_reg(&rctx->ps_const_buffer,
					R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
					ALIGN_DIVUP(buffer->width0 >> 4, 16),
					0xFFFFFFFF, NULL, 0);
		r600_pipe_state_add_reg(&rctx->ps_const_buffer,
					R_028940_ALU_CONST_CACHE_PS_0,
					offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
		r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);

		rstate = &rctx->ps_const_buffer_resource[index];
		if (!rstate->id) {
			if (rctx->chip_class >= EVERGREEN) {
				evergreen_pipe_init_buffer_resource(rctx, rstate);
			} else {
				r600_pipe_init_buffer_resource(rctx, rstate);
			}
		}
		if (rctx->chip_class >= EVERGREEN) {
			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
			evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
		} else {
			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
			r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
		}
		break;
	default:
		R600_ERR("unsupported %d\n", shader);
		return;
	}

	if (buffer != &rbuffer->b.b.b)
		pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL);
}
Ejemplo n.º 13
0
			radeon_emit(cs, (t[i]->b.buffer_offset +
					 t[i]->b.buffer_size) >> 2);	/* BUFFER_SIZE (in DW) */
			radeon_emit(cs, stride_in_dw[i]);		/* VTX_STRIDE (in DW) */
		} else {
			uint64_t va = r600_resource_va(rctx->b.screen,
						       (void*)t[i]->b.buffer);

			update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);

			r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
			radeon_emit(cs, (t[i]->b.buffer_offset +
					 t[i]->b.buffer_size) >> 2);	/* BUFFER_SIZE (in DW) */
			radeon_emit(cs, stride_in_dw[i]);		/* VTX_STRIDE (in DW) */
			radeon_emit(cs, va >> 8);			/* BUFFER_BASE */

			r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
					RADEON_USAGE_WRITE);

			/* R7xx requires this packet after updating BUFFER_BASE.
			 * Without this, R7xx locks up. */
			if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
				radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0));
				radeon_emit(cs, i);
				radeon_emit(cs, va >> 8);

				r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
						RADEON_USAGE_WRITE);
			}
		}

		if (rctx->streamout.append_bitmask & (1 << i)) {
Ejemplo n.º 14
0
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
					struct pipe_resource *resource,
					unsigned level,
					unsigned usage,
					const struct pipe_box *box,
					struct pipe_transfer **ptransfer)
{
	struct r600_context *rctx = (struct r600_context*)ctx;
	struct r600_resource *rbuffer = r600_resource(resource);
	uint8_t *data;

	assert(box->x + box->width <= resource->width0);

	/* See if the buffer range being mapped has never been initialized,
	 * in which case it can be mapped unsynchronized. */
	if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
	    usage & PIPE_TRANSFER_WRITE &&
	    !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
	}

	if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
	    !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
		assert(usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (r600_rings_is_buffer_referenced(&rctx->b, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->b.ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			unsigned i, mask;

			/* Discard the buffer. */
			pb_reference(&rbuffer->buf, NULL);

			/* Create a new one in the same pipe_resource. */
			/* XXX We probably want a different alignment for buffers and textures. */
			r600_init_resource(&rctx->screen->b, rbuffer, rbuffer->b.b.width0, 4096,
					   TRUE, rbuffer->b.b.usage);

			/* We changed the buffer, now we need to bind it where the old one was bound. */
			/* Vertex buffers. */
			mask = rctx->vertex_buffer_state.enabled_mask;
			while (mask) {
				i = u_bit_scan(&mask);
				if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
					rctx->vertex_buffer_state.dirty_mask |= 1 << i;
					r600_vertex_buffers_dirty(rctx);
				}
			}
			/* Streamout buffers. */
			for (i = 0; i < rctx->b.streamout.num_targets; i++) {
				if (rctx->b.streamout.targets[i]->b.buffer == &rbuffer->b.b) {
					if (rctx->b.streamout.begin_emitted) {
						r600_emit_streamout_end(&rctx->b);
					}
					rctx->b.streamout.append_bitmask = rctx->b.streamout.enabled_mask;
					r600_streamout_buffers_dirty(&rctx->b);
				}
			}
			/* Constant buffers. */
			r600_set_constants_dirty_if_bound(rctx, rbuffer);
		}
	}
	else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
		 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
		 !(rctx->screen->b.debug_flags & DBG_NO_DISCARD_RANGE) &&
		 (rctx->screen->has_cp_dma ||
		  (rctx->screen->has_streamout &&
		   /* The buffer range must be aligned to 4 with streamout. */
		   box->x % 4 == 0 && box->width % 4 == 0))) {
		assert(usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (r600_rings_is_buffer_referenced(&rctx->b, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->b.ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			/* Do a wait-free write-only transfer using a temporary buffer. */
			unsigned offset;
			struct r600_resource *staging = NULL;

			u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
				       &offset, (struct pipe_resource**)&staging, (void**)&data);

			if (staging) {
				data += box->x % R600_MAP_BUFFER_ALIGNMENT;
				return r600_buffer_get_transfer(ctx, resource, level, usage, box,
								ptransfer, data, staging, offset);
			}
		}
	}

	/* mmap and synchronize with rings */
	data = r600_buffer_map_sync_with_rings(&rctx->b, rbuffer, usage);
	if (!data) {
		return NULL;
	}
	data += box->x;

	return r600_buffer_get_transfer(ctx, resource, level, usage, box,
					ptransfer, data, NULL, 0);
}
Ejemplo n.º 15
0
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                      struct pipe_resource *resource,
                                      unsigned level,
                                      unsigned usage,
                                      const struct pipe_box *box,
                                      struct pipe_transfer **ptransfer)
{
    struct r600_common_context *rctx = (struct r600_common_context*)ctx;
    struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
    struct r600_resource *rbuffer = r600_resource(resource);
    uint8_t *data;

    assert(box->x + box->width <= resource->width0);

    /* See if the buffer range being mapped has never been initialized,
     * in which case it can be mapped unsynchronized. */
    if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
            usage & PIPE_TRANSFER_WRITE &&
            !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
        usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
    }

    /* If discarding the entire range, discard the whole resource instead. */
    if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
            box->x == 0 && box->width == resource->width0) {
        usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
    }

    if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
            !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
        assert(usage & PIPE_TRANSFER_WRITE);

        /* Check if mapping this buffer would cause waiting for the GPU. */
        if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
                rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
            rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
        }
        /* At this point, the buffer is always idle. */
        usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
    }
    else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
             !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
             !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
             (rscreen->has_cp_dma ||
              (rscreen->has_streamout &&
               /* The buffer range must be aligned to 4 with streamout. */
               box->x % 4 == 0 && box->width % 4 == 0))) {
        assert(usage & PIPE_TRANSFER_WRITE);

        /* Check if mapping this buffer would cause waiting for the GPU. */
        if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
                rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
            /* Do a wait-free write-only transfer using a temporary buffer. */
            unsigned offset;
            struct r600_resource *staging = NULL;

            u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
                           &offset, (struct pipe_resource**)&staging, (void**)&data);

            if (staging) {
                data += box->x % R600_MAP_BUFFER_ALIGNMENT;
                return r600_buffer_get_transfer(ctx, resource, level, usage, box,
                                                ptransfer, data, staging, offset);
            } else {
                return NULL; /* error, shouldn't occur though */
            }
        }
        /* At this point, the buffer is always idle (we checked it above). */
        usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
    }

    data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
    if (!data) {
        return NULL;
    }
    data += box->x;

    return r600_buffer_get_transfer(ctx, resource, level, usage, box,
                                    ptransfer, data, NULL, 0);
}
Ejemplo n.º 16
0
void r600_cp_dma_copy_buffer(struct r600_context *rctx,
			     struct pipe_resource *dst, uint64_t dst_offset,
			     struct pipe_resource *src, uint64_t src_offset,
			     unsigned size)
{
	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;

	assert(size);
	assert(rctx->screen->b.has_cp_dma);

	/* Mark the buffer range of destination as valid (initialized),
	 * so that transfer_map knows it should wait for the GPU when mapping
	 * that range. */
	util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
		       dst_offset + size);

	dst_offset += r600_resource(dst)->gpu_address;
	src_offset += r600_resource(src)->gpu_address;

	/* Flush the caches where the resources are bound. */
	rctx->b.flags |= r600_get_flush_flags(R600_COHERENCY_SHADER) |
			 R600_CONTEXT_WAIT_3D_IDLE;

	/* There are differences between R700 and EG in CP DMA,
	 * but we only use the common bits here. */
	while (size) {
		unsigned sync = 0;
		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
		unsigned src_reloc, dst_reloc;

		r600_need_cs_space(rctx,
				   10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
				   3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE);

		/* Flush the caches for the first copy only. */
		if (rctx->b.flags) {
			r600_flush_emit(rctx);
		}

		/* Do the synchronization after the last copy, so that all data is written to memory. */
		if (size == byte_count) {
			sync = PKT3_CP_DMA_CP_SYNC;
		}

		/* This must be done after r600_need_cs_space. */
		src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)src,
						  RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
		dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst,
						  RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);

		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
		radeon_emit(cs, src_offset);	/* SRC_ADDR_LO [31:0] */
		radeon_emit(cs, sync | ((src_offset >> 32) & 0xff));		/* CP_SYNC [31] | SRC_ADDR_HI [7:0] */
		radeon_emit(cs, dst_offset);	/* DST_ADDR_LO [31:0] */
		radeon_emit(cs, (dst_offset >> 32) & 0xff);		/* DST_ADDR_HI [7:0] */
		radeon_emit(cs, byte_count);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */

		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
		radeon_emit(cs, src_reloc);
		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
		radeon_emit(cs, dst_reloc);

		size -= byte_count;
		src_offset += byte_count;
		dst_offset += byte_count;
	}

	/* CP_DMA_CP_SYNC doesn't wait for idle on R6xx, but this does. */
	if (rctx->b.chip_class == R600)
		radeon_set_config_reg(cs, R_008040_WAIT_UNTIL,
				      S_008040_WAIT_CP_DMA_IDLE(1));

	/* CP DMA is executed in ME, but index buffers are read by PFP.
	 * This ensures that ME (CP DMA) is idle before PFP starts fetching
	 * indices. If we wanted to execute CP DMA in PFP, this packet
	 * should precede it.
	 */
	r600_emit_pfp_sync_me(rctx);
}