static bool
r600_invalidate_buffer(struct r600_common_context *rctx,
		       struct r600_resource *rbuffer)
{
	/* In AMD_pinned_memory, the user pointer association only gets
	 * broken when the buffer is explicitly re-allocated.
	 */
	if (rctx->ws->buffer_is_user_ptr(rbuffer->buf))
		return false;

	/* Check if mapping this buffer would cause waiting for the GPU. */
	if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
	    !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
		rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
	} else {
		util_range_set_empty(&rbuffer->valid_buffer_range);
	}

	return true;
}
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                      struct pipe_resource *resource,
                                      unsigned level,
                                      unsigned usage,
                                      const struct pipe_box *box,
                                      struct pipe_transfer **ptransfer)
{
	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
	struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
        struct r600_resource *rbuffer = r600_resource(resource);
        uint8_t *data;

	assert(box->x + box->width <= resource->width0);

	/* See if the buffer range being mapped has never been initialized,
	 * in which case it can be mapped unsynchronized. */
	if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
	    usage & PIPE_TRANSFER_WRITE &&
	    !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
	}

	/* If discarding the entire range, discard the whole resource instead. */
	if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
	    box->x == 0 && box->width == resource->width0) {
		usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
	}

	if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
	    !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
		assert(usage & PIPE_TRANSFER_WRITE);

		if (r600_invalidate_buffer(rctx, rbuffer)) {
			/* At this point, the buffer is always idle. */
			usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
		}
	}
	else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
		 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
		 !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
		 r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
		assert(usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
		    !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
			/* Do a wait-free write-only transfer using a temporary buffer. */
			unsigned offset;
			struct r600_resource *staging = NULL;

			u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
				       256, &offset, (struct pipe_resource**)&staging, (void**)&data);

			if (staging) {
				data += box->x % R600_MAP_BUFFER_ALIGNMENT;
				return r600_buffer_get_transfer(ctx, resource, level, usage, box,
								ptransfer, data, staging, offset);
			}
		} else {
			/* At this point, the buffer is always idle (we checked it above). */
			usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
		}
	}
	/* Using a staging buffer in GTT for larger reads is much faster. */
	else if ((usage & PIPE_TRANSFER_READ) &&
		 !(usage & PIPE_TRANSFER_WRITE) &&
		 rbuffer->domains == RADEON_DOMAIN_VRAM &&
		 r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
		struct r600_resource *staging;

		staging = (struct r600_resource*) pipe_buffer_create(
				ctx->screen, PIPE_BIND_TRANSFER_READ, PIPE_USAGE_STAGING,
				box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
		if (staging) {
			/* Copy the VRAM buffer to the staging buffer. */
			rctx->dma_copy(ctx, &staging->b.b, 0,
				       box->x % R600_MAP_BUFFER_ALIGNMENT,
				       0, 0, resource, level, box);

			data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
			data += box->x % R600_MAP_BUFFER_ALIGNMENT;

			return r600_buffer_get_transfer(ctx, resource, level, usage, box,
							ptransfer, data, staging, 0);
		}
	}

	data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
	if (!data) {
		return NULL;
	}
	data += box->x;

	return r600_buffer_get_transfer(ctx, resource, level, usage, box,
					ptransfer, data, NULL, 0);
}
Exemplo n.º 3
0
static void *r600_texture_transfer_map(struct pipe_context *ctx,
				       struct pipe_resource *texture,
				       unsigned level,
				       unsigned usage,
				       const struct pipe_box *box,
				       struct pipe_transfer **ptransfer)
{
	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
	struct r600_texture *rtex = (struct r600_texture*)texture;
	struct r600_transfer *trans;
	boolean use_staging_texture = FALSE;
	struct r600_resource *buf;
	unsigned offset = 0;
	char *map;

	/* We cannot map a tiled texture directly because the data is
	 * in a different order, therefore we do detiling using a blit.
	 *
	 * Also, use a temporary in GTT memory for read transfers, as
	 * the CPU is much happier reading out of cached system memory
	 * than uncached VRAM.
	 */
	if (rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
		use_staging_texture = TRUE;
	} else if ((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_MAP_DIRECTLY) &&
	    (rtex->resource.domains == RADEON_DOMAIN_VRAM)) {
		/* Untiled buffers in VRAM, which is slow for CPU reads */
		use_staging_texture = TRUE;
	} else if (!(usage & PIPE_TRANSFER_READ) &&
	    (r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
	     !rctx->ws->buffer_wait(rtex->resource.buf, 0, RADEON_USAGE_READWRITE))) {
		/* Use a staging texture for uploads if the underlying BO is busy. */
		use_staging_texture = TRUE;
	}

	if (texture->flags & R600_RESOURCE_FLAG_TRANSFER) {
		use_staging_texture = FALSE;
	}

	if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) {
		return NULL;
	}

	trans = CALLOC_STRUCT(r600_transfer);
	if (trans == NULL)
		return NULL;
	trans->transfer.resource = texture;
	trans->transfer.level = level;
	trans->transfer.usage = usage;
	trans->transfer.box = *box;

	if (rtex->is_depth) {
		struct r600_texture *staging_depth;

		if (rtex->resource.b.b.nr_samples > 1) {
			/* MSAA depth buffers need to be converted to single sample buffers.
			 *
			 * Mapping MSAA depth buffers can occur if ReadPixels is called
			 * with a multisample GLX visual.
			 *
			 * First downsample the depth buffer to a temporary texture,
			 * then decompress the temporary one to staging.
			 *
			 * Only the region being mapped is transfered.
			 */
			struct pipe_resource resource;

			r600_init_temp_resource_from_box(&resource, texture, box, level, 0);

			if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
				R600_ERR("failed to create temporary texture to hold untiled copy\n");
				FREE(trans);
				return NULL;
			}

			if (usage & PIPE_TRANSFER_READ) {
				struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);

				r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
				rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
							    0, 0, 0, box->depth, 0, 0);
				pipe_resource_reference((struct pipe_resource**)&temp, NULL);
			}
		}
		else {
			/* XXX: only readback the rectangle which is being mapped? */
			/* XXX: when discard is true, no need to read back from depth texture */
			if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
				R600_ERR("failed to create temporary texture to hold untiled copy\n");
				FREE(trans);
				return NULL;
			}

			rctx->blit_decompress_depth(ctx, rtex, staging_depth,
						    level, level,
						    box->z, box->z + box->depth - 1,
						    0, 0);

			offset = r600_texture_get_offset(staging_depth, level, box);
		}

		trans->transfer.stride = staging_depth->surface.level[level].pitch_bytes;
		trans->transfer.layer_stride = staging_depth->surface.level[level].slice_size;
		trans->staging = (struct r600_resource*)staging_depth;
	} else if (use_staging_texture) {
		struct pipe_resource resource;
		struct r600_texture *staging;

		r600_init_temp_resource_from_box(&resource, texture, box, level,
						 R600_RESOURCE_FLAG_TRANSFER);
		resource.usage = (usage & PIPE_TRANSFER_READ) ?
			PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;

		/* Create the temporary texture. */
		staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
		if (staging == NULL) {
			R600_ERR("failed to create temporary texture to hold untiled copy\n");
			FREE(trans);
			return NULL;
		}
		trans->staging = &staging->resource;
		trans->transfer.stride = staging->surface.level[0].pitch_bytes;
		trans->transfer.layer_stride = staging->surface.level[0].slice_size;
		if (usage & PIPE_TRANSFER_READ) {
			r600_copy_to_staging_texture(ctx, trans);
		}
	} else {
		/* the resource is mapped directly */
		trans->transfer.stride = rtex->surface.level[level].pitch_bytes;
		trans->transfer.layer_stride = rtex->surface.level[level].slice_size;
		offset = r600_texture_get_offset(rtex, level, box);
	}

	if (trans->staging) {
		buf = trans->staging;
		if (!rtex->is_depth && !(usage & PIPE_TRANSFER_READ))
			usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
	} else {
		buf = &rtex->resource;
	}

	if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
		pipe_resource_reference((struct pipe_resource**)&trans->staging, NULL);
		FREE(trans);
		return NULL;
	}

	*ptransfer = &trans->transfer;
	return map + offset;
}
Exemplo n.º 4
0
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
					struct pipe_resource *resource,
					unsigned level,
					unsigned usage,
					const struct pipe_box *box,
					struct pipe_transfer **ptransfer)
{
	struct r600_context *rctx = (struct r600_context*)ctx;
	struct r600_resource *rbuffer = r600_resource(resource);
	uint8_t *data;

	assert(box->x + box->width <= resource->width0);

	/* See if the buffer range being mapped has never been initialized,
	 * in which case it can be mapped unsynchronized. */
	if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
	    usage & PIPE_TRANSFER_WRITE &&
	    !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
	}

	if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
	    !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
		assert(usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (r600_rings_is_buffer_referenced(&rctx->b, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->b.ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			unsigned i, mask;

			/* Discard the buffer. */
			pb_reference(&rbuffer->buf, NULL);

			/* Create a new one in the same pipe_resource. */
			/* XXX We probably want a different alignment for buffers and textures. */
			r600_init_resource(&rctx->screen->b, rbuffer, rbuffer->b.b.width0, 4096,
					   TRUE, rbuffer->b.b.usage);

			/* We changed the buffer, now we need to bind it where the old one was bound. */
			/* Vertex buffers. */
			mask = rctx->vertex_buffer_state.enabled_mask;
			while (mask) {
				i = u_bit_scan(&mask);
				if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
					rctx->vertex_buffer_state.dirty_mask |= 1 << i;
					r600_vertex_buffers_dirty(rctx);
				}
			}
			/* Streamout buffers. */
			for (i = 0; i < rctx->b.streamout.num_targets; i++) {
				if (rctx->b.streamout.targets[i]->b.buffer == &rbuffer->b.b) {
					if (rctx->b.streamout.begin_emitted) {
						r600_emit_streamout_end(&rctx->b);
					}
					rctx->b.streamout.append_bitmask = rctx->b.streamout.enabled_mask;
					r600_streamout_buffers_dirty(&rctx->b);
				}
			}
			/* Constant buffers. */
			r600_set_constants_dirty_if_bound(rctx, rbuffer);
		}
	}
	else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
		 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
		 !(rctx->screen->b.debug_flags & DBG_NO_DISCARD_RANGE) &&
		 (rctx->screen->has_cp_dma ||
		  (rctx->screen->has_streamout &&
		   /* The buffer range must be aligned to 4 with streamout. */
		   box->x % 4 == 0 && box->width % 4 == 0))) {
		assert(usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (r600_rings_is_buffer_referenced(&rctx->b, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->b.ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			/* Do a wait-free write-only transfer using a temporary buffer. */
			unsigned offset;
			struct r600_resource *staging = NULL;

			u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
				       &offset, (struct pipe_resource**)&staging, (void**)&data);

			if (staging) {
				data += box->x % R600_MAP_BUFFER_ALIGNMENT;
				return r600_buffer_get_transfer(ctx, resource, level, usage, box,
								ptransfer, data, staging, offset);
			}
		}
	}

	/* mmap and synchronize with rings */
	data = r600_buffer_map_sync_with_rings(&rctx->b, rbuffer, usage);
	if (!data) {
		return NULL;
	}
	data += box->x;

	return r600_buffer_get_transfer(ctx, resource, level, usage, box,
					ptransfer, data, NULL, 0);
}
Exemplo n.º 5
0
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                      struct pipe_resource *resource,
                                      unsigned level,
                                      unsigned usage,
                                      const struct pipe_box *box,
                                      struct pipe_transfer **ptransfer)
{
    struct r600_common_context *rctx = (struct r600_common_context*)ctx;
    struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
    struct r600_resource *rbuffer = r600_resource(resource);
    uint8_t *data;

    assert(box->x + box->width <= resource->width0);

    /* See if the buffer range being mapped has never been initialized,
     * in which case it can be mapped unsynchronized. */
    if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
            usage & PIPE_TRANSFER_WRITE &&
            !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
        usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
    }

    /* If discarding the entire range, discard the whole resource instead. */
    if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
            box->x == 0 && box->width == resource->width0) {
        usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
    }

    if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
            !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
        assert(usage & PIPE_TRANSFER_WRITE);

        /* Check if mapping this buffer would cause waiting for the GPU. */
        if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
                rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
            rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
        }
        /* At this point, the buffer is always idle. */
        usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
    }
    else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
             !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
             !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
             (rscreen->has_cp_dma ||
              (rscreen->has_streamout &&
               /* The buffer range must be aligned to 4 with streamout. */
               box->x % 4 == 0 && box->width % 4 == 0))) {
        assert(usage & PIPE_TRANSFER_WRITE);

        /* Check if mapping this buffer would cause waiting for the GPU. */
        if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
                rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
            /* Do a wait-free write-only transfer using a temporary buffer. */
            unsigned offset;
            struct r600_resource *staging = NULL;

            u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
                           &offset, (struct pipe_resource**)&staging, (void**)&data);

            if (staging) {
                data += box->x % R600_MAP_BUFFER_ALIGNMENT;
                return r600_buffer_get_transfer(ctx, resource, level, usage, box,
                                                ptransfer, data, staging, offset);
            } else {
                return NULL; /* error, shouldn't occur though */
            }
        }
        /* At this point, the buffer is always idle (we checked it above). */
        usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
    }

    data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
    if (!data) {
        return NULL;
    }
    data += box->x;

    return r600_buffer_get_transfer(ctx, resource, level, usage, box,
                                    ptransfer, data, NULL, 0);
}