static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct r600_context *rctx = (struct r600_context*)ctx; struct r600_resource *rbuffer = r600_resource(resource); uint8_t *data; assert(box->x + box->width <= resource->width0); if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { unsigned i, mask; /* Discard the buffer. */ pb_reference(&rbuffer->buf, NULL); /* Create a new one in the same pipe_resource. */ /* XXX We probably want a different alignment for buffers and textures. */ r600_init_resource(rctx->screen, rbuffer, rbuffer->b.b.width0, 4096, rbuffer->b.b.bind, rbuffer->b.b.usage); /* We changed the buffer, now we need to bind it where the old one was bound. */ /* Vertex buffers. */ mask = rctx->vertex_buffer_state.enabled_mask; while (mask) { i = u_bit_scan(&mask); if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) { rctx->vertex_buffer_state.dirty_mask |= 1 << i; r600_vertex_buffers_dirty(rctx); } } /* Streamout buffers. */ for (i = 0; i < rctx->num_so_targets; i++) { if (rctx->so_targets[i]->b.buffer == &rbuffer->b.b) { r600_context_streamout_end(rctx); rctx->streamout_start = TRUE; rctx->streamout_append_bitmask = ~0; } } /* Constant buffers. */ r600_set_constants_dirty_if_bound(rctx, rbuffer); } } else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && rctx->screen->has_streamout && /* The buffer range must be aligned to 4. */ box->x % 4 == 0 && box->width % 4 == 0) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ struct r600_resource *staging = (struct r600_resource*) pipe_buffer_create(ctx->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STAGING, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT)); data = rctx->ws->buffer_map(staging->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); if (!data) return NULL; data += box->x % R600_MAP_BUFFER_ALIGNMENT; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, staging); } } data = rctx->ws->buffer_map(rbuffer->cs_buf, rctx->cs, usage); if (!data) { return NULL; } data += box->x; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, NULL); }
static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct r600_context *rctx = (struct r600_context*)ctx; struct r600_resource *rbuffer = r600_resource(resource); uint8_t *data; assert(box->x + box->width <= resource->width0); /* See if the buffer range being mapped has never been initialized, * in which case it can be mapped unsynchronized. */ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && usage & PIPE_TRANSFER_WRITE && !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (r600_rings_is_buffer_referenced(&rctx->b, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->b.ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { unsigned i, mask; /* Discard the buffer. */ pb_reference(&rbuffer->buf, NULL); /* Create a new one in the same pipe_resource. */ /* XXX We probably want a different alignment for buffers and textures. */ r600_init_resource(&rctx->screen->b, rbuffer, rbuffer->b.b.width0, 4096, TRUE, rbuffer->b.b.usage); /* We changed the buffer, now we need to bind it where the old one was bound. */ /* Vertex buffers. */ mask = rctx->vertex_buffer_state.enabled_mask; while (mask) { i = u_bit_scan(&mask); if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) { rctx->vertex_buffer_state.dirty_mask |= 1 << i; r600_vertex_buffers_dirty(rctx); } } /* Streamout buffers. */ for (i = 0; i < rctx->b.streamout.num_targets; i++) { if (rctx->b.streamout.targets[i]->b.buffer == &rbuffer->b.b) { if (rctx->b.streamout.begin_emitted) { r600_emit_streamout_end(&rctx->b); } rctx->b.streamout.append_bitmask = rctx->b.streamout.enabled_mask; r600_streamout_buffers_dirty(&rctx->b); } } /* Constant buffers. */ r600_set_constants_dirty_if_bound(rctx, rbuffer); } } else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && !(rctx->screen->b.debug_flags & DBG_NO_DISCARD_RANGE) && (rctx->screen->has_cp_dma || (rctx->screen->has_streamout && /* The buffer range must be aligned to 4 with streamout. */ box->x % 4 == 0 && box->width % 4 == 0))) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (r600_rings_is_buffer_referenced(&rctx->b, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->b.ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ unsigned offset; struct r600_resource *staging = NULL; u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT), &offset, (struct pipe_resource**)&staging, (void**)&data); if (staging) { data += box->x % R600_MAP_BUFFER_ALIGNMENT; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, staging, offset); } } } /* mmap and synchronize with rings */ data = r600_buffer_map_sync_with_rings(&rctx->b, rbuffer, usage); if (!data) { return NULL; } data += box->x; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, NULL, 0); }
static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct r600_common_context *rctx = (struct r600_common_context*)ctx; struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; struct r600_resource *rbuffer = r600_resource(resource); uint8_t *data; assert(box->x + box->width <= resource->width0); /* See if the buffer range being mapped has never been initialized, * in which case it can be mapped unsynchronized. */ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && usage & PIPE_TRANSFER_WRITE && !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } /* If discarding the entire range, discard the whole resource instead. */ if (usage & PIPE_TRANSFER_DISCARD_RANGE && box->x == 0 && box->width == resource->width0) { usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; } if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { assert(usage & PIPE_TRANSFER_WRITE); if (r600_invalidate_buffer(rctx, rbuffer)) { /* At this point, the buffer is always idle. */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } } else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ unsigned offset; struct r600_resource *staging = NULL; u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT), 256, &offset, (struct pipe_resource**)&staging, (void**)&data); if (staging) { data += box->x % R600_MAP_BUFFER_ALIGNMENT; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, staging, offset); } } else { /* At this point, the buffer is always idle (we checked it above). */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } } /* Using a staging buffer in GTT for larger reads is much faster. */ else if ((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_WRITE) && rbuffer->domains == RADEON_DOMAIN_VRAM && r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) { struct r600_resource *staging; staging = (struct r600_resource*) pipe_buffer_create( ctx->screen, PIPE_BIND_TRANSFER_READ, PIPE_USAGE_STAGING, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT)); if (staging) { /* Copy the VRAM buffer to the staging buffer. */ rctx->dma_copy(ctx, &staging->b.b, 0, box->x % R600_MAP_BUFFER_ALIGNMENT, 0, 0, resource, level, box); data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ); data += box->x % R600_MAP_BUFFER_ALIGNMENT; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, staging, 0); } } data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage); if (!data) { return NULL; } data += box->x; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, NULL, 0); }
static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct r600_common_context *rctx = (struct r600_common_context*)ctx; struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; struct r600_resource *rbuffer = r600_resource(resource); uint8_t *data; assert(box->x + box->width <= resource->width0); /* See if the buffer range being mapped has never been initialized, * in which case it can be mapped unsynchronized. */ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && usage & PIPE_TRANSFER_WRITE && !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } /* If discarding the entire range, discard the whole resource instead. */ if (usage & PIPE_TRANSFER_DISCARD_RANGE && box->x == 0 && box->width == resource->width0) { usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; } if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b); } /* At this point, the buffer is always idle. */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && (rscreen->has_cp_dma || (rscreen->has_streamout && /* The buffer range must be aligned to 4 with streamout. */ box->x % 4 == 0 && box->width % 4 == 0))) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ unsigned offset; struct r600_resource *staging = NULL; u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT), &offset, (struct pipe_resource**)&staging, (void**)&data); if (staging) { data += box->x % R600_MAP_BUFFER_ALIGNMENT; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, staging, offset); } else { return NULL; /* error, shouldn't occur though */ } } /* At this point, the buffer is always idle (we checked it above). */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage); if (!data) { return NULL; } data += box->x; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, NULL, 0); }