struct pipe_resource * nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr, unsigned bytes, unsigned bind) { struct nv04_resource *buffer; buffer = CALLOC_STRUCT(nv04_resource); if (!buffer) return NULL; pipe_reference_init(&buffer->base.reference, 1); buffer->vtbl = &nouveau_buffer_vtbl; buffer->base.screen = pscreen; buffer->base.format = PIPE_FORMAT_R8_UNORM; buffer->base.usage = PIPE_USAGE_IMMUTABLE; buffer->base.bind = bind; buffer->base.width0 = bytes; buffer->base.height0 = 1; buffer->base.depth0 = 1; buffer->data = ptr; buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY; util_range_init(&buffer->valid_buffer_range); util_range_add(&buffer->valid_buffer_range, 0, bytes); return &buffer->base; }
/* Unmap stage of the transfer. If it was a WRITE transfer and the map that * was returned was not the real resource's data, this needs to transfer the * data back to the resource. * * Also marks vbo dirty based on the buffer's binding */ static void nouveau_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct nouveau_context *nv = nouveau_context(pipe); struct nouveau_transfer *tx = nouveau_transfer(transfer); struct nv04_resource *buf = nv04_resource(transfer->resource); if (tx->base.usage & PIPE_TRANSFER_WRITE) { if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map) nouveau_transfer_write(nv, tx, 0, tx->base.box.width); if (likely(buf->domain)) { const uint8_t bind = buf->base.bind; /* make sure we invalidate dedicated caches */ if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) nv->vbo_dirty = TRUE; } util_range_add(&buf->valid_buffer_range, tx->base.box.x, tx->base.box.x + tx->base.box.width); } if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE)) NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_direct, tx->base.box.width); nouveau_buffer_transfer_del(nv, tx); FREE(tx); }
void nouveau_copy_buffer(struct nouveau_context *nv, struct nv04_resource *dst, unsigned dstx, struct nv04_resource *src, unsigned srcx, unsigned size) { assert(dst->base.target == PIPE_BUFFER && src->base.target == PIPE_BUFFER); if (likely(dst->domain) && likely(src->domain)) { nv->copy_data(nv, dst->bo, dst->offset + dstx, dst->domain, src->bo, src->offset + srcx, src->domain, size); dst->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; nouveau_fence_ref(nv->screen->fence.current, &dst->fence); nouveau_fence_ref(nv->screen->fence.current, &dst->fence_wr); src->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; nouveau_fence_ref(nv->screen->fence.current, &src->fence); } else { struct pipe_box src_box; src_box.x = srcx; src_box.y = 0; src_box.z = 0; src_box.width = size; src_box.height = 1; src_box.depth = 1; util_resource_copy_region(&nv->pipe, &dst->base, 0, dstx, 0, 0, &src->base, 0, &src_box); } util_range_add(&dst->valid_buffer_range, dstx, dstx + size); }
static void r600_buffer_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct r600_common_context *rctx = (struct r600_common_context*)ctx; struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct r600_resource *rbuffer = r600_resource(transfer->resource); if (rtransfer->staging) { if (rtransfer->transfer.usage & PIPE_TRANSFER_WRITE) { struct pipe_resource *dst, *src; unsigned soffset, doffset, size; struct pipe_box box; dst = transfer->resource; src = &rtransfer->staging->b.b; size = transfer->box.width; doffset = transfer->box.x; soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT; u_box_1d(soffset, size, &box); /* Copy the staging buffer into the original one. */ rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box); } pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL); } if (transfer->usage & PIPE_TRANSFER_WRITE) { util_range_add(&rbuffer->valid_buffer_range, transfer->box.x, transfer->box.x + transfer->box.width); } util_slab_free(&rctx->pool_transfers, transfer); }
static struct pipe_stream_output_target * r600_create_so_target(struct pipe_context *ctx, struct pipe_resource *buffer, unsigned buffer_offset, unsigned buffer_size) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct r600_so_target *t; struct r600_resource *rbuffer = (struct r600_resource*)buffer; t = CALLOC_STRUCT(r600_so_target); if (!t) { return NULL; } u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4, &t->buf_filled_size_offset, (struct pipe_resource**)&t->buf_filled_size); if (!t->buf_filled_size) { FREE(t); return NULL; } t->b.reference.count = 1; t->b.context = ctx; pipe_resource_reference(&t->b.buffer, buffer); t->b.buffer_offset = buffer_offset; t->b.buffer_size = buffer_size; util_range_add(&rbuffer->valid_buffer_range, buffer_offset, buffer_offset + buffer_size); return &t->b; }
struct pipe_resource * r600_buffer_from_user_memory(struct pipe_screen *screen, const struct pipe_resource *templ, void *user_memory) { struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; struct radeon_winsys *ws = rscreen->ws; struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); rbuffer->domains = RADEON_DOMAIN_GTT; util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0); /* Convert a user pointer to a buffer. */ rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0); if (!rbuffer->buf) { FREE(rbuffer); return NULL; } if (rscreen->info.has_virtual_memory) rbuffer->gpu_address = ws->buffer_get_virtual_address(rbuffer->buf); else rbuffer->gpu_address = 0; return &rbuffer->b.b; }
static void r600_buffer_do_flush_region(struct pipe_context *ctx, struct pipe_transfer *transfer, const struct pipe_box *box) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct r600_resource *rbuffer = r600_resource(transfer->resource); if (rtransfer->staging) { struct pipe_resource *dst, *src; unsigned soffset; struct pipe_box dma_box; dst = transfer->resource; src = &rtransfer->staging->b.b; soffset = rtransfer->offset + box->x % R600_MAP_BUFFER_ALIGNMENT; u_box_1d(soffset, box->width, &dma_box); /* Copy the staging buffer into the original one. */ ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box); } util_range_add(&rbuffer->valid_buffer_range, box->x, box->x + box->width); }
void evergreen_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, uint64_t size) { struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs; unsigned i, ncopy, csize, sub_cmd, shift; struct r600_resource *rdst = (struct r600_resource*)dst; struct r600_resource *rsrc = (struct r600_resource*)src; /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&rdst->valid_buffer_range, dst_offset, dst_offset + size); dst_offset += rdst->gpu_address; src_offset += rsrc->gpu_address; /* see if we use dword or byte copy */ if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) { size >>= 2; sub_cmd = EG_DMA_COPY_DWORD_ALIGNED; shift = 2; } else {
static struct pipe_stream_output_target * nv50_so_target_create(struct pipe_context *pipe, struct pipe_resource *res, unsigned offset, unsigned size) { struct nv04_resource *buf = (struct nv04_resource *)res; struct nv50_so_target *targ = MALLOC_STRUCT(nv50_so_target); if (!targ) return NULL; if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) { targ->pq = pipe->create_query(pipe, NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0); if (!targ->pq) { FREE(targ); return NULL; } } else { targ->pq = NULL; } targ->clean = TRUE; targ->pipe.buffer_size = size; targ->pipe.buffer_offset = offset; targ->pipe.context = pipe; targ->pipe.buffer = NULL; pipe_resource_reference(&targ->pipe.buffer, res); pipe_reference_init(&targ->pipe.reference, 1); assert(buf->base.target == PIPE_BUFFER); util_range_add(&buf->valid_buffer_range, offset, offset + size); return &targ->pipe; }
static struct pipe_stream_output_target * nvc0_so_target_create(struct pipe_context *pipe, struct pipe_resource *res, unsigned offset, unsigned size) { struct nv04_resource *buf = (struct nv04_resource *)res; struct nvc0_so_target *targ = MALLOC_STRUCT(nvc0_so_target); if (!targ) return NULL; targ->pq = pipe->create_query(pipe, NVC0_HW_QUERY_TFB_BUFFER_OFFSET, 0); if (!targ->pq) { FREE(targ); return NULL; } targ->clean = true; targ->pipe.buffer_size = size; targ->pipe.buffer_offset = offset; targ->pipe.context = pipe; targ->pipe.buffer = NULL; pipe_resource_reference(&targ->pipe.buffer, res); pipe_reference_init(&targ->pipe.reference, 1); assert(buf->base.target == PIPE_BUFFER); util_range_add(&buf->valid_buffer_range, offset, offset + size); return &targ->pipe; }
static void fd_resource_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans) { struct fd_context *ctx = fd_context(pctx); struct fd_resource *rsc = fd_resource(ptrans->resource); struct fd_transfer *trans = fd_transfer(ptrans); if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { struct pipe_box box; u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box); fd_resource_flush(trans, &box); } if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { fd_bo_cpu_fini(rsc->bo); if (rsc->stencil) fd_bo_cpu_fini(rsc->stencil->bo); } util_range_add(&rsc->valid_buffer_range, ptrans->box.x, ptrans->box.x + ptrans->box.width); pipe_resource_reference(&ptrans->resource, NULL); util_slab_free(&ctx->transfer_pool, ptrans); free(trans->staging); }
static struct pipe_stream_output_target * fd_create_stream_output_target(struct pipe_context *pctx, struct pipe_resource *prsc, unsigned buffer_offset, unsigned buffer_size) { struct pipe_stream_output_target *target; struct fd_resource *rsc = fd_resource(prsc); target = CALLOC_STRUCT(pipe_stream_output_target); if (!target) return NULL; pipe_reference_init(&target->reference, 1); pipe_resource_reference(&target->buffer, prsc); target->context = pctx; target->buffer_offset = buffer_offset; target->buffer_size = buffer_size; assert(rsc->base.target == PIPE_BUFFER); util_range_add(&rsc->valid_buffer_range, buffer_offset, buffer_offset + buffer_size); return target; }
static void fd_resource_transfer_flush_region(struct pipe_context *pctx, struct pipe_transfer *ptrans, const struct pipe_box *box) { struct fd_resource *rsc = fd_resource(ptrans->resource); if (ptrans->resource->target == PIPE_BUFFER) util_range_add(&rsc->valid_buffer_range, ptrans->box.x + box->x, ptrans->box.x + box->x + box->width); }
static void nouveau_buffer_transfer_flush_region(struct pipe_context *pipe, struct pipe_transfer *transfer, const struct pipe_box *box) { struct nouveau_transfer *tx = nouveau_transfer(transfer); struct nv04_resource *buf = nv04_resource(transfer->resource); if (tx->map) nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width); util_range_add(&buf->valid_buffer_range, tx->base.box.x + box->x, tx->base.box.x + box->x + box->width); }
static void fd_resource_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans) { struct fd_context *ctx = fd_context(pctx); struct fd_resource *rsc = fd_resource(ptrans->resource); if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) fd_bo_cpu_fini(rsc->bo); util_range_add(&rsc->valid_buffer_range, ptrans->box.x, ptrans->box.x + ptrans->box.width); pipe_resource_reference(&ptrans->resource, NULL); util_slab_free(&ctx->transfer_pool, ptrans); }
static void virgl_buffer_transfer_flush_region(struct pipe_context *ctx, struct pipe_transfer *transfer, const struct pipe_box *box) { struct virgl_transfer *trans = virgl_transfer(transfer); /* * FIXME: This is not optimal. For example, * * glMapBufferRange(.., 0, 100, GL_MAP_FLUSH_EXPLICIT_BIT) * glFlushMappedBufferRange(.., 25, 30) * glFlushMappedBufferRange(.., 65, 70) * * We'll end up flushing 25 --> 70. */ util_range_add(&trans->range, box->x, box->x + box->width); }
static void cik_sdma_copy_buffer(struct si_context *ctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, uint64_t size) { struct r600_resource *rdst = (struct r600_resource*)dst; /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&rdst->valid_buffer_range, dst_offset, dst_offset + size); cik_sdma_do_copy_buffer(ctx, dst, src, dst_offset, src_offset, size); }
static void virgl_buffer_transfer_flush_region(struct pipe_context *ctx, struct pipe_transfer *transfer, const struct pipe_box *box) { struct virgl_context *vctx = virgl_context(ctx); struct virgl_buffer *vbuf = virgl_buffer(transfer->resource); if (!vbuf->on_list) { struct pipe_resource *res = NULL; list_addtail(&vbuf->flush_list, &vctx->to_flush_bufs); vbuf->on_list = TRUE; pipe_resource_reference(&res, &vbuf->base.u.b); } util_range_add(&vbuf->valid_buffer_range, transfer->box.x + box->x, transfer->box.x + box->x + box->width); vbuf->base.clean = FALSE; }
void r600_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, uint64_t size) { struct radeon_winsys_cs *cs = rctx->b.dma.cs; unsigned i, ncopy, csize; struct r600_resource *rdst = (struct r600_resource*)dst; struct r600_resource *rsrc = (struct r600_resource*)src; /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&rdst->valid_buffer_range, dst_offset, dst_offset + size); size >>= 2; /* convert to dwords */ ncopy = (size / R600_DMA_COPY_MAX_SIZE_DW) + !!(size % R600_DMA_COPY_MAX_SIZE_DW); r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc); for (i = 0; i < ncopy; i++) { csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW; /* emit reloc before writing cs so that cs is always in consistent state */ radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ, RADEON_PRIO_SDMA_BUFFER); radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_BUFFER); radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize)); radeon_emit(cs, dst_offset & 0xfffffffc); radeon_emit(cs, src_offset & 0xfffffffc); radeon_emit(cs, (dst_offset >> 32UL) & 0xff); radeon_emit(cs, (src_offset >> 32UL) & 0xff); dst_offset += csize << 2; src_offset += csize << 2; size -= csize; } r600_dma_emit_wait_idle(&rctx->b); }
static void r600_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_context *rctx = (struct r600_context*)pipe; struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct r600_resource *rbuffer = r600_resource(transfer->resource); if (rtransfer->staging) { struct pipe_resource *dst, *src; unsigned soffset, doffset, size; dst = transfer->resource; src = &rtransfer->staging->b.b; size = transfer->box.width; doffset = transfer->box.x; soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT; /* Copy the staging buffer into the original one. */ if (rctx->b.rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset % 4)) { if (rctx->screen->b.chip_class >= EVERGREEN) { evergreen_dma_copy(rctx, dst, src, doffset, soffset, size); } else { r600_dma_copy(rctx, dst, src, doffset, soffset, size); } } else { struct pipe_box box; u_box_1d(soffset, size, &box); r600_copy_buffer(pipe, dst, doffset, src, &box); } pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL); } if (transfer->usage & PIPE_TRANSFER_WRITE) { util_range_add(&rbuffer->valid_buffer_range, transfer->box.x, transfer->box.x + transfer->box.width); } util_slab_free(&rctx->pool_transfers, transfer); }
static void *virgl_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **transfer) { struct virgl_context *vctx = virgl_context(ctx); struct virgl_screen *vs = virgl_screen(ctx->screen); struct virgl_resource *vbuf = virgl_resource(resource); struct virgl_transfer *trans; enum virgl_transfer_map_type map_type; trans = virgl_resource_create_transfer(&vctx->transfer_pool, resource, &vbuf->metadata, level, usage, box); map_type = virgl_resource_transfer_prepare(vctx, trans); switch (map_type) { case VIRGL_TRANSFER_MAP_HW_RES: trans->hw_res_map = vs->vws->resource_map(vs->vws, vbuf->hw_res); break; case VIRGL_TRANSFER_MAP_ERROR: default: trans->hw_res_map = NULL; break; } if (!trans->hw_res_map) { virgl_resource_destroy_transfer(&vctx->transfer_pool, trans); return NULL; } if (usage & PIPE_TRANSFER_WRITE) util_range_add(&vbuf->valid_buffer_range, box->x, box->x + box->width); *transfer = &trans->base; return trans->hw_res_map + trans->offset; }
static void nvc0_compute_validate_buffers(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_screen *screen = nvc0->screen; const int s = 5; int i; BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, NVC0_CB_AUX_SIZE); PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0)); for (i = 0; i < NVC0_MAX_BUFFERS; i++) { if (nvc0->buffers[s][i].buffer) { struct nv04_resource *res = nv04_resource(nvc0->buffers[s][i].buffer); PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); PUSH_DATA (push, 0); BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR); util_range_add(&res->valid_buffer_range, nvc0->buffers[s][i].buffer_offset, nvc0->buffers[s][i].buffer_offset + nvc0->buffers[s][i].buffer_size); } else { PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); } } }
void r600_cp_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *dst, uint64_t dst_offset, struct pipe_resource *src, uint64_t src_offset, unsigned size) { struct radeon_winsys_cs *cs = rctx->b.gfx.cs; assert(size); assert(rctx->screen->b.has_cp_dma); /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, dst_offset + size); dst_offset += r600_resource(dst)->gpu_address; src_offset += r600_resource(src)->gpu_address; /* Flush the caches where the resources are bound. */ rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | R600_CONTEXT_INV_VERTEX_CACHE | R600_CONTEXT_INV_TEX_CACHE | R600_CONTEXT_FLUSH_AND_INV | R600_CONTEXT_FLUSH_AND_INV_CB | R600_CONTEXT_FLUSH_AND_INV_DB | R600_CONTEXT_FLUSH_AND_INV_CB_META | R600_CONTEXT_FLUSH_AND_INV_DB_META | R600_CONTEXT_STREAMOUT_FLUSH | R600_CONTEXT_WAIT_3D_IDLE; /* There are differences between R700 and EG in CP DMA, * but we only use the common bits here. */ while (size) { unsigned sync = 0; unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); unsigned src_reloc, dst_reloc; r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE); /* Flush the caches for the first copy only. */ if (rctx->b.flags) { r600_flush_emit(rctx); } /* Do the synchronization after the last copy, so that all data is written to memory. */ if (size == byte_count) { sync = PKT3_CP_DMA_CP_SYNC; } /* This must be done after r600_need_cs_space. */ src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)src, RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */ radeon_emit(cs, sync | ((src_offset >> 32) & 0xff)); /* CP_SYNC [31] | SRC_ADDR_HI [7:0] */ radeon_emit(cs, dst_offset); /* DST_ADDR_LO [31:0] */ radeon_emit(cs, (dst_offset >> 32) & 0xff); /* DST_ADDR_HI [7:0] */ radeon_emit(cs, byte_count); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, src_reloc); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, dst_reloc); size -= byte_count; src_offset += byte_count; dst_offset += byte_count; } /* Invalidate the read caches. */ rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | R600_CONTEXT_INV_VERTEX_CACHE | R600_CONTEXT_INV_TEX_CACHE; }
void r600_cp_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *dst, uint64_t dst_offset, struct pipe_resource *src, uint64_t src_offset, unsigned size) { struct radeon_winsys_cs *cs = rctx->b.gfx.cs; assert(size); assert(rctx->screen->b.has_cp_dma); /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, dst_offset + size); dst_offset += r600_resource(dst)->gpu_address; src_offset += r600_resource(src)->gpu_address; /* Flush the caches where the resources are bound. */ rctx->b.flags |= r600_get_flush_flags(R600_COHERENCY_SHADER) | R600_CONTEXT_WAIT_3D_IDLE; /* There are differences between R700 and EG in CP DMA, * but we only use the common bits here. */ while (size) { unsigned sync = 0; unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); unsigned src_reloc, dst_reloc; r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) + 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE); /* Flush the caches for the first copy only. */ if (rctx->b.flags) { r600_flush_emit(rctx); } /* Do the synchronization after the last copy, so that all data is written to memory. */ if (size == byte_count) { sync = PKT3_CP_DMA_CP_SYNC; } /* This must be done after r600_need_cs_space. */ src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)src, RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */ radeon_emit(cs, sync | ((src_offset >> 32) & 0xff)); /* CP_SYNC [31] | SRC_ADDR_HI [7:0] */ radeon_emit(cs, dst_offset); /* DST_ADDR_LO [31:0] */ radeon_emit(cs, (dst_offset >> 32) & 0xff); /* DST_ADDR_HI [7:0] */ radeon_emit(cs, byte_count); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, src_reloc); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, dst_reloc); size -= byte_count; src_offset += byte_count; dst_offset += byte_count; } /* CP_DMA_CP_SYNC doesn't wait for idle on R6xx, but this does. */ if (rctx->b.chip_class == R600) radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_CP_DMA_IDLE(1)); /* CP DMA is executed in ME, but index buffers are read by PFP. * This ensures that ME (CP DMA) is idle before PFP starts fetching * indices. If we wanted to execute CP DMA in PFP, this packet * should precede it. */ r600_emit_pfp_sync_me(rctx); }