static void tu_copy_buffer(struct tu_cmd_buffer *cmdbuf, struct tu_bo *src_bo, uint64_t src_offset, struct tu_bo *dst_bo, uint64_t dst_offset, uint64_t size) { const unsigned max_size_per_iter = 0x4000 - 0x40; const unsigned max_iterations = (size + max_size_per_iter) / max_size_per_iter; tu_bo_list_add(&cmdbuf->bo_list, src_bo, MSM_SUBMIT_BO_READ); tu_bo_list_add(&cmdbuf->bo_list, dst_bo, MSM_SUBMIT_BO_WRITE); tu_dma_prepare(cmdbuf); tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 21 + 48 * max_iterations); /* buffer copy setup */ tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); const uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000; tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); tu_cs_emit(&cmdbuf->cs, blit_cntl); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); tu_cs_emit(&cmdbuf->cs, blit_cntl); for (; size;) { uint64_t src_va = src_bo->iova + src_offset; uint64_t dst_va = dst_bo->iova + dst_offset; unsigned src_shift = src_va & 0x3f; unsigned dst_shift = dst_va & 0x3f; unsigned max_shift = MAX2(src_shift, dst_shift); src_va -= src_shift; dst_va -= dst_shift; uint32_t size_todo = MIN2(0x4000 - max_shift, size); unsigned pitch = (size_todo + max_shift + 63) & ~63; /* * Emit source: */ tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) | A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) | A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000); tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_shift + size_todo) | A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */ tu_cs_emit_qw(&cmdbuf->cs, src_va); tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch)); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); /* * Emit destination: */ tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) | A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); tu_cs_emit_qw(&cmdbuf->cs, dst_va); tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(pitch)); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); /* * Blit command: */ tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_shift)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_X_X(src_shift + size_todo - 1)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(0)); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_DST_TL_X(dst_shift) | A6XX_GRAS_2D_DST_TL_Y(0)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_DST_BR_X(dst_shift + size_todo - 1) | A6XX_GRAS_2D_DST_BR_Y(0)); tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); tu_cs_emit(&cmdbuf->cs, 0x3f); tu_cs_emit_wfi(&cmdbuf->cs); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); tu_cs_emit(&cmdbuf->cs, 0); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); tu_cs_emit(&cmdbuf->cs, 0xf180); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); tu_cs_emit(&cmdbuf->cs, 0x01000000); tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); tu_cs_emit_wfi(&cmdbuf->cs); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); tu_cs_emit(&cmdbuf->cs, 0); src_offset += size_todo; dst_offset += size_todo; size -= size_todo; } tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); }
static void fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) { struct fd_ringbuffer *ring; // TODO mid-frame clears (ie. app doing crazy stuff)?? Maybe worth // splitting both clear and lrz clear out into their own rb's. And // just throw away any draws prior to clear. (Anything not fullscreen // clear, just fallback to generic path that treats it as a normal // draw if (!batch->lrz_clear) { batch->lrz_clear = fd_ringbuffer_new(batch->ctx->pipe, 0x1000); fd_ringbuffer_set_parent(batch->lrz_clear, batch->gmem); } ring = batch->lrz_clear; emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BYPASS)); emit_marker6(ring, 7); OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); OUT_RING(ring, 0x10000000); OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); OUT_RING(ring, 0x7ffff); emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0xc)); emit_marker6(ring, 7); OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1); OUT_RING(ring, 0x0); OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_ACC0, 1); OUT_RING(ring, 0x0000f410); OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); OUT_RING(ring, A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT(RB6_R16_UNORM) | 0x4f00080); OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1); OUT_RING(ring, A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(RB6_R16_UNORM) | 0x4f00080); fd6_event_write(batch, ring, UNK_1D, true); fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false); OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); OUT_RING(ring, fui(depth)); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9); OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R16_UNORM) | A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); OUT_RELOCW(ring, zsbuf->lrz, 0, 0, 0); OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(zsbuf->lrz_pitch * 2)); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4); OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X_X(0)); OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X_X(0)); OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y_Y(0)); OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2); OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0)); OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) | A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1)); fd6_event_write(batch, ring, 0x3f, false); OUT_WFI5(ring); OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); OUT_RING(ring, 0x1000000); OUT_PKT7(ring, CP_BLIT, 1); OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE)); OUT_WFI5(ring); OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); OUT_RING(ring, 0x0); fd6_event_write(batch, ring, UNK_1D, true); fd6_event_write(batch, ring, FACENESS_FLUSH, true); fd6_event_write(batch, ring, CACHE_FLUSH_TS, true); fd6_cache_flush(batch, ring); }
static void tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf, struct tu_image *src_image, struct tu_buffer *dst_buffer, const VkBufferImageCopy *copy_info, VkFormat format, uint32_t layer, uint64_t dst_va) { const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset; unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch * vk_format_get_blocksize(format); unsigned dst_pitch; unsigned dst_offset = 0; if (copy_info->imageExtent.height == 1) { /* Can't find this in the spec, but not having it is sort of insane? */ assert(dst_va % vk_format_get_blocksize(format) == 0); dst_offset = (dst_va & 63) / vk_format_get_blocksize(format); dst_va &= ~63; dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64); } else { unsigned dst_pixel_stride = copy_info->bufferRowLength ? copy_info->bufferRowLength : copy_info->imageExtent.width; dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format); assert(!(dst_pitch & 63)); assert(!(dst_va & 63)); } tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48); /* * Emit source: */ tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) | A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) | A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000); tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) | A6XX_SP_PS_2D_SRC_SIZE_HEIGHT( src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */ tu_cs_emit_qw(&cmdbuf->cs, src_va); tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch)); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); /* * Emit destination: */ tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) | A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); tu_cs_emit_qw(&cmdbuf->cs, dst_va); tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch)); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x + copy_info->imageExtent.width - 1)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y + copy_info->imageExtent.height - 1)); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) | A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1)); tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); tu_cs_emit(&cmdbuf->cs, 0x3f); tu_cs_emit_wfi(&cmdbuf->cs); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); tu_cs_emit(&cmdbuf->cs, 0); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format)); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); tu_cs_emit(&cmdbuf->cs, 0x01000000); tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); tu_cs_emit_wfi(&cmdbuf->cs); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); tu_cs_emit(&cmdbuf->cs, 0); }