static void tu_copy_buffer(struct tu_cmd_buffer *cmdbuf, struct tu_bo *src_bo, uint64_t src_offset, struct tu_bo *dst_bo, uint64_t dst_offset, uint64_t size) { const unsigned max_size_per_iter = 0x4000 - 0x40; const unsigned max_iterations = (size + max_size_per_iter) / max_size_per_iter; tu_bo_list_add(&cmdbuf->bo_list, src_bo, MSM_SUBMIT_BO_READ); tu_bo_list_add(&cmdbuf->bo_list, dst_bo, MSM_SUBMIT_BO_WRITE); tu_dma_prepare(cmdbuf); tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 21 + 48 * max_iterations); /* buffer copy setup */ tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); const uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000; tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); tu_cs_emit(&cmdbuf->cs, blit_cntl); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); tu_cs_emit(&cmdbuf->cs, blit_cntl); for (; size;) { uint64_t src_va = src_bo->iova + src_offset; uint64_t dst_va = dst_bo->iova + dst_offset; unsigned src_shift = src_va & 0x3f; unsigned dst_shift = dst_va & 0x3f; unsigned max_shift = MAX2(src_shift, dst_shift); src_va -= src_shift; dst_va -= dst_shift; uint32_t size_todo = MIN2(0x4000 - max_shift, size); unsigned pitch = (size_todo + max_shift + 63) & ~63; /* * Emit source: */ tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) | A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) | A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000); tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_shift + size_todo) | A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */ tu_cs_emit_qw(&cmdbuf->cs, src_va); tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch)); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); /* * Emit destination: */ tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) | A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); tu_cs_emit_qw(&cmdbuf->cs, dst_va); tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(pitch)); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); tu_cs_emit(&cmdbuf->cs, 0x00000000); /* * Blit command: */ tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_shift)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_X_X(src_shift + size_todo - 1)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(0)); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_DST_TL_X(dst_shift) | A6XX_GRAS_2D_DST_TL_Y(0)); tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_DST_BR_X(dst_shift + size_todo - 1) | A6XX_GRAS_2D_DST_BR_Y(0)); tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); tu_cs_emit(&cmdbuf->cs, 0x3f); tu_cs_emit_wfi(&cmdbuf->cs); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); tu_cs_emit(&cmdbuf->cs, 0); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); tu_cs_emit(&cmdbuf->cs, 0xf180); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); tu_cs_emit(&cmdbuf->cs, 0x01000000); tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); tu_cs_emit_wfi(&cmdbuf->cs); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); tu_cs_emit(&cmdbuf->cs, 0); src_offset += size_todo; dst_offset += size_todo; size -= size_todo; } tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); }
static void fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) { struct fd_ringbuffer *ring; // TODO mid-frame clears (ie. app doing crazy stuff)?? Maybe worth // splitting both clear and lrz clear out into their own rb's. And // just throw away any draws prior to clear. (Anything not fullscreen // clear, just fallback to generic path that treats it as a normal // draw if (!batch->lrz_clear) { batch->lrz_clear = fd_ringbuffer_new(batch->ctx->pipe, 0x1000); fd_ringbuffer_set_parent(batch->lrz_clear, batch->gmem); } ring = batch->lrz_clear; emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BYPASS)); emit_marker6(ring, 7); OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); OUT_RING(ring, 0x10000000); OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); OUT_RING(ring, 0x7ffff); emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0xc)); emit_marker6(ring, 7); OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1); OUT_RING(ring, 0x0); OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_ACC0, 1); OUT_RING(ring, 0x0000f410); OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); OUT_RING(ring, A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT(RB6_R16_UNORM) | 0x4f00080); OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1); OUT_RING(ring, A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(RB6_R16_UNORM) | 0x4f00080); fd6_event_write(batch, ring, UNK_1D, true); fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false); OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); OUT_RING(ring, fui(depth)); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9); OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R16_UNORM) | A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); OUT_RELOCW(ring, zsbuf->lrz, 0, 0, 0); OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(zsbuf->lrz_pitch * 2)); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4); OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X_X(0)); OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X_X(0)); OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y_Y(0)); OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2); OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0)); OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) | A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1)); fd6_event_write(batch, ring, 0x3f, false); OUT_WFI5(ring); OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); OUT_RING(ring, 0x1000000); OUT_PKT7(ring, CP_BLIT, 1); OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE)); OUT_WFI5(ring); OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); OUT_RING(ring, 0x0); fd6_event_write(batch, ring, UNK_1D, true); fd6_event_write(batch, ring, FACENESS_FLUSH, true); fd6_event_write(batch, ring, CACHE_FLUSH_TS, true); fd6_cache_flush(batch, ring); }
static void tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf, struct tu_image *src_image, struct tu_buffer *dst_buffer, const VkBufferImageCopy *copy_info) { tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE); /* general setup */ tu_dma_prepare(cmdbuf); tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6); /* buffer copy setup */ tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); VkFormat format = src_image->vk_format; const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; unsigned dst_pixel_stride = copy_info->bufferRowLength ? copy_info->bufferRowLength : copy_info->imageExtent.width; unsigned cpp = vk_format_get_blocksize(format); unsigned dst_pitch = dst_pixel_stride * cpp; const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000; tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); tu_cs_emit(&cmdbuf->cs, blit_cntl); tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); tu_cs_emit(&cmdbuf->cs, blit_cntl); for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) { unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset; uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch; if ((dst_pitch & 63) || (dst_va & 63)) { /* Do a per line copy */ VkBufferImageCopy line_copy_info = *copy_info; line_copy_info.imageExtent.height = 1; for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) { /* * if dst_va is not aligned the line copy will need to adjust. Give it * room to do so. */ unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0; line_copy_info.imageOffset.x = copy_info->imageOffset.x; line_copy_info.imageExtent.width = copy_info->imageExtent.width; for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) { tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp); line_copy_info.imageOffset.x += max_width; line_copy_info.imageExtent.width -= max_width; } line_copy_info.imageOffset.y++; dst_va += dst_pitch; } } else { tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va); } } tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15); tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); }