/* Create a compute shader implementing clear_buffer or copy_buffer. */ void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread, bool dst_stream_cache_policy, bool is_copy) { assert(util_is_power_of_two_nonzero(num_dwords_per_thread)); unsigned store_qualifier = TGSI_MEMORY_COHERENT | TGSI_MEMORY_RESTRICT; if (dst_stream_cache_policy) store_qualifier |= TGSI_MEMORY_STREAM_CACHE_POLICY; /* Don't cache loads, because there is no reuse. */ unsigned load_qualifier = store_qualifier | TGSI_MEMORY_STREAM_CACHE_POLICY; unsigned num_mem_ops = MAX2(1, num_dwords_per_thread / 4); unsigned *inst_dwords = alloca(num_mem_ops * sizeof(unsigned)); for (unsigned i = 0; i < num_mem_ops; i++) { if (i*4 < num_dwords_per_thread) inst_dwords[i] = MIN2(4, num_dwords_per_thread - i*4); } struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE); if (!ureg) return NULL; ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 64); ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 1); ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1); struct ureg_src value; if (!is_copy) { ureg_property(ureg, TGSI_PROPERTY_CS_USER_DATA_DWORDS, inst_dwords[0]); value = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_CS_USER_DATA, 0); } struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0); struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0); struct ureg_dst store_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); struct ureg_dst load_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); struct ureg_dst dstbuf = ureg_dst(ureg_DECL_buffer(ureg, 0, false)); struct ureg_src srcbuf; struct ureg_src *values = NULL; if (is_copy) { srcbuf = ureg_DECL_buffer(ureg, 1, false); values = malloc(num_mem_ops * sizeof(struct ureg_src)); } /* If there are multiple stores, the first store writes into 0+tid, * the 2nd store writes into 64+tid, the 3rd store writes into 128+tid, etc. */ ureg_UMAD(ureg, store_addr, blk, ureg_imm1u(ureg, 64 * num_mem_ops), tid); /* Convert from a "store size unit" into bytes. */ ureg_UMUL(ureg, store_addr, ureg_src(store_addr), ureg_imm1u(ureg, 4 * inst_dwords[0])); ureg_MOV(ureg, load_addr, ureg_src(store_addr)); /* Distance between a load and a store for latency hiding. */ unsigned load_store_distance = is_copy ? 8 : 0; for (unsigned i = 0; i < num_mem_ops + load_store_distance; i++) { int d = i - load_store_distance; if (is_copy && i < num_mem_ops) { if (i) { ureg_UADD(ureg, load_addr, ureg_src(load_addr), ureg_imm1u(ureg, 4 * inst_dwords[i] * 64)); } values[i] = ureg_src(ureg_DECL_temporary(ureg)); struct ureg_dst dst = ureg_writemask(ureg_dst(values[i]), u_bit_consecutive(0, inst_dwords[i])); struct ureg_src srcs[] = {srcbuf, ureg_src(load_addr)}; ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &dst, 1, srcs, 2, load_qualifier, TGSI_TEXTURE_BUFFER, 0); } if (d >= 0) { if (d) { ureg_UADD(ureg, store_addr, ureg_src(store_addr), ureg_imm1u(ureg, 4 * inst_dwords[d] * 64)); } struct ureg_dst dst = ureg_writemask(dstbuf, u_bit_consecutive(0, inst_dwords[d])); struct ureg_src srcs[] = {ureg_src(store_addr), is_copy ? values[d] : value}; ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dst, 1, srcs, 2, store_qualifier, TGSI_TEXTURE_BUFFER, 0); } } ureg_END(ureg); struct pipe_compute_state state = {}; state.ir_type = PIPE_SHADER_IR_TGSI; state.prog = ureg_get_tokens(ureg, NULL); void *cs = ctx->create_compute_state(ctx, &state); ureg_destroy(ureg); free(values); return cs; }
static void * create_fs(struct st_context *st, bool download, enum pipe_texture_target target) { struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = pipe->screen; struct ureg_program *ureg; bool have_layer; struct ureg_dst out; struct ureg_src sampler; struct ureg_src pos; struct ureg_src layer; struct ureg_src const0; struct ureg_src const1; struct ureg_dst temp0; have_layer = st->pbo.layers && (!download || target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY); ureg = ureg_create(PIPE_SHADER_FRAGMENT); if (!ureg) return NULL; if (!download) { out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); } else { struct ureg_src image; /* writeonly images do not require an explicitly given format. */ image = ureg_DECL_image(ureg, 0, TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE, true, false); out = ureg_dst(image); } sampler = ureg_DECL_sampler(ureg, 0); if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) { pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); } else { pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_INTERPOLATE_LINEAR); } if (have_layer) { layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0, TGSI_INTERPOLATE_CONSTANT); } const0 = ureg_DECL_constant(ureg, 0); const1 = ureg_DECL_constant(ureg, 1); temp0 = ureg_DECL_temporary(ureg); /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */ /* temp0.xy = f2i(temp0.xy) */ ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), ureg_swizzle(pos, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y)); /* temp0.xy = temp0.xy + const0.xy */ ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), ureg_swizzle(ureg_src(temp0), TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y), ureg_swizzle(const0, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y)); /* temp0.x = const0.z * temp0.y + temp0.x */ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X), ureg_scalar(const0, TGSI_SWIZZLE_Z), ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X)); if (have_layer) { /* temp0.x = const0.w * layer + temp0.x */ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X), ureg_scalar(const0, TGSI_SWIZZLE_W), ureg_scalar(layer, TGSI_SWIZZLE_X), ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X)); } /* temp0.w = 0 */ ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0)); if (download) { struct ureg_dst temp1; struct ureg_src op[2]; temp1 = ureg_DECL_temporary(ureg); /* temp1.xy = pos.xy */ ureg_F2I(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_XY), pos); /* temp1.zw = 0 */ ureg_MOV(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_ZW), ureg_imm1u(ureg, 0)); if (have_layer) { struct ureg_dst temp1_layer = ureg_writemask(temp1, target == PIPE_TEXTURE_1D_ARRAY ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_Z); /* temp1.y/z = layer */ ureg_MOV(ureg, temp1_layer, ureg_scalar(layer, TGSI_SWIZZLE_X)); if (target == PIPE_TEXTURE_3D) { /* temp1.z += layer_offset */ ureg_UADD(ureg, temp1_layer, ureg_scalar(ureg_src(temp1), TGSI_SWIZZLE_Z), ureg_scalar(const1, TGSI_SWIZZLE_X)); } } /* temp1 = txf(sampler, temp1) */ ureg_TXF(ureg, temp1, util_pipe_tex_to_tgsi_tex(target, 1), ureg_src(temp1), sampler); /* store(out, temp0, temp1) */ op[0] = ureg_src(temp0); op[1] = ureg_src(temp1); ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &out, 1, op, 2, 0, TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE); ureg_release_temporary(ureg, temp1); } else { /* out = txf(sampler, temp0.x) */ ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler); } ureg_release_temporary(ureg, temp0); ureg_END(ureg); return ureg_create_shader_and_destroy(ureg, pipe); }