void * util_make_fs_msaa_resolve(struct pipe_context *pipe, enum tgsi_texture_type tgsi_tex, unsigned nr_samples, enum tgsi_return_type stype) { struct ureg_program *ureg; struct ureg_src sampler, coord; struct ureg_dst out, tmp_sum, tmp_coord, tmp; unsigned i; ureg = ureg_create(PIPE_SHADER_FRAGMENT); if (!ureg) return NULL; /* Declarations. */ sampler = ureg_DECL_sampler(ureg, 0); ureg_DECL_sampler_view(ureg, 0, tgsi_tex, stype, stype, stype, stype); coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_LINEAR); out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); tmp_sum = ureg_DECL_temporary(ureg); tmp_coord = ureg_DECL_temporary(ureg); tmp = ureg_DECL_temporary(ureg); /* Instructions. */ ureg_MOV(ureg, tmp_sum, ureg_imm1f(ureg, 0)); ureg_F2U(ureg, tmp_coord, coord); for (i = 0; i < nr_samples; i++) { /* Read one sample. */ ureg_MOV(ureg, ureg_writemask(tmp_coord, TGSI_WRITEMASK_W), ureg_imm1u(ureg, i)); ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord), sampler); if (stype == TGSI_RETURN_TYPE_UINT) ureg_U2F(ureg, tmp, ureg_src(tmp)); else if (stype == TGSI_RETURN_TYPE_SINT) ureg_I2F(ureg, tmp, ureg_src(tmp)); /* Add it to the sum.*/ ureg_ADD(ureg, tmp_sum, ureg_src(tmp_sum), ureg_src(tmp)); } /* Calculate the average and return. */ ureg_MUL(ureg, tmp_sum, ureg_src(tmp_sum), ureg_imm1f(ureg, 1.0 / nr_samples)); if (stype == TGSI_RETURN_TYPE_UINT) ureg_F2U(ureg, out, ureg_src(tmp_sum)); else if (stype == TGSI_RETURN_TYPE_SINT) ureg_F2I(ureg, out, ureg_src(tmp_sum)); else ureg_MOV(ureg, out, ureg_src(tmp_sum)); ureg_END(ureg); return ureg_create_shader_and_destroy(ureg, pipe); }
static struct ureg_src prepare_argument(struct st_translate *t, const unsigned argId, const struct atifragshader_src_register *srcReg) { struct ureg_src src = get_source(t, srcReg->Index); struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + argId); switch (srcReg->argRep) { case GL_NONE: break; case GL_RED: src = ureg_scalar(src, TGSI_SWIZZLE_X); break; case GL_GREEN: src = ureg_scalar(src, TGSI_SWIZZLE_Y); break; case GL_BLUE: src = ureg_scalar(src, TGSI_SWIZZLE_Z); break; case GL_ALPHA: src = ureg_scalar(src, TGSI_SWIZZLE_W); break; } ureg_insn(t->ureg, TGSI_OPCODE_MOV, &arg, 1, &src, 1); if (srcReg->argMod & GL_COMP_BIT_ATI) { struct ureg_src modsrc[2]; modsrc[0] = ureg_imm1f(t->ureg, 1.0f); modsrc[1] = ureg_src(arg); ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2); } if (srcReg->argMod & GL_BIAS_BIT_ATI) { struct ureg_src modsrc[2]; modsrc[0] = ureg_src(arg); modsrc[1] = ureg_imm1f(t->ureg, 0.5f); ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2); } if (srcReg->argMod & GL_2X_BIT_ATI) { struct ureg_src modsrc[2]; modsrc[0] = ureg_src(arg); modsrc[1] = ureg_src(arg); ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2); } if (srcReg->argMod & GL_NEGATE_BIT_ATI) { struct ureg_src modsrc[2]; modsrc[0] = ureg_src(arg); modsrc[1] = ureg_imm1f(t->ureg, -1.0f); ureg_insn(t->ureg, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2); } return ureg_src(arg); }
static void * create_frag_shader(struct vl_matrix_filter *filter, unsigned num_offsets, struct vertex2f *offsets, const float *matrix_values) { struct ureg_program *shader; struct ureg_src i_vtex; struct ureg_src sampler; struct ureg_dst tmp; struct ureg_dst t_sum; struct ureg_dst o_fragment; unsigned i; shader = ureg_create(PIPE_SHADER_FRAGMENT); if (!shader) { return NULL; } i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR); sampler = ureg_DECL_sampler(shader, 0); ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT); tmp = ureg_DECL_temporary(shader); t_sum = ureg_DECL_temporary(shader); o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); ureg_MOV(shader, t_sum, ureg_imm1f(shader, 0.0f)); for (i = 0; i < num_offsets; ++i) { if (matrix_values[i] == 0.0f) continue; if (!is_vec_zero(offsets[i])) { ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), i_vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y)); ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 0.0f)); ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, ureg_src(tmp), sampler); } else { ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, i_vtex, sampler); } ureg_MAD(shader, t_sum, ureg_src(tmp), ureg_imm1f(shader, matrix_values[i]), ureg_src(t_sum)); } ureg_MOV(shader, o_fragment, ureg_src(t_sum)); ureg_END(shader); return ureg_create_shader_and_destroy(shader, filter->pipe); }
static struct ureg_dst calc_position(struct vl_mc *r, struct ureg_program *shader, struct ureg_src block_scale) { struct ureg_src vrect, vpos; struct ureg_dst t_vpos; struct ureg_dst o_vpos; vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_vpos = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); /* * block_scale = (VL_MACROBLOCK_WIDTH, VL_MACROBLOCK_HEIGHT) / (dst.width, dst.height) * * t_vpos = (vpos + vrect) * block_scale * o_vpos.xy = t_vpos * o_vpos.zw = vpos */ ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect); ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); return t_vpos; }
/* These instructions need special treatment */ static void emit_special_inst(struct st_translate *t, const struct instruction_desc *desc, struct ureg_dst *dst, struct ureg_src *args, unsigned argcount) { struct ureg_dst tmp[1]; struct ureg_src src[3]; if (!strcmp(desc->name, "CND")) { tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */ src[0] = ureg_imm1f(t->ureg, 0.5f); src[1] = args[2]; ureg_insn(t->ureg, TGSI_OPCODE_SUB, tmp, 1, src, 2); src[0] = ureg_src(tmp[0]); src[1] = args[0]; src[2] = args[1]; ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3); } else if (!strcmp(desc->name, "CND0")) { src[0] = args[2]; src[1] = args[1]; src[2] = args[0]; ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3); } else if (!strcmp(desc->name, "DOT2_ADD")) { /* note: DP2A is not implemented in most pipe drivers */ tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */ src[0] = args[0]; src[1] = args[1]; ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2); src[0] = ureg_src(tmp[0]); src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z); ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2); } }
static void * create_copy_frag_shader(struct vl_deint_filter *filter, unsigned field) { struct ureg_program *shader; struct ureg_src i_vtex; struct ureg_src sampler; struct ureg_dst o_fragment; struct ureg_dst t_tex; shader = ureg_create(PIPE_SHADER_FRAGMENT); if (!shader) { return NULL; } t_tex = ureg_DECL_temporary(shader); i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR); sampler = ureg_DECL_sampler(shader, 2); o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); ureg_MOV(shader, t_tex, i_vtex); if (field) { ureg_MOV(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_ZW), ureg_imm4f(shader, 0, 0, 1.0f, 0)); } else { ureg_MOV(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 0)); } ureg_TEX(shader, o_fragment, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_tex), sampler); ureg_release_temporary(shader, t_tex); ureg_END(shader); return ureg_create_shader_and_destroy(shader, filter->pipe); }
static void increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], struct ureg_src saddr[2], bool right_side, bool transposed, int pos, float size) { unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; /* * daddr[0..1].(start) = saddr[0..1].(start) * daddr[0..1].(tc) = saddr[0..1].(tc) */ ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); }
static struct ureg_dst calc_line(struct ureg_program *shader) { struct ureg_dst tmp; struct ureg_src pos; tmp = ureg_DECL_temporary(shader); pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS, TGSI_INTERPOLATE_LINEAR); /* * tmp.y = fraction(pos.y / 2) >= 0.5 ? 1 : 0 */ ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), pos, ureg_imm1f(shader, 0.5f)); ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp)); ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f)); return tmp; }
static void * create_frag_shader_video_buffer(struct vl_compositor *c) { struct ureg_program *shader; struct ureg_src tc; struct ureg_src csc[3]; struct ureg_src sampler[3]; struct ureg_dst texel; struct ureg_dst fragment; unsigned i; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return false; tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR); for (i = 0; i < 3; ++i) { csc[i] = ureg_DECL_constant(shader, i); sampler[i] = ureg_DECL_sampler(shader, i); } texel = ureg_DECL_temporary(shader); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); /* * texel.xyz = tex(tc, sampler[i]) * fragment = csc * texel */ for (i = 0; i < 3; ++i) ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, tc, sampler[i]); ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); for (i = 0; i < 3; ++i) ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel)); ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); ureg_release_temporary(shader, texel); ureg_END(shader); return ureg_create_shader_and_destroy(shader, c->pipe); }
/** * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions. */ static void compile_instruction(struct st_translate *t, const struct atifs_instruction *inst) { unsigned optype; for (optype = 0; optype < 2; optype++) { /* color, alpha */ const struct instruction_desc *desc; struct ureg_dst dst[1]; struct ureg_src args[3]; /* arguments for the main operation */ unsigned arg; unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI; if (!inst->Opcode[optype]) continue; desc = &inst_desc[inst->Opcode[optype] - GL_MOV_ATI]; /* prepare the arguments */ for (arg = 0; arg < desc->arg_count; arg++) { if (arg >= inst->ArgCount[optype]) { _mesa_warning(0, "Using 0 for missing argument %d of %s\n", arg, desc->name); args[arg] = ureg_imm1f(t->ureg, 0.0f); } else { args[arg] = prepare_argument(t, arg, &inst->SrcReg[optype][arg]); } } /* prepare dst */ dst[0] = get_temp(t, dstreg); if (optype) { dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W); } else { GLuint dstMask = inst->DstReg[optype].dstMask; if (dstMask == GL_NONE) { dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ); } else { dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */ } } /* emit the main instruction */ emit_arith_inst(t, desc, dst, args, arg); emit_dstmod(t, *dst, inst->DstReg[optype].dstMod); t->regs_written[t->current_pass][dstreg] = true; } }
void vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, unsigned first_output, struct ureg_dst tex) { struct ureg_src vrect, vpos; struct ureg_src scale; struct ureg_dst t_start; struct ureg_dst o_l_addr[2], o_r_addr[2]; vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_start = ureg_DECL_temporary(shader); --first_output; o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0); o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1); o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0); o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1); scale = ureg_imm2f(shader, (float)VL_BLOCK_WIDTH / idct->buffer_width, (float)VL_BLOCK_HEIGHT / idct->buffer_height); ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z), ureg_scalar(vrect, TGSI_SWIZZLE_X), ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets)); ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4); calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4); ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex)); ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex)); }
static struct ureg_src get_source(struct st_translate *t, GLuint src_type) { if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) { if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) { return ureg_src(get_temp(t, src_type - GL_REG_0_ATI)); } else { return ureg_imm1f(t->ureg, 0.0f); } } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) { return t->constants[src_type - GL_CON_0_ATI]; } else if (src_type == GL_ZERO) { return ureg_imm1f(t->ureg, 0.0f); } else if (src_type == GL_ONE) { return ureg_imm1f(t->ureg, 1.0f); } else if (src_type == GL_PRIMARY_COLOR_ARB) { return t->inputs[t->inputMapping[VARYING_SLOT_COL0]]; } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) { return t->inputs[t->inputMapping[VARYING_SLOT_COL1]]; } else { /* frontend prevents this */ unreachable("unknown source"); } }
static void * create_mismatch_vert_shader(struct vl_idct *idct) { struct ureg_program *shader; struct ureg_src vpos; struct ureg_src scale; struct ureg_dst t_tex; struct ureg_dst o_vpos, o_addr[2]; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_tex = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); /* * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height) * * t_vpos = vpos + 7 / VL_BLOCK_WIDTH * o_vpos.xy = t_vpos * scale * * o_addr = calc_addr(...) * */ scale = ureg_imm2f(shader, (float)VL_BLOCK_WIDTH / idct->buffer_width, (float)VL_BLOCK_HEIGHT / idct->buffer_height); ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale); calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4); ureg_release_temporary(shader, t_tex); ureg_END(shader); return ureg_create_shader_and_destroy(shader, idct->pipe); }
/** * Compile one setup instruction to TGSI instructions. */ static void compile_setupinst(struct st_translate *t, const unsigned r, const struct atifs_setupinst *texinst) { struct ureg_dst dst[1]; struct ureg_src src[2]; if (!texinst->Opcode) return; dst[0] = get_temp(t, r); GLuint pass_tex = texinst->src; if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0; src[0] = t->inputs[t->inputMapping[attr]]; } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) { unsigned reg = pass_tex - GL_REG_0_ATI; /* the frontend already validated that REG is only allowed in second pass */ if (t->regs_written[0][reg]) { src[0] = ureg_src(t->temps[reg]); } else { src[0] = ureg_imm1f(t->ureg, 0.0f); } } src[0] = apply_swizzle(t, src[0], texinst->swizzle); if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) { /* by default texture and sampler indexes are the same */ src[1] = t->samplers[r]; /* the texture target is still unknown, it will be fixed in the draw call */ ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, TGSI_TEXTURE_2D, NULL, 0, src, 2); } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1); } t->regs_written[t->current_pass][r] = true; }
static void emit_dstmod(struct st_translate *t, struct ureg_dst dst, GLuint dstMod) { float imm; struct ureg_src src[3]; GLuint scale = dstMod & ~GL_SATURATE_BIT_ATI; if (dstMod == GL_NONE) { return; } switch (scale) { case GL_2X_BIT_ATI: imm = 2.0f; break; case GL_4X_BIT_ATI: imm = 4.0f; break; case GL_8X_BIT_ATI: imm = 8.0f; break; case GL_HALF_BIT_ATI: imm = 0.5f; break; case GL_QUARTER_BIT_ATI: imm = 0.25f; break; case GL_EIGHTH_BIT_ATI: imm = 0.125f; break; default: imm = 1.0f; } src[0] = ureg_src(dst); src[1] = ureg_imm1f(t->ureg, imm); if (dstMod & GL_SATURATE_BIT_ATI) { dst = ureg_saturate(dst); } ureg_insn(t->ureg, TGSI_OPCODE_MUL, &dst, 1, src, 2); }
static void calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], struct ureg_src tc, struct ureg_src start, bool right_side, bool transposed, float size) { unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X; unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y; /* * addr[0..1].(start) = right_side ? start.x : tc.x * addr[0..1].(tc) = right_side ? tc.y : start.y * addr[0..1].z = tc.z * addr[1].(start) += 1.0f / scale */ ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start)); ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc)); ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size)); ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc)); }
static void compile_instruction( struct gl_context *ctx, struct st_translate *t, const struct prog_instruction *inst, boolean clamp_dst_color_output) { struct ureg_program *ureg = t->ureg; GLuint i; struct ureg_dst dst[1] = { { 0 } }; struct ureg_src src[4]; unsigned num_dst; unsigned num_src; num_dst = _mesa_num_inst_dst_regs( inst->Opcode ); num_src = _mesa_num_inst_src_regs( inst->Opcode ); if (num_dst) dst[0] = translate_dst( t, &inst->DstReg, inst->Saturate, clamp_dst_color_output); for (i = 0; i < num_src; i++) src[i] = translate_src( t, &inst->SrcReg[i] ); switch( inst->Opcode ) { case OPCODE_SWZ: emit_swz( t, dst[0], &inst->SrcReg[0] ); return; case OPCODE_BGNLOOP: case OPCODE_CAL: case OPCODE_ELSE: case OPCODE_ENDLOOP: debug_assert(num_dst == 0); ureg_label_insn( ureg, translate_opcode( inst->Opcode ), src, num_src, get_label( t, inst->BranchTarget )); return; case OPCODE_IF: debug_assert(num_dst == 0); ureg_label_insn( ureg, ctx->Const.NativeIntegers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF, src, num_src, get_label( t, inst->BranchTarget )); return; case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXD: case OPCODE_TXL: case OPCODE_TXP: src[num_src++] = t->samplers[inst->TexSrcUnit]; ureg_tex_insn( ureg, translate_opcode( inst->Opcode ), dst, num_dst, st_translate_texture_target( inst->TexSrcTarget, inst->TexShadow ), NULL, 0, src, num_src ); return; case OPCODE_SCS: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); ureg_insn( ureg, translate_opcode( inst->Opcode ), dst, num_dst, src, num_src ); break; case OPCODE_XPD: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); ureg_insn( ureg, translate_opcode( inst->Opcode ), dst, num_dst, src, num_src ); break; case OPCODE_NOISE1: case OPCODE_NOISE2: case OPCODE_NOISE3: case OPCODE_NOISE4: /* At some point, a motivated person could add a better * implementation of noise. Currently not even the nvidia * binary drivers do anything more than this. In any case, the * place to do this is in the GL state tracker, not the poor * driver. */ ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); break; case OPCODE_DDY: emit_ddy( t, dst[0], &inst->SrcReg[0] ); break; case OPCODE_RSQ: ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) ); break; default: ureg_insn( ureg, translate_opcode( inst->Opcode ), dst, num_dst, src, num_src ); break; } }
static void * create_frag_shader_weave(struct vl_compositor *c) { struct ureg_program *shader; struct ureg_src i_tc[2]; struct ureg_src csc[3]; struct ureg_src sampler[3]; struct ureg_dst t_tc[2]; struct ureg_dst t_texel[2]; struct ureg_dst o_fragment; unsigned i, j; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return false; i_tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR); i_tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR); for (i = 0; i < 3; ++i) { csc[i] = ureg_DECL_constant(shader, i); sampler[i] = ureg_DECL_sampler(shader, i); } for (i = 0; i < 2; ++i) { t_tc[i] = ureg_DECL_temporary(shader); t_texel[i] = ureg_DECL_temporary(shader); } o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); /* calculate the texture offsets * t_tc.x = i_tc.x * t_tc.y = (round(i_tc.y) + 0.5) / height * 2 */ for (i = 0; i < 2; ++i) { ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_X), i_tc[i]); ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), i_tc[i]); ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W), ureg_imm1f(shader, i ? 0.75f : 0.25f)); ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i]), ureg_imm1f(shader, 0.5f)); ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Y), ureg_src(t_tc[i]), ureg_scalar(i_tc[0], TGSI_SWIZZLE_W)); ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Z), ureg_src(t_tc[i]), ureg_scalar(i_tc[1], TGSI_SWIZZLE_W)); } /* fetch the texels * texel[0..1].x = tex(t_tc[0..1][0]) * texel[0..1].y = tex(t_tc[0..1][1]) * texel[0..1].z = tex(t_tc[0..1][2]) */ for (i = 0; i < 2; ++i) for (j = 0; j < 3; ++j) { struct ureg_src src = ureg_swizzle(ureg_src(t_tc[i]), TGSI_SWIZZLE_X, j ? TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); ureg_TEX(shader, ureg_writemask(t_texel[i], TGSI_WRITEMASK_X << j), TGSI_TEXTURE_3D, src, sampler[j]); } /* calculate linear interpolation factor * factor = |round(i_tc.y) - i_tc.y| * 2 */ ureg_ROUND(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), i_tc[0]); ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), ureg_src(t_tc[0]), ureg_negate(i_tc[0])); ureg_MUL(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_XY), ureg_abs(ureg_src(t_tc[0])), ureg_imm1f(shader, 2.0f)); ureg_LRP(shader, t_texel[0], ureg_swizzle(ureg_src(t_tc[0]), TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z), ureg_src(t_texel[1]), ureg_src(t_texel[0])); /* and finally do colour space transformation * fragment = csc * texel */ ureg_MOV(shader, ureg_writemask(t_texel[0], TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); for (i = 0; i < 3; ++i) ureg_DP4(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(t_texel[0])); ureg_MOV(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); for (i = 0; i < 2; ++i) { ureg_release_temporary(shader, t_texel[i]); ureg_release_temporary(shader, t_tc[i]); } ureg_END(shader); return ureg_create_shader_and_destroy(shader, c->pipe); }
static void * create_vert_shader(struct vl_compositor *c) { struct ureg_program *shader; struct ureg_src vpos, vtex, color; struct ureg_dst tmp; struct ureg_dst o_vpos, o_vtex, o_color; struct ureg_dst o_vtop, o_vbottom; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return false; vpos = ureg_DECL_vs_input(shader, 0); vtex = ureg_DECL_vs_input(shader, 1); color = ureg_DECL_vs_input(shader, 2); tmp = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_color = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, VS_O_COLOR); o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX); o_vtop = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); o_vbottom = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM); /* * o_vpos = vpos * o_vtex = vtex * o_color = color */ ureg_MOV(shader, o_vpos, vpos); ureg_MOV(shader, o_vtex, vtex); ureg_MOV(shader, o_color, color); /* * tmp.x = vtex.w / 2 * tmp.y = vtex.w / 4 * * o_vtop.x = vtex.x * o_vtop.y = vtex.y * tmp.x + 0.25f * o_vtop.z = vtex.y * tmp.y + 0.25f * o_vtop.w = 1 / tmp.x * * o_vbottom.x = vtex.x * o_vbottom.y = vtex.y * tmp.x - 0.25f * o_vbottom.z = vtex.y * tmp.y - 0.25f * o_vbottom.w = 1 / tmp.y */ ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(vtex, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.5f)); ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.25f)); ureg_MOV(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_X), vtex); ureg_MAD(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, 0.25f)); ureg_MAD(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.25f)); ureg_RCP(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_W), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); ureg_MOV(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_X), vtex); ureg_MAD(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, -0.25f)); ureg_MAD(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, -0.25f)); ureg_RCP(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_W), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); ureg_END(shader); return ureg_create_shader_and_destroy(shader, c->pipe); }
static void * create_ycbcr_frag_shader(struct vl_mc *r, float scale, bool invert, vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv) { struct ureg_program *shader; struct ureg_src flags; struct ureg_dst tmp; struct ureg_dst fragment; unsigned label; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return NULL; flags = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS, TGSI_INTERPOLATE_LINEAR); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); tmp = calc_line(shader); /* * if (field == tc.w) * kill(); * else { * fragment.xyz = tex(tc, sampler) * scale + tc.z * fragment.w = 1.0f * } */ ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(flags, TGSI_SWIZZLE_W), ureg_src(tmp)); ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label); ureg_KILP(shader); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ELSE(shader, &label); fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp); if (scale != 1.0f) ureg_MAD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, scale), ureg_scalar(flags, TGSI_SWIZZLE_Z)); else ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z)); ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, invert ? -1.0f : 1.0f)); ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ENDIF(shader); ureg_release_temporary(shader, tmp); ureg_END(shader); return ureg_create_shader_and_destroy(shader, r->pipe); }
static void * create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, void *callback_priv) { struct ureg_program *shader; struct ureg_src vrect, vpos; struct ureg_dst t_vpos, t_vtex; struct ureg_dst o_vpos, o_flags; struct vertex2f scale = { (float)VL_BLOCK_WIDTH / r->buffer_width * VL_MACROBLOCK_WIDTH / r->macroblock_size, (float)VL_BLOCK_HEIGHT / r->buffer_height * VL_MACROBLOCK_HEIGHT / r->macroblock_size }; unsigned label; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_vpos = calc_position(r, shader, ureg_imm2f(shader, scale.x, scale.y)); t_vtex = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_flags = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS); /* * o_vtex.xy = t_vpos * o_flags.z = intra * 0.5 * * if(interlaced) { * t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 } * t_vtex.z = vpos.y % 2 * t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y * o_vpos.y = t_vtex.y + t_vpos.y * * o_flags.w = t_vtex.z ? 0 : 1 * } * */ vs_callback(callback_priv, r, shader, VS_O_VTEX, t_vpos); ureg_MUL(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_Z), ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f)); ureg_MOV(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f)); if (r->macroblock_size == VL_MACROBLOCK_HEIGHT) { //TODO ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label); ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_Y)), ureg_imm2f(shader, 0.0f, scale.y), ureg_imm2f(shader, -scale.y, 0.0f)); ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_scalar(vpos, TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.5f)); ureg_FRC(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_src(t_vtex)); ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Y)); ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y), ureg_src(t_vpos), ureg_src(t_vtex)); ureg_CMP(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f)); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ENDIF(shader); } ureg_release_temporary(shader, t_vtex); ureg_release_temporary(shader, t_vpos); ureg_END(shader); return ureg_create_shader_and_destroy(shader, r->pipe); }
static void * create_mismatch_frag_shader(struct vl_idct *idct) { struct ureg_program *shader; struct ureg_src addr[2]; struct ureg_dst m[8][2]; struct ureg_dst fragment; unsigned i; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return NULL; addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); for (i = 0; i < 8; ++i) { m[i][0] = ureg_DECL_temporary(shader); m[i][1] = ureg_DECL_temporary(shader); } for (i = 0; i < 8; ++i) { increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height); } for (i = 0; i < 8; ++i) { struct ureg_src s_addr[2]; s_addr[0] = ureg_src(m[i][0]); s_addr[1] = ureg_src(m[i][1]); fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false); } for (i = 1; i < 8; ++i) { ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0])); ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1])); } ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1])); ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14)); ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14)); ureg_FRC(shader, m[0][0], ureg_src(m[0][0])); ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0]))); ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])), ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15))); ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X)); ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1])); ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1])); for (i = 0; i < 8; ++i) { ureg_release_temporary(shader, m[i][0]); ureg_release_temporary(shader, m[i][1]); } ureg_END(shader); return ureg_create_shader_and_destroy(shader, idct->pipe); }
static void * create_deint_frag_shader(struct vl_deint_filter *filter, unsigned field, struct vertex2f *sizes, bool spatial_filter) { struct ureg_program *shader; struct ureg_src i_vtex; struct ureg_src sampler_cur; struct ureg_src sampler_prevprev; struct ureg_src sampler_prev; struct ureg_src sampler_next; struct ureg_dst o_fragment; struct ureg_dst t_tex; struct ureg_dst t_comp_top, t_comp_bot; struct ureg_dst t_diff; struct ureg_dst t_a, t_b; struct ureg_dst t_weave, t_linear; shader = ureg_create(PIPE_SHADER_FRAGMENT); if (!shader) { return NULL; } t_tex = ureg_DECL_temporary(shader); t_comp_top = ureg_DECL_temporary(shader); t_comp_bot = ureg_DECL_temporary(shader); t_diff = ureg_DECL_temporary(shader); t_a = ureg_DECL_temporary(shader); t_b = ureg_DECL_temporary(shader); t_weave = ureg_DECL_temporary(shader); t_linear = ureg_DECL_temporary(shader); i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR); sampler_prevprev = ureg_DECL_sampler(shader, 0); sampler_prev = ureg_DECL_sampler(shader, 1); sampler_cur = ureg_DECL_sampler(shader, 2); sampler_next = ureg_DECL_sampler(shader, 3); o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); // we don't care about ZW interpolation (allows better optimization) ureg_MOV(shader, t_tex, i_vtex); ureg_MOV(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 0)); // sample between texels for cheap lowpass ureg_ADD(shader, t_comp_top, ureg_src(t_tex), ureg_imm4f(shader, sizes->x * 0.5f, sizes->y * -0.5f, 0, 0)); ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, sizes->x * -0.5f, sizes->y * 0.5f, 1.0f, 0)); if (field == 0) { /* interpolating top field -> current field is a bottom field */ // cur vs prev2 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prevprev); ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_negate(ureg_src(t_b))); // prev vs next ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prev); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_next); ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_negate(ureg_src(t_b))); } else { /* interpolating bottom field -> current field is a top field */ // cur vs prev2 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prevprev); ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_negate(ureg_src(t_b))); // prev vs next ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_next); ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_negate(ureg_src(t_b))); } // absolute maximum of differences ureg_MAX(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_abs(ureg_src(t_diff)), ureg_scalar(ureg_abs(ureg_src(t_diff)), TGSI_SWIZZLE_Y)); if (field == 0) { /* weave with prev top field */ ureg_TEX(shader, t_weave, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_tex), sampler_prev); /* get linear interpolation from current bottom field */ ureg_ADD(shader, t_comp_top, ureg_src(t_tex), ureg_imm4f(shader, 0, sizes->y * -1.0f, 1.0f, 0)); ureg_TEX(shader, t_linear, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur); } else { /* weave with prev bottom field */ ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, 0, 0, 1.0f, 0)); ureg_TEX(shader, t_weave, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev); /* get linear interpolation from current top field */ ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, 0, sizes->y * 1.0f, 0, 0)); ureg_TEX(shader, t_linear, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur); } // mix between weave and linear // fully weave if diff < 6 (0.02353), fully interpolate if diff > 14 (0.05490) ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_diff), ureg_imm4f(shader, -0.02353f, 0, 0, 0)); ureg_MUL(shader, ureg_saturate(ureg_writemask(t_diff, TGSI_WRITEMASK_X)), ureg_src(t_diff), ureg_imm4f(shader, 31.8750f, 0, 0, 0)); ureg_LRP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X), ureg_src(t_diff), ureg_src(t_linear), ureg_src(t_weave)); ureg_MOV(shader, o_fragment, ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X)); ureg_release_temporary(shader, t_tex); ureg_release_temporary(shader, t_comp_top); ureg_release_temporary(shader, t_comp_bot); ureg_release_temporary(shader, t_diff); ureg_release_temporary(shader, t_a); ureg_release_temporary(shader, t_b); ureg_release_temporary(shader, t_weave); ureg_release_temporary(shader, t_linear); ureg_END(shader); return ureg_create_shader_and_destroy(shader, filter->pipe); }
void * util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe, enum tgsi_texture_type tgsi_tex, unsigned nr_samples, enum tgsi_return_type stype) { struct ureg_program *ureg; struct ureg_src sampler, coord; struct ureg_dst out, tmp, top, bottom; struct ureg_dst tmp_coord[4], tmp_sum[4]; unsigned i, c; ureg = ureg_create(PIPE_SHADER_FRAGMENT); if (!ureg) return NULL; /* Declarations. */ sampler = ureg_DECL_sampler(ureg, 0); ureg_DECL_sampler_view(ureg, 0, tgsi_tex, stype, stype, stype, stype); coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_LINEAR); out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); for (c = 0; c < 4; c++) tmp_sum[c] = ureg_DECL_temporary(ureg); for (c = 0; c < 4; c++) tmp_coord[c] = ureg_DECL_temporary(ureg); tmp = ureg_DECL_temporary(ureg); top = ureg_DECL_temporary(ureg); bottom = ureg_DECL_temporary(ureg); /* Instructions. */ for (c = 0; c < 4; c++) ureg_MOV(ureg, tmp_sum[c], ureg_imm1f(ureg, 0)); /* Get 4 texture coordinates for the bilinear filter. */ ureg_F2U(ureg, tmp_coord[0], coord); /* top-left */ ureg_UADD(ureg, tmp_coord[1], ureg_src(tmp_coord[0]), ureg_imm4u(ureg, 1, 0, 0, 0)); /* top-right */ ureg_UADD(ureg, tmp_coord[2], ureg_src(tmp_coord[0]), ureg_imm4u(ureg, 0, 1, 0, 0)); /* bottom-left */ ureg_UADD(ureg, tmp_coord[3], ureg_src(tmp_coord[0]), ureg_imm4u(ureg, 1, 1, 0, 0)); /* bottom-right */ for (i = 0; i < nr_samples; i++) { for (c = 0; c < 4; c++) { /* Read one sample. */ ureg_MOV(ureg, ureg_writemask(tmp_coord[c], TGSI_WRITEMASK_W), ureg_imm1u(ureg, i)); ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord[c]), sampler); if (stype == TGSI_RETURN_TYPE_UINT) ureg_U2F(ureg, tmp, ureg_src(tmp)); else if (stype == TGSI_RETURN_TYPE_SINT) ureg_I2F(ureg, tmp, ureg_src(tmp)); /* Add it to the sum.*/ ureg_ADD(ureg, tmp_sum[c], ureg_src(tmp_sum[c]), ureg_src(tmp)); } } /* Calculate the average. */ for (c = 0; c < 4; c++) ureg_MUL(ureg, tmp_sum[c], ureg_src(tmp_sum[c]), ureg_imm1f(ureg, 1.0 / nr_samples)); /* Take the 4 average values and apply a standard bilinear filter. */ ureg_FRC(ureg, tmp, coord); ureg_LRP(ureg, top, ureg_scalar(ureg_src(tmp), 0), ureg_src(tmp_sum[1]), ureg_src(tmp_sum[0])); ureg_LRP(ureg, bottom, ureg_scalar(ureg_src(tmp), 0), ureg_src(tmp_sum[3]), ureg_src(tmp_sum[2])); ureg_LRP(ureg, tmp, ureg_scalar(ureg_src(tmp), 1), ureg_src(bottom), ureg_src(top)); /* Convert to the texture format and return. */ if (stype == TGSI_RETURN_TYPE_UINT) ureg_F2U(ureg, out, ureg_src(tmp)); else if (stype == TGSI_RETURN_TYPE_SINT) ureg_F2I(ureg, out, ureg_src(tmp)); else ureg_MOV(ureg, out, ureg_src(tmp)); ureg_END(ureg); return ureg_create_shader_and_destroy(ureg, pipe); }
static void * create_ref_frag_shader(struct vl_mc *r) { const float y_scale = r->buffer_height / 2 * r->macroblock_size / VL_MACROBLOCK_HEIGHT; struct ureg_program *shader; struct ureg_src tc[2], sampler; struct ureg_dst ref, field; struct ureg_dst fragment; unsigned label; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return NULL; tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR); tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR); sampler = ureg_DECL_sampler(shader, 0); ref = ureg_DECL_temporary(shader); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); field = calc_line(shader); /* * ref = field.z ? tc[1] : tc[0] * * // Adjust tc acording to top/bottom field selection * if (|ref.z|) { * ref.y *= y_scale * ref.y = floor(ref.y) * ref.y += ref.z * ref.y /= y_scale * } * fragment.xyz = tex(ref, sampler[0]) */ ureg_CMP(shader, ureg_writemask(ref, TGSI_WRITEMASK_XYZ), ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), tc[1], tc[0]); ureg_CMP(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), tc[1], tc[0]); ureg_IF(shader, ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z), &label); ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref), ureg_imm1f(shader, y_scale)); ureg_FLR(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref)); ureg_ADD(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z)); ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref), ureg_imm1f(shader, 1.0f / y_scale)); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ENDIF(shader); ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), TGSI_TEXTURE_2D, ureg_src(ref), sampler); ureg_release_temporary(shader, ref); ureg_release_temporary(shader, field); ureg_END(shader); return ureg_create_shader_and_destroy(shader, r->pipe); }
static void * create_vert_shader(struct vl_zscan *zscan) { struct ureg_program *shader; struct ureg_src scale; struct ureg_src vrect, vpos, block_num; struct ureg_dst tmp; struct ureg_dst o_vpos; struct ureg_dst *o_vtex; unsigned i; shader = ureg_create(PIPE_SHADER_VERTEX); if (!shader) return NULL; o_vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_dst)); scale = ureg_imm2f(shader, (float)VL_BLOCK_WIDTH / zscan->buffer_width, (float)VL_BLOCK_HEIGHT / zscan->buffer_height); vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM); tmp = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); for (i = 0; i < zscan->num_channels; ++i) o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i); /* * o_vpos.xy = (vpos + vrect) * scale * o_vpos.zw = 1.0f * * tmp.xy = InstanceID / blocks_per_line * tmp.x = frac(tmp.x) * tmp.y = floor(tmp.y) * * o_vtex.x = vrect.x / blocks_per_line + tmp.x * o_vtex.y = vrect.y * o_vtex.z = tmp.z * blocks_per_line / blocks_total */ ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect); ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X), ureg_imm1f(shader, 1.0f / zscan->blocks_per_line)); ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp)); for (i = 0; i < zscan->num_channels; ++i) { ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH) * (i - (signed)zscan->num_channels / 2))); ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect, ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp)); ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect); ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos); ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp), ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total)); } ureg_release_temporary(shader, tmp); ureg_END(shader); FREE(o_vtex); return ureg_create_shader_and_destroy(shader, zscan->pipe); }
static void * create_frag_shader(struct vl_zscan *zscan) { struct ureg_program *shader; struct ureg_src *vtex; struct ureg_src samp_src, samp_scan, samp_quant; struct ureg_dst *tmp; struct ureg_dst quant, fragment; unsigned i; shader = ureg_create(PIPE_SHADER_FRAGMENT); if (!shader) return NULL; vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_src)); tmp = MALLOC(zscan->num_channels * sizeof(struct ureg_dst)); for (i = 0; i < zscan->num_channels; ++i) vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR); samp_src = ureg_DECL_sampler(shader, 0); samp_scan = ureg_DECL_sampler(shader, 1); samp_quant = ureg_DECL_sampler(shader, 2); for (i = 0; i < zscan->num_channels; ++i) tmp[i] = ureg_DECL_temporary(shader); quant = ureg_DECL_temporary(shader); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); /* * tmp.x = tex(vtex, 1) * tmp.y = vtex.z * fragment = tex(tmp, 0) * quant */ for (i = 0; i < zscan->num_channels; ++i) ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan); for (i = 0; i < zscan->num_channels; ++i) ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W)); for (i = 0; i < zscan->num_channels; ++i) { ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src); ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant); } ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f)); ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant)); for (i = 0; i < zscan->num_channels; ++i) ureg_release_temporary(shader, tmp[i]); ureg_END(shader); FREE(vtex); FREE(tmp); return ureg_create_shader_and_destroy(shader, zscan->pipe); }
static void * create_frag_shader(struct vl_median_filter *filter, struct vertex2f *offsets, unsigned num_offsets) { struct pipe_screen *screen = filter->pipe->screen; struct ureg_program *shader; struct ureg_src i_vtex; struct ureg_src sampler; struct ureg_dst *t_array = MALLOC(sizeof(struct ureg_dst) * num_offsets); struct ureg_dst o_fragment; const unsigned median = num_offsets >> 1; unsigned i, j; assert(num_offsets & 1); /* we need an odd number of offsets */ if (!(num_offsets & 1)) { /* yeah, we REALLY need an odd number of offsets!!! */ FREE(t_array); return NULL; } if (num_offsets > screen->get_shader_param( screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS)) { FREE(t_array); return NULL; } shader = ureg_create(PIPE_SHADER_FRAGMENT); if (!shader) { FREE(t_array); return NULL; } i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR); sampler = ureg_DECL_sampler(shader, 0); ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT); for (i = 0; i < num_offsets; ++i) t_array[i] = ureg_DECL_temporary(shader); o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); /* * t_array[0..*] = vtex + offset[0..*] * t_array[0..*] = tex(t_array[0..*], sampler) * result = partial_bubblesort(t_array)[mid] */ for (i = 0; i < num_offsets; ++i) { if (!is_vec_zero(offsets[i])) { ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY), i_vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y)); ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 0.0f)); } } for (i = 0; i < num_offsets; ++i) { struct ureg_src src = is_vec_zero(offsets[i]) ? i_vtex : ureg_src(t_array[i]); ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, src, sampler); } // TODO: Couldn't this be improved even more? for (i = 0; i <= median; ++i) { for (j = 1; j < (num_offsets - i - 1); ++j) { struct ureg_dst tmp = ureg_DECL_temporary(shader); ureg_MOV(shader, tmp, ureg_src(t_array[j])); ureg_MAX(shader, t_array[j], ureg_src(t_array[j]), ureg_src(t_array[j - 1])); ureg_MIN(shader, t_array[j - 1], ureg_src(tmp), ureg_src(t_array[j - 1])); ureg_release_temporary(shader, tmp); } if (i == median) ureg_MAX(shader, t_array[j], ureg_src(t_array[j]), ureg_src(t_array[j - 1])); else ureg_MIN(shader, t_array[j - 1], ureg_src(t_array[j]), ureg_src(t_array[j - 1])); } ureg_MOV(shader, o_fragment, ureg_src(t_array[median])); ureg_END(shader); FREE(t_array); return ureg_create_shader_and_destroy(shader, filter->pipe); }