예제 #1
0
void *
util_make_fs_msaa_resolve(struct pipe_context *pipe,
                          enum tgsi_texture_type tgsi_tex, unsigned nr_samples,
                          enum tgsi_return_type stype)
{
   struct ureg_program *ureg;
   struct ureg_src sampler, coord;
   struct ureg_dst out, tmp_sum, tmp_coord, tmp;
   unsigned i;

   ureg = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!ureg)
      return NULL;

   /* Declarations. */
   sampler = ureg_DECL_sampler(ureg, 0);
   ureg_DECL_sampler_view(ureg, 0, tgsi_tex, stype, stype, stype, stype);
   coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0,
                              TGSI_INTERPOLATE_LINEAR);
   out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
   tmp_sum = ureg_DECL_temporary(ureg);
   tmp_coord = ureg_DECL_temporary(ureg);
   tmp = ureg_DECL_temporary(ureg);

   /* Instructions. */
   ureg_MOV(ureg, tmp_sum, ureg_imm1f(ureg, 0));
   ureg_F2U(ureg, tmp_coord, coord);

   for (i = 0; i < nr_samples; i++) {
      /* Read one sample. */
      ureg_MOV(ureg, ureg_writemask(tmp_coord, TGSI_WRITEMASK_W),
               ureg_imm1u(ureg, i));
      ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord), sampler);

      if (stype == TGSI_RETURN_TYPE_UINT)
         ureg_U2F(ureg, tmp, ureg_src(tmp));
      else if (stype == TGSI_RETURN_TYPE_SINT)
         ureg_I2F(ureg, tmp, ureg_src(tmp));

      /* Add it to the sum.*/
      ureg_ADD(ureg, tmp_sum, ureg_src(tmp_sum), ureg_src(tmp));
   }

   /* Calculate the average and return. */
   ureg_MUL(ureg, tmp_sum, ureg_src(tmp_sum),
            ureg_imm1f(ureg, 1.0 / nr_samples));

   if (stype == TGSI_RETURN_TYPE_UINT)
      ureg_F2U(ureg, out, ureg_src(tmp_sum));
   else if (stype == TGSI_RETURN_TYPE_SINT)
      ureg_F2I(ureg, out, ureg_src(tmp_sum));
   else
      ureg_MOV(ureg, out, ureg_src(tmp_sum));

   ureg_END(ureg);

   return ureg_create_shader_and_destroy(ureg, pipe);
}
예제 #2
0
static struct ureg_src
prepare_argument(struct st_translate *t, const unsigned argId,
                 const struct atifragshader_src_register *srcReg)
{
   struct ureg_src src = get_source(t, srcReg->Index);
   struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + argId);

   switch (srcReg->argRep) {
   case GL_NONE:
      break;
   case GL_RED:
      src = ureg_scalar(src, TGSI_SWIZZLE_X);
      break;
   case GL_GREEN:
      src = ureg_scalar(src, TGSI_SWIZZLE_Y);
      break;
   case GL_BLUE:
      src = ureg_scalar(src, TGSI_SWIZZLE_Z);
      break;
   case GL_ALPHA:
      src = ureg_scalar(src, TGSI_SWIZZLE_W);
      break;
   }
   ureg_insn(t->ureg, TGSI_OPCODE_MOV, &arg, 1, &src, 1);

   if (srcReg->argMod & GL_COMP_BIT_ATI) {
      struct ureg_src modsrc[2];
      modsrc[0] = ureg_imm1f(t->ureg, 1.0f);
      modsrc[1] = ureg_src(arg);

      ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2);
   }
   if (srcReg->argMod & GL_BIAS_BIT_ATI) {
      struct ureg_src modsrc[2];
      modsrc[0] = ureg_src(arg);
      modsrc[1] = ureg_imm1f(t->ureg, 0.5f);

      ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2);
   }
   if (srcReg->argMod & GL_2X_BIT_ATI) {
      struct ureg_src modsrc[2];
      modsrc[0] = ureg_src(arg);
      modsrc[1] = ureg_src(arg);

      ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
   }
   if (srcReg->argMod & GL_NEGATE_BIT_ATI) {
      struct ureg_src modsrc[2];
      modsrc[0] = ureg_src(arg);
      modsrc[1] = ureg_imm1f(t->ureg, -1.0f);

      ureg_insn(t->ureg, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2);
   }
   return  ureg_src(arg);
}
예제 #3
0
static void *
create_frag_shader(struct vl_matrix_filter *filter, unsigned num_offsets,
                   struct vertex2f *offsets, const float *matrix_values)
{
   struct ureg_program *shader;
   struct ureg_src i_vtex;
   struct ureg_src sampler;
   struct ureg_dst tmp;
   struct ureg_dst t_sum;
   struct ureg_dst o_fragment;
   unsigned i;

   shader = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!shader) {
      return NULL;
   }

   i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
   sampler = ureg_DECL_sampler(shader, 0);
   ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D,
                          TGSI_RETURN_TYPE_FLOAT,
                          TGSI_RETURN_TYPE_FLOAT,
                          TGSI_RETURN_TYPE_FLOAT,
                          TGSI_RETURN_TYPE_FLOAT);

   tmp = ureg_DECL_temporary(shader);
   t_sum = ureg_DECL_temporary(shader);
   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   ureg_MOV(shader, t_sum, ureg_imm1f(shader, 0.0f));
   for (i = 0; i < num_offsets; ++i) {
      if (matrix_values[i] == 0.0f)
         continue;

      if (!is_vec_zero(offsets[i])) {
         ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
                  i_vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y));
         ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_ZW),
                  ureg_imm1f(shader, 0.0f));
         ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, ureg_src(tmp), sampler);
      } else {
         ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, i_vtex, sampler);
      }
      ureg_MAD(shader, t_sum, ureg_src(tmp), ureg_imm1f(shader, matrix_values[i]),
               ureg_src(t_sum));
   }

   ureg_MOV(shader, o_fragment, ureg_src(t_sum));

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, filter->pipe);
}
static struct ureg_dst
calc_position(struct vl_mc *r, struct ureg_program *shader, struct ureg_src block_scale)
{
   struct ureg_src vrect, vpos;
   struct ureg_dst t_vpos;
   struct ureg_dst o_vpos;

   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);

   t_vpos = ureg_DECL_temporary(shader);

   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);

   /*
    * block_scale = (VL_MACROBLOCK_WIDTH, VL_MACROBLOCK_HEIGHT) / (dst.width, dst.height)
    *
    * t_vpos = (vpos + vrect) * block_scale
    * o_vpos.xy = t_vpos
    * o_vpos.zw = vpos
    */
   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));

   return t_vpos;
}
예제 #5
0
/* These instructions need special treatment */
static void
emit_special_inst(struct st_translate *t, const struct instruction_desc *desc,
                  struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
{
   struct ureg_dst tmp[1];
   struct ureg_src src[3];

   if (!strcmp(desc->name, "CND")) {
      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */
      src[0] = ureg_imm1f(t->ureg, 0.5f);
      src[1] = args[2];
      ureg_insn(t->ureg, TGSI_OPCODE_SUB, tmp, 1, src, 2);
      src[0] = ureg_src(tmp[0]);
      src[1] = args[0];
      src[2] = args[1];
      ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
   } else if (!strcmp(desc->name, "CND0")) {
      src[0] = args[2];
      src[1] = args[1];
      src[2] = args[0];
      ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
   } else if (!strcmp(desc->name, "DOT2_ADD")) {
      /* note: DP2A is not implemented in most pipe drivers */
      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */
      src[0] = args[0];
      src[1] = args[1];
      ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2);
      src[0] = ureg_src(tmp[0]);
      src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z);
      ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2);
   }
}
예제 #6
0
static void *
create_copy_frag_shader(struct vl_deint_filter *filter, unsigned field)
{
   struct ureg_program *shader;
   struct ureg_src i_vtex;
   struct ureg_src sampler;
   struct ureg_dst o_fragment;
   struct ureg_dst t_tex;

   shader = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!shader) {
      return NULL;
   }
   t_tex = ureg_DECL_temporary(shader);

   i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
   sampler = ureg_DECL_sampler(shader, 2);
   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   ureg_MOV(shader, t_tex, i_vtex);
   if (field) {
      ureg_MOV(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_ZW),
               ureg_imm4f(shader, 0, 0, 1.0f, 0));
   } else {
      ureg_MOV(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_ZW),
               ureg_imm1f(shader, 0));
   }

   ureg_TEX(shader, o_fragment, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_tex), sampler);

   ureg_release_temporary(shader, t_tex);
   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, filter->pipe);
}
예제 #7
0
파일: vl_idct.c 프로젝트: kallisti5/mesa
static void
increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
               struct ureg_src saddr[2], bool right_side, bool transposed,
               int pos, float size)
{
    unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
    unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;

    /*
     * daddr[0..1].(start) = saddr[0..1].(start)
     * daddr[0..1].(tc) = saddr[0..1].(tc)
     */

    ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
    ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
    ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
    ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
}
static struct ureg_dst
calc_line(struct ureg_program *shader)
{
   struct ureg_dst tmp;
   struct ureg_src pos;

   tmp = ureg_DECL_temporary(shader);

   pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS, TGSI_INTERPOLATE_LINEAR);

   /*
    * tmp.y = fraction(pos.y / 2) >= 0.5 ? 1 : 0
    */
   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), pos, ureg_imm1f(shader, 0.5f));
   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp));
   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));

   return tmp;
}
예제 #9
0
static void *
create_frag_shader_video_buffer(struct vl_compositor *c)
{
   struct ureg_program *shader;
   struct ureg_src tc;
   struct ureg_src csc[3];
   struct ureg_src sampler[3];
   struct ureg_dst texel;
   struct ureg_dst fragment;
   unsigned i;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return false;

   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
   for (i = 0; i < 3; ++i) {
      csc[i] = ureg_DECL_constant(shader, i);
      sampler[i] = ureg_DECL_sampler(shader, i);
   }
   texel = ureg_DECL_temporary(shader);
   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   /*
    * texel.xyz = tex(tc, sampler[i])
    * fragment = csc * texel
    */
   for (i = 0; i < 3; ++i)
      ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, tc, sampler[i]);

   ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));

   for (i = 0; i < 3; ++i)
      ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));

   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));

   ureg_release_temporary(shader, texel);
   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, c->pipe);
}
예제 #10
0
/**
 * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions.
 */
static void
compile_instruction(struct st_translate *t,
                    const struct atifs_instruction *inst)
{
   unsigned optype;

   for (optype = 0; optype < 2; optype++) { /* color, alpha */
      const struct instruction_desc *desc;
      struct ureg_dst dst[1];
      struct ureg_src args[3]; /* arguments for the main operation */
      unsigned arg;
      unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;

      if (!inst->Opcode[optype])
         continue;

      desc = &inst_desc[inst->Opcode[optype] - GL_MOV_ATI];

      /* prepare the arguments */
      for (arg = 0; arg < desc->arg_count; arg++) {
         if (arg >= inst->ArgCount[optype]) {
            _mesa_warning(0, "Using 0 for missing argument %d of %s\n",
                          arg, desc->name);
            args[arg] = ureg_imm1f(t->ureg, 0.0f);
         } else {
            args[arg] = prepare_argument(t, arg,
                                         &inst->SrcReg[optype][arg]);
         }
      }

      /* prepare dst */
      dst[0] = get_temp(t, dstreg);

      if (optype) {
         dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W);
      } else {
         GLuint dstMask = inst->DstReg[optype].dstMask;
         if (dstMask == GL_NONE) {
            dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ);
         } else {
            dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */
         }
      }

      /* emit the main instruction */
      emit_arith_inst(t, desc, dst, args, arg);

      emit_dstmod(t, *dst, inst->DstReg[optype].dstMod);

      t->regs_written[t->current_pass][dstreg] = true;
   }
}
예제 #11
0
파일: vl_idct.c 프로젝트: kallisti5/mesa
void
vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
                           unsigned first_output, struct ureg_dst tex)
{
    struct ureg_src vrect, vpos;
    struct ureg_src scale;
    struct ureg_dst t_start;
    struct ureg_dst o_l_addr[2], o_r_addr[2];

    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);

    t_start = ureg_DECL_temporary(shader);

    --first_output;

    o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0);
    o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1);

    o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0);
    o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1);

    scale = ureg_imm2f(shader,
                       (float)VL_BLOCK_WIDTH / idct->buffer_width,
                       (float)VL_BLOCK_HEIGHT / idct->buffer_height);

    ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z),
             ureg_scalar(vrect, TGSI_SWIZZLE_X),
             ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets));
    ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);

    calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4);
    calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);

    ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex));
    ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex));
}
예제 #12
0
static struct ureg_src
get_source(struct st_translate *t, GLuint src_type)
{
   if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) {
      if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) {
         return ureg_src(get_temp(t, src_type - GL_REG_0_ATI));
      } else {
         return ureg_imm1f(t->ureg, 0.0f);
      }
   } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) {
      return t->constants[src_type - GL_CON_0_ATI];
   } else if (src_type == GL_ZERO) {
      return ureg_imm1f(t->ureg, 0.0f);
   } else if (src_type == GL_ONE) {
      return ureg_imm1f(t->ureg, 1.0f);
   } else if (src_type == GL_PRIMARY_COLOR_ARB) {
      return t->inputs[t->inputMapping[VARYING_SLOT_COL0]];
   } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) {
      return t->inputs[t->inputMapping[VARYING_SLOT_COL1]];
   } else {
      /* frontend prevents this */
      unreachable("unknown source");
   }
}
예제 #13
0
파일: vl_idct.c 프로젝트: kallisti5/mesa
static void *
create_mismatch_vert_shader(struct vl_idct *idct)
{
    struct ureg_program *shader;
    struct ureg_src vpos;
    struct ureg_src scale;
    struct ureg_dst t_tex;
    struct ureg_dst o_vpos, o_addr[2];

    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
        return NULL;

    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);

    t_tex = ureg_DECL_temporary(shader);

    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);

    o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
    o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);

    /*
     * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
     *
     * t_vpos = vpos + 7 / VL_BLOCK_WIDTH
     * o_vpos.xy = t_vpos * scale
     *
     * o_addr = calc_addr(...)
     *
     */

    scale = ureg_imm2f(shader,
                       (float)VL_BLOCK_WIDTH / idct->buffer_width,
                       (float)VL_BLOCK_HEIGHT / idct->buffer_height);

    ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));

    ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
    calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);

    ureg_release_temporary(shader, t_tex);

    ureg_END(shader);

    return ureg_create_shader_and_destroy(shader, idct->pipe);
}
예제 #14
0
/**
 * Compile one setup instruction to TGSI instructions.
 */
static void
compile_setupinst(struct st_translate *t,
                  const unsigned r,
                  const struct atifs_setupinst *texinst)
{
   struct ureg_dst dst[1];
   struct ureg_src src[2];

   if (!texinst->Opcode)
      return;

   dst[0] = get_temp(t, r);

   GLuint pass_tex = texinst->src;

   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
      unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0;

      src[0] = t->inputs[t->inputMapping[attr]];
   } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
      unsigned reg = pass_tex - GL_REG_0_ATI;

      /* the frontend already validated that REG is only allowed in second pass */
      if (t->regs_written[0][reg]) {
         src[0] = ureg_src(t->temps[reg]);
      } else {
         src[0] = ureg_imm1f(t->ureg, 0.0f);
      }
   }
   src[0] = apply_swizzle(t, src[0], texinst->swizzle);

   if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
      /* by default texture and sampler indexes are the same */
      src[1] = t->samplers[r];
      /* the texture target is still unknown, it will be fixed in the draw call */
      ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, TGSI_TEXTURE_2D,
                    NULL, 0, src, 2);
   } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
      ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
   }

   t->regs_written[t->current_pass][r] = true;
}
예제 #15
0
static void
emit_dstmod(struct st_translate *t,
            struct ureg_dst dst, GLuint dstMod)
{
   float imm;
   struct ureg_src src[3];
   GLuint scale = dstMod & ~GL_SATURATE_BIT_ATI;

   if (dstMod == GL_NONE) {
      return;
   }

   switch (scale) {
   case GL_2X_BIT_ATI:
      imm = 2.0f;
      break;
   case GL_4X_BIT_ATI:
      imm = 4.0f;
      break;
   case GL_8X_BIT_ATI:
      imm = 8.0f;
      break;
   case GL_HALF_BIT_ATI:
      imm = 0.5f;
      break;
   case GL_QUARTER_BIT_ATI:
      imm = 0.25f;
      break;
   case GL_EIGHTH_BIT_ATI:
      imm = 0.125f;
      break;
   default:
      imm = 1.0f;
   }

   src[0] = ureg_src(dst);
   src[1] = ureg_imm1f(t->ureg, imm);
   if (dstMod & GL_SATURATE_BIT_ATI) {
      dst = ureg_saturate(dst);
   }
   ureg_insn(t->ureg, TGSI_OPCODE_MUL, &dst, 1, src, 2);
}
예제 #16
0
static void
calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
          struct ureg_src tc, struct ureg_src start, bool right_side,
          bool transposed, float size)
{
   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
   unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;

   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
   unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;

   /*
    * addr[0..1].(start) = right_side ? start.x : tc.x
    * addr[0..1].(tc) = right_side ? tc.y : start.y
    * addr[0..1].z = tc.z
    * addr[1].(start) += 1.0f / scale
    */
   ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
   ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));

   ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
   ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
}
예제 #17
0
static void
compile_instruction(
   struct gl_context *ctx,
   struct st_translate *t,
   const struct prog_instruction *inst,
   boolean clamp_dst_color_output)
{
   struct ureg_program *ureg = t->ureg;
   GLuint i;
   struct ureg_dst dst[1] = { { 0 } };
   struct ureg_src src[4];
   unsigned num_dst;
   unsigned num_src;

   num_dst = _mesa_num_inst_dst_regs( inst->Opcode );
   num_src = _mesa_num_inst_src_regs( inst->Opcode );

   if (num_dst) 
      dst[0] = translate_dst( t, 
                              &inst->DstReg,
                              inst->Saturate,
                              clamp_dst_color_output);

   for (i = 0; i < num_src; i++) 
      src[i] = translate_src( t, &inst->SrcReg[i] );

   switch( inst->Opcode ) {
   case OPCODE_SWZ:
      emit_swz( t, dst[0], &inst->SrcReg[0] );
      return;

   case OPCODE_BGNLOOP:
   case OPCODE_CAL:
   case OPCODE_ELSE:
   case OPCODE_ENDLOOP:
      debug_assert(num_dst == 0);
      ureg_label_insn( ureg,
                       translate_opcode( inst->Opcode ),
                       src, num_src,
                       get_label( t, inst->BranchTarget ));
      return;

   case OPCODE_IF:
      debug_assert(num_dst == 0);
      ureg_label_insn( ureg,
                       ctx->Const.NativeIntegers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF,
                       src, num_src,
                       get_label( t, inst->BranchTarget ));
      return;

   case OPCODE_TEX:
   case OPCODE_TXB:
   case OPCODE_TXD:
   case OPCODE_TXL:
   case OPCODE_TXP:
      src[num_src++] = t->samplers[inst->TexSrcUnit];
      ureg_tex_insn( ureg,
                     translate_opcode( inst->Opcode ),
                     dst, num_dst, 
                     st_translate_texture_target( inst->TexSrcTarget,
                                               inst->TexShadow ),
                     NULL, 0,
                     src, num_src );
      return;

   case OPCODE_SCS:
      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
      ureg_insn( ureg, 
                 translate_opcode( inst->Opcode ), 
                 dst, num_dst, 
                 src, num_src );
      break;

   case OPCODE_XPD:
      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
      ureg_insn( ureg, 
                 translate_opcode( inst->Opcode ), 
                 dst, num_dst, 
                 src, num_src );
      break;

   case OPCODE_NOISE1:
   case OPCODE_NOISE2:
   case OPCODE_NOISE3:
   case OPCODE_NOISE4:
      /* At some point, a motivated person could add a better
       * implementation of noise.  Currently not even the nvidia
       * binary drivers do anything more than this.  In any case, the
       * place to do this is in the GL state tracker, not the poor
       * driver.
       */
      ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
      break;
		 
   case OPCODE_DDY:
      emit_ddy( t, dst[0], &inst->SrcReg[0] );
      break;

   case OPCODE_RSQ:
      ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) );
      break;

   default:
      ureg_insn( ureg, 
                 translate_opcode( inst->Opcode ), 
                 dst, num_dst, 
                 src, num_src );
      break;
   }
}
예제 #18
0
static void *
create_frag_shader_weave(struct vl_compositor *c)
{
   struct ureg_program *shader;
   struct ureg_src i_tc[2];
   struct ureg_src csc[3];
   struct ureg_src sampler[3];
   struct ureg_dst t_tc[2];
   struct ureg_dst t_texel[2];
   struct ureg_dst o_fragment;
   unsigned i, j;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return false;

   i_tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
   i_tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);

   for (i = 0; i < 3; ++i) {
      csc[i] = ureg_DECL_constant(shader, i);
      sampler[i] = ureg_DECL_sampler(shader, i);
   }

   for (i = 0; i < 2; ++i) {
      t_tc[i] = ureg_DECL_temporary(shader);
      t_texel[i] = ureg_DECL_temporary(shader);
   }
   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   /* calculate the texture offsets
    * t_tc.x = i_tc.x
    * t_tc.y = (round(i_tc.y) + 0.5) / height * 2
    */
   for (i = 0; i < 2; ++i) {
      ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_X), i_tc[i]);
      ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), i_tc[i]);
      ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W),
               ureg_imm1f(shader, i ? 0.75f : 0.25f));
      ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ),
               ureg_src(t_tc[i]), ureg_imm1f(shader, 0.5f));
      ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Y),
               ureg_src(t_tc[i]), ureg_scalar(i_tc[0], TGSI_SWIZZLE_W));
      ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Z),
               ureg_src(t_tc[i]), ureg_scalar(i_tc[1], TGSI_SWIZZLE_W));
   }

   /* fetch the texels
    * texel[0..1].x = tex(t_tc[0..1][0])
    * texel[0..1].y = tex(t_tc[0..1][1])
    * texel[0..1].z = tex(t_tc[0..1][2])
    */
   for (i = 0; i < 2; ++i)
      for (j = 0; j < 3; ++j) {
         struct ureg_src src = ureg_swizzle(ureg_src(t_tc[i]),
            TGSI_SWIZZLE_X, j ? TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);

         ureg_TEX(shader, ureg_writemask(t_texel[i], TGSI_WRITEMASK_X << j),
                  TGSI_TEXTURE_3D, src, sampler[j]);
      }

   /* calculate linear interpolation factor
    * factor = |round(i_tc.y) - i_tc.y| * 2
    */
   ureg_ROUND(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), i_tc[0]);
   ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ),
            ureg_src(t_tc[0]), ureg_negate(i_tc[0]));
   ureg_MUL(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_XY),
            ureg_abs(ureg_src(t_tc[0])), ureg_imm1f(shader, 2.0f));
   ureg_LRP(shader, t_texel[0], ureg_swizzle(ureg_src(t_tc[0]),
            TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z),
            ureg_src(t_texel[1]), ureg_src(t_texel[0]));

   /* and finally do colour space transformation
    * fragment = csc * texel
    */
   ureg_MOV(shader, ureg_writemask(t_texel[0], TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
   for (i = 0; i < 3; ++i)
      ureg_DP4(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(t_texel[0]));

   ureg_MOV(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));

   for (i = 0; i < 2; ++i) {
      ureg_release_temporary(shader, t_texel[i]);
      ureg_release_temporary(shader, t_tc[i]);
   }

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, c->pipe);
}
예제 #19
0
static void *
create_vert_shader(struct vl_compositor *c)
{
   struct ureg_program *shader;
   struct ureg_src vpos, vtex, color;
   struct ureg_dst tmp;
   struct ureg_dst o_vpos, o_vtex, o_color;
   struct ureg_dst o_vtop, o_vbottom;

   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
   if (!shader)
      return false;

   vpos = ureg_DECL_vs_input(shader, 0);
   vtex = ureg_DECL_vs_input(shader, 1);
   color = ureg_DECL_vs_input(shader, 2);
   tmp = ureg_DECL_temporary(shader);
   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
   o_color = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, VS_O_COLOR);
   o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX);
   o_vtop = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
   o_vbottom = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);

   /*
    * o_vpos = vpos
    * o_vtex = vtex
    * o_color = color
    */
   ureg_MOV(shader, o_vpos, vpos);
   ureg_MOV(shader, o_vtex, vtex);
   ureg_MOV(shader, o_color, color);

   /*
    * tmp.x = vtex.w / 2
    * tmp.y = vtex.w / 4
    *
    * o_vtop.x = vtex.x
    * o_vtop.y = vtex.y * tmp.x + 0.25f
    * o_vtop.z = vtex.y * tmp.y + 0.25f
    * o_vtop.w = 1 / tmp.x
    *
    * o_vbottom.x = vtex.x
    * o_vbottom.y = vtex.y * tmp.x - 0.25f
    * o_vbottom.z = vtex.y * tmp.y - 0.25f
    * o_vbottom.w = 1 / tmp.y
    */
   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X),
            ureg_scalar(vtex, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.5f));
   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
            ureg_scalar(vtex, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.25f));

   ureg_MOV(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_X), vtex);
   ureg_MAD(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, 0.25f));
   ureg_MAD(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.25f));
   ureg_RCP(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_W),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));

   ureg_MOV(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_X), vtex);
   ureg_MAD(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, -0.25f));
   ureg_MAD(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, -0.25f));
   ureg_RCP(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_W),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, c->pipe);
}
static void *
create_ycbcr_frag_shader(struct vl_mc *r, float scale, bool invert,
                         vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv)
{
   struct ureg_program *shader;
   struct ureg_src flags;
   struct ureg_dst tmp;
   struct ureg_dst fragment;
   unsigned label;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return NULL;

   flags = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS, TGSI_INTERPOLATE_LINEAR);

   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   tmp = calc_line(shader);

   /*
    * if (field == tc.w)
    *    kill();
    * else {
    *    fragment.xyz  = tex(tc, sampler) * scale + tc.z
    *    fragment.w = 1.0f
    * }
    */

   ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
            ureg_scalar(flags, TGSI_SWIZZLE_W), ureg_src(tmp));

   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);

      ureg_KILP(shader);

   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
   ureg_ELSE(shader, &label);

      fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp);

      if (scale != 1.0f)
         ureg_MAD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
                  ureg_src(tmp), ureg_imm1f(shader, scale),
                  ureg_scalar(flags, TGSI_SWIZZLE_Z));
      else
         ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
                  ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z));
                  
      ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, invert ? -1.0f : 1.0f));
      ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));

   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
   ureg_ENDIF(shader);

   ureg_release_temporary(shader, tmp);

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, r->pipe);
}
static void *
create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, void *callback_priv)
{
   struct ureg_program *shader;

   struct ureg_src vrect, vpos;
   struct ureg_dst t_vpos, t_vtex;
   struct ureg_dst o_vpos, o_flags;

   struct vertex2f scale = {
      (float)VL_BLOCK_WIDTH / r->buffer_width * VL_MACROBLOCK_WIDTH / r->macroblock_size,
      (float)VL_BLOCK_HEIGHT / r->buffer_height * VL_MACROBLOCK_HEIGHT / r->macroblock_size
   };

   unsigned label;

   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
   if (!shader)
      return NULL;

   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);

   t_vpos = calc_position(r, shader, ureg_imm2f(shader, scale.x, scale.y));
   t_vtex = ureg_DECL_temporary(shader);

   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
   o_flags = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS);

   /*
    * o_vtex.xy = t_vpos
    * o_flags.z = intra * 0.5
    *
    * if(interlaced) {
    *    t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 }
    *    t_vtex.z = vpos.y % 2
    *    t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y
    *    o_vpos.y = t_vtex.y + t_vpos.y
    *
    *    o_flags.w = t_vtex.z ? 0 : 1
    * }
    *
    */

   vs_callback(callback_priv, r, shader, VS_O_VTEX, t_vpos);

   ureg_MUL(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_Z),
            ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f));
   ureg_MOV(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f));

   if (r->macroblock_size == VL_MACROBLOCK_HEIGHT) { //TODO
      ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label);

         ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY),
                  ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_Y)),
                  ureg_imm2f(shader, 0.0f, scale.y),
                  ureg_imm2f(shader, -scale.y, 0.0f));
         ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z),
                  ureg_scalar(vpos, TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.5f));

         ureg_FRC(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_src(t_vtex));

         ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y),
                  ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
                  ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_X),
                  ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Y));
         ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y),
                  ureg_src(t_vpos), ureg_src(t_vtex));

         ureg_CMP(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W),
                  ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
                  ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f));

      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
      ureg_ENDIF(shader);
   }

   ureg_release_temporary(shader, t_vtex);
   ureg_release_temporary(shader, t_vpos);

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, r->pipe);
}
예제 #22
0
파일: vl_idct.c 프로젝트: kallisti5/mesa
static void *
create_mismatch_frag_shader(struct vl_idct *idct)
{
    struct ureg_program *shader;

    struct ureg_src addr[2];

    struct ureg_dst m[8][2];
    struct ureg_dst fragment;

    unsigned i;

    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
        return NULL;

    addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
    addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);

    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

    for (i = 0; i < 8; ++i) {
        m[i][0] = ureg_DECL_temporary(shader);
        m[i][1] = ureg_DECL_temporary(shader);
    }

    for (i = 0; i < 8; ++i) {
        increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
    }

    for (i = 0; i < 8; ++i) {
        struct ureg_src s_addr[2];
        s_addr[0] = ureg_src(m[i][0]);
        s_addr[1] = ureg_src(m[i][1]);
        fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
    }

    for (i = 1; i < 8; ++i) {
        ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
        ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
    }

    ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
    ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));

    ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
    ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
    ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));

    ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
             ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
    ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
             ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));

    ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
    ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));

    for (i = 0; i < 8; ++i) {
        ureg_release_temporary(shader, m[i][0]);
        ureg_release_temporary(shader, m[i][1]);
    }

    ureg_END(shader);

    return ureg_create_shader_and_destroy(shader, idct->pipe);
}
예제 #23
0
static void *
create_deint_frag_shader(struct vl_deint_filter *filter, unsigned field,
                         struct vertex2f *sizes, bool spatial_filter)
{
   struct ureg_program *shader;
   struct ureg_src i_vtex;
   struct ureg_src sampler_cur;
   struct ureg_src sampler_prevprev;
   struct ureg_src sampler_prev;
   struct ureg_src sampler_next;
   struct ureg_dst o_fragment;
   struct ureg_dst t_tex;
   struct ureg_dst t_comp_top, t_comp_bot;
   struct ureg_dst t_diff;
   struct ureg_dst t_a, t_b;
   struct ureg_dst t_weave, t_linear;

   shader = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!shader) {
      return NULL;
   }

   t_tex = ureg_DECL_temporary(shader);
   t_comp_top = ureg_DECL_temporary(shader);
   t_comp_bot = ureg_DECL_temporary(shader);
   t_diff = ureg_DECL_temporary(shader);
   t_a = ureg_DECL_temporary(shader);
   t_b = ureg_DECL_temporary(shader);
   t_weave = ureg_DECL_temporary(shader);
   t_linear = ureg_DECL_temporary(shader);

   i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
   sampler_prevprev = ureg_DECL_sampler(shader, 0);
   sampler_prev = ureg_DECL_sampler(shader, 1);
   sampler_cur = ureg_DECL_sampler(shader, 2);
   sampler_next = ureg_DECL_sampler(shader, 3);
   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   // we don't care about ZW interpolation (allows better optimization)
   ureg_MOV(shader, t_tex, i_vtex);
   ureg_MOV(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_ZW),
            ureg_imm1f(shader, 0));

   // sample between texels for cheap lowpass
   ureg_ADD(shader, t_comp_top, ureg_src(t_tex),
            ureg_imm4f(shader, sizes->x * 0.5f, sizes->y * -0.5f, 0, 0));
   ureg_ADD(shader, t_comp_bot, ureg_src(t_tex),
            ureg_imm4f(shader, sizes->x * -0.5f, sizes->y * 0.5f, 1.0f, 0));

   if (field == 0) {
      /* interpolating top field -> current field is a bottom field */
      // cur vs prev2
      ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur);
      ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prevprev);
      ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_negate(ureg_src(t_b)));
      // prev vs next
      ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prev);
      ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_next);
      ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_negate(ureg_src(t_b)));
   } else {
      /* interpolating bottom field -> current field is a top field */
      // cur vs prev2
      ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur);
      ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prevprev);
      ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_negate(ureg_src(t_b)));
      // prev vs next
      ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev);
      ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_next);
      ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_negate(ureg_src(t_b)));
   }

   // absolute maximum of differences
   ureg_MAX(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_abs(ureg_src(t_diff)),
            ureg_scalar(ureg_abs(ureg_src(t_diff)), TGSI_SWIZZLE_Y));

   if (field == 0) {
      /* weave with prev top field */
      ureg_TEX(shader, t_weave, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_tex), sampler_prev);
      /* get linear interpolation from current bottom field */
      ureg_ADD(shader, t_comp_top, ureg_src(t_tex), ureg_imm4f(shader, 0, sizes->y * -1.0f, 1.0f, 0));
      ureg_TEX(shader, t_linear, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur);
   } else {
      /* weave with prev bottom field */
      ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, 0, 0, 1.0f, 0));
      ureg_TEX(shader, t_weave, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev);
      /* get linear interpolation from current top field */
      ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, 0, sizes->y * 1.0f, 0, 0));
      ureg_TEX(shader, t_linear, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur);
   }

   // mix between weave and linear
   // fully weave if diff < 6 (0.02353), fully interpolate if diff > 14 (0.05490)
   ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_diff),
            ureg_imm4f(shader, -0.02353f, 0, 0, 0));
   ureg_MUL(shader, ureg_saturate(ureg_writemask(t_diff, TGSI_WRITEMASK_X)),
            ureg_src(t_diff), ureg_imm4f(shader, 31.8750f, 0, 0, 0));
   ureg_LRP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X), ureg_src(t_diff),
            ureg_src(t_linear), ureg_src(t_weave));
   ureg_MOV(shader, o_fragment, ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X));

   ureg_release_temporary(shader, t_tex);
   ureg_release_temporary(shader, t_comp_top);
   ureg_release_temporary(shader, t_comp_bot);
   ureg_release_temporary(shader, t_diff);
   ureg_release_temporary(shader, t_a);
   ureg_release_temporary(shader, t_b);
   ureg_release_temporary(shader, t_weave);
   ureg_release_temporary(shader, t_linear);
   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, filter->pipe);
}
예제 #24
0
void *
util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
                                   enum tgsi_texture_type tgsi_tex,
                                   unsigned nr_samples,
                                   enum tgsi_return_type stype)
{
   struct ureg_program *ureg;
   struct ureg_src sampler, coord;
   struct ureg_dst out, tmp, top, bottom;
   struct ureg_dst tmp_coord[4], tmp_sum[4];
   unsigned i, c;

   ureg = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!ureg)
      return NULL;

   /* Declarations. */
   sampler = ureg_DECL_sampler(ureg, 0);
   ureg_DECL_sampler_view(ureg, 0, tgsi_tex, stype, stype, stype, stype);
   coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0,
                              TGSI_INTERPOLATE_LINEAR);
   out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
   for (c = 0; c < 4; c++)
      tmp_sum[c] = ureg_DECL_temporary(ureg);
   for (c = 0; c < 4; c++)
      tmp_coord[c] = ureg_DECL_temporary(ureg);
   tmp = ureg_DECL_temporary(ureg);
   top = ureg_DECL_temporary(ureg);
   bottom = ureg_DECL_temporary(ureg);

   /* Instructions. */
   for (c = 0; c < 4; c++)
      ureg_MOV(ureg, tmp_sum[c], ureg_imm1f(ureg, 0));

   /* Get 4 texture coordinates for the bilinear filter. */
   ureg_F2U(ureg, tmp_coord[0], coord); /* top-left */
   ureg_UADD(ureg, tmp_coord[1], ureg_src(tmp_coord[0]),
             ureg_imm4u(ureg, 1, 0, 0, 0)); /* top-right */
   ureg_UADD(ureg, tmp_coord[2], ureg_src(tmp_coord[0]),
             ureg_imm4u(ureg, 0, 1, 0, 0)); /* bottom-left */
   ureg_UADD(ureg, tmp_coord[3], ureg_src(tmp_coord[0]),
             ureg_imm4u(ureg, 1, 1, 0, 0)); /* bottom-right */

   for (i = 0; i < nr_samples; i++) {
      for (c = 0; c < 4; c++) {
         /* Read one sample. */
         ureg_MOV(ureg, ureg_writemask(tmp_coord[c], TGSI_WRITEMASK_W),
                  ureg_imm1u(ureg, i));
         ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord[c]), sampler);

         if (stype == TGSI_RETURN_TYPE_UINT)
            ureg_U2F(ureg, tmp, ureg_src(tmp));
         else if (stype == TGSI_RETURN_TYPE_SINT)
            ureg_I2F(ureg, tmp, ureg_src(tmp));

         /* Add it to the sum.*/
         ureg_ADD(ureg, tmp_sum[c], ureg_src(tmp_sum[c]), ureg_src(tmp));
      }
   }

   /* Calculate the average. */
   for (c = 0; c < 4; c++)
      ureg_MUL(ureg, tmp_sum[c], ureg_src(tmp_sum[c]),
               ureg_imm1f(ureg, 1.0 / nr_samples));

   /* Take the 4 average values and apply a standard bilinear filter. */
   ureg_FRC(ureg, tmp, coord);

   ureg_LRP(ureg, top,
            ureg_scalar(ureg_src(tmp), 0),
            ureg_src(tmp_sum[1]),
            ureg_src(tmp_sum[0]));

   ureg_LRP(ureg, bottom,
            ureg_scalar(ureg_src(tmp), 0),
            ureg_src(tmp_sum[3]),
            ureg_src(tmp_sum[2]));

   ureg_LRP(ureg, tmp,
            ureg_scalar(ureg_src(tmp), 1),
            ureg_src(bottom),
            ureg_src(top));

   /* Convert to the texture format and return. */
   if (stype == TGSI_RETURN_TYPE_UINT)
      ureg_F2U(ureg, out, ureg_src(tmp));
   else if (stype == TGSI_RETURN_TYPE_SINT)
      ureg_F2I(ureg, out, ureg_src(tmp));
   else
      ureg_MOV(ureg, out, ureg_src(tmp));

   ureg_END(ureg);

   return ureg_create_shader_and_destroy(ureg, pipe);
}
static void *
create_ref_frag_shader(struct vl_mc *r)
{
   const float y_scale =
      r->buffer_height / 2 *
      r->macroblock_size / VL_MACROBLOCK_HEIGHT;

   struct ureg_program *shader;
   struct ureg_src tc[2], sampler;
   struct ureg_dst ref, field;
   struct ureg_dst fragment;
   unsigned label;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return NULL;

   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);

   sampler = ureg_DECL_sampler(shader, 0);
   ref = ureg_DECL_temporary(shader);

   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   field = calc_line(shader);

   /*
    * ref = field.z ? tc[1] : tc[0]
    *
    * // Adjust tc acording to top/bottom field selection
    * if (|ref.z|) {
    *    ref.y *= y_scale
    *    ref.y = floor(ref.y)
    *    ref.y += ref.z
    *    ref.y /= y_scale
    * }
    * fragment.xyz = tex(ref, sampler[0])
    */
   ureg_CMP(shader, ureg_writemask(ref, TGSI_WRITEMASK_XYZ),
            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
            tc[1], tc[0]);
   ureg_CMP(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W),
            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
            tc[1], tc[0]);

   ureg_IF(shader, ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z), &label);

      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
               ureg_src(ref), ureg_imm1f(shader, y_scale));
      ureg_FLR(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref));
      ureg_ADD(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
               ureg_src(ref), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z));
      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
               ureg_src(ref), ureg_imm1f(shader, 1.0f / y_scale));

   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
   ureg_ENDIF(shader);

   ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), TGSI_TEXTURE_2D, ureg_src(ref), sampler);

   ureg_release_temporary(shader, ref);

   ureg_release_temporary(shader, field);
   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, r->pipe);
}
예제 #26
0
파일: vl_zscan.c 프로젝트: airlied/mesa
static void *
create_vert_shader(struct vl_zscan *zscan)
{
   struct ureg_program *shader;
   struct ureg_src scale;
   struct ureg_src vrect, vpos, block_num;
   struct ureg_dst tmp;
   struct ureg_dst o_vpos;
   struct ureg_dst *o_vtex;
   unsigned i;

   shader = ureg_create(PIPE_SHADER_VERTEX);
   if (!shader)
      return NULL;

   o_vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_dst));

   scale = ureg_imm2f(shader,
      (float)VL_BLOCK_WIDTH / zscan->buffer_width,
      (float)VL_BLOCK_HEIGHT / zscan->buffer_height);

   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
   block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM);

   tmp = ureg_DECL_temporary(shader);

   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);

   for (i = 0; i < zscan->num_channels; ++i)
      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i);

   /*
    * o_vpos.xy = (vpos + vrect) * scale
    * o_vpos.zw = 1.0f
    *
    * tmp.xy = InstanceID / blocks_per_line
    * tmp.x = frac(tmp.x)
    * tmp.y = floor(tmp.y)
    *
    * o_vtex.x = vrect.x / blocks_per_line + tmp.x
    * o_vtex.y = vrect.y
    * o_vtex.z = tmp.z * blocks_per_line / blocks_total
    */
   ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect);
   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));

   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X),
            ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));

   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
   ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp));

   for (i = 0; i < zscan->num_channels; ++i) {
      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
               ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH)
                * (i - (signed)zscan->num_channels / 2)));

      ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
               ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos);
      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp),
               ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
   }

   ureg_release_temporary(shader, tmp);
   ureg_END(shader);

   FREE(o_vtex);

   return ureg_create_shader_and_destroy(shader, zscan->pipe);
}
예제 #27
0
파일: vl_zscan.c 프로젝트: airlied/mesa
static void *
create_frag_shader(struct vl_zscan *zscan)
{
   struct ureg_program *shader;
   struct ureg_src *vtex;

   struct ureg_src samp_src, samp_scan, samp_quant;

   struct ureg_dst *tmp;
   struct ureg_dst quant, fragment;

   unsigned i;

   shader = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!shader)
      return NULL;

   vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_src));
   tmp = MALLOC(zscan->num_channels * sizeof(struct ureg_dst));

   for (i = 0; i < zscan->num_channels; ++i)
      vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);

   samp_src = ureg_DECL_sampler(shader, 0);
   samp_scan = ureg_DECL_sampler(shader, 1);
   samp_quant = ureg_DECL_sampler(shader, 2);

   for (i = 0; i < zscan->num_channels; ++i)
      tmp[i] = ureg_DECL_temporary(shader);
   quant = ureg_DECL_temporary(shader);

   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   /*
    * tmp.x = tex(vtex, 1)
    * tmp.y = vtex.z
    * fragment = tex(tmp, 0) * quant
    */
   for (i = 0; i < zscan->num_channels; ++i)
      ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan);

   for (i = 0; i < zscan->num_channels; ++i)
      ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W));

   for (i = 0; i < zscan->num_channels; ++i) {
      ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src);
      ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant);
   }

   ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f));
   ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant));

   for (i = 0; i < zscan->num_channels; ++i)
      ureg_release_temporary(shader, tmp[i]);
   ureg_END(shader);

   FREE(vtex);
   FREE(tmp);

   return ureg_create_shader_and_destroy(shader, zscan->pipe);
}
예제 #28
0
static void *
create_frag_shader(struct vl_median_filter *filter,
                   struct vertex2f *offsets,
                   unsigned num_offsets)
{
   struct pipe_screen *screen = filter->pipe->screen;
   struct ureg_program *shader;
   struct ureg_src i_vtex;
   struct ureg_src sampler;
   struct ureg_dst *t_array = MALLOC(sizeof(struct ureg_dst) * num_offsets);
   struct ureg_dst o_fragment;
   const unsigned median = num_offsets >> 1;
   unsigned i, j;

   assert(num_offsets & 1); /* we need an odd number of offsets */
   if (!(num_offsets & 1)) { /* yeah, we REALLY need an odd number of offsets!!! */
      FREE(t_array);
      return NULL;
   }

   if (num_offsets > screen->get_shader_param(
      screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS)) {

      FREE(t_array);
      return NULL;
   }

   shader = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!shader) {
      FREE(t_array);
      return NULL;
   }

   i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
   sampler = ureg_DECL_sampler(shader, 0);
   ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D,
                          TGSI_RETURN_TYPE_FLOAT,
                          TGSI_RETURN_TYPE_FLOAT,
                          TGSI_RETURN_TYPE_FLOAT,
                          TGSI_RETURN_TYPE_FLOAT);

   for (i = 0; i < num_offsets; ++i)
      t_array[i] = ureg_DECL_temporary(shader);
   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   /*
    * t_array[0..*] = vtex + offset[0..*]
    * t_array[0..*] = tex(t_array[0..*], sampler)
    * result = partial_bubblesort(t_array)[mid]
    */

   for (i = 0; i < num_offsets; ++i) {
      if (!is_vec_zero(offsets[i])) {
         ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY),
                  i_vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y));
         ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW),
                  ureg_imm1f(shader, 0.0f));
      }
   }

   for (i = 0; i < num_offsets; ++i) {
      struct ureg_src src = is_vec_zero(offsets[i]) ? i_vtex : ureg_src(t_array[i]);
      ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, src, sampler);
   }

   // TODO: Couldn't this be improved even more?
   for (i = 0; i <= median; ++i) {
      for (j = 1; j < (num_offsets - i - 1); ++j) {
         struct ureg_dst tmp = ureg_DECL_temporary(shader);
         ureg_MOV(shader, tmp, ureg_src(t_array[j]));
         ureg_MAX(shader, t_array[j], ureg_src(t_array[j]), ureg_src(t_array[j - 1]));
         ureg_MIN(shader, t_array[j - 1], ureg_src(tmp), ureg_src(t_array[j - 1]));
         ureg_release_temporary(shader, tmp);
      }
      if (i == median)
         ureg_MAX(shader, t_array[j], ureg_src(t_array[j]), ureg_src(t_array[j - 1]));
      else
         ureg_MIN(shader, t_array[j - 1], ureg_src(t_array[j]), ureg_src(t_array[j - 1]));
   }
   ureg_MOV(shader, o_fragment, ureg_src(t_array[median]));

   ureg_END(shader);

   FREE(t_array);
   return ureg_create_shader_and_destroy(shader, filter->pipe);
}