Beispiel #1
0
static void *
create_yuv_shader(struct pipe_context *pipe, struct ureg_program *ureg)
{
    struct ureg_src y_sampler, u_sampler, v_sampler;
    struct ureg_src pos;
    struct ureg_src matrow0, matrow1, matrow2;
    struct ureg_dst y, u, v, rgb;
    struct ureg_dst out = ureg_DECL_output(ureg,
					   TGSI_SEMANTIC_COLOR,
					   0);

    pos = ureg_DECL_fs_input(ureg,
			     TGSI_SEMANTIC_GENERIC, 0,
			     TGSI_INTERPOLATE_PERSPECTIVE);

    rgb = ureg_DECL_temporary(ureg);
    y = ureg_DECL_temporary(ureg);
    u = ureg_DECL_temporary(ureg);
    v = ureg_DECL_temporary(ureg);

    y_sampler = ureg_DECL_sampler(ureg, 0);
    u_sampler = ureg_DECL_sampler(ureg, 1);
    v_sampler = ureg_DECL_sampler(ureg, 2);

    matrow0 = ureg_DECL_constant(ureg, 0);
    matrow1 = ureg_DECL_constant(ureg, 1);
    matrow2 = ureg_DECL_constant(ureg, 2);

    ureg_TEX(ureg, y, TGSI_TEXTURE_2D, pos, y_sampler);
    ureg_TEX(ureg, u, TGSI_TEXTURE_2D, pos, u_sampler);
    ureg_TEX(ureg, v, TGSI_TEXTURE_2D, pos, v_sampler);

    ureg_SUB(ureg, u, ureg_src(u), ureg_scalar(matrow0, TGSI_SWIZZLE_W));
    ureg_SUB(ureg, v, ureg_src(v), ureg_scalar(matrow0, TGSI_SWIZZLE_W));

    ureg_MUL(ureg, rgb, ureg_scalar(ureg_src(y), TGSI_SWIZZLE_X), matrow0);
    ureg_MAD(ureg, rgb,
	     ureg_scalar(ureg_src(u), TGSI_SWIZZLE_X), matrow1, ureg_src(rgb));
    ureg_MAD(ureg, rgb,
	     ureg_scalar(ureg_src(v), TGSI_SWIZZLE_X), matrow2, ureg_src(rgb));

    /* rgb.a = 1; */
    ureg_MOV(ureg, ureg_writemask(rgb, TGSI_WRITEMASK_W),
	     ureg_scalar(matrow0, TGSI_SWIZZLE_X));

    ureg_MOV(ureg, out, ureg_src(rgb));

    ureg_release_temporary(ureg, rgb);
    ureg_release_temporary(ureg, y);
    ureg_release_temporary(ureg, u);
    ureg_release_temporary(ureg, v);

    ureg_END(ureg);

    return ureg_create_shader_and_destroy(ureg, pipe);
}
Beispiel #2
0
static void *
create_vs(struct pipe_context *pipe, unsigned vs_traits)
{
    struct ureg_program *ureg;
    struct ureg_src src;
    struct ureg_dst dst;
    struct ureg_src const0, const1;
    boolean is_fill = (vs_traits & VS_FILL) != 0;
    boolean is_composite = (vs_traits & VS_COMPOSITE) != 0;
    boolean has_mask = (vs_traits & VS_MASK) != 0;
    boolean is_yuv = (vs_traits & VS_YUV) != 0;
    unsigned input_slot = 0;

    ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (ureg == NULL)
	return 0;

    const0 = ureg_DECL_constant(ureg, 0);
    const1 = ureg_DECL_constant(ureg, 1);

    /* it has to be either a fill or a composite op */
    debug_assert((is_fill ^ is_composite) ^ is_yuv);

    src = ureg_DECL_vs_input(ureg, input_slot++);
    dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
    src = vs_normalize_coords(ureg, src, const0, const1);
    ureg_MOV(ureg, dst, src);

    if (is_yuv) {
	src = ureg_DECL_vs_input(ureg, input_slot++);
	dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
	ureg_MOV(ureg, dst, src);
    }

    if (is_composite) {
	src = ureg_DECL_vs_input(ureg, input_slot++);
	dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
	ureg_MOV(ureg, dst, src);
    }

    if (is_fill) {
	src = ureg_DECL_vs_input(ureg, input_slot++);
	dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
	ureg_MOV(ureg, dst, src);
    }

    if (has_mask) {
	src = ureg_DECL_vs_input(ureg, input_slot++);
	dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1);
	ureg_MOV(ureg, dst, src);
    }

    ureg_END(ureg);

    return ureg_create_shader_and_destroy(ureg, pipe);
}
Beispiel #3
0
/**
 * Create a simple vertex shader that passes through position and the given
 * attribute.
 */
static void *create_passthrough_vs(struct pipe_context *pipe, int semantic_name)
{
   struct ureg_program *ureg;
   struct ureg_src src[2], constants[3];
   struct ureg_dst dst[2], tmp;
   int i;

   ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
   if (!ureg)
      return NULL;

   /* position is in user coordinates */
   src[0] = ureg_DECL_vs_input(ureg, 0);
   dst[0] = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
   tmp = ureg_DECL_temporary(ureg);
   for (i = 0; i < Elements(constants); i++)
      constants[i] = ureg_DECL_constant(ureg, i);

   /* transform to clipped coordinates */
   ureg_DP4(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), src[0], constants[0]);
   ureg_DP4(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), src[0], constants[1]);
   ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), src[0]);
   ureg_DP4(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), src[0], constants[2]);
   ureg_MOV(ureg, dst[0], ureg_src(tmp));

   if (semantic_name >= 0) {
      src[1] = ureg_DECL_vs_input(ureg, 1);
      dst[1] = ureg_DECL_output(ureg, semantic_name, 0);
      ureg_MOV(ureg, dst[1], src[1]);
   }

   ureg_END(ureg);

   return ureg_create_shader_and_destroy(ureg, pipe);
}
Beispiel #4
0
/**
 * Map a Mesa src register to a TGSI ureg_src register.
 */
static struct ureg_src
src_register( struct st_translate *t,
              gl_register_file file,
              GLint index )
{
   switch( file ) {
   case PROGRAM_UNDEFINED:
      return ureg_src_undef();

   case PROGRAM_TEMPORARY:
      assert(index >= 0);
      assert(index < Elements(t->temps));
      if (ureg_dst_is_undef(t->temps[index]))
         t->temps[index] = ureg_DECL_temporary( t->ureg );
      return ureg_src(t->temps[index]);

   case PROGRAM_ENV_PARAM:
   case PROGRAM_LOCAL_PARAM:
   case PROGRAM_UNIFORM:
      assert(index >= 0);
      return t->constants[index];
   case PROGRAM_STATE_VAR:
   case PROGRAM_CONSTANT:       /* ie, immediate */
      if (index < 0)
         return ureg_DECL_constant( t->ureg, 0 );
      else
         return t->constants[index];

   case PROGRAM_INPUT:
      assert(t->inputMapping[index] < Elements(t->inputs));
      return t->inputs[t->inputMapping[index]];

   case PROGRAM_OUTPUT:
      assert(t->outputMapping[index] < Elements(t->outputs));
      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */

   case PROGRAM_ADDRESS:
      return ureg_src(t->address[index]);

   case PROGRAM_SYSTEM_VALUE:
      assert(index < Elements(t->systemValues));
      return t->systemValues[index];

   default:
      debug_assert( 0 );
      return ureg_src_undef();
   }
}
Beispiel #5
0
static void *
create_frag_shader_palette(struct vl_compositor *c, bool include_cc)
{
   struct ureg_program *shader;
   struct ureg_src csc[3];
   struct ureg_src tc;
   struct ureg_src sampler;
   struct ureg_src palette;
   struct ureg_dst texel;
   struct ureg_dst fragment;
   unsigned i;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return false;

   for (i = 0; include_cc && i < 3; ++i)
      csc[i] = ureg_DECL_constant(shader, i);

   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
   sampler = ureg_DECL_sampler(shader, 0);
   palette = ureg_DECL_sampler(shader, 1);

   texel = ureg_DECL_temporary(shader);
   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   /*
    * texel = tex(tc, sampler)
    * fragment.xyz = tex(texel, palette) * csc
    * fragment.a = texel.a
    */
   ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(texel));

   if (include_cc) {
      ureg_TEX(shader, texel, TGSI_TEXTURE_1D, ureg_src(texel), palette);
      for (i = 0; i < 3; ++i)
         ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
   } else {
      ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
               TGSI_TEXTURE_1D, ureg_src(texel), palette);
   }

   ureg_release_temporary(shader, texel);
   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, c->pipe);
}
Beispiel #6
0
static void *
create_frag_shader_video_buffer(struct vl_compositor *c)
{
   struct ureg_program *shader;
   struct ureg_src tc;
   struct ureg_src csc[3];
   struct ureg_src sampler[3];
   struct ureg_dst texel;
   struct ureg_dst fragment;
   unsigned i;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return false;

   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
   for (i = 0; i < 3; ++i) {
      csc[i] = ureg_DECL_constant(shader, i);
      sampler[i] = ureg_DECL_sampler(shader, i);
   }
   texel = ureg_DECL_temporary(shader);
   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   /*
    * texel.xyz = tex(tc, sampler[i])
    * fragment = csc * texel
    */
   for (i = 0; i < 3; ++i)
      ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, tc, sampler[i]);

   ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));

   for (i = 0; i < 3; ++i)
      ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));

   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));

   ureg_release_temporary(shader, texel);
   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, c->pipe);
}
Beispiel #7
0
/**
 * Emit the TGSI instructions for inverting and adjusting WPOS.
 * This code is unavoidable because it also depends on whether
 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
 */
static void
emit_wpos_adjustment( struct st_translate *t,
                      const struct gl_program *program,
                      boolean invert,
                      GLfloat adjX, GLfloat adjY[2])
{
   struct ureg_program *ureg = t->ureg;

   /* Fragment program uses fragment position input.
    * Need to replace instances of INPUT[WPOS] with temp T
    * where T = INPUT[WPOS] by y is inverted.
    */
   static const gl_state_index wposTransformState[STATE_LENGTH]
      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 };
   
   /* XXX: note we are modifying the incoming shader here!  Need to
    * do this before emitting the constant decls below, or this
    * will be missed:
    */
   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
                                                       wposTransformState);

   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
   struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
   struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]];

   /* First, apply the coordinate shift: */
   if (adjX || adjY[0] || adjY[1]) {
      if (adjY[0] != adjY[1]) {
         /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
          * depending on whether inversion is actually going to be applied
          * or not, which is determined by testing against the inversion
          * state variable used below, which will be either +1 or -1.
          */
         struct ureg_dst adj_temp = ureg_DECL_temporary(ureg);

         ureg_CMP(ureg, adj_temp,
                  ureg_scalar(wpostrans, invert ? 2 : 0),
                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
                  ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
         ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
      } else {
         ureg_ADD(ureg, wpos_temp, wpos_input,
                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
      }
      wpos_input = ureg_src(wpos_temp);
   } else {
      /* MOV wpos_temp, input[wpos]
       */
      ureg_MOV( ureg, wpos_temp, wpos_input );
   }

   /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
    * inversion/identity, or the other way around if we're drawing to an FBO.
    */
   if (invert) {
      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
       */
      ureg_MAD( ureg,
                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
                wpos_input,
                ureg_scalar(wpostrans, 0),
                ureg_scalar(wpostrans, 1));
   } else {
      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
       */
      ureg_MAD( ureg,
                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
                wpos_input,
                ureg_scalar(wpostrans, 2),
                ureg_scalar(wpostrans, 3));
   }

   /* Use wpos_temp as position input from here on:
    */
   t->inputs[t->inputMapping[VARYING_SLOT_POS]] = ureg_src(wpos_temp);
}
Beispiel #8
0
/**
 * Translate Mesa program to TGSI format.
 * \param program  the program to translate
 * \param numInputs  number of input registers used
 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
 *                      input indexes
 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
 *                            each input
 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
 * \param numOutputs  number of output registers used
 * \param outputMapping  maps Mesa fragment program outputs to TGSI
 *                       generic outputs
 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
 *                             each output
 *
 * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
 */
enum pipe_error
st_translate_mesa_program(
   struct gl_context *ctx,
   uint procType,
   struct ureg_program *ureg,
   const struct gl_program *program,
   GLuint numInputs,
   const GLuint inputMapping[],
   const ubyte inputSemanticName[],
   const ubyte inputSemanticIndex[],
   const GLuint interpMode[],
   GLuint numOutputs,
   const GLuint outputMapping[],
   const ubyte outputSemanticName[],
   const ubyte outputSemanticIndex[],
   boolean passthrough_edgeflags,
   boolean clamp_color)
{
   struct st_translate translate, *t;
   unsigned i;
   enum pipe_error ret = PIPE_OK;

   assert(numInputs <= ARRAY_SIZE(t->inputs));
   assert(numOutputs <= ARRAY_SIZE(t->outputs));

   t = &translate;
   memset(t, 0, sizeof *t);

   t->procType = procType;
   t->inputMapping = inputMapping;
   t->outputMapping = outputMapping;
   t->ureg = ureg;

   /*_mesa_print_program(program);*/

   /*
    * Declare input attributes.
    */
   if (procType == TGSI_PROCESSOR_FRAGMENT) {
      for (i = 0; i < numInputs; i++) {
         t->inputs[i] = ureg_DECL_fs_input(ureg,
                                           inputSemanticName[i],
                                           inputSemanticIndex[i],
                                           interpMode[i]);
      }

      if (program->InputsRead & VARYING_BIT_POS) {
         /* Must do this after setting up t->inputs, and before
          * emitting constant references, below:
          */
         emit_wpos(st_context(ctx), t, program, ureg);
      }

      if (program->InputsRead & VARYING_BIT_FACE) {
         emit_face_var( t, program );
      }

      /*
       * Declare output attributes.
       */
      for (i = 0; i < numOutputs; i++) {
         switch (outputSemanticName[i]) {
         case TGSI_SEMANTIC_POSITION:
            t->outputs[i] = ureg_DECL_output( ureg,
                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
                                              outputSemanticIndex[i] );

            t->outputs[i] = ureg_writemask( t->outputs[i],
                                            TGSI_WRITEMASK_Z );
            break;
         case TGSI_SEMANTIC_STENCIL:
            t->outputs[i] = ureg_DECL_output( ureg,
                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
                                              outputSemanticIndex[i] );
            t->outputs[i] = ureg_writemask( t->outputs[i],
                                            TGSI_WRITEMASK_Y );
            break;
         case TGSI_SEMANTIC_COLOR:
            t->outputs[i] = ureg_DECL_output( ureg,
                                              TGSI_SEMANTIC_COLOR,
                                              outputSemanticIndex[i] );
            break;
         default:
            debug_assert(0);
            return 0;
         }
      }
   }
   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
      for (i = 0; i < numInputs; i++) {
         t->inputs[i] = ureg_DECL_input(ureg,
                                        inputSemanticName[i],
                                        inputSemanticIndex[i], 0, 1);
      }

      for (i = 0; i < numOutputs; i++) {
         t->outputs[i] = ureg_DECL_output( ureg,
                                           outputSemanticName[i],
                                           outputSemanticIndex[i] );
      }
   }
   else {
      assert(procType == TGSI_PROCESSOR_VERTEX);

      for (i = 0; i < numInputs; i++) {
         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
      }

      for (i = 0; i < numOutputs; i++) {
         t->outputs[i] = ureg_DECL_output( ureg,
                                           outputSemanticName[i],
                                           outputSemanticIndex[i] );
         if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
            /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
            ureg_MOV(ureg,
                     ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
                     ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
	 }
      }
      if (passthrough_edgeflags)
         emit_edgeflags( t, program );
   }

   /* Declare address register.
    */
   if (program->NumAddressRegs > 0) {
      debug_assert( program->NumAddressRegs == 1 );
      t->address[0] = ureg_DECL_address( ureg );
   }

   /* Declare misc input registers
    */
   {
      GLbitfield sysInputs = program->SystemValuesRead;
      unsigned numSys = 0;
      for (i = 0; sysInputs; i++) {
         if (sysInputs & (1 << i)) {
            unsigned semName = _mesa_sysval_to_semantic[i];
            t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
            if (semName == TGSI_SEMANTIC_INSTANCEID ||
                semName == TGSI_SEMANTIC_VERTEXID) {
               /* From Gallium perspective, these system values are always
                * integer, and require native integer support.  However, if
                * native integer is supported on the vertex stage but not the
                * pixel stage (e.g, i915g + draw), Mesa will generate IR that
                * assumes these system values are floats. To resolve the
                * inconsistency, we insert a U2F.
                */
               struct st_context *st = st_context(ctx);
               struct pipe_screen *pscreen = st->pipe->screen;
               assert(procType == TGSI_PROCESSOR_VERTEX);
               assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
               (void) pscreen;  /* silence non-debug build warnings */
               if (!ctx->Const.NativeIntegers) {
                  struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
                  ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
                  t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
               }
            }
            numSys++;
            sysInputs &= ~(1 << i);
         }
      }
   }

   if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) {
      /* If temps are accessed with indirect addressing, declare temporaries
       * in sequential order.  Else, we declare them on demand elsewhere.
       */
      for (i = 0; i < program->NumTemporaries; i++) {
         /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
         t->temps[i] = ureg_DECL_temporary( t->ureg );
      }
   }

   /* Emit constants and immediates.  Mesa uses a single index space
    * for these, so we put all the translated regs in t->constants.
    */
   if (program->Parameters) {
      t->constants = calloc( program->Parameters->NumParameters,
                             sizeof t->constants[0] );
      if (t->constants == NULL) {
         ret = PIPE_ERROR_OUT_OF_MEMORY;
         goto out;
      }

      for (i = 0; i < program->Parameters->NumParameters; i++) {
         switch (program->Parameters->Parameters[i].Type) {
         case PROGRAM_STATE_VAR:
         case PROGRAM_UNIFORM:
            t->constants[i] = ureg_DECL_constant( ureg, i );
            break;

            /* Emit immediates only when there's no indirect addressing of
             * the const buffer.
             * FIXME: Be smarter and recognize param arrays:
             * indirect addressing is only valid within the referenced
             * array.
             */
         case PROGRAM_CONSTANT:
            if (program->IndirectRegisterFiles & PROGRAM_ANY_CONST)
               t->constants[i] = ureg_DECL_constant( ureg, i );
            else
               t->constants[i] = 
                  ureg_DECL_immediate( ureg,
                                       (const float*) program->Parameters->ParameterValues[i],
                                       4 );
            break;
         default:
            break;
         }
      }
   }

   /* texture samplers */
   for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
      if (program->SamplersUsed & (1 << i)) {
         t->samplers[i] = ureg_DECL_sampler( ureg, i );
      }
   }

   /* Emit each instruction in turn:
    */
   for (i = 0; i < program->NumInstructions; i++) {
      set_insn_start( t, ureg_get_instruction_number( ureg ));
      compile_instruction( ctx, t, &program->Instructions[i], clamp_color );
   }

   /* Fix up all emitted labels:
    */
   for (i = 0; i < t->labels_count; i++) {
      ureg_fixup_label( ureg,
                        t->labels[i].token,
                        t->insn[t->labels[i].branch_target] );
   }

out:
   free(t->insn);
   free(t->labels);
   free(t->constants);

   if (t->error) {
      debug_printf("%s: translate error flag set\n", __func__);
   }

   return ret;
}
Beispiel #9
0
static void *
create_fs(struct pipe_context *pipe, unsigned fs_traits)
{
    struct ureg_program *ureg;
    struct ureg_src /*dst_sampler, */ src_sampler, mask_sampler;
    struct ureg_src /*dst_pos, */ src_input, mask_pos;
    struct ureg_dst src, mask;
    struct ureg_dst out;
    struct ureg_src imm0 = { 0 };
    unsigned has_mask = (fs_traits & FS_MASK) != 0;
    unsigned is_fill = (fs_traits & FS_FILL) != 0;
    unsigned is_composite = (fs_traits & FS_COMPOSITE) != 0;
    unsigned is_solid = (fs_traits & FS_SOLID_FILL) != 0;
    unsigned is_lingrad = (fs_traits & FS_LINGRAD_FILL) != 0;
    unsigned is_radgrad = (fs_traits & FS_RADGRAD_FILL) != 0;
    unsigned comp_alpha_mask = fs_traits & FS_COMPONENT_ALPHA;
    unsigned is_yuv = (fs_traits & FS_YUV) != 0;
    unsigned src_repeat_none = (fs_traits & FS_SRC_REPEAT_NONE) != 0;
    unsigned mask_repeat_none = (fs_traits & FS_MASK_REPEAT_NONE) != 0;
    unsigned src_swizzle = (fs_traits & FS_SRC_SWIZZLE_RGB) != 0;
    unsigned mask_swizzle = (fs_traits & FS_MASK_SWIZZLE_RGB) != 0;
    unsigned src_set_alpha = (fs_traits & FS_SRC_SET_ALPHA) != 0;
    unsigned mask_set_alpha = (fs_traits & FS_MASK_SET_ALPHA) != 0;
    unsigned src_luminance = (fs_traits & FS_SRC_LUMINANCE) != 0;
    unsigned mask_luminance = (fs_traits & FS_MASK_LUMINANCE) != 0;
    unsigned dst_luminance = (fs_traits & FS_DST_LUMINANCE) != 0;

#if 0
    print_fs_traits(fs_traits);
#else
    (void)print_fs_traits;
#endif

    ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (ureg == NULL)
	return 0;

    /* it has to be either a fill, a composite op or a yuv conversion */
    debug_assert((is_fill ^ is_composite) ^ is_yuv);
    (void)is_yuv;

    out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);

    if (src_repeat_none || mask_repeat_none ||
	src_set_alpha || mask_set_alpha || src_luminance) {
	imm0 = ureg_imm4f(ureg, 0, 0, 0, 1);
    }
    if (is_composite) {
	src_sampler = ureg_DECL_sampler(ureg, 0);
	src_input = ureg_DECL_fs_input(ureg,
				       TGSI_SEMANTIC_GENERIC, 0,
				       TGSI_INTERPOLATE_PERSPECTIVE);
    } else if (is_fill) {
	if (is_solid)
	    src_input = ureg_DECL_fs_input(ureg,
					   TGSI_SEMANTIC_COLOR, 0,
					   TGSI_INTERPOLATE_PERSPECTIVE);
	else
	    src_input = ureg_DECL_fs_input(ureg,
					   TGSI_SEMANTIC_POSITION, 0,
					   TGSI_INTERPOLATE_PERSPECTIVE);
    } else {
	debug_assert(is_yuv);
	return create_yuv_shader(pipe, ureg);
    }

    if (has_mask) {
	mask_sampler = ureg_DECL_sampler(ureg, 1);
	mask_pos = ureg_DECL_fs_input(ureg,
				      TGSI_SEMANTIC_GENERIC, 1,
				      TGSI_INTERPOLATE_PERSPECTIVE);
    }
#if 0				/* unused right now */
    dst_sampler = ureg_DECL_sampler(ureg, 2);
    dst_pos = ureg_DECL_fs_input(ureg,
				 TGSI_SEMANTIC_POSITION, 2,
				 TGSI_INTERPOLATE_PERSPECTIVE);
#endif

    if (is_composite) {
	if (has_mask || src_luminance || dst_luminance)
	    src = ureg_DECL_temporary(ureg);
	else
	    src = out;
	xrender_tex(ureg, src, src_input, src_sampler, imm0,
		    src_repeat_none, src_swizzle, src_set_alpha);
    } else if (is_fill) {
	if (is_solid) {
	    if (has_mask || src_luminance || dst_luminance)
		src = ureg_dst(src_input);
	    else
		ureg_MOV(ureg, out, src_input);
	} else if (is_lingrad || is_radgrad) {
	    struct ureg_src coords, const0124, matrow0, matrow1, matrow2;

	    if (has_mask || src_luminance || dst_luminance)
		src = ureg_DECL_temporary(ureg);
	    else
		src = out;

	    coords = ureg_DECL_constant(ureg, 0);
	    const0124 = ureg_DECL_constant(ureg, 1);
	    matrow0 = ureg_DECL_constant(ureg, 2);
	    matrow1 = ureg_DECL_constant(ureg, 3);
	    matrow2 = ureg_DECL_constant(ureg, 4);

	    if (is_lingrad) {
		linear_gradient(ureg, src,
				src_input, src_sampler,
				coords, const0124, matrow0, matrow1, matrow2);
	    } else if (is_radgrad) {
		radial_gradient(ureg, src,
				src_input, src_sampler,
				coords, const0124, matrow0, matrow1, matrow2);
	    }
	} else
	    debug_assert(!"Unknown fill type!");
    }
    if (src_luminance) {
	ureg_MOV(ureg, src, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_X));
	ureg_MOV(ureg, ureg_writemask(src, TGSI_WRITEMASK_XYZ),
		 ureg_scalar(imm0, TGSI_SWIZZLE_X));
	if (!has_mask && !dst_luminance)
	    ureg_MOV(ureg, out, ureg_src(src));
    }

    if (has_mask) {
	mask = ureg_DECL_temporary(ureg);
	xrender_tex(ureg, mask, mask_pos, mask_sampler, imm0,
		    mask_repeat_none, mask_swizzle, mask_set_alpha);
	/* src IN mask */

	src_in_mask(ureg, (dst_luminance) ? src : out, ureg_src(src),
		    ureg_src(mask),
		    comp_alpha_mask, mask_luminance);

	ureg_release_temporary(ureg, mask);
    }

    if (dst_luminance) {
	/*
	 * Make sure the alpha channel goes into the output L8 surface.
	 */
	ureg_MOV(ureg, out, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_W));
    }

    ureg_END(ureg);

    return ureg_create_shader_and_destroy(ureg, pipe);
}
Beispiel #10
0
static void *
create_frag_shader_weave(struct vl_compositor *c)
{
   struct ureg_program *shader;
   struct ureg_src i_tc[2];
   struct ureg_src csc[3];
   struct ureg_src sampler[3];
   struct ureg_dst t_tc[2];
   struct ureg_dst t_texel[2];
   struct ureg_dst o_fragment;
   unsigned i, j;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return false;

   i_tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
   i_tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);

   for (i = 0; i < 3; ++i) {
      csc[i] = ureg_DECL_constant(shader, i);
      sampler[i] = ureg_DECL_sampler(shader, i);
   }

   for (i = 0; i < 2; ++i) {
      t_tc[i] = ureg_DECL_temporary(shader);
      t_texel[i] = ureg_DECL_temporary(shader);
   }
   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   /* calculate the texture offsets
    * t_tc.x = i_tc.x
    * t_tc.y = (round(i_tc.y) + 0.5) / height * 2
    */
   for (i = 0; i < 2; ++i) {
      ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_X), i_tc[i]);
      ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), i_tc[i]);
      ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W),
               ureg_imm1f(shader, i ? 0.75f : 0.25f));
      ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ),
               ureg_src(t_tc[i]), ureg_imm1f(shader, 0.5f));
      ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Y),
               ureg_src(t_tc[i]), ureg_scalar(i_tc[0], TGSI_SWIZZLE_W));
      ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Z),
               ureg_src(t_tc[i]), ureg_scalar(i_tc[1], TGSI_SWIZZLE_W));
   }

   /* fetch the texels
    * texel[0..1].x = tex(t_tc[0..1][0])
    * texel[0..1].y = tex(t_tc[0..1][1])
    * texel[0..1].z = tex(t_tc[0..1][2])
    */
   for (i = 0; i < 2; ++i)
      for (j = 0; j < 3; ++j) {
         struct ureg_src src = ureg_swizzle(ureg_src(t_tc[i]),
            TGSI_SWIZZLE_X, j ? TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);

         ureg_TEX(shader, ureg_writemask(t_texel[i], TGSI_WRITEMASK_X << j),
                  TGSI_TEXTURE_3D, src, sampler[j]);
      }

   /* calculate linear interpolation factor
    * factor = |round(i_tc.y) - i_tc.y| * 2
    */
   ureg_ROUND(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), i_tc[0]);
   ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ),
            ureg_src(t_tc[0]), ureg_negate(i_tc[0]));
   ureg_MUL(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_XY),
            ureg_abs(ureg_src(t_tc[0])), ureg_imm1f(shader, 2.0f));
   ureg_LRP(shader, t_texel[0], ureg_swizzle(ureg_src(t_tc[0]),
            TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z),
            ureg_src(t_texel[1]), ureg_src(t_texel[0]));

   /* and finally do colour space transformation
    * fragment = csc * texel
    */
   ureg_MOV(shader, ureg_writemask(t_texel[0], TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
   for (i = 0; i < 3; ++i)
      ureg_DP4(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(t_texel[0]));

   ureg_MOV(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));

   for (i = 0; i < 2; ++i) {
      ureg_release_temporary(shader, t_texel[i]);
      ureg_release_temporary(shader, t_tc[i]);
   }

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, c->pipe);
}
Beispiel #11
0
/**
 * Called when a new variant is needed, we need to translate
 * the ATI fragment shader to TGSI
 */
enum pipe_error
st_translate_atifs_program(
   struct ureg_program *ureg,
   struct ati_fragment_shader *atifs,
   struct gl_program *program,
   GLuint numInputs,
   const GLuint inputMapping[],
   const ubyte inputSemanticName[],
   const ubyte inputSemanticIndex[],
   const GLuint interpMode[],
   GLuint numOutputs,
   const GLuint outputMapping[],
   const ubyte outputSemanticName[],
   const ubyte outputSemanticIndex[])
{
   enum pipe_error ret = PIPE_OK;

   unsigned pass, i, r;

   struct st_translate translate, *t;
   t = &translate;
   memset(t, 0, sizeof *t);

   t->inputMapping = inputMapping;
   t->outputMapping = outputMapping;
   t->ureg = ureg;
   t->atifs = atifs;

   /*
    * Declare input attributes.
    */
   for (i = 0; i < numInputs; i++) {
      t->inputs[i] = ureg_DECL_fs_input(ureg,
                                        inputSemanticName[i],
                                        inputSemanticIndex[i],
                                        interpMode[i]);
   }

   /*
    * Declare output attributes:
    *  we always have numOutputs=1 and it's FRAG_RESULT_COLOR
    */
   t->outputs[0] = ureg_DECL_output(ureg,
                                    TGSI_SEMANTIC_COLOR,
                                    outputSemanticIndex[0]);

   /* Emit constants and immediates.  Mesa uses a single index space
    * for these, so we put all the translated regs in t->constants.
    */
   if (program->Parameters) {
      t->constants = calloc(program->Parameters->NumParameters,
                            sizeof t->constants[0]);
      if (t->constants == NULL) {
         ret = PIPE_ERROR_OUT_OF_MEMORY;
         goto out;
      }

      for (i = 0; i < program->Parameters->NumParameters; i++) {
         switch (program->Parameters->Parameters[i].Type) {
         case PROGRAM_STATE_VAR:
         case PROGRAM_UNIFORM:
            t->constants[i] = ureg_DECL_constant(ureg, i);
            break;
         case PROGRAM_CONSTANT:
            t->constants[i] =
               ureg_DECL_immediate(ureg,
                                   (const float*)program->Parameters->ParameterValues[i],
                                   4);
            break;
         default:
            break;
         }
      }
   }

   /* texture samplers */
   for (i = 0; i < MAX_NUM_FRAGMENT_REGISTERS_ATI; i++) {
      if (program->SamplersUsed & (1 << i)) {
         t->samplers[i] = ureg_DECL_sampler(ureg, i);
         /* the texture target is still unknown, it will be fixed in the draw call */
         ureg_DECL_sampler_view(ureg, i, TGSI_TEXTURE_2D,
                                TGSI_RETURN_TYPE_FLOAT,
                                TGSI_RETURN_TYPE_FLOAT,
                                TGSI_RETURN_TYPE_FLOAT,
                                TGSI_RETURN_TYPE_FLOAT);
      }
   }

   /* emit instructions */
   for (pass = 0; pass < atifs->NumPasses; pass++) {
      t->current_pass = pass;
      for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
         struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
         compile_setupinst(t, r, texinst);
      }
      for (i = 0; i < atifs->numArithInstr[pass]; i++) {
         struct atifs_instruction *inst = &atifs->Instructions[pass][i];
         compile_instruction(t, inst);
      }
   }

   finalize_shader(t, atifs->NumPasses);

out:
   free(t->constants);

   if (t->error) {
      debug_printf("%s: translate error flag set\n", __func__);
   }

   return ret;
}
static void *
combine_shaders(const struct shader_asm_info *shaders[SHADER_STAGES], int num_shaders,
                struct pipe_context *pipe,
                struct pipe_shader_state *shader)
{
   VGboolean declare_input = VG_FALSE;
   VGint start_const   = -1, end_const   = 0;
   VGint start_temp    = -1, end_temp    = 0;
   VGint start_sampler = -1, end_sampler = 0;
   VGint i, current_shader = 0;
   VGint num_consts, num_temps, num_samplers;
   struct ureg_program *ureg;
   struct ureg_src in[2];
   struct ureg_src *sampler = NULL;
   struct ureg_src *constant = NULL;
   struct ureg_dst out, *temp = NULL;
   void *p = NULL;

   for (i = 0; i < num_shaders; ++i) {
      if (shaders[i]->num_consts)
         start_const = range_min(start_const, shaders[i]->start_const);
      if (shaders[i]->num_temps)
         start_temp = range_min(start_temp, shaders[i]->start_temp);
      if (shaders[i]->num_samplers)
         start_sampler = range_min(start_sampler, shaders[i]->start_sampler);

      end_const = range_max(end_const, shaders[i]->start_const +
                            shaders[i]->num_consts);
      end_temp = range_max(end_temp, shaders[i]->start_temp +
                            shaders[i]->num_temps);
      end_sampler = range_max(end_sampler, shaders[i]->start_sampler +
                            shaders[i]->num_samplers);
      if (shaders[i]->needs_position)
         declare_input = VG_TRUE;
   }
   /* if they're still unitialized, initialize them */
   if (start_const < 0)
      start_const = 0;
   if (start_temp < 0)
      start_temp = 0;
   if (start_sampler < 0)
       start_sampler = 0;

   num_consts   = end_const   - start_const;
   num_temps    = end_temp    - start_temp;
   num_samplers = end_sampler - start_sampler;

   ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!ureg)
       return NULL;

   if (declare_input) {
      in[0] = ureg_DECL_fs_input(ureg,
                                 TGSI_SEMANTIC_POSITION,
                                 0,
                                 TGSI_INTERPOLATE_LINEAR);
      in[1] = ureg_DECL_fs_input(ureg,
                                 TGSI_SEMANTIC_GENERIC,
                                 0,
                                 TGSI_INTERPOLATE_PERSPECTIVE);
   }

   /* we always have a color output */
   out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);

   if (num_consts >= 1) {
      constant = (struct ureg_src *) malloc(sizeof(struct ureg_src) * end_const);
      for (i = start_const; i < end_const; i++) {
         constant[i] = ureg_DECL_constant(ureg, i);
      }

   }

   if (num_temps >= 1) {
      temp = (struct ureg_dst *) malloc(sizeof(struct ureg_dst) * end_temp);
      for (i = start_temp; i < end_temp; i++) {
         temp[i] = ureg_DECL_temporary(ureg);
      }
   }

   if (num_samplers >= 1) {
      sampler = (struct ureg_src *) malloc(sizeof(struct ureg_src) * end_sampler);
      for (i = start_sampler; i < end_sampler; i++) {
         sampler[i] = ureg_DECL_sampler(ureg, i);
      }
   }

   while (current_shader < num_shaders) {
      if ((current_shader + 1) == num_shaders) {
         shaders[current_shader]->func(ureg,
                                       &out,
                                       in,
                                       sampler,
                                       temp,
                                       constant);
      } else {
         shaders[current_shader]->func(ureg,
                                      &temp[0],
                                      in,
                                      sampler,
                                      temp,
                                      constant);
      }
      current_shader++;
   }

   ureg_END(ureg);

   shader->tokens = ureg_finalize(ureg);
   if(!shader->tokens)
      return NULL;

   p = pipe->create_fs_state(pipe, shader);
   ureg_destroy(ureg);

   if (num_temps >= 1) {
      for (i = start_temp; i < end_temp; i++) {
         ureg_release_temporary(ureg, temp[i]);
      }
   }

   if (temp)
      free(temp);
   if (constant)
      free(constant);
   if (sampler)
      free(sampler);

   return p;
}
Beispiel #13
0
static void *
create_fs(struct st_context *st, bool download, enum pipe_texture_target target)
{
   struct pipe_context *pipe = st->pipe;
   struct pipe_screen *screen = pipe->screen;
   struct ureg_program *ureg;
   bool have_layer;
   struct ureg_dst out;
   struct ureg_src sampler;
   struct ureg_src pos;
   struct ureg_src layer;
   struct ureg_src const0;
   struct ureg_src const1;
   struct ureg_dst temp0;

   have_layer =
      st->pbo.layers &&
      (!download || target == PIPE_TEXTURE_1D_ARRAY
                 || target == PIPE_TEXTURE_2D_ARRAY
                 || target == PIPE_TEXTURE_3D
                 || target == PIPE_TEXTURE_CUBE
                 || target == PIPE_TEXTURE_CUBE_ARRAY);

   ureg = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!ureg)
      return NULL;

   if (!download) {
      out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
   } else {
      struct ureg_src image;

      /* writeonly images do not require an explicitly given format. */
      image = ureg_DECL_image(ureg, 0, TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE,
                                    true, false);
      out = ureg_dst(image);
   }

   sampler = ureg_DECL_sampler(ureg, 0);
   if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
      pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
   } else {
      pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
                               TGSI_INTERPOLATE_LINEAR);
   }
   if (have_layer) {
      layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0,
                                       TGSI_INTERPOLATE_CONSTANT);
   }
   const0  = ureg_DECL_constant(ureg, 0);
   const1  = ureg_DECL_constant(ureg, 1);
   temp0   = ureg_DECL_temporary(ureg);

   /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */

   /* temp0.xy = f2i(temp0.xy) */
   ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
                  ureg_swizzle(pos,
                               TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                               TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));

   /* temp0.xy = temp0.xy + const0.xy */
   ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
                   ureg_swizzle(ureg_src(temp0),
                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                                TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
                   ureg_swizzle(const0,
                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                                TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));

   /* temp0.x = const0.z * temp0.y + temp0.x */
   ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
                   ureg_scalar(const0, TGSI_SWIZZLE_Z),
                   ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y),
                   ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));

   if (have_layer) {
      /* temp0.x = const0.w * layer + temp0.x */
      ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
                      ureg_scalar(const0, TGSI_SWIZZLE_W),
                      ureg_scalar(layer, TGSI_SWIZZLE_X),
                      ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
   }

   /* temp0.w = 0 */
   ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0));

   if (download) {
      struct ureg_dst temp1;
      struct ureg_src op[2];

      temp1 = ureg_DECL_temporary(ureg);

      /* temp1.xy = pos.xy */
      ureg_F2I(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_XY), pos);

      /* temp1.zw = 0 */
      ureg_MOV(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_ZW), ureg_imm1u(ureg, 0));

      if (have_layer) {
         struct ureg_dst temp1_layer =
            ureg_writemask(temp1, target == PIPE_TEXTURE_1D_ARRAY ? TGSI_WRITEMASK_Y
                                                                  : TGSI_WRITEMASK_Z);

         /* temp1.y/z = layer */
         ureg_MOV(ureg, temp1_layer, ureg_scalar(layer, TGSI_SWIZZLE_X));

         if (target == PIPE_TEXTURE_3D) {
            /* temp1.z += layer_offset */
            ureg_UADD(ureg, temp1_layer,
                            ureg_scalar(ureg_src(temp1), TGSI_SWIZZLE_Z),
                            ureg_scalar(const1, TGSI_SWIZZLE_X));
         }
      }

      /* temp1 = txf(sampler, temp1) */
      ureg_TXF(ureg, temp1, util_pipe_tex_to_tgsi_tex(target, 1),
                     ureg_src(temp1), sampler);

      /* store(out, temp0, temp1) */
      op[0] = ureg_src(temp0);
      op[1] = ureg_src(temp1);
      ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &out, 1, op, 2, 0,
                             TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE);

      ureg_release_temporary(ureg, temp1);
   } else {
      /* out = txf(sampler, temp0.x) */
      ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler);
   }

   ureg_release_temporary(ureg, temp0);

   ureg_END(ureg);

   return ureg_create_shader_and_destroy(ureg, pipe);
}