예제 #1
0
/**
 * Emit the TGSI instructions for inverting and adjusting WPOS.
 * This code is unavoidable because it also depends on whether
 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
 */
static void
emit_wpos_adjustment(struct gl_context *ctx,
                     struct st_translate *t,
                     const struct gl_program *program,
                     boolean invert,
                     GLfloat adjX, GLfloat adjY[2])
{
   struct ureg_program *ureg = t->ureg;

   /* Fragment program uses fragment position input.
    * Need to replace instances of INPUT[WPOS] with temp T
    * where T = INPUT[WPOS] by y is inverted.
    */
   static const gl_state_index wposTransformState[STATE_LENGTH]
      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 };
   
   /* XXX: note we are modifying the incoming shader here!  Need to
    * do this before emitting the constant decls below, or this
    * will be missed:
    */
   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
                                                       wposTransformState);

   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
   struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
   struct ureg_src *wpos =
      ctx->Const.GLSLFragCoordIsSysVal ?
         &t->systemValues[SYSTEM_VALUE_FRAG_COORD] :
         &t->inputs[t->inputMapping[VARYING_SLOT_POS]];
   struct ureg_src wpos_input = *wpos;

   /* First, apply the coordinate shift: */
   if (adjX || adjY[0] || adjY[1]) {
      if (adjY[0] != adjY[1]) {
         /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
          * depending on whether inversion is actually going to be applied
          * or not, which is determined by testing against the inversion
          * state variable used below, which will be either +1 or -1.
          */
         struct ureg_dst adj_temp = ureg_DECL_temporary(ureg);

         ureg_CMP(ureg, adj_temp,
                  ureg_scalar(wpostrans, invert ? 2 : 0),
                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
                  ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
         ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
      } else {
         ureg_ADD(ureg, wpos_temp, wpos_input,
                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
      }
      wpos_input = ureg_src(wpos_temp);
   } else {
      /* MOV wpos_temp, input[wpos]
       */
      ureg_MOV( ureg, wpos_temp, wpos_input );
   }

   /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
    * inversion/identity, or the other way around if we're drawing to an FBO.
    */
   if (invert) {
      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
       */
      ureg_MAD( ureg,
                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
                wpos_input,
                ureg_scalar(wpostrans, 0),
                ureg_scalar(wpostrans, 1));
   } else {
      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
       */
      ureg_MAD( ureg,
                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
                wpos_input,
                ureg_scalar(wpostrans, 2),
                ureg_scalar(wpostrans, 3));
   }

   /* Use wpos_temp as position input from here on:
    */
   *wpos = ureg_src(wpos_temp);
}
예제 #2
0
/**
 * Translate Mesa program to TGSI format.
 * \param program  the program to translate
 * \param numInputs  number of input registers used
 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
 *                      input indexes
 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
 *                            each input
 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
 * \param numOutputs  number of output registers used
 * \param outputMapping  maps Mesa fragment program outputs to TGSI
 *                       generic outputs
 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
 *                             each output
 *
 * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
 */
enum pipe_error
st_translate_mesa_program(
   struct gl_context *ctx,
   uint procType,
   struct ureg_program *ureg,
   const struct gl_program *program,
   GLuint numInputs,
   const GLuint inputMapping[],
   const ubyte inputSemanticName[],
   const ubyte inputSemanticIndex[],
   const GLuint interpMode[],
   GLuint numOutputs,
   const GLuint outputMapping[],
   const ubyte outputSemanticName[],
   const ubyte outputSemanticIndex[])
{
   struct st_translate translate, *t;
   unsigned i;
   enum pipe_error ret = PIPE_OK;

   assert(numInputs <= ARRAY_SIZE(t->inputs));
   assert(numOutputs <= ARRAY_SIZE(t->outputs));

   t = &translate;
   memset(t, 0, sizeof *t);

   t->procType = procType;
   t->inputMapping = inputMapping;
   t->outputMapping = outputMapping;
   t->ureg = ureg;

   /*_mesa_print_program(program);*/

   /*
    * Declare input attributes.
    */
   if (procType == TGSI_PROCESSOR_FRAGMENT) {
      for (i = 0; i < numInputs; i++) {
         t->inputs[i] = ureg_DECL_fs_input(ureg,
                                           inputSemanticName[i],
                                           inputSemanticIndex[i],
                                           interpMode[i]);
      }

      if (program->InputsRead & VARYING_BIT_POS) {
         /* Must do this after setting up t->inputs, and before
          * emitting constant references, below:
          */
         emit_wpos(st_context(ctx), t, program, ureg);
      }

      /*
       * Declare output attributes.
       */
      for (i = 0; i < numOutputs; i++) {
         switch (outputSemanticName[i]) {
         case TGSI_SEMANTIC_POSITION:
            t->outputs[i] = ureg_DECL_output( ureg,
                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
                                              outputSemanticIndex[i] );

            t->outputs[i] = ureg_writemask( t->outputs[i],
                                            TGSI_WRITEMASK_Z );
            break;
         case TGSI_SEMANTIC_STENCIL:
            t->outputs[i] = ureg_DECL_output( ureg,
                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
                                              outputSemanticIndex[i] );
            t->outputs[i] = ureg_writemask( t->outputs[i],
                                            TGSI_WRITEMASK_Y );
            break;
         case TGSI_SEMANTIC_COLOR:
            t->outputs[i] = ureg_DECL_output( ureg,
                                              TGSI_SEMANTIC_COLOR,
                                              outputSemanticIndex[i] );
            break;
         default:
            debug_assert(0);
            return 0;
         }
      }
   }
   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
      for (i = 0; i < numInputs; i++) {
         t->inputs[i] = ureg_DECL_input(ureg,
                                        inputSemanticName[i],
                                        inputSemanticIndex[i], 0, 1);
      }

      for (i = 0; i < numOutputs; i++) {
         t->outputs[i] = ureg_DECL_output( ureg,
                                           outputSemanticName[i],
                                           outputSemanticIndex[i] );
      }
   }
   else {
      assert(procType == TGSI_PROCESSOR_VERTEX);

      for (i = 0; i < numInputs; i++) {
         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
      }

      for (i = 0; i < numOutputs; i++) {
         t->outputs[i] = ureg_DECL_output( ureg,
                                           outputSemanticName[i],
                                           outputSemanticIndex[i] );
         if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
            /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
            ureg_MOV(ureg,
                     ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
                     ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
	 }
      }
   }

   /* Declare address register.
    */
   if (program->NumAddressRegs > 0) {
      debug_assert( program->NumAddressRegs == 1 );
      t->address[0] = ureg_DECL_address( ureg );
   }

   /* Declare misc input registers
    */
   {
      GLbitfield sysInputs = program->SystemValuesRead;

      for (i = 0; sysInputs; i++) {
         if (sysInputs & (1 << i)) {
            unsigned semName = _mesa_sysval_to_semantic[i];

            t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);

            if (semName == TGSI_SEMANTIC_INSTANCEID ||
                semName == TGSI_SEMANTIC_VERTEXID) {
               /* From Gallium perspective, these system values are always
                * integer, and require native integer support.  However, if
                * native integer is supported on the vertex stage but not the
                * pixel stage (e.g, i915g + draw), Mesa will generate IR that
                * assumes these system values are floats. To resolve the
                * inconsistency, we insert a U2F.
                */
               struct st_context *st = st_context(ctx);
               struct pipe_screen *pscreen = st->pipe->screen;
               assert(procType == TGSI_PROCESSOR_VERTEX);
               assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
               (void) pscreen;  /* silence non-debug build warnings */
               if (!ctx->Const.NativeIntegers) {
                  struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
                  ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
                  t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
               }
            }

            if (procType == TGSI_PROCESSOR_FRAGMENT &&
                semName == TGSI_SEMANTIC_POSITION)
               emit_wpos(st_context(ctx), t, program, ureg);

            sysInputs &= ~(1 << i);
         }
      }
   }

   if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) {
      /* If temps are accessed with indirect addressing, declare temporaries
       * in sequential order.  Else, we declare them on demand elsewhere.
       */
      for (i = 0; i < program->NumTemporaries; i++) {
         /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
         t->temps[i] = ureg_DECL_temporary( t->ureg );
      }
   }

   /* Emit constants and immediates.  Mesa uses a single index space
    * for these, so we put all the translated regs in t->constants.
    */
   if (program->Parameters) {
      t->constants = calloc( program->Parameters->NumParameters,
                             sizeof t->constants[0] );
      if (t->constants == NULL) {
         ret = PIPE_ERROR_OUT_OF_MEMORY;
         goto out;
      }

      for (i = 0; i < program->Parameters->NumParameters; i++) {
         switch (program->Parameters->Parameters[i].Type) {
         case PROGRAM_STATE_VAR:
         case PROGRAM_UNIFORM:
            t->constants[i] = ureg_DECL_constant( ureg, i );
            break;

            /* Emit immediates only when there's no indirect addressing of
             * the const buffer.
             * FIXME: Be smarter and recognize param arrays:
             * indirect addressing is only valid within the referenced
             * array.
             */
         case PROGRAM_CONSTANT:
            if (program->IndirectRegisterFiles & PROGRAM_ANY_CONST)
               t->constants[i] = ureg_DECL_constant( ureg, i );
            else
               t->constants[i] = 
                  ureg_DECL_immediate( ureg,
                                       (const float*) program->Parameters->ParameterValues[i],
                                       4 );
            break;
         default:
            break;
         }
      }
   }

   /* texture samplers */
   for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
      if (program->SamplersUsed & (1 << i)) {
         t->samplers[i] = ureg_DECL_sampler( ureg, i );
      }
   }

   /* Emit each instruction in turn:
    */
   for (i = 0; i < program->NumInstructions; i++) {
      set_insn_start( t, ureg_get_instruction_number( ureg ));
      compile_instruction(ctx, t, &program->Instructions[i]);
   }

   /* Fix up all emitted labels:
    */
   for (i = 0; i < t->labels_count; i++) {
      ureg_fixup_label( ureg,
                        t->labels[i].token,
                        t->insn[t->labels[i].branch_target] );
   }

out:
   free(t->insn);
   free(t->labels);
   free(t->constants);

   if (t->error) {
      debug_printf("%s: translate error flag set\n", __func__);
   }

   return ret;
}
예제 #3
0
static void *
create_deint_frag_shader(struct vl_deint_filter *filter, unsigned field,
                         struct vertex2f *sizes, bool spatial_filter)
{
   struct ureg_program *shader;
   struct ureg_src i_vtex;
   struct ureg_src sampler_cur;
   struct ureg_src sampler_prevprev;
   struct ureg_src sampler_prev;
   struct ureg_src sampler_next;
   struct ureg_dst o_fragment;
   struct ureg_dst t_tex;
   struct ureg_dst t_comp_top, t_comp_bot;
   struct ureg_dst t_diff;
   struct ureg_dst t_a, t_b;
   struct ureg_dst t_weave, t_linear;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader) {
      return NULL;
   }

   t_tex = ureg_DECL_temporary(shader);
   t_comp_top = ureg_DECL_temporary(shader);
   t_comp_bot = ureg_DECL_temporary(shader);
   t_diff = ureg_DECL_temporary(shader);
   t_a = ureg_DECL_temporary(shader);
   t_b = ureg_DECL_temporary(shader);
   t_weave = ureg_DECL_temporary(shader);
   t_linear = ureg_DECL_temporary(shader);

   i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
   sampler_prevprev = ureg_DECL_sampler(shader, 0);
   sampler_prev = ureg_DECL_sampler(shader, 1);
   sampler_cur = ureg_DECL_sampler(shader, 2);
   sampler_next = ureg_DECL_sampler(shader, 3);
   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   // we don't care about ZW interpolation (allows better optimization)
   ureg_MOV(shader, t_tex, i_vtex);
   ureg_MOV(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_ZW),
            ureg_imm1f(shader, 0));

   // sample between texels for cheap lowpass
   ureg_ADD(shader, t_comp_top, ureg_src(t_tex),
            ureg_imm4f(shader, sizes->x * 0.5f, sizes->y * -0.5f, 0, 0));
   ureg_ADD(shader, t_comp_bot, ureg_src(t_tex),
            ureg_imm4f(shader, sizes->x * -0.5f, sizes->y * 0.5f, 1.0f, 0));

   if (field == 0) {
      /* interpolating top field -> current field is a bottom field */
      // cur vs prev2
      ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur);
      ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prevprev);
      ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_src(t_b));
      // prev vs next
      ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prev);
      ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_next);
      ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_src(t_b));
   } else {
      /* interpolating bottom field -> current field is a top field */
      // cur vs prev2
      ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur);
      ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prevprev);
      ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_src(t_b));
      // prev vs next
      ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev);
      ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_next);
      ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_src(t_b));
   }

   // absolute maximum of differences
   ureg_MAX(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_abs(ureg_src(t_diff)),
            ureg_scalar(ureg_abs(ureg_src(t_diff)), TGSI_SWIZZLE_Y));

   if (field == 0) {
      /* weave with prev top field */
      ureg_TEX(shader, t_weave, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_tex), sampler_prev);
      /* get linear interpolation from current bottom field */
      ureg_ADD(shader, t_comp_top, ureg_src(t_tex), ureg_imm4f(shader, 0, sizes->y * -1.0f, 1.0f, 0));
      ureg_TEX(shader, t_linear, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur);
   } else {
      /* weave with prev bottom field */
      ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, 0, 0, 1.0f, 0));
      ureg_TEX(shader, t_weave, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev);
      /* get linear interpolation from current top field */
      ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, 0, sizes->y * 1.0f, 0, 0));
      ureg_TEX(shader, t_linear, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur);
   }

   // mix between weave and linear
   // fully weave if diff < 6 (0.02353), fully interpolate if diff > 14 (0.05490)
   ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_diff),
            ureg_imm4f(shader, -0.02353f, 0, 0, 0));
   ureg_MUL(shader, ureg_saturate(ureg_writemask(t_diff, TGSI_WRITEMASK_X)),
            ureg_src(t_diff), ureg_imm4f(shader, 31.8750f, 0, 0, 0));
   ureg_LRP(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_X), ureg_src(t_diff),
            ureg_src(t_linear), ureg_src(t_weave));

   ureg_release_temporary(shader, t_tex);
   ureg_release_temporary(shader, t_comp_top);
   ureg_release_temporary(shader, t_comp_bot);
   ureg_release_temporary(shader, t_diff);
   ureg_release_temporary(shader, t_a);
   ureg_release_temporary(shader, t_b);
   ureg_release_temporary(shader, t_weave);
   ureg_release_temporary(shader, t_linear);
   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, filter->pipe);
}
예제 #4
0
/* Create a compute shader implementing clear_buffer or copy_buffer. */
void *si_create_dma_compute_shader(struct pipe_context *ctx,
				   unsigned num_dwords_per_thread,
				   bool dst_stream_cache_policy, bool is_copy)
{
	assert(util_is_power_of_two_nonzero(num_dwords_per_thread));

	unsigned store_qualifier = TGSI_MEMORY_COHERENT | TGSI_MEMORY_RESTRICT;
	if (dst_stream_cache_policy)
		store_qualifier |= TGSI_MEMORY_STREAM_CACHE_POLICY;

	/* Don't cache loads, because there is no reuse. */
	unsigned load_qualifier = store_qualifier | TGSI_MEMORY_STREAM_CACHE_POLICY;

	unsigned num_mem_ops = MAX2(1, num_dwords_per_thread / 4);
	unsigned *inst_dwords = alloca(num_mem_ops * sizeof(unsigned));

	for (unsigned i = 0; i < num_mem_ops; i++) {
		if (i*4 < num_dwords_per_thread)
			inst_dwords[i] = MIN2(4, num_dwords_per_thread - i*4);
	}

	struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE);
	if (!ureg)
		return NULL;

	ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 64);
	ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 1);
	ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1);

	struct ureg_src value;
	if (!is_copy) {
		ureg_property(ureg, TGSI_PROPERTY_CS_USER_DATA_DWORDS, inst_dwords[0]);
		value = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_CS_USER_DATA, 0);
	}

	struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0);
	struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0);
	struct ureg_dst store_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X);
	struct ureg_dst load_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X);
	struct ureg_dst dstbuf = ureg_dst(ureg_DECL_buffer(ureg, 0, false));
	struct ureg_src srcbuf;
	struct ureg_src *values = NULL;

	if (is_copy) {
		srcbuf = ureg_DECL_buffer(ureg, 1, false);
		values = malloc(num_mem_ops * sizeof(struct ureg_src));
	}

	/* If there are multiple stores, the first store writes into 0+tid,
	 * the 2nd store writes into 64+tid, the 3rd store writes into 128+tid, etc.
	 */
	ureg_UMAD(ureg, store_addr, blk, ureg_imm1u(ureg, 64 * num_mem_ops), tid);
	/* Convert from a "store size unit" into bytes. */
	ureg_UMUL(ureg, store_addr, ureg_src(store_addr),
		  ureg_imm1u(ureg, 4 * inst_dwords[0]));
	ureg_MOV(ureg, load_addr, ureg_src(store_addr));

	/* Distance between a load and a store for latency hiding. */
	unsigned load_store_distance = is_copy ? 8 : 0;

	for (unsigned i = 0; i < num_mem_ops + load_store_distance; i++) {
		int d = i - load_store_distance;

		if (is_copy && i < num_mem_ops) {
			if (i) {
				ureg_UADD(ureg, load_addr, ureg_src(load_addr),
					  ureg_imm1u(ureg, 4 * inst_dwords[i] * 64));
			}

			values[i] = ureg_src(ureg_DECL_temporary(ureg));
			struct ureg_dst dst =
				ureg_writemask(ureg_dst(values[i]),
					       u_bit_consecutive(0, inst_dwords[i]));
			struct ureg_src srcs[] = {srcbuf, ureg_src(load_addr)};
			ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &dst, 1, srcs, 2,
					 load_qualifier, TGSI_TEXTURE_BUFFER, 0);
		}

		if (d >= 0) {
			if (d) {
				ureg_UADD(ureg, store_addr, ureg_src(store_addr),
					  ureg_imm1u(ureg, 4 * inst_dwords[d] * 64));
			}

			struct ureg_dst dst =
				ureg_writemask(dstbuf, u_bit_consecutive(0, inst_dwords[d]));
			struct ureg_src srcs[] =
				{ureg_src(store_addr), is_copy ? values[d] : value};
			ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dst, 1, srcs, 2,
					 store_qualifier, TGSI_TEXTURE_BUFFER, 0);
		}
	}
	ureg_END(ureg);

	struct pipe_compute_state state = {};
	state.ir_type = PIPE_SHADER_IR_TGSI;
	state.prog = ureg_get_tokens(ureg, NULL);

	void *cs = ctx->create_compute_state(ctx, &state);
	ureg_destroy(ureg);
	free(values);
	return cs;
}
예제 #5
0
static void *
create_mismatch_frag_shader(struct vl_idct *idct)
{
   struct ureg_program *shader;

   struct ureg_src addr[2];

   struct ureg_dst m[8][2];
   struct ureg_dst fragment;

   unsigned i;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return NULL;

   addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
   addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);

   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   for (i = 0; i < 8; ++i) {
      m[i][0] = ureg_DECL_temporary(shader);
      m[i][1] = ureg_DECL_temporary(shader);
   }

   for (i = 0; i < 8; ++i) {
      increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
   }

   for (i = 0; i < 8; ++i) {
      struct ureg_src s_addr[2];
      s_addr[0] = ureg_src(m[i][0]);
      s_addr[1] = ureg_src(m[i][1]);
      fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
   }

   for (i = 1; i < 8; ++i) {
      ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
      ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
   }

   ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
   ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));

   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
   ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
   ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));

   ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
            ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
            ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));

   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));

   for (i = 0; i < 8; ++i) {
      ureg_release_temporary(shader, m[i][0]);
      ureg_release_temporary(shader, m[i][1]);
   }

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, idct->pipe);
}
예제 #6
0
파일: xa_tgsi.c 프로젝트: nikai3d/mesa
static void
radial_gradient(struct ureg_program *ureg,
		struct ureg_dst out,
		struct ureg_src pos,
		struct ureg_src sampler,
		struct ureg_src coords,
		struct ureg_src const0124,
		struct ureg_src matrow0,
		struct ureg_src matrow1, struct ureg_src matrow2)
{
    struct ureg_dst temp0 = ureg_DECL_temporary(ureg);
    struct ureg_dst temp1 = ureg_DECL_temporary(ureg);
    struct ureg_dst temp2 = ureg_DECL_temporary(ureg);
    struct ureg_dst temp3 = ureg_DECL_temporary(ureg);
    struct ureg_dst temp4 = ureg_DECL_temporary(ureg);
    struct ureg_dst temp5 = ureg_DECL_temporary(ureg);

    ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), pos);
    ureg_MOV(ureg,
	     ureg_writemask(temp0, TGSI_WRITEMASK_Z),
	     ureg_scalar(const0124, TGSI_SWIZZLE_Y));

    ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0));
    ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0));
    ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0));
    ureg_RCP(ureg, temp3, ureg_src(temp3));
    ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3));
    ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3));

    ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_X), ureg_src(temp1));
    ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_Y), ureg_src(temp2));

    ureg_MUL(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Y),
	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
    ureg_MAD(ureg, temp1,
	     ureg_scalar(coords, TGSI_SWIZZLE_X),
	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), ureg_src(temp0));
    ureg_ADD(ureg, temp1, ureg_src(temp1), ureg_src(temp1));
    ureg_MUL(ureg, temp3,
	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y),
	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
    ureg_MAD(ureg, temp4,
	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), ureg_src(temp3));
    ureg_MOV(ureg, temp4, ureg_negate(ureg_src(temp4)));
    ureg_MUL(ureg, temp2, ureg_scalar(coords, TGSI_SWIZZLE_Z), ureg_src(temp4));
    ureg_MUL(ureg, temp0,
	     ureg_scalar(const0124, TGSI_SWIZZLE_W), ureg_src(temp2));
    ureg_MUL(ureg, temp3, ureg_src(temp1), ureg_src(temp1));
    ureg_SUB(ureg, temp2, ureg_src(temp3), ureg_src(temp0));
    ureg_RSQ(ureg, temp2, ureg_abs(ureg_src(temp2)));
    ureg_RCP(ureg, temp2, ureg_src(temp2));
    ureg_SUB(ureg, temp1, ureg_src(temp2), ureg_src(temp1));
    ureg_ADD(ureg, temp0,
	     ureg_scalar(coords, TGSI_SWIZZLE_Z),
	     ureg_scalar(coords, TGSI_SWIZZLE_Z));
    ureg_RCP(ureg, temp0, ureg_src(temp0));
    ureg_MUL(ureg, temp2, ureg_src(temp1), ureg_src(temp0));
    ureg_TEX(ureg, out, TGSI_TEXTURE_1D, ureg_src(temp2), sampler);

    ureg_release_temporary(ureg, temp0);
    ureg_release_temporary(ureg, temp1);
    ureg_release_temporary(ureg, temp2);
    ureg_release_temporary(ureg, temp3);
    ureg_release_temporary(ureg, temp4);
    ureg_release_temporary(ureg, temp5);
}
예제 #7
0
/**
 * Make simple fragment texture shader:
 *  IMM {0,0,0,1}                         // (if writemask != 0xf)
 *  MOV TEMP[0], IMM[0]                   // (if writemask != 0xf)
 *  TEX TEMP[0].writemask, IN[0], SAMP[0], 2D;
 *   .. optional SINT <-> UINT clamping ..
 *  MOV OUT[0], TEMP[0]
 *  END;
 *
 * \param tex_target  one of PIPE_TEXTURE_x
 * \parma interp_mode  either TGSI_INTERPOLATE_LINEAR or PERSPECTIVE
 * \param writemask  mask of TGSI_WRITEMASK_x
 */
void *
util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
                                        unsigned tex_target,
                                        unsigned interp_mode,
                                        unsigned writemask,
                                        enum tgsi_return_type stype,
                                        enum tgsi_return_type dtype,
                                        bool load_level_zero,
                                        bool use_txf)
{
   struct ureg_program *ureg;
   struct ureg_src sampler;
   struct ureg_src tex;
   struct ureg_dst temp;
   struct ureg_dst out;

   assert((stype == TGSI_RETURN_TYPE_FLOAT) == (dtype == TGSI_RETURN_TYPE_FLOAT));
   assert(interp_mode == TGSI_INTERPOLATE_LINEAR ||
          interp_mode == TGSI_INTERPOLATE_PERSPECTIVE);

   ureg = ureg_create( PIPE_SHADER_FRAGMENT );
   if (!ureg)
      return NULL;
   
   sampler = ureg_DECL_sampler( ureg, 0 );

   ureg_DECL_sampler_view(ureg, 0, tex_target, stype, stype, stype, stype);

   tex = ureg_DECL_fs_input( ureg, 
                             TGSI_SEMANTIC_GENERIC, 0, 
                             interp_mode );

   out = ureg_DECL_output( ureg, 
                           TGSI_SEMANTIC_COLOR,
                           0 );

   temp = ureg_DECL_temporary(ureg);

   if (writemask != TGSI_WRITEMASK_XYZW) {
      struct ureg_src imm = ureg_imm4f( ureg, 0, 0, 0, 1 );

      ureg_MOV( ureg, out, imm );
   }

   if (tex_target == TGSI_TEXTURE_BUFFER)
      ureg_TXF(ureg,
               ureg_writemask(temp, writemask),
               tex_target, tex, sampler);
   else
      ureg_load_tex(ureg, ureg_writemask(temp, writemask), tex, sampler,
                    tex_target, load_level_zero, use_txf);

   if (stype != dtype) {
      if (stype == TGSI_RETURN_TYPE_SINT) {
         assert(dtype == TGSI_RETURN_TYPE_UINT);

         ureg_IMAX(ureg, temp, ureg_src(temp), ureg_imm1i(ureg, 0));
      } else {
         assert(stype == TGSI_RETURN_TYPE_UINT);
         assert(dtype == TGSI_RETURN_TYPE_SINT);

         ureg_UMIN(ureg, temp, ureg_src(temp), ureg_imm1u(ureg, (1u << 31) - 1));
      }
   }

   ureg_MOV(ureg, out, ureg_src(temp));

   ureg_END( ureg );

   return ureg_create_shader_and_destroy( ureg, pipe );
}
예제 #8
0
static void *
create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, void *callback_priv)
{
   struct ureg_program *shader;

   struct ureg_src vrect, vpos;
   struct ureg_dst t_vpos, t_vtex;
   struct ureg_dst o_vpos, o_flags;

   struct vertex2f scale = {
      (float)VL_BLOCK_WIDTH / r->buffer_width * VL_MACROBLOCK_WIDTH / r->macroblock_size,
      (float)VL_BLOCK_HEIGHT / r->buffer_height * VL_MACROBLOCK_HEIGHT / r->macroblock_size
   };

   unsigned label;

   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
   if (!shader)
      return NULL;

   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);

   t_vpos = calc_position(r, shader, ureg_imm2f(shader, scale.x, scale.y));
   t_vtex = ureg_DECL_temporary(shader);

   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
   o_flags = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS);

   /*
    * o_vtex.xy = t_vpos
    * o_flags.z = intra * 0.5
    *
    * if(interlaced) {
    *    t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 }
    *    t_vtex.z = vpos.y % 2
    *    t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y
    *    o_vpos.y = t_vtex.y + t_vpos.y
    *
    *    o_flags.w = t_vtex.z ? 0 : 1
    * }
    *
    */

   vs_callback(callback_priv, r, shader, VS_O_VTEX, t_vpos);

   ureg_MUL(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_Z),
            ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f));
   ureg_MOV(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f));

   if (r->macroblock_size == VL_MACROBLOCK_HEIGHT) { //TODO
      ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label);

         ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY),
                  ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_Y)),
                  ureg_imm2f(shader, 0.0f, scale.y),
                  ureg_imm2f(shader, -scale.y, 0.0f));
         ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z),
                  ureg_scalar(vpos, TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.5f));

         ureg_FRC(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_src(t_vtex));

         ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y),
                  ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
                  ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_X),
                  ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Y));
         ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y),
                  ureg_src(t_vpos), ureg_src(t_vtex));

         ureg_CMP(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W),
                  ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
                  ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f));

      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
      ureg_ENDIF(shader);
   }

   ureg_release_temporary(shader, t_vtex);
   ureg_release_temporary(shader, t_vpos);

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, r->pipe);
}
예제 #9
0
파일: vl_zscan.c 프로젝트: Distrotech/Mesa
static void *
create_frag_shader(struct vl_zscan *zscan)
{
   struct ureg_program *shader;
   struct ureg_src *vtex;

   struct ureg_src samp_src, samp_scan, samp_quant;

   struct ureg_dst *tmp;
   struct ureg_dst quant, fragment;

   unsigned i;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return NULL;

   vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_src));
   tmp = MALLOC(zscan->num_channels * sizeof(struct ureg_dst));

   for (i = 0; i < zscan->num_channels; ++i)
      vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);

   samp_src = ureg_DECL_sampler(shader, 0);
   samp_scan = ureg_DECL_sampler(shader, 1);
   samp_quant = ureg_DECL_sampler(shader, 2);

   for (i = 0; i < zscan->num_channels; ++i)
      tmp[i] = ureg_DECL_temporary(shader);
   quant = ureg_DECL_temporary(shader);

   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   /*
    * tmp.x = tex(vtex, 1)
    * tmp.y = vtex.z
    * fragment = tex(tmp, 0) * quant
    */
   for (i = 0; i < zscan->num_channels; ++i)
      ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan);

   for (i = 0; i < zscan->num_channels; ++i)
      ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W));

   for (i = 0; i < zscan->num_channels; ++i) {
      ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src);
      ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant);
   }

   ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f));
   ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant));

   for (i = 0; i < zscan->num_channels; ++i)
      ureg_release_temporary(shader, tmp[i]);
   ureg_END(shader);

   FREE(vtex);
   FREE(tmp);

   return ureg_create_shader_and_destroy(shader, zscan->pipe);
}
예제 #10
0
파일: vl_zscan.c 프로젝트: Distrotech/Mesa
static void *
create_vert_shader(struct vl_zscan *zscan)
{
   struct ureg_program *shader;

   struct ureg_src scale;
   struct ureg_src vrect, vpos, block_num;

   struct ureg_dst tmp;
   struct ureg_dst o_vpos;
   struct ureg_dst *o_vtex;

   signed i;

   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
   if (!shader)
      return NULL;

   o_vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_dst));

   scale = ureg_imm2f(shader,
      (float)VL_BLOCK_WIDTH / zscan->buffer_width,
      (float)VL_BLOCK_HEIGHT / zscan->buffer_height);

   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
   block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM);

   tmp = ureg_DECL_temporary(shader);

   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);

   for (i = 0; i < zscan->num_channels; ++i)
      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i);

   /*
    * o_vpos.xy = (vpos + vrect) * scale
    * o_vpos.zw = 1.0f
    *
    * tmp.xy = InstanceID / blocks_per_line
    * tmp.x = frac(tmp.x)
    * tmp.y = floor(tmp.y)
    *
    * o_vtex.x = vrect.x / blocks_per_line + tmp.x
    * o_vtex.y = vrect.y
    * o_vtex.z = tmp.z * blocks_per_line / blocks_total
    */
   ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect);
   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));

   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X),
            ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));

   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
   ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp));

   for (i = 0; i < zscan->num_channels; ++i) {
      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
               ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH)
                * (i - (signed)zscan->num_channels / 2)));

      ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
               ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos);
      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp),
               ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
   }

   ureg_release_temporary(shader, tmp);
   ureg_END(shader);

   FREE(o_vtex);

   return ureg_create_shader_and_destroy(shader, zscan->pipe);
}
예제 #11
0
static void *
create_frag_shader(struct vl_median_filter *filter,
                   struct vertex2f *offsets,
                   unsigned num_offsets)
{
   struct pipe_screen *screen = filter->pipe->screen;
   struct ureg_program *shader;
   struct ureg_src i_vtex;
   struct ureg_src sampler;
   struct ureg_dst *t_array = MALLOC(sizeof(struct ureg_dst) * num_offsets);
   struct ureg_dst o_fragment;
   const unsigned median = num_offsets >> 1;
   unsigned i, j;

   assert(num_offsets & 1); /* we need an odd number of offsets */
   if (!(num_offsets & 1)) { /* yeah, we REALLY need an odd number of offsets!!! */
      FREE(t_array);
      return NULL;
   }

   if (num_offsets > screen->get_shader_param(
      screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS)) {

      FREE(t_array);
      return NULL;
   }

   shader = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!shader) {
      FREE(t_array);
      return NULL;
   }

   i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
   sampler = ureg_DECL_sampler(shader, 0);
   ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D,
                          TGSI_RETURN_TYPE_FLOAT,
                          TGSI_RETURN_TYPE_FLOAT,
                          TGSI_RETURN_TYPE_FLOAT,
                          TGSI_RETURN_TYPE_FLOAT);

   for (i = 0; i < num_offsets; ++i)
      t_array[i] = ureg_DECL_temporary(shader);
   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   /*
    * t_array[0..*] = vtex + offset[0..*]
    * t_array[0..*] = tex(t_array[0..*], sampler)
    * result = partial_bubblesort(t_array)[mid]
    */

   for (i = 0; i < num_offsets; ++i) {
      if (!is_vec_zero(offsets[i])) {
         ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY),
                  i_vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y));
         ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW),
                  ureg_imm1f(shader, 0.0f));
      }
   }

   for (i = 0; i < num_offsets; ++i) {
      struct ureg_src src = is_vec_zero(offsets[i]) ? i_vtex : ureg_src(t_array[i]);
      ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, src, sampler);
   }

   // TODO: Couldn't this be improved even more?
   for (i = 0; i <= median; ++i) {
      for (j = 1; j < (num_offsets - i - 1); ++j) {
         struct ureg_dst tmp = ureg_DECL_temporary(shader);
         ureg_MOV(shader, tmp, ureg_src(t_array[j]));
         ureg_MAX(shader, t_array[j], ureg_src(t_array[j]), ureg_src(t_array[j - 1]));
         ureg_MIN(shader, t_array[j - 1], ureg_src(tmp), ureg_src(t_array[j - 1]));
         ureg_release_temporary(shader, tmp);
      }
      if (i == median)
         ureg_MAX(shader, t_array[j], ureg_src(t_array[j]), ureg_src(t_array[j - 1]));
      else
         ureg_MIN(shader, t_array[j - 1], ureg_src(t_array[j]), ureg_src(t_array[j - 1]));
   }
   ureg_MOV(shader, o_fragment, ureg_src(t_array[median]));

   ureg_END(shader);

   FREE(t_array);
   return ureg_create_shader_and_destroy(shader, filter->pipe);
}
예제 #12
0
파일: st_pbo.c 프로젝트: airlied/mesa
static void *
create_fs(struct st_context *st, bool download, enum pipe_texture_target target)
{
   struct pipe_context *pipe = st->pipe;
   struct pipe_screen *screen = pipe->screen;
   struct ureg_program *ureg;
   bool have_layer;
   struct ureg_dst out;
   struct ureg_src sampler;
   struct ureg_src pos;
   struct ureg_src layer;
   struct ureg_src const0;
   struct ureg_src const1;
   struct ureg_dst temp0;

   have_layer =
      st->pbo.layers &&
      (!download || target == PIPE_TEXTURE_1D_ARRAY
                 || target == PIPE_TEXTURE_2D_ARRAY
                 || target == PIPE_TEXTURE_3D
                 || target == PIPE_TEXTURE_CUBE
                 || target == PIPE_TEXTURE_CUBE_ARRAY);

   ureg = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!ureg)
      return NULL;

   if (!download) {
      out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
   } else {
      struct ureg_src image;

      /* writeonly images do not require an explicitly given format. */
      image = ureg_DECL_image(ureg, 0, TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE,
                                    true, false);
      out = ureg_dst(image);
   }

   sampler = ureg_DECL_sampler(ureg, 0);
   if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
      pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
   } else {
      pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
                               TGSI_INTERPOLATE_LINEAR);
   }
   if (have_layer) {
      layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0,
                                       TGSI_INTERPOLATE_CONSTANT);
   }
   const0  = ureg_DECL_constant(ureg, 0);
   const1  = ureg_DECL_constant(ureg, 1);
   temp0   = ureg_DECL_temporary(ureg);

   /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */

   /* temp0.xy = f2i(temp0.xy) */
   ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
                  ureg_swizzle(pos,
                               TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                               TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));

   /* temp0.xy = temp0.xy + const0.xy */
   ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
                   ureg_swizzle(ureg_src(temp0),
                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                                TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
                   ureg_swizzle(const0,
                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                                TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));

   /* temp0.x = const0.z * temp0.y + temp0.x */
   ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
                   ureg_scalar(const0, TGSI_SWIZZLE_Z),
                   ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y),
                   ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));

   if (have_layer) {
      /* temp0.x = const0.w * layer + temp0.x */
      ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
                      ureg_scalar(const0, TGSI_SWIZZLE_W),
                      ureg_scalar(layer, TGSI_SWIZZLE_X),
                      ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
   }

   /* temp0.w = 0 */
   ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0));

   if (download) {
      struct ureg_dst temp1;
      struct ureg_src op[2];

      temp1 = ureg_DECL_temporary(ureg);

      /* temp1.xy = pos.xy */
      ureg_F2I(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_XY), pos);

      /* temp1.zw = 0 */
      ureg_MOV(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_ZW), ureg_imm1u(ureg, 0));

      if (have_layer) {
         struct ureg_dst temp1_layer =
            ureg_writemask(temp1, target == PIPE_TEXTURE_1D_ARRAY ? TGSI_WRITEMASK_Y
                                                                  : TGSI_WRITEMASK_Z);

         /* temp1.y/z = layer */
         ureg_MOV(ureg, temp1_layer, ureg_scalar(layer, TGSI_SWIZZLE_X));

         if (target == PIPE_TEXTURE_3D) {
            /* temp1.z += layer_offset */
            ureg_UADD(ureg, temp1_layer,
                            ureg_scalar(ureg_src(temp1), TGSI_SWIZZLE_Z),
                            ureg_scalar(const1, TGSI_SWIZZLE_X));
         }
      }

      /* temp1 = txf(sampler, temp1) */
      ureg_TXF(ureg, temp1, util_pipe_tex_to_tgsi_tex(target, 1),
                     ureg_src(temp1), sampler);

      /* store(out, temp0, temp1) */
      op[0] = ureg_src(temp0);
      op[1] = ureg_src(temp1);
      ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &out, 1, op, 2, 0,
                             TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE);

      ureg_release_temporary(ureg, temp1);
   } else {
      /* out = txf(sampler, temp0.x) */
      ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler);
   }

   ureg_release_temporary(ureg, temp0);

   ureg_END(ureg);

   return ureg_create_shader_and_destroy(ureg, pipe);
}
예제 #13
0
파일: xa_tgsi.c 프로젝트: nikai3d/mesa
static void *
create_fs(struct pipe_context *pipe, unsigned fs_traits)
{
    struct ureg_program *ureg;
    struct ureg_src /*dst_sampler, */ src_sampler, mask_sampler;
    struct ureg_src /*dst_pos, */ src_input, mask_pos;
    struct ureg_dst src, mask;
    struct ureg_dst out;
    struct ureg_src imm0 = { 0 };
    unsigned has_mask = (fs_traits & FS_MASK) != 0;
    unsigned is_fill = (fs_traits & FS_FILL) != 0;
    unsigned is_composite = (fs_traits & FS_COMPOSITE) != 0;
    unsigned is_solid = (fs_traits & FS_SOLID_FILL) != 0;
    unsigned is_lingrad = (fs_traits & FS_LINGRAD_FILL) != 0;
    unsigned is_radgrad = (fs_traits & FS_RADGRAD_FILL) != 0;
    unsigned comp_alpha_mask = fs_traits & FS_COMPONENT_ALPHA;
    unsigned is_yuv = (fs_traits & FS_YUV) != 0;
    unsigned src_repeat_none = (fs_traits & FS_SRC_REPEAT_NONE) != 0;
    unsigned mask_repeat_none = (fs_traits & FS_MASK_REPEAT_NONE) != 0;
    unsigned src_swizzle = (fs_traits & FS_SRC_SWIZZLE_RGB) != 0;
    unsigned mask_swizzle = (fs_traits & FS_MASK_SWIZZLE_RGB) != 0;
    unsigned src_set_alpha = (fs_traits & FS_SRC_SET_ALPHA) != 0;
    unsigned mask_set_alpha = (fs_traits & FS_MASK_SET_ALPHA) != 0;
    unsigned src_luminance = (fs_traits & FS_SRC_LUMINANCE) != 0;
    unsigned mask_luminance = (fs_traits & FS_MASK_LUMINANCE) != 0;
    unsigned dst_luminance = (fs_traits & FS_DST_LUMINANCE) != 0;

#if 0
    print_fs_traits(fs_traits);
#else
    (void)print_fs_traits;
#endif

    ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (ureg == NULL)
	return 0;

    /* it has to be either a fill, a composite op or a yuv conversion */
    debug_assert((is_fill ^ is_composite) ^ is_yuv);
    (void)is_yuv;

    out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);

    if (src_repeat_none || mask_repeat_none ||
	src_set_alpha || mask_set_alpha || src_luminance) {
	imm0 = ureg_imm4f(ureg, 0, 0, 0, 1);
    }
    if (is_composite) {
	src_sampler = ureg_DECL_sampler(ureg, 0);
	src_input = ureg_DECL_fs_input(ureg,
				       TGSI_SEMANTIC_GENERIC, 0,
				       TGSI_INTERPOLATE_PERSPECTIVE);
    } else if (is_fill) {
	if (is_solid)
	    src_input = ureg_DECL_fs_input(ureg,
					   TGSI_SEMANTIC_COLOR, 0,
					   TGSI_INTERPOLATE_PERSPECTIVE);
	else
	    src_input = ureg_DECL_fs_input(ureg,
					   TGSI_SEMANTIC_POSITION, 0,
					   TGSI_INTERPOLATE_PERSPECTIVE);
    } else {
	debug_assert(is_yuv);
	return create_yuv_shader(pipe, ureg);
    }

    if (has_mask) {
	mask_sampler = ureg_DECL_sampler(ureg, 1);
	mask_pos = ureg_DECL_fs_input(ureg,
				      TGSI_SEMANTIC_GENERIC, 1,
				      TGSI_INTERPOLATE_PERSPECTIVE);
    }
#if 0				/* unused right now */
    dst_sampler = ureg_DECL_sampler(ureg, 2);
    dst_pos = ureg_DECL_fs_input(ureg,
				 TGSI_SEMANTIC_POSITION, 2,
				 TGSI_INTERPOLATE_PERSPECTIVE);
#endif

    if (is_composite) {
	if (has_mask || src_luminance || dst_luminance)
	    src = ureg_DECL_temporary(ureg);
	else
	    src = out;
	xrender_tex(ureg, src, src_input, src_sampler, imm0,
		    src_repeat_none, src_swizzle, src_set_alpha);
    } else if (is_fill) {
	if (is_solid) {
	    if (has_mask || src_luminance || dst_luminance)
		src = ureg_dst(src_input);
	    else
		ureg_MOV(ureg, out, src_input);
	} else if (is_lingrad || is_radgrad) {
	    struct ureg_src coords, const0124, matrow0, matrow1, matrow2;

	    if (has_mask || src_luminance || dst_luminance)
		src = ureg_DECL_temporary(ureg);
	    else
		src = out;

	    coords = ureg_DECL_constant(ureg, 0);
	    const0124 = ureg_DECL_constant(ureg, 1);
	    matrow0 = ureg_DECL_constant(ureg, 2);
	    matrow1 = ureg_DECL_constant(ureg, 3);
	    matrow2 = ureg_DECL_constant(ureg, 4);

	    if (is_lingrad) {
		linear_gradient(ureg, src,
				src_input, src_sampler,
				coords, const0124, matrow0, matrow1, matrow2);
	    } else if (is_radgrad) {
		radial_gradient(ureg, src,
				src_input, src_sampler,
				coords, const0124, matrow0, matrow1, matrow2);
	    }
	} else
	    debug_assert(!"Unknown fill type!");
    }
    if (src_luminance) {
	ureg_MOV(ureg, src, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_X));
	ureg_MOV(ureg, ureg_writemask(src, TGSI_WRITEMASK_XYZ),
		 ureg_scalar(imm0, TGSI_SWIZZLE_X));
	if (!has_mask && !dst_luminance)
	    ureg_MOV(ureg, out, ureg_src(src));
    }

    if (has_mask) {
	mask = ureg_DECL_temporary(ureg);
	xrender_tex(ureg, mask, mask_pos, mask_sampler, imm0,
		    mask_repeat_none, mask_swizzle, mask_set_alpha);
	/* src IN mask */

	src_in_mask(ureg, (dst_luminance) ? src : out, ureg_src(src),
		    ureg_src(mask),
		    comp_alpha_mask, mask_luminance);

	ureg_release_temporary(ureg, mask);
    }

    if (dst_luminance) {
	/*
	 * Make sure the alpha channel goes into the output L8 surface.
	 */
	ureg_MOV(ureg, out, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_W));
    }

    ureg_END(ureg);

    return ureg_create_shader_and_destroy(ureg, pipe);
}
예제 #14
0
static void *
create_ref_vert_shader(struct vl_mc *r)
{
   struct ureg_program *shader;
   struct ureg_src mv_scale;
   struct ureg_src vmv[2];
   struct ureg_dst t_vpos;
   struct ureg_dst o_vmv[2];
   unsigned i;

   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
   if (!shader)
      return NULL;

   vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
   vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);

   t_vpos = calc_position(r, shader, ureg_imm2f(shader,
      (float)VL_MACROBLOCK_WIDTH / r->buffer_width,
      (float)VL_MACROBLOCK_HEIGHT / r->buffer_height)
   );

   o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
   o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);

   /*
    * mv_scale.xy = 0.5 / (dst.width, dst.height);
    * mv_scale.z = 1.0f / 4.0f
    * mv_scale.w = 1.0f / 255.0f
    *
    * // Apply motion vectors
    * o_vmv[0..1].xy = vmv[0..1] * mv_scale + t_vpos
    * o_vmv[0..1].zw = vmv[0..1] * mv_scale
    *
    */

   mv_scale = ureg_imm4f(shader,
      0.5f / r->buffer_width,
      0.5f / r->buffer_height,
      1.0f / 4.0f,
      1.0f / PIPE_VIDEO_MV_WEIGHT_MAX);

   for (i = 0; i < 2; ++i) {
      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
      ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), mv_scale, vmv[i]);
   }

   ureg_release_temporary(shader, t_vpos);

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, r->pipe);
}
예제 #15
0
void *
util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
                                   enum tgsi_texture_type tgsi_tex,
                                   unsigned nr_samples,
                                   enum tgsi_return_type stype)
{
   struct ureg_program *ureg;
   struct ureg_src sampler, coord;
   struct ureg_dst out, tmp, top, bottom;
   struct ureg_dst tmp_coord[4], tmp_sum[4];
   unsigned i, c;

   ureg = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!ureg)
      return NULL;

   /* Declarations. */
   sampler = ureg_DECL_sampler(ureg, 0);
   ureg_DECL_sampler_view(ureg, 0, tgsi_tex, stype, stype, stype, stype);
   coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0,
                              TGSI_INTERPOLATE_LINEAR);
   out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
   for (c = 0; c < 4; c++)
      tmp_sum[c] = ureg_DECL_temporary(ureg);
   for (c = 0; c < 4; c++)
      tmp_coord[c] = ureg_DECL_temporary(ureg);
   tmp = ureg_DECL_temporary(ureg);
   top = ureg_DECL_temporary(ureg);
   bottom = ureg_DECL_temporary(ureg);

   /* Instructions. */
   for (c = 0; c < 4; c++)
      ureg_MOV(ureg, tmp_sum[c], ureg_imm1f(ureg, 0));

   /* Get 4 texture coordinates for the bilinear filter. */
   ureg_F2U(ureg, tmp_coord[0], coord); /* top-left */
   ureg_UADD(ureg, tmp_coord[1], ureg_src(tmp_coord[0]),
             ureg_imm4u(ureg, 1, 0, 0, 0)); /* top-right */
   ureg_UADD(ureg, tmp_coord[2], ureg_src(tmp_coord[0]),
             ureg_imm4u(ureg, 0, 1, 0, 0)); /* bottom-left */
   ureg_UADD(ureg, tmp_coord[3], ureg_src(tmp_coord[0]),
             ureg_imm4u(ureg, 1, 1, 0, 0)); /* bottom-right */

   for (i = 0; i < nr_samples; i++) {
      for (c = 0; c < 4; c++) {
         /* Read one sample. */
         ureg_MOV(ureg, ureg_writemask(tmp_coord[c], TGSI_WRITEMASK_W),
                  ureg_imm1u(ureg, i));
         ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord[c]), sampler);

         if (stype == TGSI_RETURN_TYPE_UINT)
            ureg_U2F(ureg, tmp, ureg_src(tmp));
         else if (stype == TGSI_RETURN_TYPE_SINT)
            ureg_I2F(ureg, tmp, ureg_src(tmp));

         /* Add it to the sum.*/
         ureg_ADD(ureg, tmp_sum[c], ureg_src(tmp_sum[c]), ureg_src(tmp));
      }
   }

   /* Calculate the average. */
   for (c = 0; c < 4; c++)
      ureg_MUL(ureg, tmp_sum[c], ureg_src(tmp_sum[c]),
               ureg_imm1f(ureg, 1.0 / nr_samples));

   /* Take the 4 average values and apply a standard bilinear filter. */
   ureg_FRC(ureg, tmp, coord);

   ureg_LRP(ureg, top,
            ureg_scalar(ureg_src(tmp), 0),
            ureg_src(tmp_sum[1]),
            ureg_src(tmp_sum[0]));

   ureg_LRP(ureg, bottom,
            ureg_scalar(ureg_src(tmp), 0),
            ureg_src(tmp_sum[3]),
            ureg_src(tmp_sum[2]));

   ureg_LRP(ureg, tmp,
            ureg_scalar(ureg_src(tmp), 1),
            ureg_src(bottom),
            ureg_src(top));

   /* Convert to the texture format and return. */
   if (stype == TGSI_RETURN_TYPE_UINT)
      ureg_F2U(ureg, out, ureg_src(tmp));
   else if (stype == TGSI_RETURN_TYPE_SINT)
      ureg_F2I(ureg, out, ureg_src(tmp));
   else
      ureg_MOV(ureg, out, ureg_src(tmp));

   ureg_END(ureg);

   return ureg_create_shader_and_destroy(ureg, pipe);
}
예제 #16
0
static void *
create_ref_frag_shader(struct vl_mc *r)
{
   const float y_scale =
      r->buffer_height / 2 *
      r->macroblock_size / VL_MACROBLOCK_HEIGHT;

   struct ureg_program *shader;
   struct ureg_src tc[2], sampler;
   struct ureg_dst ref, field;
   struct ureg_dst fragment;
   unsigned label;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return NULL;

   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);

   sampler = ureg_DECL_sampler(shader, 0);
   ref = ureg_DECL_temporary(shader);

   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   field = calc_line(shader);

   /*
    * ref = field.z ? tc[1] : tc[0]
    *
    * // Adjust tc acording to top/bottom field selection
    * if (|ref.z|) {
    *    ref.y *= y_scale
    *    ref.y = floor(ref.y)
    *    ref.y += ref.z
    *    ref.y /= y_scale
    * }
    * fragment.xyz = tex(ref, sampler[0])
    */
   ureg_CMP(shader, ureg_writemask(ref, TGSI_WRITEMASK_XYZ),
            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
            tc[1], tc[0]);
   ureg_CMP(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W),
            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
            tc[1], tc[0]);

   ureg_IF(shader, ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z), &label);

      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
               ureg_src(ref), ureg_imm1f(shader, y_scale));
      ureg_FLR(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref));
      ureg_ADD(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
               ureg_src(ref), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z));
      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
               ureg_src(ref), ureg_imm1f(shader, 1.0f / y_scale));

   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
   ureg_ENDIF(shader);

   ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), TGSI_TEXTURE_2D, ureg_src(ref), sampler);

   ureg_release_temporary(shader, ref);

   ureg_release_temporary(shader, field);
   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, r->pipe);
}
예제 #17
0
static void *
create_frag_shader_weave(struct vl_compositor *c)
{
   struct ureg_program *shader;
   struct ureg_src i_tc[2];
   struct ureg_src csc[3];
   struct ureg_src sampler[3];
   struct ureg_dst t_tc[2];
   struct ureg_dst t_texel[2];
   struct ureg_dst o_fragment;
   unsigned i, j;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return false;

   i_tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
   i_tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);

   for (i = 0; i < 3; ++i) {
      csc[i] = ureg_DECL_constant(shader, i);
      sampler[i] = ureg_DECL_sampler(shader, i);
   }

   for (i = 0; i < 2; ++i) {
      t_tc[i] = ureg_DECL_temporary(shader);
      t_texel[i] = ureg_DECL_temporary(shader);
   }
   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   /* calculate the texture offsets
    * t_tc.x = i_tc.x
    * t_tc.y = (round(i_tc.y - 0.5) + 0.5) / height * 2
    */
   for (i = 0; i < 2; ++i) {
      ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_X), i_tc[i]);
      ureg_SUB(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ),
               i_tc[i], ureg_imm1f(shader, 0.5f));
      ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i]));
      ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W),
               ureg_imm1f(shader, i ? 1.0f : 0.0f));
      ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ),
               ureg_src(t_tc[i]), ureg_imm1f(shader, 0.5f));
      ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Y),
               ureg_src(t_tc[i]), ureg_scalar(i_tc[0], TGSI_SWIZZLE_W));
      ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Z),
               ureg_src(t_tc[i]), ureg_scalar(i_tc[1], TGSI_SWIZZLE_W));
   }

   /* fetch the texels
    * texel[0..1].x = tex(t_tc[0..1][0])
    * texel[0..1].y = tex(t_tc[0..1][1])
    * texel[0..1].z = tex(t_tc[0..1][2])
    */
   for (i = 0; i < 2; ++i)
      for (j = 0; j < 3; ++j) {
         struct ureg_src src = ureg_swizzle(ureg_src(t_tc[i]),
            TGSI_SWIZZLE_X, j ? TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);

         ureg_TEX(shader, ureg_writemask(t_texel[i], TGSI_WRITEMASK_X << j),
                  TGSI_TEXTURE_2D_ARRAY, src, sampler[j]);
      }

   /* calculate linear interpolation factor
    * factor = |round(i_tc.y) - i_tc.y| * 2
    */
   ureg_ROUND(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), i_tc[0]);
   ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ),
            ureg_src(t_tc[0]), ureg_negate(i_tc[0]));
   ureg_MUL(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ),
            ureg_abs(ureg_src(t_tc[0])), ureg_imm1f(shader, 2.0f));
   ureg_LRP(shader, t_texel[0], ureg_swizzle(ureg_src(t_tc[0]),
            TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z),
            ureg_src(t_texel[0]), ureg_src(t_texel[1]));

   /* and finally do colour space transformation
    * fragment = csc * texel
    */
   ureg_MOV(shader, ureg_writemask(t_texel[0], TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
   for (i = 0; i < 3; ++i)
      ureg_DP4(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(t_texel[0]));

   ureg_MOV(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));

   for (i = 0; i < 2; ++i) {
      ureg_release_temporary(shader, t_texel[i]);
      ureg_release_temporary(shader, t_tc[i]);
   }

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, c->pipe);
}
예제 #18
0
static void *
create_ycbcr_frag_shader(struct vl_mc *r, float scale, bool invert,
                         vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv)
{
   struct ureg_program *shader;
   struct ureg_src flags;
   struct ureg_dst tmp;
   struct ureg_dst fragment;
   unsigned label;

   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
   if (!shader)
      return NULL;

   flags = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS, TGSI_INTERPOLATE_LINEAR);

   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

   tmp = calc_line(shader);

   /*
    * if (field == tc.w)
    *    kill();
    * else {
    *    fragment.xyz  = tex(tc, sampler) * scale + tc.z
    *    fragment.w = 1.0f
    * }
    */

   ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
            ureg_scalar(flags, TGSI_SWIZZLE_W), ureg_src(tmp));

   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);

      ureg_KILL(shader);

   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
   ureg_ELSE(shader, &label);

      fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp);

      if (scale != 1.0f)
         ureg_MAD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
                  ureg_src(tmp), ureg_imm1f(shader, scale),
                  ureg_scalar(flags, TGSI_SWIZZLE_Z));
      else
         ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
                  ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z));
                  
      ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, invert ? -1.0f : 1.0f));
      ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));

   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
   ureg_ENDIF(shader);

   ureg_release_temporary(shader, tmp);

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, r->pipe);
}
예제 #19
0
static void *
create_vert_shader(struct vl_compositor *c)
{
   struct ureg_program *shader;
   struct ureg_src vpos, vtex, color;
   struct ureg_dst tmp;
   struct ureg_dst o_vpos, o_vtex, o_color;
   struct ureg_dst o_vtop, o_vbottom;

   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
   if (!shader)
      return false;

   vpos = ureg_DECL_vs_input(shader, 0);
   vtex = ureg_DECL_vs_input(shader, 1);
   color = ureg_DECL_vs_input(shader, 2);
   tmp = ureg_DECL_temporary(shader);
   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
   o_color = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, VS_O_COLOR);
   o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX);
   o_vtop = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
   o_vbottom = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);

   /*
    * o_vpos = vpos
    * o_vtex = vtex
    * o_color = color
    */
   ureg_MOV(shader, o_vpos, vpos);
   ureg_MOV(shader, o_vtex, vtex);
   ureg_MOV(shader, o_color, color);

   /*
    * tmp.x = vtex.w / 2
    * tmp.y = vtex.w / 4
    *
    * o_vtop.x = vtex.x
    * o_vtop.y = vtex.y * tmp.x + 0.25f
    * o_vtop.z = vtex.y * tmp.y + 0.25f
    * o_vtop.w = 1 / tmp.x
    *
    * o_vbottom.x = vtex.x
    * o_vbottom.y = vtex.y * tmp.x - 0.25f
    * o_vbottom.z = vtex.y * tmp.y - 0.25f
    * o_vbottom.w = 1 / tmp.y
    */
   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X),
            ureg_scalar(vtex, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.5f));
   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
            ureg_scalar(vtex, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.25f));

   ureg_MOV(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_X), vtex);
   ureg_MAD(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, 0.25f));
   ureg_MAD(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.25f));
   ureg_RCP(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_W),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));

   ureg_MOV(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_X), vtex);
   ureg_MAD(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, -0.25f));
   ureg_MAD(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, -0.25f));
   ureg_RCP(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_W),
            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, c->pipe);
}
예제 #20
0
static void *
create_stage1_vert_shader(struct vl_idct *idct)
{
   struct ureg_program *shader;
   struct ureg_src vrect, vpos;
   struct ureg_src scale;
   struct ureg_dst t_tex, t_start;
   struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];

   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
   if (!shader)
      return NULL;

   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);

   t_tex = ureg_DECL_temporary(shader);
   t_start = ureg_DECL_temporary(shader);

   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);

   o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
   o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);

   o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);
   o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);

   /*
    * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
    *
    * t_vpos = vpos + vrect
    * o_vpos.xy = t_vpos * scale
    * o_vpos.zw = vpos
    *
    * o_l_addr = calc_addr(...)
    * o_r_addr = calc_addr(...)
    *
    */

   scale = ureg_imm2f(shader,
      (float)VL_BLOCK_WIDTH / idct->buffer_width,
      (float)VL_BLOCK_HEIGHT / idct->buffer_height);

   ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);

   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));

   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);

   calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
   calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4);

   ureg_release_temporary(shader, t_tex);
   ureg_release_temporary(shader, t_start);

   ureg_END(shader);

   return ureg_create_shader_and_destroy(shader, idct->pipe);
}
예제 #21
0
static void *
create_stage1_frag_shader(struct vl_idct *idct)
{
   struct ureg_program *shader;

   struct ureg_src l_addr[2], r_addr[2];

   struct ureg_dst l[4][2], r[2];
   struct ureg_dst *fragment;

   int i, j;

   shader = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!shader)
      return NULL;

   fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst));

   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);

   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);

   for (i = 0; i < idct->nr_of_render_targets; ++i)
       fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);

   for (i = 0; i < 4; ++i) {
      l[i][0] = ureg_DECL_temporary(shader);
      l[i][1] = ureg_DECL_temporary(shader);
   }

   r[0] = ureg_DECL_temporary(shader);
   r[1] = ureg_DECL_temporary(shader);

   for (i = 0; i < 4; ++i) {
      increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height);
   }

   for (i = 0; i < 4; ++i) {
      struct ureg_src s_addr[2];
      s_addr[0] = ureg_src(l[i][0]);
      s_addr[1] = ureg_src(l[i][1]);
      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);
   }

   for (i = 0; i < idct->nr_of_render_targets; ++i) {
      struct ureg_src s_addr[2];

      increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT);

      s_addr[0] = ureg_src(r[0]);
      s_addr[1] = ureg_src(r[1]);
      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);

      for (j = 0; j < 4; ++j) {
         matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
      }
   }

   for (i = 0; i < 4; ++i) {
      ureg_release_temporary(shader, l[i][0]);
      ureg_release_temporary(shader, l[i][1]);
   }
   ureg_release_temporary(shader, r[0]);
   ureg_release_temporary(shader, r[1]);

   ureg_END(shader);

   FREE(fragment);

   return ureg_create_shader_and_destroy(shader, idct->pipe);
}