Ejemplo n.º 1
0
static void precalc_txp( struct brw_wm_compile *c,
			       const struct prog_instruction *inst )
{
   struct prog_src_register src0 = inst->SrcReg[0];

   if (projtex(c, inst)) {
      struct prog_dst_register tmp = get_temp(c);
      struct prog_instruction tmp_inst;

      /* tmp0.w = RCP inst.arg[0][3]
       */
      emit_op(c,
	      OPCODE_RCP,
	      dst_mask(tmp, WRITEMASK_W),
	      0, 0, 0,
	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
	      src_undef(),
	      src_undef());

      /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
       */
      emit_op(c,
	      OPCODE_MUL,
	      dst_mask(tmp, WRITEMASK_XYZ),
	      0, 0, 0,
	      src0,
	      src_swizzle1(src_reg_from_dst(tmp), W),
	      src_undef());

      /* dst = precalc(TEX tmp0)
       */
      tmp_inst = *inst;
      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
      precalc_tex(c, &tmp_inst);

      release_temp(c, tmp);
   }
   else
   {
      /* dst = precalc(TEX src0)
       */
      precalc_tex(c, inst);
   }
}
Ejemplo n.º 2
0
static void ei_lit(struct r300_vertex_program_code *vp,
				      struct rc_sub_instruction *vpi,
				      unsigned int * inst)
{
	//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}

	inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
				     1,
				     0,
				     t_dst_index(vp, &vpi->DstReg),
				     t_dst_mask(vpi->DstReg.WriteMask),
				     t_dst_class(vpi->DstReg.File),
                                     vpi->SaturateMode == RC_SATURATE_ZERO_ONE);
	/* NOTE: Users swizzling might not work. */
	inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
				  PVS_SRC_SELECT_FORCE_0,	// Z
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
				  t_src_class(vpi->SrcReg[0].File),
				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
	    (vpi->SrcReg[0].RelAddr << 4);
	inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
				  PVS_SRC_SELECT_FORCE_0,	// Z
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
				  t_src_class(vpi->SrcReg[0].File),
				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
	    (vpi->SrcReg[0].RelAddr << 4);
	inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
				  PVS_SRC_SELECT_FORCE_0,	// Z
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
				  t_src_class(vpi->SrcReg[0].File),
				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
	    (vpi->SrcReg[0].RelAddr << 4);
}
Ejemplo n.º 3
0
static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
						    struct prog_src_register src,
						    GLuint i )
{
   GLuint component = GET_SWZ(src.Swizzle,i);
   const struct brw_wm_ref *src_ref;
   static const GLfloat const_zero = 0.0;
   static const GLfloat const_one = 1.0;

	 
   if (component == SWIZZLE_ZERO) 
      src_ref = get_const_ref(c, &const_zero);
   else if (component == SWIZZLE_ONE) 
      src_ref = get_const_ref(c, &const_one);
   else 
      src_ref = pass0_get_reg(c, src.File, src.Index, component);
	 
   return src_ref;
}
Ejemplo n.º 4
0
static void update_const_value(void * data, struct rc_instruction * inst,
		rc_register_file file, unsigned int index, unsigned int mask)
{
	struct const_value * value = data;
	if(value->Src->File != file ||
	   value->Src->Index != index ||
	   !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
		return;
	}
	switch(inst->U.I.Opcode){
	case RC_OPCODE_MOV:
		if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
			return;
		}
		value->HasValue = 1;
		value->Value =
			get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
		break;
	}
}
Ejemplo n.º 5
0
/**
 * For a MOV instruction, compute a write mask when src register also has
 * a mask
 */
static GLuint
get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
{
   const GLuint mask = mov->DstReg.WriteMask;
   GLuint comp;
   GLuint updated_mask = 0x0;

   ASSERT(mov->Opcode == OPCODE_MOV);

   for (comp = 0; comp < 4; ++comp) {
      GLuint src_comp;
      if ((mask & (1 << comp)) == 0)
         continue;
      src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
      if ((src_mask & (1 << src_comp)) == 0)
         continue;
      updated_mask |= 1 << comp;
   }

   return updated_mask;
}
Ejemplo n.º 6
0
static GLboolean projtex( struct brw_wm_compile *c,
			  const struct prog_instruction *inst )
{
   struct prog_src_register src = inst->SrcReg[0];

   /* Only try to detect the simplest cases.  Could detect (later)
    * cases where we are trying to emit code like RCP {1.0}, MUL x,
    * {1.0}, and so on.
    *
    * More complex cases than this typically only arise from
    * user-provided fragment programs anyway:
    */
   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
      return 0;  /* ut2004 gun rendering !?! */
   else if (src.File == PROGRAM_INPUT && 
	    GET_SWZ(src.Swizzle, W) == W &&
	    (c->key.projtex_mask & (1<<src.Index)) == 0)
      return 0;
   else
      return 1;
}
Ejemplo n.º 7
0
static void
brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
                                   const struct gl_program *prog,
                                   struct brw_stage_prog_data *stage_prog_data,
                                   bool is_scalar)
{
   const nir_state_slot *const slots = var->state_slots;
   assert(var->state_slots != NULL);

   unsigned uniform_index = var->data.driver_location / 4;
   for (unsigned int i = 0; i < var->num_state_slots; i++) {
      /* This state reference has already been setup by ir_to_mesa, but we'll
       * get the same index back here.
       */
      int index = _mesa_add_state_reference(prog->Parameters,
					    (gl_state_index *)slots[i].tokens);

      /* Add each of the unique swizzles of the element as a parameter.
       * This'll end up matching the expected layout of the
       * array/matrix/structure we're trying to fill in.
       */
      int last_swiz = -1;
      for (unsigned j = 0; j < 4; j++) {
         int swiz = GET_SWZ(slots[i].swizzle, j);

         /* If we hit a pair of identical swizzles, this means we've hit the
          * end of the builtin variable.  In scalar mode, we should just quit
          * and move on to the next one.  In vec4, we need to continue and pad
          * it out to 4 components.
          */
         if (swiz == last_swiz && is_scalar)
            break;

         last_swiz = swiz;

         stage_prog_data->param[uniform_index++] =
            &prog->Parameters->ParameterValues[index][swiz];
      }
   }
}
Ejemplo n.º 8
0
static void reads_pair(struct rc_instruction * fullinst,  rc_read_write_fn cb, void * userdata)
{
	struct rc_pair_instruction * inst = &fullinst->U.P;
	unsigned int refmasks[3] = { 0, 0, 0 };

	if (inst->RGB.Opcode != RC_OPCODE_NOP) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);

		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
			for(unsigned int chan = 0; chan < 3; ++chan) {
				unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
				if (swz < 4)
					refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz;
			}
		}
	}

	if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);

		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
			if (inst->Alpha.Arg[arg].Swizzle < 4)
				refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle;
		}
	}

	for(unsigned int src = 0; src < 3; ++src) {
		if (inst->RGB.Src[src].Used) {
			for(unsigned int chan = 0; chan < 3; ++chan) {
				if (GET_BIT(refmasks[src], chan))
					cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan);
			}
		}

		if (inst->Alpha.Src[src].Used) {
			if (GET_BIT(refmasks[src], 3))
				cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3);
		}
	}
}
Ejemplo n.º 9
0
static void reads_normal_callback(
	void * userdata,
	struct rc_instruction * fullinst,
	struct rc_src_register * src)
{
	struct read_write_mask_data * cb_data = userdata;
	unsigned int refmask = 0;
	unsigned int chan;
	for(chan = 0; chan < 4; chan++) {
		refmask |= 1 << GET_SWZ(src->Swizzle, chan);
	}
	refmask &= RC_MASK_XYZW;

	if (refmask) {
		cb_data->Cb(cb_data->UserData, fullinst, src->File,
							src->Index, refmask);
	}

	if (refmask && src->RelAddr) {
		cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0,
								RC_MASK_X);
	}
}
static void test_runner_rc_regalloc(
	struct test_result *result,
	struct radeon_compiler *c,
	const char *filename)
{
	struct rc_test_file test_file;
	unsigned optimizations = 1;
	unsigned do_full_regalloc = 1;
	struct rc_instruction *inst;
	unsigned pass = 1;

	test_begin(result);

	if (!load_program(c, &test_file, filename)) {
		fprintf(stderr, "Failed to load program\n");
	}

	rc_pair_translate(c, NULL);
	rc_pair_schedule(c, &optimizations);
	rc_pair_remove_dead_sources(c, NULL);
	rc_pair_regalloc(c, &do_full_regalloc);

	for(inst = c->Program.Instructions.Next;
				inst != &c->Program.Instructions;
				inst = inst->Next) {
		if (inst->Type == RC_INSTRUCTION_NORMAL &&
				inst->U.I.Opcode != RC_OPCODE_BEGIN_TEX) {
			if (GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 0)
							!= RC_SWIZZLE_X) {
				pass = 0;
			}
		}
	}

	test_check(result, pass);
}
Ejemplo n.º 11
0
/**
 * Check if there's a potential src/dst register data dependency when
 * using SOA execution.
 * Example:
 *   MOV T, T.yxwz;
 * This would expand into:
 *   MOV t0, t1;
 *   MOV t1, t0;
 *   MOV t2, t3;
 *   MOV t3, t2;
 * The second instruction will have the wrong value for t0 if executed as-is.
 */
GLboolean
_mesa_check_soa_dependencies(const struct prog_instruction *inst)
{
   GLuint i, chan;

   if (inst->DstReg.WriteMask == WRITEMASK_X ||
       inst->DstReg.WriteMask == WRITEMASK_Y ||
       inst->DstReg.WriteMask == WRITEMASK_Z ||
       inst->DstReg.WriteMask == WRITEMASK_W ||
       inst->DstReg.WriteMask == 0x0) {
      /* no chance of data dependency */
      return GL_FALSE;
   }

   /* loop over src regs */
   for (i = 0; i < 3; i++) {
      if (inst->SrcReg[i].File == inst->DstReg.File &&
          inst->SrcReg[i].Index == inst->DstReg.Index) {
         /* loop over dest channels */
         GLuint channelsWritten = 0x0;
         for (chan = 0; chan < 4; chan++) {
            if (inst->DstReg.WriteMask & (1 << chan)) {
               /* check if we're reading a channel that's been written */
               GLuint swizzle = GET_SWZ(inst->SrcReg[i].Swizzle, chan);
               if (swizzle <= SWIZZLE_W &&
                   (channelsWritten & (1 << swizzle))) {
                  return GL_TRUE;
               }

               channelsWritten |= (1 << chan);
            }
         }
      }
   }
   return GL_FALSE;
}
Ejemplo n.º 12
0
/**
 * Remove dead instructions from the given program.
 * This is very primitive for now.  Basically look for temp registers
 * that are written to but never read.  Remove any instructions that
 * write to such registers.  Be careful with condition code setters.
 */
static GLboolean
_mesa_remove_dead_code_global(struct gl_program *prog)
{
   GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
   GLboolean *removeInst; /* per-instruction removal flag */
   GLuint i, rem = 0, comp;

   memset(tempRead, 0, sizeof(tempRead));

   if (dbg) {
      printf("Optimize: Begin dead code removal\n");
      /*_mesa_print_program(prog);*/
   }

   removeInst = (GLboolean *)
      calloc(1, prog->NumInstructions * sizeof(GLboolean));

   /* Determine which temps are read and written */
   for (i = 0; i < prog->NumInstructions; i++) {
      const struct prog_instruction *inst = prog->Instructions + i;
      const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
      GLuint j;

      /* check src regs */
      for (j = 0; j < numSrc; j++) {
         if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
            const GLuint index = inst->SrcReg[j].Index;
            GLuint read_mask;
            ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
	    read_mask = get_src_arg_mask(inst, j, NO_MASK);

            if (inst->SrcReg[j].RelAddr) {
               if (dbg)
                  printf("abort remove dead code (indirect temp)\n");
               goto done;
            }

	    for (comp = 0; comp < 4; comp++) {
	       const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
	       ASSERT(swz < 4);
               if ((read_mask & (1 << swz)) == 0)
		  continue;
               if (swz <= SWIZZLE_W)
                  tempRead[index][swz] = GL_TRUE;
	    }
         }
      }

      /* check dst reg */
      if (inst->DstReg.File == PROGRAM_TEMPORARY) {
         const GLuint index = inst->DstReg.Index;
         ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);

         if (inst->DstReg.RelAddr) {
            if (dbg)
               printf("abort remove dead code (indirect temp)\n");
            goto done;
         }

         if (inst->CondUpdate) {
            /* If we're writing to this register and setting condition
             * codes we cannot remove the instruction.  Prevent removal
             * by setting the 'read' flag.
             */
            tempRead[index][0] = GL_TRUE;
            tempRead[index][1] = GL_TRUE;
            tempRead[index][2] = GL_TRUE;
            tempRead[index][3] = GL_TRUE;
         }
      }
   }

   /* find instructions that write to dead registers, flag for removal */
   for (i = 0; i < prog->NumInstructions; i++) {
      struct prog_instruction *inst = prog->Instructions + i;
      const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);

      if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
         GLint chan, index = inst->DstReg.Index;

	 for (chan = 0; chan < 4; chan++) {
	    if (!tempRead[index][chan] &&
		inst->DstReg.WriteMask & (1 << chan)) {
	       if (dbg) {
		  printf("Remove writemask on %u.%c\n", i,
			       chan == 3 ? 'w' : 'x' + chan);
	       }
	       inst->DstReg.WriteMask &= ~(1 << chan);
	       rem++;
	    }
	 }

	 if (inst->DstReg.WriteMask == 0) {
	    /* If we cleared all writes, the instruction can be removed. */
	    if (dbg)
	       printf("Remove instruction %u: \n", i);
	    removeInst[i] = GL_TRUE;
	 }
      }
   }

   /* now remove the instructions which aren't needed */
   rem = remove_instructions(prog, removeInst);

   if (dbg) {
      printf("Optimize: End dead code removal.\n");
      printf("  %u channel writes removed\n", rem);
      printf("  %u instructions removed\n", rem);
      /*_mesa_print_program(prog);*/
   }

done:
   free(removeInst);
   return rem != 0;
}
Ejemplo n.º 13
0
static uint32_t
gen8_blorp_emit_surface_states(struct brw_context *brw,
                               const struct brw_blorp_params *params)
{
   uint32_t wm_surf_offset_renderbuffer;
   uint32_t wm_surf_offset_texture = 0;

   intel_miptree_used_for_rendering(params->dst.mt);

   wm_surf_offset_renderbuffer =
      brw_blorp_emit_surface_state(brw, &params->dst,
                                   I915_GEM_DOMAIN_RENDER,
                                   I915_GEM_DOMAIN_RENDER,
                                   true /* is_render_target */);
   if (params->src.mt) {
      const struct brw_blorp_surface_info *surface = &params->src;
      struct intel_mipmap_tree *mt = surface->mt;

      /* If src is a 2D multisample array texture on Gen7+ using
       * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src layer is the
       * physical layer holding sample 0.  So, for example, if mt->num_samples
       * == 4, then logical layer n corresponds to layer == 4*n.
       *
       * Multisampled depth and stencil surfaces have the samples interleaved
       * (INTEL_MSAA_LAYOUT_IMS) and therefore the layer doesn't need
       * adjustment.
       */
      const unsigned layer_divider =
         (mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
          mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
         MAX2(mt->num_samples, 1) : 1;

      const bool is_cube = mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
                           mt->target == GL_TEXTURE_CUBE_MAP;
      const unsigned depth = (is_cube ? 6 : 1) * mt->logical_depth0;
      const unsigned layer = mt->target != GL_TEXTURE_3D ?
                                surface->layer / layer_divider : 0;

      struct isl_view view = {
         .format = surface->brw_surfaceformat,
         .base_level = surface->level,
         .levels = mt->last_level - surface->level + 1,
         .base_array_layer = layer,
         .array_len = depth - layer,
         .channel_select = {
            swizzle_to_scs(GET_SWZ(surface->swizzle, 0)),
            swizzle_to_scs(GET_SWZ(surface->swizzle, 1)),
            swizzle_to_scs(GET_SWZ(surface->swizzle, 2)),
            swizzle_to_scs(GET_SWZ(surface->swizzle, 3)),
         },
         .usage = ISL_SURF_USAGE_TEXTURE_BIT,
      };

      brw_emit_surface_state(brw, mt, &view,
                             brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB,
                             false, &wm_surf_offset_texture, -1,
                             I915_GEM_DOMAIN_SAMPLER, 0);
   }

   return gen6_blorp_emit_binding_table(brw,
                                        wm_surf_offset_renderbuffer,
                                        wm_surf_offset_texture);
}

/**
 * \copydoc gen6_blorp_exec()
 */
void
gen8_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params)
{
   uint32_t wm_bind_bo_offset = 0;

   brw_upload_state_base_address(brw);

   gen7_blorp_emit_cc_viewport(brw);
   gen7_l3_state.emit(brw);

   gen7_blorp_emit_urb_config(brw, params);

   const uint32_t cc_blend_state_offset =
      gen8_blorp_emit_blend_state(brw, params);
   gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset);

   const uint32_t cc_state_offset = gen6_blorp_emit_cc_state(brw);
   gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset);

   gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_VS);
   gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_HS);
   gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_DS);
   gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_GS);
   gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_PS);

   wm_bind_bo_offset = gen8_blorp_emit_surface_states(brw, params);

   gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset);

   if (params->src.mt) {
      const uint32_t sampler_offset =
         gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true);
      gen7_blorp_emit_sampler_state_pointers_ps(brw, sampler_offset);
   }

   gen8_emit_3dstate_multisample(brw, params->dst.num_samples);
   gen6_emit_3dstate_sample_mask(brw,
                                 params->dst.num_samples > 1 ?
                                    (1 << params->dst.num_samples) - 1 : 1);

   gen8_disable_stages.emit(brw);
   gen8_blorp_emit_vs_disable(brw);
   gen8_blorp_emit_hs_disable(brw);
   gen7_blorp_emit_te_disable(brw);
   gen8_blorp_emit_ds_disable(brw);
   gen8_blorp_emit_gs_disable(brw);

   gen8_blorp_emit_streamout_disable(brw);
   gen6_blorp_emit_clip_disable(brw);
   gen8_blorp_emit_raster_state(brw);
   gen8_blorp_emit_sbe_state(brw, params);
   gen8_blorp_emit_sf_config(brw);

   gen8_blorp_emit_ps_blend(brw);
   gen8_blorp_emit_ps_extra(brw, params);

   gen8_blorp_emit_ps_config(brw, params);

   gen8_blorp_emit_depth_stencil_state(brw, params);
   gen8_blorp_emit_wm_state(brw);

   gen8_blorp_emit_depth_disable(brw);
   gen7_blorp_emit_clear_params(brw, params);
   gen6_blorp_emit_drawing_rectangle(brw, params);
   gen8_blorp_emit_vf_topology(brw);
   gen8_blorp_emit_vf_sys_gen_vals_state(brw);
   gen6_blorp_emit_vertices(brw, params);
   gen8_blorp_emit_vf_instancing_state(brw, params);
   gen8_blorp_emit_vf_state(brw);
   gen7_blorp_emit_primitive(brw, params);

   if (brw->gen < 9)
      gen8_write_pma_stall_bits(brw, 0);
}
Ejemplo n.º 14
0
/**
 * Generate an R200 vertex program from Mesa's internal representation.
 *
 * \return  GL_TRUE for success, GL_FALSE for failure.
 */
static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
{
   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
   struct prog_instruction *vpi;
   int i;
   VERTEX_SHADER_INSTRUCTION *o_inst;
   unsigned long operands;
   int are_srcs_scalar;
   unsigned long hw_op;
   int dofogfix = 0;
   int fog_temp_i = 0;
   int free_inputs;
   int array_count = 0;
   int u_temp_used;

   vp->native = GL_FALSE;
   vp->translated = GL_TRUE;
   vp->fogmode = ctx->Fog.Mode;

   if (mesa_vp->Base.NumInstructions == 0)
      return GL_FALSE;

#if 0
   if ((mesa_vp->Base.InputsRead &
      ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
      VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
      VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
	    mesa_vp->Base.InputsRead);
      }
      return GL_FALSE;
   }
#endif

   if ((mesa_vp->Base.OutputsWritten &
      ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) |
      (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) |
      (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) |
      (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
                 (unsigned long long) mesa_vp->Base.OutputsWritten);
      }
      return GL_FALSE;
   }

   /* Initial value should be last tmp reg that hw supports.
      Strangely enough r300 doesnt mind even though these would be out of range.
      Smart enough to realize that it doesnt need it? */
   int u_temp_i = R200_VSF_MAX_TEMPS - 1;
   struct prog_src_register src[3];
   struct prog_dst_register dst;

/* FIXME: is changing the prog safe to do here? */
   if (mesa_vp->IsPositionInvariant &&
      /* make sure we only do this once */
       !(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) {
	 _mesa_insert_mvp_code(ctx, mesa_vp);
      }

   /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
      base e isn't directly available neither. */
   if ((mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_FOGC)) && !vp->fogpidx) {
      struct gl_program_parameter_list *paramList;
      gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
      paramList = mesa_vp->Base.Parameters;
      vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
   }

   vp->pos_end = 0;
   mesa_vp->Base.NumNativeInstructions = 0;
   if (mesa_vp->Base.Parameters)
      mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
   else
      mesa_vp->Base.NumNativeParameters = 0;

   for(i = 0; i < VERT_ATTRIB_MAX; i++)
      vp->inputs[i] = -1;
   for(i = 0; i < 15; i++)
      vp->inputmap_rev[i] = 255;
   free_inputs = 0x2ffd;

/* fglrx uses fixed inputs as follows for conventional attribs.
   generic attribs use non-fixed assignment, fglrx will always use the
   lowest attrib values available. We'll just do the same.
   There are 12 generic attribs possible, corresponding to attrib 0, 2-11
   and 13 in a hw vertex prog.
   attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
   (correspond to vertex normal/weight - maybe weight actually could be made vec4).
   Additionally, not more than 12 arrays in total are possible I think.
   attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
   attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
   attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
   attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
*/

/* attr 4,5 and 13 are only used with generic attribs.
   Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
   not possibe to use with vertex progs as it is lacking in vert prog specification) */
/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
   if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
      vp->inputs[VERT_ATTRIB_POS] = 0;
      vp->inputmap_rev[0] = VERT_ATTRIB_POS;
      free_inputs &= ~(1 << 0);
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
      vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
      vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
      vp->inputs[VERT_ATTRIB_NORMAL] = 1;
      vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
      vp->inputs[VERT_ATTRIB_COLOR0] = 2;
      vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
      free_inputs &= ~(1 << 2);
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
      vp->inputs[VERT_ATTRIB_COLOR1] = 3;
      vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
      free_inputs &= ~(1 << 3);
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
      vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
      vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
      array_count++;
   }
   /* VERT_ATTRIB_TEX0-5 */
   for (i = 0; i <= 5; i++) {
      if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
	 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
	 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
	 free_inputs &= ~(1 << (i + 6));
	 array_count++;
      }
   }
   /* using VERT_ATTRIB_TEX6/7 would be illegal */
   for (; i < VERT_ATTRIB_TEX_MAX; i++) {
      if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
          if (R200_DEBUG & RADEON_FALLBACKS) {
              fprintf(stderr, "texture attribute %d in vert prog\n", i);
          }
          return GL_FALSE;
      }
   }
   /* completely ignore aliasing? */
   for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
      int j;
   /* completely ignore aliasing? */
      if (mesa_vp->Base.InputsRead & VERT_BIT_GENERIC(i)) {
	 array_count++;
	 if (array_count > 12) {
	    if (R200_DEBUG & RADEON_FALLBACKS) {
	       fprintf(stderr, "more than 12 attribs used in vert prog\n");
	    }
	    return GL_FALSE;
	 }
	 for (j = 0; j < 14; j++) {
	    /* will always find one due to limited array_count */
	    if (free_inputs & (1 << j)) {
	       free_inputs &= ~(1 << j);
	       vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
	       if (j == 0) {
                  /* mapped to pos */
                  vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
	       } else if (j < 12) {
                  /* mapped to col/tex */
                  vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
	       } else {
                  /* mapped to pos1 */
                  vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
               }
	       break;
	    }
	 }
      }
   }

   if (!(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog without position output\n");
      }
      return GL_FALSE;
   }
   if (free_inputs & 1) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog without position input\n");
      }
      return GL_FALSE;
   }

   o_inst = vp->instr;
   for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
      operands = op_operands(vpi->Opcode);
      are_srcs_scalar = operands & SCALAR_FLAG;
      operands &= OP_MASK;

      for(i = 0; i < operands; i++) {
	 src[i] = vpi->SrcReg[i];
	 /* hack up default attrib values as per spec as swizzling.
	    normal, fog, secondary color. Crazy?
	    May need more if we don't submit vec4 elements? */
	 if (src[i].File == PROGRAM_INPUT) {
	    if (src[i].Index == VERT_ATTRIB_NORMAL) {
	       int j;
	       for (j = 0; j < 4; j++) {
		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
		  }
	       }
	    }
	    else if (src[i].Index == VERT_ATTRIB_COLOR1) {
	       int j;
	       for (j = 0; j < 4; j++) {
		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
		  }
	       }
	    }
	    else if (src[i].Index == VERT_ATTRIB_FOG) {
	       int j;
	       for (j = 0; j < 4; j++) {
		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
		  }
		  else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
			    GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
		  }
	       }
	    }
	 }
      }

      if(operands == 3){
	 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_ALL);

	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
		  SWIZZLE_X, SWIZZLE_Y,
		  SWIZZLE_Z, SWIZZLE_W,
		  t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);

	    o_inst->src1 = ZERO_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;

	    src[2].File = PROGRAM_TEMPORARY;
	    src[2].Index = u_temp_i;
	    src[2].RelAddr = 0;
	    u_temp_i--;
	 }
      }

      if(operands >= 2){
	 if( CMP_SRCS(src[1], src[0]) ){
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_ALL);

	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		  SWIZZLE_X, SWIZZLE_Y,
		  SWIZZLE_Z, SWIZZLE_W,
		  t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);

	    o_inst->src1 = ZERO_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;

	    src[0].File = PROGRAM_TEMPORARY;
	    src[0].Index = u_temp_i;
	    src[0].RelAddr = 0;
	    u_temp_i--;
	 }
      }

      dst = vpi->DstReg;
      if (dst.File == PROGRAM_OUTPUT &&
	  dst.Index == VARYING_SLOT_FOGC &&
	  dst.WriteMask & WRITEMASK_X) {
	  fog_temp_i = u_temp_i;
	  dst.File = PROGRAM_TEMPORARY;
	  dst.Index = fog_temp_i;
	  dofogfix = 1;
	  u_temp_i--;
      }

      /* These ops need special handling. */
      switch(vpi->Opcode){
      case OPCODE_POW:
/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
   So may need to insert additional instruction */
	 if ((src[0].File == src[1].File) &&
	     (src[0].Index == src[1].Index)) {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
		   t_dst_mask(dst.WriteMask));
	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		   SWIZZLE_ZERO,
		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
		   SWIZZLE_ZERO,
		   t_src_class(src[0].File),
		   src[0].Negate) | (src[0].RelAddr << 4);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_0;
	 }
	 else {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		   (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		   VSF_FLAG_ALL);
	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		   SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
		   t_src_class(src[0].File),
		   src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
	    o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		   SWIZZLE_ZERO, SWIZZLE_ZERO,
		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
		   t_src_class(src[1].File),
		   src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;

	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
		   t_dst_mask(dst.WriteMask));
	    o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
		   VSF_IN_COMPONENT_X,
		   VSF_IN_COMPONENT_Y,
		   VSF_IN_COMPONENT_Z,
		   VSF_IN_COMPONENT_W,
		   VSF_IN_CLASS_TMP,
		   VSF_FLAG_NONE);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_0;
	    u_temp_i--;
	 }
	 goto next;

      case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} 
      case OPCODE_SWZ:
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
		t_dst_mask(dst.WriteMask));
	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = ZERO_SRC_0;
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_MAD:
	 /* only 2 read ports into temp memory thus may need the macro op MAD_2
	    instead (requiring 2 clocks) if all inputs are in temp memory
	    (and, only if they actually reference 3 distinct temps) */
	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
	    src[1].File == PROGRAM_TEMPORARY &&
	    src[2].File == PROGRAM_TEMPORARY &&
	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
	    (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;

	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
	    t_dst_mask(dst.WriteMask));
	 o_inst->src0 = t_src(vp, &src[0]);
#if 0
if ((o_inst - vp->instr) == 31) {
/* fix up the broken vertex program of quake4 demo... */
o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
			SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
			t_src_class(src[1].File),
			src[1].Negate) | (src[1].RelAddr << 4);
o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
			SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
			t_src_class(src[1].File),
			src[1].Negate) | (src[1].RelAddr << 4);
}
else {
	 o_inst->src1 = t_src(vp, &src[1]);
	 o_inst->src2 = t_src(vp, &src[2]);
}
#else
	 o_inst->src1 = t_src(vp, &src[1]);
	 o_inst->src2 = t_src(vp, &src[2]);
#endif
	 goto next;

      case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} 
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
		SWIZZLE_ZERO,
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);

	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
		SWIZZLE_ZERO,
		t_src_class(src[1].File),
		src[1].Negate) | (src[1].RelAddr << 4);

	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} 
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
		VSF_IN_COMPONENT_ONE,
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);
	 o_inst->src1 = t_src(vp, &src[1]);
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
		t_src_class(src[1].File),
		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
	 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0=t_src(vp, &src[0]);
	 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
		t_src_class(src[0].File),
		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_FLR:
      /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} 
         ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */

	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
	    t_dst_mask(dst.WriteMask));

	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = UNUSED_SRC_0;
	 o_inst->src2 = UNUSED_SRC_1;
	 o_inst++;

	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
		VSF_IN_COMPONENT_X,
		VSF_IN_COMPONENT_Y,
		VSF_IN_COMPONENT_Z,
		VSF_IN_COMPONENT_W,
		VSF_IN_CLASS_TMP,
		/* Not 100% sure about this */
		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);

	 o_inst->src2 = UNUSED_SRC_0;
	 u_temp_i--;
	 goto next;

      case OPCODE_XPD:
	 /* mul r0, r1.yzxw, r2.zxyw
	    mad r0, -r2.yzxw, r1.zxyw, r0
	  */
	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
	    src[1].File == PROGRAM_TEMPORARY &&
	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;

	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
	    t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);

	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
		t_src_class(src[1].File),
		src[1].Negate) | (src[1].RelAddr << 4);

	 o_inst->src2 = UNUSED_SRC_1;
	 o_inst++;
	 u_temp_i--;

	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
		t_src_class(src[1].File),
		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);

	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);

	 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
		VSF_IN_COMPONENT_X,
		VSF_IN_COMPONENT_Y,
		VSF_IN_COMPONENT_Z,
		VSF_IN_COMPONENT_W,
		VSF_IN_CLASS_TMP,
		VSF_FLAG_NONE);
	 goto next;

      case OPCODE_END:
	 assert(0);
      default:
	 break;
      }

      o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
	    t_dst_mask(dst.WriteMask));

      if(are_srcs_scalar){
	 switch(operands){
	    case 1:
		o_inst->src0 = t_src_scalar(vp, &src[0]);
		o_inst->src1 = UNUSED_SRC_0;
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 2:
		o_inst->src0 = t_src_scalar(vp, &src[0]);
		o_inst->src1 = t_src_scalar(vp, &src[1]);
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 3:
		o_inst->src0 = t_src_scalar(vp, &src[0]);
		o_inst->src1 = t_src_scalar(vp, &src[1]);
		o_inst->src2 = t_src_scalar(vp, &src[2]);
	    break;

	    default:
		fprintf(stderr, "illegal number of operands %lu\n", operands);
		exit(-1);
	    break;
	 }
      } else {
	 switch(operands){
	    case 1:
		o_inst->src0 = t_src(vp, &src[0]);
		o_inst->src1 = UNUSED_SRC_0;
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 2:
		o_inst->src0 = t_src(vp, &src[0]);
		o_inst->src1 = t_src(vp, &src[1]);
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 3:
		o_inst->src0 = t_src(vp, &src[0]);
		o_inst->src1 = t_src(vp, &src[1]);
		o_inst->src2 = t_src(vp, &src[2]);
	    break;

	    default:
		fprintf(stderr, "illegal number of operands %lu\n", operands);
		exit(-1);
	    break;
	 }
      }
      next:

      if (dofogfix) {
	 o_inst++;
	 if (vp->fogmode == GL_EXP) {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
		R200_VSF_OUT_CLASS_RESULT_FOGC,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	 }
	 else if (vp->fogmode == GL_EXP2) {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
		R200_VSF_OUT_CLASS_RESULT_FOGC,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	 }
	 else { /* fogmode == GL_LINEAR */
		/* could do that with single op (dot) if using params like
		   with fixed function pipeline fog */
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		R200_VSF_OUT_CLASS_RESULT_FOGC,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;

	 }
         dofogfix = 0;
      }

      u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
      if (mesa_vp->Base.NumNativeTemporaries <
	 (mesa_vp->Base.NumTemporaries + u_temp_used)) {
	 mesa_vp->Base.NumNativeTemporaries =
	    mesa_vp->Base.NumTemporaries + u_temp_used;
      }
      if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
	 if (R200_DEBUG & RADEON_FALLBACKS) {
	    fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
	 }
	 return GL_FALSE;
      }
      u_temp_i = R200_VSF_MAX_TEMPS - 1;
      if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
	 mesa_vp->Base.NumNativeInstructions = 129;
	 if (R200_DEBUG & RADEON_FALLBACKS) {
	    fprintf(stderr, "more than 128 native instructions\n");
	 }
	 return GL_FALSE;
      }
      if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
	 vp->pos_end = (o_inst - vp->instr);
      }
   }

   vp->native = GL_TRUE;
   mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
#if 0
   fprintf(stderr, "hw program:\n");
   for(i=0; i < vp->program.length; i++)
      fprintf(stderr, "%08x\n", vp->instr[i]);
#endif
   return GL_TRUE;
}
Ejemplo n.º 15
0
static void
st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access,
                     GLboolean output, struct gl_texture_object *texObj,
                     struct gl_texture_image *texImage,
                     const GLvoid *vdpSurface, GLuint index)
{
   int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr);
   uint32_t device = (uintptr_t)ctx->vdpDevice;

   struct st_context *st = st_context(ctx);
   struct st_texture_object *stObj = st_texture_object(texObj);
   struct st_texture_image *stImage = st_texture_image(texImage);
 
   struct pipe_resource *res;
   struct pipe_sampler_view *sv, templ;
   gl_format texFormat;

   getProcAddr = ctx->vdpGetProcAddress;
   if (output) {
      VdpOutputSurfaceGallium *f;
      
      if (getProcAddr(device, VDP_FUNC_ID_OUTPUT_SURFACE_GALLIUM, (void**)&f)) {
         _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV");
         return;
      }

      res = f((uintptr_t)vdpSurface);

      if (!res) {
         _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV");
         return;
      }

   } else {
      VdpVideoSurfaceGallium *f;

      struct pipe_video_buffer *buffer;
      struct pipe_sampler_view **samplers;

      if (getProcAddr(device, VDP_FUNC_ID_VIDEO_SURFACE_GALLIUM, (void**)&f)) {
         _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV");
         return;
      }

      buffer = f((uintptr_t)vdpSurface);
      if (!buffer) {
         _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV");
         return;
      }

      samplers = buffer->get_sampler_view_planes(buffer);
      if (!samplers) {
         _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV");
         return;
      }

      sv = samplers[index >> 1];
      if (!sv) {
         _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV");
         return;
      }

      res = sv->texture;
   }

   if (!res) {
      _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV");
      return;
   }

   /* do we have different screen objects ? */
   if (res->screen != st->pipe->screen) {
      _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV");
      return;
   }

   /* switch to surface based */
   if (!stObj->surface_based) {
      _mesa_clear_texture_object(ctx, texObj);
      stObj->surface_based = GL_TRUE;
   }

   texFormat = st_pipe_format_to_mesa_format(res->format);

   _mesa_init_teximage_fields(ctx, texImage,
                              res->width0, res->height0, 1, 0, GL_RGBA,
                              texFormat);

   pipe_resource_reference(&stObj->pt, res);
   pipe_sampler_view_reference(&stObj->sampler_view, NULL);
   pipe_resource_reference(&stImage->pt, res);

   u_sampler_view_default_template(&templ, res, res->format);
   templ.u.tex.first_layer = index & 1;
   templ.u.tex.last_layer = index & 1;
   templ.swizzle_r = GET_SWZ(stObj->base._Swizzle, 0);
   templ.swizzle_g = GET_SWZ(stObj->base._Swizzle, 1);
   templ.swizzle_b = GET_SWZ(stObj->base._Swizzle, 2);
   templ.swizzle_a = GET_SWZ(stObj->base._Swizzle, 3);
   stObj->sampler_view = st->pipe->create_sampler_view(st->pipe, res, &templ);

   stObj->width0 = res->width0;
   stObj->height0 = res->height0;
   stObj->depth0 = 1;
   stObj->surface_format = res->format;

   _mesa_dirty_texobj(ctx, texObj);
}
/**
 * Fill the given ALU instruction's opcodes and source operands into the given pair,
 * if possible.
 */
static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
{
	struct pair_state_instruction *pairinst = s->Instructions + ip;
	struct prog_instruction *inst = s->Program->Instructions + ip;

	ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP);
	ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP);

	if (pairinst->NeedRGB) {
		if (pairinst->IsTranscendent)
			pair->RGB.Opcode = OPCODE_REPL_ALPHA;
		else
			pair->RGB.Opcode = inst->Opcode;
		if (inst->SaturateMode == SATURATE_ZERO_ONE)
			pair->RGB.Saturate = 1;
	}
	if (pairinst->NeedAlpha) {
		pair->Alpha.Opcode = inst->Opcode;
		if (inst->SaturateMode == SATURATE_ZERO_ONE)
			pair->Alpha.Saturate = 1;
	}

	int nargs = _mesa_num_inst_src_regs(inst->Opcode);
	int i;

	/* Special case for DDX/DDY (MDH/MDV). */
	if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) {
		if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used)
			return GL_FALSE;
		else
			nargs++;
	}

	for(i = 0; i < nargs; ++i) {
		int source;
		if (pairinst->NeedRGB && !pairinst->IsTranscendent) {
			GLboolean srcrgb = GL_FALSE;
			GLboolean srcalpha = GL_FALSE;
			GLuint negatebase = 0;
			int j;
			for(j = 0; j < 3; ++j) {
				GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
				if (swz < 3)
					srcrgb = GL_TRUE;
				else if (swz < 4)
					srcalpha = GL_TRUE;
				if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].NegateBase, j))
					negatebase = 1;
			}
			source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
			if (source < 0)
				return GL_FALSE;
			pair->RGB.Arg[i].Source = source;
			pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
			pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs;
		}
		if (pairinst->NeedAlpha) {
			GLboolean srcrgb = GL_FALSE;
			GLboolean srcalpha = GL_FALSE;
			GLuint negatebase = GET_BIT(inst->SrcReg[i].NegateBase, pairinst->IsTranscendent ? 0 : 3);
			GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3);
			if (swz < 3)
				srcrgb = GL_TRUE;
			else if (swz < 4)
				srcalpha = GL_TRUE;
			source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
			if (source < 0)
				return GL_FALSE;
			pair->Alpha.Arg[i].Source = source;
			pair->Alpha.Arg[i].Swizzle = swz;
			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
			pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs;
		}
	}

	return GL_TRUE;
}
Ejemplo n.º 17
0
/**
 * Emit a single TEX instruction
 */
static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
{
	int ip;
	PROG_CODE;

	if (code->inst_end >= c->Base.max_alu_insts-1) {
		error("emit_tex: Too many instructions");
		return 0;
	}

	ip = ++code->inst_end;

	code->inst[ip].inst0 = R500_INST_TYPE_TEX
		| (inst->DstReg.WriteMask << 11)
		| R500_INST_TEX_SEM_WAIT;
	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
		| R500_TEX_SEM_ACQUIRE;

	if (inst->TexSrcTarget == RC_TEXTURE_RECT)
		code->inst[ip].inst1 |= R500_TEX_UNSCALED;

	switch (inst->Opcode) {
	case RC_OPCODE_KIL:
		code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
		break;
	case RC_OPCODE_TEX:
		code->inst[ip].inst1 |= R500_TEX_INST_LD;
		break;
	case RC_OPCODE_TXB:
		code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
		break;
	case RC_OPCODE_TXP:
		code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
		break;
	case RC_OPCODE_TXD:
		code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
		break;
	case RC_OPCODE_TXL:
		code->inst[ip].inst1 |= R500_TEX_INST_LOD;
		break;
	default:
		error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
	}

	use_temporary(code, inst->SrcReg[0].Index);
	if (inst->Opcode != RC_OPCODE_KIL)
		use_temporary(code, inst->DstReg.Index);

	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
		| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
		| R500_TEX_DST_ADDR(inst->DstReg.Index)
		| (GET_SWZ(inst->TexSwizzle, 0) << 24)
		| (GET_SWZ(inst->TexSwizzle, 1) << 26)
		| (GET_SWZ(inst->TexSwizzle, 2) << 28)
		| (GET_SWZ(inst->TexSwizzle, 3) << 30)
		;

	if (inst->Opcode == RC_OPCODE_TXD) {
		use_temporary(code, inst->SrcReg[1].Index);
		use_temporary(code, inst->SrcReg[2].Index);

		/* DX and DY parameters are specified in a separate register. */
		code->inst[ip].inst3 =
			R500_DX_ADDR(inst->SrcReg[1].Index) |
			(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
			R500_DY_ADDR(inst->SrcReg[2].Index) |
			(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
	}

	return 1;
}
Ejemplo n.º 18
0
/**
 * Try to inject the destination of mov as the destination of inst and recompute
 * the swizzles operators for the sources of inst if required. Return GL_TRUE
 * of the substitution was possible, GL_FALSE otherwise
 */
static GLboolean
_mesa_merge_mov_into_inst(struct prog_instruction *inst,
                          const struct prog_instruction *mov)
{
   /* Indirection table which associates destination and source components for
    * the mov instruction
    */
   const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);

   /* Some components are not written by inst. We cannot remove the mov */
   if (mask != (inst->DstReg.WriteMask & mask))
      return GL_FALSE;

   inst->SaturateMode |= mov->SaturateMode;

   /* Depending on the instruction, we may need to recompute the swizzles.
    * Also, some other instructions (like TEX) are not linear. We will only
    * consider completely active sources and destinations
    */
   switch (inst->Opcode) {

   /* Carstesian instructions: we compute the swizzle */
   case OPCODE_MOV:
   case OPCODE_MIN:
   case OPCODE_MAX:
   case OPCODE_ABS:
   case OPCODE_ADD:
   case OPCODE_MAD:
   case OPCODE_MUL:
   case OPCODE_SUB:
   {
      GLuint dst_to_src_comp[4] = {0,0,0,0};
      GLuint dst_comp, arg;
      for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
         if (mov->DstReg.WriteMask & (1 << dst_comp)) {
            const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
            ASSERT(src_comp < 4);
            dst_to_src_comp[dst_comp] = src_comp;
         }
      }

      /* Patch each source of the instruction */
      for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
         const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
         inst->SrcReg[arg].Swizzle = 0;

         /* Reset each active component of the swizzle */
         for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
            GLuint src_comp, arg_comp;
            if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
               continue;
            src_comp = dst_to_src_comp[dst_comp];
            ASSERT(src_comp < 4);
            arg_comp = GET_SWZ(arg_swz, src_comp);
            ASSERT(arg_comp < 4);
            inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
         }
      }
      inst->DstReg = mov->DstReg;
      return GL_TRUE;
   }

   /* Dot products and scalar instructions: we only change the destination */
   case OPCODE_RCP:
   case OPCODE_SIN:
   case OPCODE_COS:
   case OPCODE_RSQ:
   case OPCODE_POW:
   case OPCODE_EX2:
   case OPCODE_LOG:
   case OPCODE_DP2:
   case OPCODE_DP3:
   case OPCODE_DP4:
      inst->DstReg = mov->DstReg;
      return GL_TRUE;

   /* All other instructions require fully active components with no swizzle */
   default:
      if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
          inst->DstReg.WriteMask != WRITEMASK_XYZW)
         return GL_FALSE;
      inst->DstReg = mov->DstReg;
      return GL_TRUE;
   }
}
Ejemplo n.º 19
0
/**
 * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
 *  - implement texture compare (shadow extensions)
 *  - extract non-native source / destination operands
 *  - premultiply texture coordinates for RECT
 *  - extract operand swizzles
 *  - introduce a temporary register when write masks are needed
 */
int radeonTransformTEX(
	struct radeon_compiler * c,
	struct rc_instruction * inst,
	void* data)
{
	struct r300_fragment_program_compiler *compiler =
		(struct r300_fragment_program_compiler*)data;
	rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
	int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
		      compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;

	if (inst->U.I.Opcode != RC_OPCODE_TEX &&
		inst->U.I.Opcode != RC_OPCODE_TXB &&
		inst->U.I.Opcode != RC_OPCODE_TXP &&
		inst->U.I.Opcode != RC_OPCODE_TXD &&
		inst->U.I.Opcode != RC_OPCODE_TXL &&
		inst->U.I.Opcode != RC_OPCODE_KIL)
		return 0;

	/* ARB_shadow & EXT_shadow_funcs */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
		((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
		 (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
		rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;

		if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
			inst->U.I.Opcode = RC_OPCODE_MOV;

			if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
				inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
			} else {
				inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
			}

			return 1;
		} else {
			struct rc_instruction * inst_rcp = NULL;
			struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
			unsigned tmp_texsample;
			unsigned tmp_sum;
			int pass, fail;

			/* Save the output register. */
			struct rc_dst_register output_reg = inst->U.I.DstReg;
			unsigned saturate_mode = inst->U.I.SaturateMode;

			/* Redirect TEX to a new temp. */
			tmp_texsample = rc_find_free_temporary(c);
			inst->U.I.SaturateMode = 0;
			inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst->U.I.DstReg.Index = tmp_texsample;
			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;

			tmp_sum = rc_find_free_temporary(c);

			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
				/* Compute 1/W. */
				inst_rcp = rc_insert_new_instruction(c, inst);
				inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
				inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
				inst_rcp->U.I.DstReg.Index = tmp_sum;
				inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
				inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
				inst_rcp->U.I.SrcReg[0].Swizzle =
					RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
			}

			/* Divide Z by W (if it's TXP) and saturate. */
			inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
			inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
			inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mul->U.I.DstReg.Index = tmp_sum;
			inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
			inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
			inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
			inst_mul->U.I.SrcReg[0].Swizzle =
				RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
				inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
				inst_mul->U.I.SrcReg[1].Index = tmp_sum;
				inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
			}

			/* Add the depth texture value. */
			inst_add = rc_insert_new_instruction(c, inst_mul);
			inst_add->U.I.Opcode = RC_OPCODE_ADD;
			inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_add->U.I.DstReg.Index = tmp_sum;
			inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
			inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_add->U.I.SrcReg[0].Index = tmp_sum;
			inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
			inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
			inst_add->U.I.SrcReg[1].Index = tmp_texsample;
			inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;

			/* Note that SrcReg[0] is r, SrcReg[1] is tex and:
			 *   LESS:    r  < tex  <=>      -tex+r < 0
			 *   GEQUAL:  r >= tex  <=> not (-tex+r < 0)
			 *   GREATER: r  > tex  <=>       tex-r < 0
			 *   LEQUAL:  r <= tex  <=> not ( tex-r < 0)
			 *   EQUAL:   GEQUAL
			 *   NOTEQUAL:LESS
			 */

			/* This negates either r or tex: */
			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
			    comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
				inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
			else
				inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;

			/* This negates the whole expresion: */
			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
			    comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
				pass = 1;
				fail = 2;
			} else {
				pass = 2;
				fail = 1;
			}

			inst_cmp = rc_insert_new_instruction(c, inst_add);
			inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
			inst_cmp->U.I.SaturateMode = saturate_mode;
			inst_cmp->U.I.DstReg = output_reg;
			inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
			inst_cmp->U.I.SrcReg[0].Swizzle =
					combine_swizzles(RC_SWIZZLE_WWWW,
							 compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
			inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
			inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);

			assert(tmp_texsample != tmp_sum);
		}
	}

	/* R300 cannot sample from rectangles and the wrap mode fallback needs
	 * normalized coordinates anyway. */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
		scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
		inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
	}

	/* Divide by W if needed. */
	if (inst->U.I.Opcode == RC_OPCODE_TXP &&
	    (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
	     compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
		projective_divide(compiler, inst);
	}

	/* Texture wrap modes don't work on NPOT textures.
	 *
	 * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
	 * mirroring are not. If we need to repeat, we do:
	 *
	 * MUL temp, texcoord, <scaling factor constant>
	 * FRC temp, temp ; Discard integer portion of coords
	 *
	 * This gives us coords in [0, 1].
	 *
	 * Mirroring is trickier. We're going to start out like repeat:
	 *
	 * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
	 * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
	 *                            ; so scale to [0, 1]
	 * FRC temp, temp ; Make the pattern repeat
	 * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
	 * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
	 *				; The pattern is backwards, so reverse it (1-x).
	 *
	 * This gives us coords in [0, 1].
	 *
	 * ~ C & M. ;)
	 */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    wrapmode != RC_WRAP_NONE) {
		struct rc_instruction *inst_mov;
		unsigned temp = rc_find_free_temporary(c);

		if (wrapmode == RC_WRAP_REPEAT) {
			/* Both instructions will be paired up. */
			struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);

			inst_frc->U.I.Opcode = RC_OPCODE_FRC;
			inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_frc->U.I.DstReg.Index = temp;
			inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
		} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
			/*
			 * Function:
			 *   f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
			 *
			 * Code:
			 *   MUL temp, src0, 0.5
			 *   FRC temp, temp
			 *   MAD temp, temp, 2, -1
			 *   ADD temp, 1, -abs(temp)
			 */

			struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
			unsigned two, two_swizzle;

			inst_mul = rc_insert_new_instruction(c, inst->Prev);

			inst_mul->U.I.Opcode = RC_OPCODE_MUL;
			inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mul->U.I.DstReg.Index = temp;
			inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
			inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;

			inst_frc = rc_insert_new_instruction(c, inst->Prev);

			inst_frc->U.I.Opcode = RC_OPCODE_FRC;
			inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_frc->U.I.DstReg.Index = temp;
			inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_frc->U.I.SrcReg[0].Index = temp;
			inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;

			two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
			inst_mad = rc_insert_new_instruction(c, inst->Prev);

			inst_mad->U.I.Opcode = RC_OPCODE_MAD;
			inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mad->U.I.DstReg.Index = temp;
			inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
			inst_mad->U.I.SrcReg[0].Index = temp;
			inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
			inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
			inst_mad->U.I.SrcReg[1].Index = two;
			inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
			inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
			inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;

			inst_add = rc_insert_new_instruction(c, inst->Prev);

			inst_add->U.I.Opcode = RC_OPCODE_ADD;
			inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_add->U.I.DstReg.Index = temp;
			inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
			inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
			inst_add->U.I.SrcReg[1].Index = temp;
			inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
			inst_add->U.I.SrcReg[1].Abs = 1;
			inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
		} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
			/*
			 * Mirrored clamp modes are bloody simple, we just use abs
			 * to mirror [0, 1] into [-1, 0]. This works for
			 * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
			 */
			struct rc_instruction *inst_mov;

			inst_mov = rc_insert_new_instruction(c, inst->Prev);

			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mov->U.I.DstReg.Index = temp;
			inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
			inst_mov->U.I.SrcReg[0].Abs = 1;
		}

		/* Preserve W for TXP/TXB. */
		inst_mov = rc_insert_new_instruction(c, inst->Prev);

		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = temp;
		inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		reset_srcreg(&inst->U.I.SrcReg[0]);
		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst->U.I.SrcReg[0].Index = temp;
	}

	/* NPOT -> POT conversion for 3D textures. */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
		struct rc_instruction *inst_mov;
		unsigned temp = rc_find_free_temporary(c);

		/* Saturate XYZ. */
		inst_mov = rc_insert_new_instruction(c, inst->Prev);
		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = temp;
		inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		/* Copy W. */
		inst_mov = rc_insert_new_instruction(c, inst->Prev);
		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = temp;
		inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		reset_srcreg(&inst->U.I.SrcReg[0]);
		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst->U.I.SrcReg[0].Index = temp;

		scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
	}

	/* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM.
	 * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2
	 */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
	    compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) {
		unsigned two, two_swizzle;
		struct rc_instruction *inst_mul, *inst_mad, *inst_cnd;

		two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle);

		inst_mul = rc_insert_new_instruction(c, inst);
		inst_mul->U.I.Opcode = RC_OPCODE_MUL;
		inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
		inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */
		inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */
		inst_mul->U.I.SrcReg[1].Index = two;
		inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle;

		inst_mad = rc_insert_new_instruction(c, inst_mul);
		inst_mad->U.I.Opcode = RC_OPCODE_MAD;
		inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
		inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
		inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */
		inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */
		inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW;

		inst_cnd = rc_insert_new_instruction(c, inst_mad);
		inst_cnd->U.I.Opcode = RC_OPCODE_CND;
		inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode;
		inst_cnd->U.I.DstReg = inst->U.I.DstReg;
		inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
		inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
		inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
		inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index;
		inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
		inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */

		inst->U.I.SaturateMode = 0;
		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index;
		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
	}

	/* Cannot write texture to output registers or with saturate (all chips),
	 * or with masks (non-r500). */
	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
		(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
		 inst->U.I.SaturateMode ||
		 (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);

		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
		inst_mov->U.I.DstReg = inst->U.I.DstReg;
		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);

		inst->U.I.SaturateMode = 0;
		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
	}

	/* Cannot read texture coordinate from constants file */
	if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);

		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
		inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];

		reset_srcreg(&inst->U.I.SrcReg[0]);
		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
		inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
	}

	return 1;
}
Ejemplo n.º 20
0
static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth)
{
    struct rc_pair_instruction * inst = &fullinst->U.P;
    int printedsrc = 0;
    unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ?
                                          inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth);

    for (unsigned i = 0; i < spaces; i++)
        fprintf(f, " ");

    for(unsigned int src = 0; src < 3; ++src) {
        if (inst->RGB.Src[src].Used) {
            if (printedsrc)
                fprintf(f, ", ");
            fprintf(f, "src%i.xyz = ", src);
            rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
            printedsrc = 1;
        }
        if (inst->Alpha.Src[src].Used) {
            if (printedsrc)
                fprintf(f, ", ");
            fprintf(f, "src%i.w = ", src);
            rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
            printedsrc = 1;
        }
    }
    if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
        fprintf(f, ", srcp.xyz = %s",
                presubtract_op_to_string(
                    inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
    }
    if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
        fprintf(f, ", srcp.w = %s",
                presubtract_op_to_string(
                    inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
    }
    if (inst->SemWait) {
        fprintf(f, " SEM_WAIT");
    }
    fprintf(f, "\n");

    if (inst->RGB.Opcode != RC_OPCODE_NOP) {
        const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);

        for (unsigned i = 0; i < spaces; i++)
            fprintf(f, " ");

        fprintf(f, "     %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
        if (inst->RGB.WriteMask)
            fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
                    (inst->RGB.WriteMask & 1) ? "x" : "",
                    (inst->RGB.WriteMask & 2) ? "y" : "",
                    (inst->RGB.WriteMask & 4) ? "z" : "");
        if (inst->RGB.OutputWriteMask)
            fprintf(f, " color[%i].%s%s%s", inst->RGB.Target,
                    (inst->RGB.OutputWriteMask & 1) ? "x" : "",
                    (inst->RGB.OutputWriteMask & 2) ? "y" : "",
                    (inst->RGB.OutputWriteMask & 4) ? "z" : "");
        if (inst->WriteALUResult == RC_ALURESULT_X)
            fprintf(f, " aluresult");

        print_omod_op(f, inst->RGB.Omod);

        for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
            const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
            const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
            fprintf(f, ", %s%ssrc", neg, abs);
            if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
                fprintf(f,"p");
            else
                fprintf(f,"%d", inst->RGB.Arg[arg].Source);
            fprintf(f,".%c%c%c%s",
                    rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
                    rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
                    rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
                    abs);
        }
        fprintf(f, "\n");
    }

    if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
        const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);

        for (unsigned i = 0; i < spaces; i++)
            fprintf(f, " ");

        fprintf(f, "     %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
        if (inst->Alpha.WriteMask)
            fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
        if (inst->Alpha.OutputWriteMask)
            fprintf(f, " color[%i].w", inst->Alpha.Target);
        if (inst->Alpha.DepthWriteMask)
            fprintf(f, " depth.w");
        if (inst->WriteALUResult == RC_ALURESULT_W)
            fprintf(f, " aluresult");

        print_omod_op(f, inst->Alpha.Omod);

        for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
            const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
            const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
            fprintf(f, ", %s%ssrc", neg, abs);
            if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
                fprintf(f,"p");
            else
                fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
            fprintf(f,".%c%s",
                    rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs);
        }
        fprintf(f, "\n");
    }

    if (inst->WriteALUResult) {
        for (unsigned i = 0; i < spaces; i++)
            fprintf(f, " ");

        fprintf(f, "      [aluresult = (");
        rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
        fprintf(f, ")]\n");
    }
}
static void
PrintSrcReg(const struct prog_src_register *src)
{
   static const char comps[5] = "xyzw";
   if (src->NegateBase)
      _mesa_printf("-");
   if (src->RelAddr) {
      if (src->Index > 0)
         _mesa_printf("c[A0.x + %d]", src->Index);
      else if (src->Index < 0)
         _mesa_printf("c[A0.x - %d]", -src->Index);
      else
         _mesa_printf("c[A0.x]");
   }
   else if (src->File == PROGRAM_OUTPUT) {
      _mesa_printf("o[%s]", OutputRegisters[src->Index]);
   }
   else if (src->File == PROGRAM_INPUT) {
      _mesa_printf("v[%s]", InputRegisters[src->Index]);
   }
   else if (src->File == PROGRAM_ENV_PARAM) {
      _mesa_printf("c[%d]", src->Index);
   }
   else {
      ASSERT(src->File == PROGRAM_TEMPORARY);
      _mesa_printf("R%d", src->Index);
   }

   if (GET_SWZ(src->Swizzle, 0) == GET_SWZ(src->Swizzle, 1) &&
       GET_SWZ(src->Swizzle, 0) == GET_SWZ(src->Swizzle, 2) &&
       GET_SWZ(src->Swizzle, 0) == GET_SWZ(src->Swizzle, 3)) {
      _mesa_printf(".%c", comps[GET_SWZ(src->Swizzle, 0)]);
   }
   else if (src->Swizzle != SWIZZLE_NOOP) {
      _mesa_printf(".%c%c%c%c",
             comps[GET_SWZ(src->Swizzle, 0)],
             comps[GET_SWZ(src->Swizzle, 1)],
             comps[GET_SWZ(src->Swizzle, 2)],
             comps[GET_SWZ(src->Swizzle, 3)]);
   }
}
Ejemplo n.º 22
0
/**
 * Try to remove use of extraneous MOV instructions, to free them up for dead
 * code removal.
 */
static void
_mesa_remove_extra_move_use(struct gl_program *prog)
{
   GLuint i, j;

   if (dbg) {
      printf("Optimize: Begin remove extra move use\n");
      _mesa_print_program(prog);
   }

   /*
    * Look for sequences such as this:
    *    MOV tmpX, arg0;
    *    ...
    *    FOO tmpY, tmpX, arg1;
    * and convert into:
    *    MOV tmpX, arg0;
    *    ...
    *    FOO tmpY, arg0, arg1;
    */

   for (i = 0; i + 1 < prog->NumInstructions; i++) {
      const struct prog_instruction *mov = prog->Instructions + i;
      GLuint dst_mask, src_mask;
      if (can_upward_mov_be_modifed(mov) == GL_FALSE)
         continue;

      /* Scanning the code, we maintain the components which are still active in
       * these two masks
       */
      dst_mask = mov->DstReg.WriteMask;
      src_mask = get_src_arg_mask(mov, 0, NO_MASK);

      /* Walk through remaining instructions until the or src reg gets
       * rewritten or we get into some flow-control, eliminating the use of
       * this MOV.
       */
      for (j = i + 1; j < prog->NumInstructions; j++) {
	 struct prog_instruction *inst2 = prog->Instructions + j;
         GLuint arg;

	 if (_mesa_is_flow_control_opcode(inst2->Opcode))
	     break;

	 /* First rewrite this instruction's args if appropriate. */
	 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
	    GLuint comp, read_mask;

	    if (inst2->SrcReg[arg].File != mov->DstReg.File ||
		inst2->SrcReg[arg].Index != mov->DstReg.Index ||
		inst2->SrcReg[arg].RelAddr ||
		inst2->SrcReg[arg].Abs)
	       continue;
            read_mask = get_src_arg_mask(inst2, arg, NO_MASK);

	    /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
             * components read still come from the mov instructions
             */
            if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
               (read_mask & dst_mask) == read_mask) {
               for (comp = 0; comp < 4; comp++) {
                  const GLuint inst2_swz =
                     GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
                  const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
                  inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
                  inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
                  inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
                                                  inst2_swz) & 0x1) << comp);
               }
               inst2->SrcReg[arg].File = mov->SrcReg[0].File;
               inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
            }
	 }

	 /* The source of MOV is written. This potentially deactivates some
          * components from the src and dst of the MOV instruction
          */
	 if (inst2->DstReg.File == mov->DstReg.File &&
	     (inst2->DstReg.RelAddr ||
	      inst2->DstReg.Index == mov->DstReg.Index)) {
            dst_mask &= ~inst2->DstReg.WriteMask;
            src_mask = get_src_arg_mask(mov, 0, dst_mask);
         }

         /* Idem when the destination of mov is written */
	 if (inst2->DstReg.File == mov->SrcReg[0].File &&
	     (inst2->DstReg.RelAddr ||
	      inst2->DstReg.Index == mov->SrcReg[0].Index)) {
            src_mask &= ~inst2->DstReg.WriteMask;
            dst_mask &= get_dst_mask_for_mov(mov, src_mask);
         }
         if (dst_mask == 0)
            break;
      }
   }

   if (dbg) {
      printf("Optimize: End remove extra move use.\n");
      /*_mesa_print_program(prog);*/
   }
}
Ejemplo n.º 23
0
/**
 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
 * are read from the given src in this instruction, We also provide
 * one optional masks which may mask other components in the dst
 * register
 */
static GLuint
get_src_arg_mask(const struct prog_instruction *inst,
                 GLuint arg, GLuint dst_mask)
{
   GLuint read_mask, channel_mask;
   GLuint comp;

   ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));

   /* Form the dst register, find the written channels */
   if (inst->CondUpdate) {
      channel_mask = WRITEMASK_XYZW;
   }
   else {
      switch (inst->Opcode) {
      case OPCODE_MOV:
      case OPCODE_MIN:
      case OPCODE_MAX:
      case OPCODE_ABS:
      case OPCODE_ADD:
      case OPCODE_MAD:
      case OPCODE_MUL:
      case OPCODE_SUB:
      case OPCODE_CMP:
      case OPCODE_FLR:
      case OPCODE_FRC:
      case OPCODE_LRP:
      case OPCODE_SEQ:
      case OPCODE_SGE:
      case OPCODE_SGT:
      case OPCODE_SLE:
      case OPCODE_SLT:
      case OPCODE_SNE:
      case OPCODE_SSG:
         channel_mask = inst->DstReg.WriteMask & dst_mask;
         break;
      case OPCODE_RCP:
      case OPCODE_SIN:
      case OPCODE_COS:
      case OPCODE_RSQ:
      case OPCODE_POW:
      case OPCODE_EX2:
      case OPCODE_LOG:
         channel_mask = WRITEMASK_X;
         break;
      case OPCODE_DP2:
         channel_mask = WRITEMASK_XY;
         break;
      case OPCODE_DP3:
      case OPCODE_XPD:
         channel_mask = WRITEMASK_XYZ;
         break;
      default:
         channel_mask = WRITEMASK_XYZW;
         break;
      }
   }

   /* Now, given the src swizzle and the written channels, find which
    * components are actually read
    */
   read_mask = 0x0;
   for (comp = 0; comp < 4; ++comp) {
      const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
      ASSERT(coord < 4);
      if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
         read_mask |= 1 << coord;
   }

   return read_mask;
}
Ejemplo n.º 24
0
static void get_incr_amount(void * data, struct rc_instruction * inst,
		rc_register_file file, unsigned int index, unsigned int mask)
{
	struct count_inst * count_inst = data;
	int amnt_src_index;
	const struct rc_opcode_info * opcode;
	float amount;

	if(file != RC_FILE_TEMPORARY ||
	   count_inst->Index != index ||
	   (1 << GET_SWZ(count_inst->Swz,0) != mask)){
		return;
	}

	/* XXX: Give up if the counter is modified within an IF block.  We
	 * could handle this case with better analysis. */
	if (count_inst->BranchDepth > 0) {
		count_inst->Unknown = 1;
		return;
	}

	/* Find the index of the counter register. */
	opcode = rc_get_opcode_info(inst->U.I.Opcode);
	if(opcode->NumSrcRegs != 2){
		count_inst->Unknown = 1;
		return;
	}
	if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
	   inst->U.I.SrcReg[0].Index == count_inst->Index &&
	   inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
		amnt_src_index = 1;
	} else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
		   inst->U.I.SrcReg[1].Index == count_inst->Index &&
		   inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
		amnt_src_index = 0;
	}
	else{
		count_inst->Unknown = 1;
		return;
	}
	if(rc_src_reg_is_immediate(count_inst->C,
				inst->U.I.SrcReg[amnt_src_index].File,
				inst->U.I.SrcReg[amnt_src_index].Index)){
		amount = rc_get_constant_value(count_inst->C,
				inst->U.I.SrcReg[amnt_src_index].Index,
				inst->U.I.SrcReg[amnt_src_index].Swizzle,
				inst->U.I.SrcReg[amnt_src_index].Negate, 0);
	}
	else{
		count_inst->Unknown = 1 ;
		return;
	}
	switch(inst->U.I.Opcode){
	case RC_OPCODE_ADD:
		count_inst->Amount += amount;
		break;
	case RC_OPCODE_SUB:
		if(amnt_src_index == 0){
			count_inst->Unknown = 0;
			return;
		}
		count_inst->Amount -= amount;
		break;
	default:
		count_inst->Unknown = 1;
		return;
	}
}
/**
 * Retrieve a ureg for the given source register.  Will emit
 * constants, apply swizzling and negation as needed.
 */
static GLuint
src_vector(struct i915_fragment_program *p,
           const struct prog_src_register *source,
           const struct gl_fragment_program *program)
{
   GLuint src;

   switch (source->File) {

      /* Registers:
       */
   case PROGRAM_TEMPORARY:
      if (source->Index >= I915_MAX_TEMPORARY) {
         i915_program_error(p, "Exceeded max temporary reg");
         return 0;
      }
      src = UREG(REG_TYPE_R, source->Index);
      break;
   case PROGRAM_INPUT:
      switch (source->Index) {
      case FRAG_ATTRIB_WPOS:
         src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
         break;
      case FRAG_ATTRIB_COL0:
         src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
         break;
      case FRAG_ATTRIB_COL1:
         src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
         src = swizzle(src, X, Y, Z, ONE);
         break;
      case FRAG_ATTRIB_FOGC:
         src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
         src = swizzle(src, W, ZERO, ZERO, ONE);
         break;
      case FRAG_ATTRIB_TEX0:
      case FRAG_ATTRIB_TEX1:
      case FRAG_ATTRIB_TEX2:
      case FRAG_ATTRIB_TEX3:
      case FRAG_ATTRIB_TEX4:
      case FRAG_ATTRIB_TEX5:
      case FRAG_ATTRIB_TEX6:
      case FRAG_ATTRIB_TEX7:
         src = i915_emit_decl(p, REG_TYPE_T,
                              T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
                              D0_CHANNEL_ALL);
         break;

      default:
         i915_program_error(p, "Bad source->Index");
         return 0;
      }
      break;

      /* Various paramters and env values.  All emitted to
       * hardware as program constants.
       */
   case PROGRAM_LOCAL_PARAM:
      src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]);
      break;

   case PROGRAM_ENV_PARAM:
      src =
         i915_emit_param4fv(p,
                            p->ctx->FragmentProgram.Parameters[source->
                                                               Index]);
      break;

   case PROGRAM_CONSTANT:
   case PROGRAM_STATE_VAR:
   case PROGRAM_NAMED_PARAM:
      src =
         i915_emit_param4fv(p,
                            program->Base.Parameters->ParameterValues[source->
                                                                      Index]);
      break;

   default:
      i915_program_error(p, "Bad source->File");
      return 0;
   }

   src = swizzle(src,
                 GET_SWZ(source->Swizzle, 0),
                 GET_SWZ(source->Swizzle, 1),
                 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));

   if (source->NegateBase)
      src = negate(src,
                   GET_BIT(source->NegateBase, 0),
                   GET_BIT(source->NegateBase, 1),
                   GET_BIT(source->NegateBase, 2),
                   GET_BIT(source->NegateBase, 3));

   return src;
}
void rc_inline_literals(struct radeon_compiler *c, void *user)
{
	struct rc_instruction * inst;

	for(inst = c->Program.Instructions.Next;
					inst != &c->Program.Instructions;
					inst = inst->Next) {
		const struct rc_opcode_info * info =
					rc_get_opcode_info(inst->U.I.Opcode);

		unsigned src_idx;
		struct rc_constant * constant;
		float float_value;
		unsigned char r300_float = 0;
		int ret;

		/* XXX: Handle presub */

		/* We aren't using rc_for_all_reads_src here, because presub
		 * sources need to be handled differently. */
		for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) {
			unsigned new_swizzle;
			unsigned use_literal = 0;
			unsigned negate_mask = 0;
			unsigned swz, chan;
			struct rc_src_register * src_reg =
						&inst->U.I.SrcReg[src_idx];
			swz = RC_SWIZZLE_UNUSED;
			if (src_reg->File != RC_FILE_CONSTANT) {
				continue;
			}
			constant =
				&c->Program.Constants.Constants[src_reg->Index];
			if (constant->Type != RC_CONSTANT_IMMEDIATE) {
				continue;
			}
			new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
			for (chan = 0; chan < 4; chan++) {
				unsigned char r300_float_tmp;
				swz = GET_SWZ(src_reg->Swizzle, chan);
				if (swz == RC_SWIZZLE_UNUSED) {
					continue;
				}
				float_value = constant->u.Immediate[swz];
				ret = ieee_754_to_r300_float(float_value,
								&r300_float_tmp);
				if (!ret || (use_literal &&
						r300_float != r300_float_tmp)) {
					use_literal = 0;
					break;
				}

				if (ret == -1 && src_reg->Abs) {
					use_literal = 0;
					break;
				}

				if (!use_literal) {
					r300_float = r300_float_tmp;
					use_literal = 1;
				}

				/* Use RC_SWIZZLE_W for the inline constant, so
				 * it will become one of the alpha sources. */
				SET_SWZ(new_swizzle, chan, RC_SWIZZLE_W);
				if (ret == -1) {
					negate_mask |= (1 << chan);
				}
			}

			if (!use_literal) {
				continue;
			}
			src_reg->File = RC_FILE_INLINE;
			src_reg->Index = r300_float;
			src_reg->Swizzle = new_swizzle;
			src_reg->Negate = src_reg->Negate ^ negate_mask;
		}
	}
}
Ejemplo n.º 27
0
/**
 * Translate a SWZ instruction into a MOV, MUL or MAD instruction.  EG:
 *
 *   SWZ dst, src.x-y10 
 * 
 * becomes:
 *
 *   MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
 */
static void emit_swz( struct st_translate *t,
                      struct ureg_dst dst,
                      const struct prog_src_register *SrcReg )
{
   struct ureg_program *ureg = t->ureg;
   struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );

   unsigned negate_mask =  SrcReg->Negate;

   unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
                        (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
                        (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
                        (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);

   unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
                         (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
                         (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
                         (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);

   unsigned negative_one_mask = one_mask & negate_mask;
   unsigned positive_one_mask = one_mask & ~negate_mask;
   
   struct ureg_src imm;
   unsigned i;
   unsigned mul_swizzle[4] = {0,0,0,0};
   unsigned add_swizzle[4] = {0,0,0,0};
   unsigned src_swizzle[4] = {0,0,0,0};
   boolean need_add = FALSE;
   boolean need_mul = FALSE;

   if (dst.WriteMask == 0)
      return;

   /* Is this just a MOV?
    */
   if (zero_mask == 0 &&
       one_mask == 0 &&
       (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) 
   {
      ureg_MOV( ureg, dst, translate_src( t, SrcReg ));
      return;
   }

#define IMM_ZERO    0
#define IMM_ONE     1
#define IMM_NEG_ONE 2

   imm = ureg_imm3f( ureg, 0, 1, -1 );

   for (i = 0; i < 4; i++) {
      unsigned bit = 1 << i;

      if (dst.WriteMask & bit) {
         if (positive_one_mask & bit) {
            mul_swizzle[i] = IMM_ZERO;
            add_swizzle[i] = IMM_ONE;
            need_add = TRUE;
         }
         else if (negative_one_mask & bit) {
            mul_swizzle[i] = IMM_ZERO;
            add_swizzle[i] = IMM_NEG_ONE;
            need_add = TRUE;
         }
         else if (zero_mask & bit) {
            mul_swizzle[i] = IMM_ZERO;
            add_swizzle[i] = IMM_ZERO;
            need_add = TRUE;
         }
         else {
            add_swizzle[i] = IMM_ZERO;
            src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
            need_mul = TRUE;
            if (negate_mask & bit) {
               mul_swizzle[i] = IMM_NEG_ONE;
            }
            else {
               mul_swizzle[i] = IMM_ONE;
            }
         }
      }
   }

   if (need_mul && need_add) {
      ureg_MAD( ureg, 
                dst,
                swizzle_4v( src, src_swizzle ),
                swizzle_4v( imm, mul_swizzle ),
                swizzle_4v( imm, add_swizzle ) );
   }
   else if (need_mul) {
      ureg_MUL( ureg, 
                dst,
                swizzle_4v( src, src_swizzle ),
                swizzle_4v( imm, mul_swizzle ) );
   }
   else if (need_add) {
      ureg_MOV( ureg, 
                dst,
                swizzle_4v( imm, add_swizzle ) );
   }
   else {
      debug_assert(0);
   }

#undef IMM_ZERO
#undef IMM_ONE
#undef IMM_NEG_ONE
}
/**
 * Count which (input, temporary) register is read and written how often,
 * and scan the instruction stream to find dependencies.
 */
static void scan_instructions(struct pair_state *s)
{
	struct prog_instruction *inst;
	struct pair_state_instruction *pairinst;
	GLuint ip;

	for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0;
	    inst->Opcode != OPCODE_END;
	    ++inst, ++pairinst, ++ip) {
		final_rewrite(s, inst);
		classify_instruction(s, inst, pairinst);

		int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
		int j;
		for(j = 0; j < nsrc; j++) {
			struct pair_register_translation *t =
				get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index);
			if (!t)
				continue;

			t->RefCount++;

			if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
				int i;
				for(i = 0; i < 4; ++i) {
					GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i);
					if (swz >= 4)
						continue; /* constant or NIL swizzle */
					if (!t->Value[swz])
						continue; /* this is an undefined read */

					/* Do not add a dependency if this instruction
					 * also rewrites the value. The code below adds
					 * a dependency for the DstReg, which is a superset
					 * of the SrcReg dependency. */
					if (inst->DstReg.File == PROGRAM_TEMPORARY &&
					    inst->DstReg.Index == inst->SrcReg[j].Index &&
					    GET_BIT(inst->DstReg.WriteMask, swz))
						continue;

					struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++];
					pairinst->NumDependencies++;
					t->Value[swz]->NumReaders++;
					r->IP = ip;
					r->Next = t->Value[swz]->Readers;
					t->Value[swz]->Readers = r;
				}
			}
		}

		int ndst = _mesa_num_inst_dst_regs(inst->Opcode);
		if (ndst) {
			struct pair_register_translation *t =
				get_register(s, inst->DstReg.File, inst->DstReg.Index);
			if (t) {
				t->RefCount++;

				if (inst->DstReg.File == PROGRAM_TEMPORARY) {
					int j;
					for(j = 0; j < 4; ++j) {
						if (!GET_BIT(inst->DstReg.WriteMask, j))
							continue;

						struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++];
						v->IP = ip;
						if (t->Value[j]) {
							pairinst->NumDependencies++;
							t->Value[j]->Next = v;
						}
						t->Value[j] = v;
						pairinst->Values[j] = v;
					}
				}
			}
		}

		if (s->Verbose)
			_mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);

		if (!pairinst->NumDependencies)
			instruction_ready(s, ip);
	}

	/* Clear the PROGRAM_TEMPORARY state */
	int i, j;
	for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) {
		for(j = 0; j < 4; ++j)
			s->Temps[i].Value[j] = 0;
	}
}
Ejemplo n.º 29
0
void
brw_populate_sampler_prog_key_data(struct gl_context *ctx,
                                   const struct gl_program *prog,
                                   struct brw_sampler_prog_key_data *key)
{
   struct brw_context *brw = brw_context(ctx);
   const struct gen_device_info *devinfo = &brw->screen->devinfo;
   GLbitfield mask = prog->SamplersUsed;

   while (mask) {
      const int s = u_bit_scan(&mask);

      key->swizzles[s] = SWIZZLE_NOOP;
      key->scale_factors[s] = 0.0f;

      int unit_id = prog->SamplerUnits[s];
      const struct gl_texture_unit *unit = &ctx->Texture.Unit[unit_id];

      if (unit->_Current && unit->_Current->Target != GL_TEXTURE_BUFFER) {
         const struct gl_texture_object *t = unit->_Current;
         const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
         struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit_id);

         const bool alpha_depth = t->DepthMode == GL_ALPHA &&
            (img->_BaseFormat == GL_DEPTH_COMPONENT ||
             img->_BaseFormat == GL_DEPTH_STENCIL);

         /* Haswell handles texture swizzling as surface format overrides
          * (except for GL_ALPHA); all other platforms need MOVs in the shader.
          */
         if (alpha_depth || (devinfo->gen < 8 && !devinfo->is_haswell))
            key->swizzles[s] = brw_get_texture_swizzle(ctx, t);

         if (devinfo->gen < 8 &&
             sampler->MinFilter != GL_NEAREST &&
             sampler->MagFilter != GL_NEAREST) {
            if (sampler->WrapS == GL_CLAMP)
               key->gl_clamp_mask[0] |= 1 << s;
            if (sampler->WrapT == GL_CLAMP)
               key->gl_clamp_mask[1] |= 1 << s;
            if (sampler->WrapR == GL_CLAMP)
               key->gl_clamp_mask[2] |= 1 << s;
         }

         /* gather4 for RG32* is broken in multiple ways on Gen7. */
         if (devinfo->gen == 7 && prog->info.uses_texture_gather) {
            switch (img->InternalFormat) {
            case GL_RG32I:
            case GL_RG32UI: {
               /* We have to override the format to R32G32_FLOAT_LD.
                * This means that SCS_ALPHA and SCS_ONE will return 0x3f8
                * (1.0) rather than integer 1.  This needs shader hacks.
                *
                * On Ivybridge, we whack W (alpha) to ONE in our key's
                * swizzle.  On Haswell, we look at the original texture
                * swizzle, and use XYZW with channels overridden to ONE,
                * leaving normal texture swizzling to SCS.
                */
               unsigned src_swizzle =
                  devinfo->is_haswell ? t->_Swizzle : key->swizzles[s];
               for (int i = 0; i < 4; i++) {
                  unsigned src_comp = GET_SWZ(src_swizzle, i);
                  if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) {
                     key->swizzles[i] &= ~(0x7 << (3 * i));
                     key->swizzles[i] |= SWIZZLE_ONE << (3 * i);
                  }
               }
               /* fallthrough */
            }
            case GL_RG32F:
               /* The channel select for green doesn't work - we have to
                * request blue.  Haswell can use SCS for this, but Ivybridge
                * needs a shader workaround.
                */
               if (!devinfo->is_haswell)
                  key->gather_channel_quirk_mask |= 1 << s;
               break;
            }
         }

         /* Gen6's gather4 is broken for UINT/SINT; we treat them as
          * UNORM/FLOAT instead and fix it in the shader.
          */
         if (devinfo->gen == 6 && prog->info.uses_texture_gather) {
            key->gen6_gather_wa[s] = gen6_gather_workaround(img->InternalFormat);
         }

         /* If this is a multisample sampler, and uses the CMS MSAA layout,
          * then we need to emit slightly different code to first sample the
          * MCS surface.
          */
         struct intel_texture_object *intel_tex =
            intel_texture_object((struct gl_texture_object *)t);

         /* From gen9 onwards some single sampled buffers can also be
          * compressed. These don't need ld2dms sampling along with mcs fetch.
          */
         if (intel_tex->mt->aux_usage == ISL_AUX_USAGE_MCS) {
            assert(devinfo->gen >= 7);
            assert(intel_tex->mt->surf.samples > 1);
            assert(intel_tex->mt->aux_buf);
            assert(intel_tex->mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
            key->compressed_multisample_layout_mask |= 1 << s;

            if (intel_tex->mt->surf.samples >= 16) {
               assert(devinfo->gen >= 9);
               key->msaa_16 |= 1 << s;
            }
         }

         if (t->Target == GL_TEXTURE_EXTERNAL_OES && intel_tex->planar_format) {

            /* Setup possible scaling factor. */
            key->scale_factors[s] = intel_tex->planar_format->scaling_factor;

            switch (intel_tex->planar_format->components) {
            case __DRI_IMAGE_COMPONENTS_Y_UV:
               key->y_uv_image_mask |= 1 << s;
               break;
            case __DRI_IMAGE_COMPONENTS_Y_U_V:
               key->y_u_v_image_mask |= 1 << s;
               break;
            case __DRI_IMAGE_COMPONENTS_Y_XUXV:
               key->yx_xuxv_image_mask |= 1 << s;
               break;
            case __DRI_IMAGE_COMPONENTS_Y_UXVX:
               key->xy_uxvx_image_mask |= 1 << s;
               break;
            case __DRI_IMAGE_COMPONENTS_AYUV:
               key->ayuv_image_mask |= 1 << s;
               break;
            case __DRI_IMAGE_COMPONENTS_XYUV:
               key->xyuv_image_mask |= 1 << s;
               break;
            default:
               break;
            }
         }

      }
   }
}
/**
 * Update the dependency tracking state based on what the instruction
 * at the given IP does.
 */
static void commit_instruction(struct pair_state *s, int ip)
{
	struct prog_instruction *inst = s->Program->Instructions + ip;
	struct pair_state_instruction *pairinst = s->Instructions + ip;

	if (s->Verbose)
		_mesa_printf("commit_instruction(%i)\n", ip);

	if (inst->DstReg.File == PROGRAM_TEMPORARY) {
		struct pair_register_translation *t = &s->Temps[inst->DstReg.Index];
		deref_hw_reg(s, t->HwIndex);

		int i;
		for(i = 0; i < 4; ++i) {
			if (!GET_BIT(inst->DstReg.WriteMask, i))
				continue;

			t->Value[i] = pairinst->Values[i];
			if (t->Value[i]->NumReaders) {
				struct reg_value_reader *r;
				for(r = pairinst->Values[i]->Readers; r; r = r->Next)
					decrement_dependencies(s, r->IP);
			} else if (t->Value[i]->Next) {
				/* This happens when the only reader writes
				 * the register at the same time */
				decrement_dependencies(s, t->Value[i]->Next->IP);
			}
		}
	}

	int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
	int i;
	for(i = 0; i < nsrc; i++) {
		struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index);
		if (!t)
			continue;

		deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index));

		if (inst->SrcReg[i].File != PROGRAM_TEMPORARY)
			continue;

		int j;
		for(j = 0; j < 4; ++j) {
			GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
			if (swz >= 4)
				continue;
			if (!t->Value[swz])
				continue;

			/* Do not free a dependency if this instruction
			 * also rewrites the value. See scan_instructions. */
			if (inst->DstReg.File == PROGRAM_TEMPORARY &&
			    inst->DstReg.Index == inst->SrcReg[i].Index &&
			    GET_BIT(inst->DstReg.WriteMask, swz))
				continue;

			if (!--t->Value[swz]->NumReaders) {
				if (t->Value[swz]->Next)
					decrement_dependencies(s, t->Value[swz]->Next->IP);
			}
		}
	}
}