/**
 * Scan forward in program from 'start' for the next occurances of TEMP[index].
 * We look if an instruction reads the component given by the masks and if they
 * are overwritten.
 * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
 * that we can't look further.
 */
static enum inst_use
find_next_use(const struct gl_program *prog,
              GLuint start,
              GLuint index,
              GLuint mask)
{
   GLuint i;

   for (i = start; i < prog->NumInstructions; i++) {
      const struct prog_instruction *inst = prog->Instructions + i;
      switch (inst->Opcode) {
      case OPCODE_BGNLOOP:
      case OPCODE_BGNSUB:
      case OPCODE_BRA:
      case OPCODE_CAL:
      case OPCODE_CONT:
      case OPCODE_IF:
      case OPCODE_ELSE:
      case OPCODE_ENDIF:
      case OPCODE_ENDLOOP:
      case OPCODE_ENDSUB:
      case OPCODE_RET:
         return FLOW;
      case OPCODE_END:
         return END;
      default:
         {
            const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
            GLuint j;
            for (j = 0; j < numSrc; j++) {
               if (inst->SrcReg[j].RelAddr ||
                   (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
                   inst->SrcReg[j].Index == index &&
                   (get_src_arg_mask(inst,j,NO_MASK) & mask)))
                  return READ;
            }
            if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
                inst->DstReg.File == PROGRAM_TEMPORARY &&
                inst->DstReg.Index == index) {
               mask &= ~inst->DstReg.WriteMask;
               if (mask == 0)
                  return WRITE;
            }
         }
      }
   }
   return END;
}
/**
 * Complements dead_code_global. Try to remove code in block of code by
 * carefully monitoring the swizzles. Both functions should be merged into one
 * with a proper control flow graph
 */
static GLboolean
_mesa_remove_dead_code_local(struct gl_program *prog)
{
   GLboolean *removeInst;
   GLuint i, arg, rem = 0;

   removeInst = (GLboolean *)
      calloc(1, prog->NumInstructions * sizeof(GLboolean));

   for (i = 0; i < prog->NumInstructions; i++) {
      const struct prog_instruction *inst = prog->Instructions + i;
      const GLuint index = inst->DstReg.Index;
      const GLuint mask = inst->DstReg.WriteMask;
      enum inst_use use;

      /* We must deactivate the pass as soon as some indirection is used */
      if (inst->DstReg.RelAddr)
         goto done;
      for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
         if (inst->SrcReg[arg].RelAddr)
            goto done;

      if (_mesa_is_flow_control_opcode(inst->Opcode) ||
          _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
          inst->DstReg.File != PROGRAM_TEMPORARY ||
          inst->DstReg.RelAddr)
         continue;

      use = find_next_use(prog, i+1, index, mask);
      if (use == WRITE || use == END)
         removeInst[i] = GL_TRUE;
   }

   rem = remove_instructions(prog, removeInst);

done:
   free(removeInst);
   return rem != 0;
}
Exemple #3
0
static void
compile_instruction(
   struct gl_context *ctx,
   struct st_translate *t,
   const struct prog_instruction *inst,
   boolean clamp_dst_color_output)
{
   struct ureg_program *ureg = t->ureg;
   GLuint i;
   struct ureg_dst dst[1] = { { 0 } };
   struct ureg_src src[4];
   unsigned num_dst;
   unsigned num_src;

   num_dst = _mesa_num_inst_dst_regs( inst->Opcode );
   num_src = _mesa_num_inst_src_regs( inst->Opcode );

   if (num_dst) 
      dst[0] = translate_dst( t, 
                              &inst->DstReg,
                              inst->Saturate,
                              clamp_dst_color_output);

   for (i = 0; i < num_src; i++) 
      src[i] = translate_src( t, &inst->SrcReg[i] );

   switch( inst->Opcode ) {
   case OPCODE_SWZ:
      emit_swz( t, dst[0], &inst->SrcReg[0] );
      return;

   case OPCODE_BGNLOOP:
   case OPCODE_CAL:
   case OPCODE_ELSE:
   case OPCODE_ENDLOOP:
      debug_assert(num_dst == 0);
      ureg_label_insn( ureg,
                       translate_opcode( inst->Opcode ),
                       src, num_src,
                       get_label( t, inst->BranchTarget ));
      return;

   case OPCODE_IF:
      debug_assert(num_dst == 0);
      ureg_label_insn( ureg,
                       ctx->Const.NativeIntegers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF,
                       src, num_src,
                       get_label( t, inst->BranchTarget ));
      return;

   case OPCODE_TEX:
   case OPCODE_TXB:
   case OPCODE_TXD:
   case OPCODE_TXL:
   case OPCODE_TXP:
      src[num_src++] = t->samplers[inst->TexSrcUnit];
      ureg_tex_insn( ureg,
                     translate_opcode( inst->Opcode ),
                     dst, num_dst, 
                     st_translate_texture_target( inst->TexSrcTarget,
                                               inst->TexShadow ),
                     NULL, 0,
                     src, num_src );
      return;

   case OPCODE_SCS:
      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
      ureg_insn( ureg, 
                 translate_opcode( inst->Opcode ), 
                 dst, num_dst, 
                 src, num_src );
      break;

   case OPCODE_XPD:
      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
      ureg_insn( ureg, 
                 translate_opcode( inst->Opcode ), 
                 dst, num_dst, 
                 src, num_src );
      break;

   case OPCODE_NOISE1:
   case OPCODE_NOISE2:
   case OPCODE_NOISE3:
   case OPCODE_NOISE4:
      /* At some point, a motivated person could add a better
       * implementation of noise.  Currently not even the nvidia
       * binary drivers do anything more than this.  In any case, the
       * place to do this is in the GL state tracker, not the poor
       * driver.
       */
      ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
      break;
		 
   case OPCODE_DDY:
      emit_ddy( t, dst[0], &inst->SrcReg[0] );
      break;

   case OPCODE_RSQ:
      ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) );
      break;

   default:
      ureg_insn( ureg, 
                 translate_opcode( inst->Opcode ), 
                 dst, num_dst, 
                 src, num_src );
      break;
   }
}
/**
 * Remove dead instructions from the given program.
 * This is very primitive for now.  Basically look for temp registers
 * that are written to but never read.  Remove any instructions that
 * write to such registers.  Be careful with condition code setters.
 */
static GLboolean
_mesa_remove_dead_code_global(struct gl_program *prog)
{
   GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
   GLboolean *removeInst; /* per-instruction removal flag */
   GLuint i, rem = 0, comp;

   memset(tempRead, 0, sizeof(tempRead));

   if (dbg) {
      printf("Optimize: Begin dead code removal\n");
      /*_mesa_print_program(prog);*/
   }

   removeInst = (GLboolean *)
      calloc(1, prog->NumInstructions * sizeof(GLboolean));

   /* Determine which temps are read and written */
   for (i = 0; i < prog->NumInstructions; i++) {
      const struct prog_instruction *inst = prog->Instructions + i;
      const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
      GLuint j;

      /* check src regs */
      for (j = 0; j < numSrc; j++) {
         if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
            const GLuint index = inst->SrcReg[j].Index;
            GLuint read_mask;
            ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
	    read_mask = get_src_arg_mask(inst, j, NO_MASK);

            if (inst->SrcReg[j].RelAddr) {
               if (dbg)
                  printf("abort remove dead code (indirect temp)\n");
               goto done;
            }

	    for (comp = 0; comp < 4; comp++) {
	       const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
	       ASSERT(swz < 4);
               if ((read_mask & (1 << swz)) == 0)
		  continue;
               if (swz <= SWIZZLE_W)
                  tempRead[index][swz] = GL_TRUE;
	    }
         }
      }

      /* check dst reg */
      if (inst->DstReg.File == PROGRAM_TEMPORARY) {
         const GLuint index = inst->DstReg.Index;
         ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);

         if (inst->DstReg.RelAddr) {
            if (dbg)
               printf("abort remove dead code (indirect temp)\n");
            goto done;
         }

         if (inst->CondUpdate) {
            /* If we're writing to this register and setting condition
             * codes we cannot remove the instruction.  Prevent removal
             * by setting the 'read' flag.
             */
            tempRead[index][0] = GL_TRUE;
            tempRead[index][1] = GL_TRUE;
            tempRead[index][2] = GL_TRUE;
            tempRead[index][3] = GL_TRUE;
         }
      }
   }

   /* find instructions that write to dead registers, flag for removal */
   for (i = 0; i < prog->NumInstructions; i++) {
      struct prog_instruction *inst = prog->Instructions + i;
      const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);

      if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
         GLint chan, index = inst->DstReg.Index;

	 for (chan = 0; chan < 4; chan++) {
	    if (!tempRead[index][chan] &&
		inst->DstReg.WriteMask & (1 << chan)) {
	       if (dbg) {
		  printf("Remove writemask on %u.%c\n", i,
			       chan == 3 ? 'w' : 'x' + chan);
	       }
	       inst->DstReg.WriteMask &= ~(1 << chan);
	       rem++;
	    }
	 }

	 if (inst->DstReg.WriteMask == 0) {
	    /* If we cleared all writes, the instruction can be removed. */
	    if (dbg)
	       printf("Remove instruction %u: \n", i);
	    removeInst[i] = GL_TRUE;
	 }
      }
   }

   /* now remove the instructions which aren't needed */
   rem = remove_instructions(prog, removeInst);

   if (dbg) {
      printf("Optimize: End dead code removal.\n");
      printf("  %u channel writes removed\n", rem);
      printf("  %u instructions removed\n", rem);
      /*_mesa_print_program(prog);*/
   }

done:
   free(removeInst);
   return rem != 0;
}
/**
 * Count which (input, temporary) register is read and written how often,
 * and scan the instruction stream to find dependencies.
 */
static void scan_instructions(struct pair_state *s)
{
	struct prog_instruction *inst;
	struct pair_state_instruction *pairinst;
	GLuint ip;

	for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0;
	    inst->Opcode != OPCODE_END;
	    ++inst, ++pairinst, ++ip) {
		final_rewrite(s, inst);
		classify_instruction(s, inst, pairinst);

		int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
		int j;
		for(j = 0; j < nsrc; j++) {
			struct pair_register_translation *t =
				get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index);
			if (!t)
				continue;

			t->RefCount++;

			if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
				int i;
				for(i = 0; i < 4; ++i) {
					GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i);
					if (swz >= 4)
						continue; /* constant or NIL swizzle */
					if (!t->Value[swz])
						continue; /* this is an undefined read */

					/* Do not add a dependency if this instruction
					 * also rewrites the value. The code below adds
					 * a dependency for the DstReg, which is a superset
					 * of the SrcReg dependency. */
					if (inst->DstReg.File == PROGRAM_TEMPORARY &&
					    inst->DstReg.Index == inst->SrcReg[j].Index &&
					    GET_BIT(inst->DstReg.WriteMask, swz))
						continue;

					struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++];
					pairinst->NumDependencies++;
					t->Value[swz]->NumReaders++;
					r->IP = ip;
					r->Next = t->Value[swz]->Readers;
					t->Value[swz]->Readers = r;
				}
			}
		}

		int ndst = _mesa_num_inst_dst_regs(inst->Opcode);
		if (ndst) {
			struct pair_register_translation *t =
				get_register(s, inst->DstReg.File, inst->DstReg.Index);
			if (t) {
				t->RefCount++;

				if (inst->DstReg.File == PROGRAM_TEMPORARY) {
					int j;
					for(j = 0; j < 4; ++j) {
						if (!GET_BIT(inst->DstReg.WriteMask, j))
							continue;

						struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++];
						v->IP = ip;
						if (t->Value[j]) {
							pairinst->NumDependencies++;
							t->Value[j]->Next = v;
						}
						t->Value[j] = v;
						pairinst->Values[j] = v;
					}
				}
			}
		}

		if (s->Verbose)
			_mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);

		if (!pairinst->NumDependencies)
			instruction_ready(s, ip);
	}

	/* Clear the PROGRAM_TEMPORARY state */
	int i, j;
	for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) {
		for(j = 0; j < 4; ++j)
			s->Temps[i].Value[j] = 0;
	}
}
Exemple #6
0
static void
compile_instruction(
   const struct prog_instruction *inst,
   struct tgsi_full_instruction *fullinst,
   const GLuint inputMapping[],
   const GLuint outputMapping[],
   const GLuint immediateMapping[],
   GLboolean indirectAccess,
   GLuint preamble_size,
   GLuint procType,
   GLboolean *insideSubroutine,
   GLint wposTemp)
{
   GLuint i;
   struct tgsi_full_dst_register *fulldst;
   struct tgsi_full_src_register *fullsrc;

   *fullinst = tgsi_default_full_instruction();

   fullinst->Instruction.Saturate = convert_sat( inst->SaturateMode );
   fullinst->Instruction.NumDstRegs = _mesa_num_inst_dst_regs( inst->Opcode );
   fullinst->Instruction.NumSrcRegs = _mesa_num_inst_src_regs( inst->Opcode );

   fulldst = &fullinst->FullDstRegisters[0];
   fulldst->DstRegister.File = map_register_file( inst->DstReg.File, 0, NULL, GL_FALSE );
   fulldst->DstRegister.Index = map_register_file_index(
      fulldst->DstRegister.File,
      inst->DstReg.Index,
      inputMapping,
      outputMapping,
      NULL,
      GL_FALSE );
   fulldst->DstRegister.WriteMask = convert_writemask( inst->DstReg.WriteMask );

   for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
      GLuint j;

      fullsrc = &fullinst->FullSrcRegisters[i];

      if (procType == TGSI_PROCESSOR_FRAGMENT &&
          inst->SrcReg[i].File == PROGRAM_INPUT &&
          inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) {
         /* special case of INPUT[WPOS] */
         fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY;
         fullsrc->SrcRegister.Index = wposTemp;
      }
      else {
         /* any other src register */
         fullsrc->SrcRegister.File = map_register_file(
            inst->SrcReg[i].File,
            inst->SrcReg[i].Index,
            immediateMapping,
            indirectAccess );
         fullsrc->SrcRegister.Index = map_register_file_index(
            fullsrc->SrcRegister.File,
            inst->SrcReg[i].Index,
            inputMapping,
            outputMapping,
            immediateMapping,
            indirectAccess );
      }

      /* swizzle (ext swizzle also depends on negation) */
      {
         GLuint swz[4];
         GLboolean extended = (inst->SrcReg[i].Negate != NEGATE_NONE &&
                               inst->SrcReg[i].Negate != NEGATE_XYZW);
         for( j = 0; j < 4; j++ ) {
            swz[j] = GET_SWZ( inst->SrcReg[i].Swizzle, j );
            if (swz[j] > SWIZZLE_W)
               extended = GL_TRUE;
         }
         if (extended) {
            for (j = 0; j < 4; j++) {
               tgsi_util_set_src_register_extswizzle(&fullsrc->SrcRegisterExtSwz,
                                                     swz[j], j);
            }
         }
         else {
            for (j = 0; j < 4; j++) {
               tgsi_util_set_src_register_swizzle(&fullsrc->SrcRegister,
                                                  swz[j], j);
            }
         }
      }

      if( inst->SrcReg[i].Negate == NEGATE_XYZW ) {
         fullsrc->SrcRegister.Negate = 1;
      }
      else if( inst->SrcReg[i].Negate != NEGATE_NONE ) {
         if( inst->SrcReg[i].Negate & NEGATE_X ) {
            fullsrc->SrcRegisterExtSwz.NegateX = 1;
         }
         if( inst->SrcReg[i].Negate & NEGATE_Y ) {
            fullsrc->SrcRegisterExtSwz.NegateY = 1;
         }
         if( inst->SrcReg[i].Negate & NEGATE_Z ) {
            fullsrc->SrcRegisterExtSwz.NegateZ = 1;
         }
         if( inst->SrcReg[i].Negate & NEGATE_W ) {
            fullsrc->SrcRegisterExtSwz.NegateW = 1;
         }
      }

      if( inst->SrcReg[i].Abs ) {
         fullsrc->SrcRegisterExtMod.Absolute = 1;
      }

      if( inst->SrcReg[i].RelAddr ) {
         fullsrc->SrcRegister.Indirect = 1;

         fullsrc->SrcRegisterInd.File = TGSI_FILE_ADDRESS;
         fullsrc->SrcRegisterInd.Index = 0;
      }
   }

   switch( inst->Opcode ) {
   case OPCODE_ARL:
      fullinst->Instruction.Opcode = TGSI_OPCODE_ARL;
      break;
   case OPCODE_ABS:
      fullinst->Instruction.Opcode = TGSI_OPCODE_ABS;
      break;
   case OPCODE_ADD:
      fullinst->Instruction.Opcode = TGSI_OPCODE_ADD;
      break;
   case OPCODE_BGNLOOP:
      fullinst->Instruction.Opcode = TGSI_OPCODE_BGNLOOP2;
      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
      break;
   case OPCODE_BGNSUB:
      fullinst->Instruction.Opcode = TGSI_OPCODE_BGNSUB;
      *insideSubroutine = GL_TRUE;
      break;
   case OPCODE_BRA:
      fullinst->Instruction.Opcode = TGSI_OPCODE_BRA;
      break;
   case OPCODE_BRK:
      fullinst->Instruction.Opcode = TGSI_OPCODE_BRK;
      break;
   case OPCODE_CAL:
      fullinst->Instruction.Opcode = TGSI_OPCODE_CAL;
      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
      break;
   case OPCODE_CMP:
      fullinst->Instruction.Opcode = TGSI_OPCODE_CMP;
      break;
   case OPCODE_CONT:
      fullinst->Instruction.Opcode = TGSI_OPCODE_CONT;
      break;
   case OPCODE_COS:
      fullinst->Instruction.Opcode = TGSI_OPCODE_COS;
      break;
   case OPCODE_DDX:
      fullinst->Instruction.Opcode = TGSI_OPCODE_DDX;
      break;
   case OPCODE_DDY:
      fullinst->Instruction.Opcode = TGSI_OPCODE_DDY;
      break;
   case OPCODE_DP2:
      fullinst->Instruction.Opcode = TGSI_OPCODE_DP2;
      break;
   case OPCODE_DP2A:
      fullinst->Instruction.Opcode = TGSI_OPCODE_DP2A;
      break;
   case OPCODE_DP3:
      fullinst->Instruction.Opcode = TGSI_OPCODE_DP3;
      break;
   case OPCODE_DP4:
      fullinst->Instruction.Opcode = TGSI_OPCODE_DP4;
      break;
   case OPCODE_DPH:
      fullinst->Instruction.Opcode = TGSI_OPCODE_DPH;
      break;
   case OPCODE_DST:
      fullinst->Instruction.Opcode = TGSI_OPCODE_DST;
      break;
   case OPCODE_ELSE:
      fullinst->Instruction.Opcode = TGSI_OPCODE_ELSE;
      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
      break;
   case OPCODE_ENDIF:
      fullinst->Instruction.Opcode = TGSI_OPCODE_ENDIF;
      break;
   case OPCODE_ENDLOOP:
      fullinst->Instruction.Opcode = TGSI_OPCODE_ENDLOOP2;
      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
      break;
   case OPCODE_ENDSUB:
      fullinst->Instruction.Opcode = TGSI_OPCODE_ENDSUB;
      *insideSubroutine = GL_FALSE;
      break;
   case OPCODE_EX2:
      fullinst->Instruction.Opcode = TGSI_OPCODE_EX2;
      break;
   case OPCODE_EXP:
      fullinst->Instruction.Opcode = TGSI_OPCODE_EXP;
      break;
   case OPCODE_FLR:
      fullinst->Instruction.Opcode = TGSI_OPCODE_FLR;
      break;
   case OPCODE_FRC:
      fullinst->Instruction.Opcode = TGSI_OPCODE_FRC;
      break;
   case OPCODE_IF:
      fullinst->Instruction.Opcode = TGSI_OPCODE_IF;
      fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
      break;
   case OPCODE_TRUNC:
      fullinst->Instruction.Opcode = TGSI_OPCODE_TRUNC;
      break;
   case OPCODE_KIL:
      /* conditional */
      fullinst->Instruction.Opcode = TGSI_OPCODE_KIL;
      break;
   case OPCODE_KIL_NV:
      /* predicated */
      assert(inst->DstReg.CondMask == COND_TR);
      fullinst->Instruction.Opcode = TGSI_OPCODE_KILP;
      break;
   case OPCODE_LG2:
      fullinst->Instruction.Opcode = TGSI_OPCODE_LG2;
      break;
   case OPCODE_LOG:
      fullinst->Instruction.Opcode = TGSI_OPCODE_LOG;
      break;
   case OPCODE_LIT:
      fullinst->Instruction.Opcode = TGSI_OPCODE_LIT;
      break;
   case OPCODE_LRP:
      fullinst->Instruction.Opcode = TGSI_OPCODE_LRP;
      break;
   case OPCODE_MAD:
      fullinst->Instruction.Opcode = TGSI_OPCODE_MAD;
      break;
   case OPCODE_MAX:
      fullinst->Instruction.Opcode = TGSI_OPCODE_MAX;
      break;
   case OPCODE_MIN:
      fullinst->Instruction.Opcode = TGSI_OPCODE_MIN;
      break;
   case OPCODE_MOV:
      fullinst->Instruction.Opcode = TGSI_OPCODE_MOV;
      break;
   case OPCODE_MUL:
      fullinst->Instruction.Opcode = TGSI_OPCODE_MUL;
      break;
   case OPCODE_NOISE1:
      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE1;
      break;
   case OPCODE_NOISE2:
      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE2;
      break;
   case OPCODE_NOISE3:
      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE3;
      break;
   case OPCODE_NOISE4:
      fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE4;
      break;
   case OPCODE_NOP:
      fullinst->Instruction.Opcode = TGSI_OPCODE_NOP;
      break;
   case OPCODE_NRM3:
      fullinst->Instruction.Opcode = TGSI_OPCODE_NRM;
      break;
   case OPCODE_NRM4:
      fullinst->Instruction.Opcode = TGSI_OPCODE_NRM4;
      break;
   case OPCODE_POW:
      fullinst->Instruction.Opcode = TGSI_OPCODE_POW;
      break;
   case OPCODE_RCP:
      fullinst->Instruction.Opcode = TGSI_OPCODE_RCP;
      break;
   case OPCODE_RET:
      /* If RET is used inside main (not a real subroutine) we may want
       * to execute END instead of RET.  TBD...
       */
      if (1 /*  *insideSubroutine */) {
         fullinst->Instruction.Opcode = TGSI_OPCODE_RET;
      }
      else {
         /* inside main() pseudo-function */
         fullinst->Instruction.Opcode = TGSI_OPCODE_END;
      }
      break;
   case OPCODE_RSQ:
      fullinst->Instruction.Opcode = TGSI_OPCODE_RSQ;
      break;
   case OPCODE_SCS:
      fullinst->Instruction.Opcode = TGSI_OPCODE_SCS;
      fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XY;
      break;
   case OPCODE_SEQ:
      fullinst->Instruction.Opcode = TGSI_OPCODE_SEQ;
      break;
   case OPCODE_SGE:
      fullinst->Instruction.Opcode = TGSI_OPCODE_SGE;
      break;
   case OPCODE_SGT:
      fullinst->Instruction.Opcode = TGSI_OPCODE_SGT;
      break;
   case OPCODE_SIN:
      fullinst->Instruction.Opcode = TGSI_OPCODE_SIN;
      break;
   case OPCODE_SLE:
      fullinst->Instruction.Opcode = TGSI_OPCODE_SLE;
      break;
   case OPCODE_SLT:
      fullinst->Instruction.Opcode = TGSI_OPCODE_SLT;
      break;
   case OPCODE_SNE:
      fullinst->Instruction.Opcode = TGSI_OPCODE_SNE;
      break;
   case OPCODE_SSG:
      fullinst->Instruction.Opcode = TGSI_OPCODE_SSG;
      break;
   case OPCODE_SUB:
      fullinst->Instruction.Opcode = TGSI_OPCODE_SUB;
      break;
   case OPCODE_SWZ:
      fullinst->Instruction.Opcode = TGSI_OPCODE_SWZ;
      break;
   case OPCODE_TEX:
      /* ordinary texture lookup */
      fullinst->Instruction.Opcode = TGSI_OPCODE_TEX;
      fullinst->Instruction.NumSrcRegs = 2;
      fullinst->InstructionExtTexture.Texture =
         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
      break;
   case OPCODE_TXB:
      /* texture lookup with LOD bias */
      fullinst->Instruction.Opcode = TGSI_OPCODE_TXB;
      fullinst->Instruction.NumSrcRegs = 2;
      fullinst->InstructionExtTexture.Texture =
         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
      break;
   case OPCODE_TXD:
      /* texture lookup with explicit partial derivatives */
      fullinst->Instruction.Opcode = TGSI_OPCODE_TXD;
      fullinst->Instruction.NumSrcRegs = 4;
      fullinst->InstructionExtTexture.Texture =
         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
      /* src[0] = coord, src[1] = d[strq]/dx, src[2] = d[strq]/dy */
      fullinst->FullSrcRegisters[3].SrcRegister.File = TGSI_FILE_SAMPLER;
      fullinst->FullSrcRegisters[3].SrcRegister.Index = inst->TexSrcUnit;
      break;
   case OPCODE_TXL:
      /* texture lookup with explicit LOD */
      fullinst->Instruction.Opcode = TGSI_OPCODE_TXL;
      fullinst->Instruction.NumSrcRegs = 2;
      fullinst->InstructionExtTexture.Texture =
         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
      break;
   case OPCODE_TXP:
      /* texture lookup with divide by Q component */
      /* convert to TEX w/ special flag for division */
      fullinst->Instruction.Opcode = TGSI_OPCODE_TXP;
      fullinst->Instruction.NumSrcRegs = 2;
      fullinst->InstructionExtTexture.Texture =
         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
      fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
      fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
      break;
   case OPCODE_XPD:
      fullinst->Instruction.Opcode = TGSI_OPCODE_XPD;
      fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XYZ;
      break;
   case OPCODE_END:
      fullinst->Instruction.Opcode = TGSI_OPCODE_END;
      break;
   default:
      assert( 0 );
   }
}
static void
compile_instruction(
   struct gl_context *ctx,
   struct st_translate *t,
   const struct prog_instruction *inst)
{
   struct ureg_program *ureg = t->ureg;
   GLuint i;
   struct ureg_dst dst[1] = { { 0 } };
   struct ureg_src src[4];
   unsigned num_dst;
   unsigned num_src;

   num_dst = _mesa_num_inst_dst_regs( inst->Opcode );
   num_src = _mesa_num_inst_src_regs( inst->Opcode );

   if (num_dst) 
      dst[0] = translate_dst( t, 
                              &inst->DstReg,
                              inst->Saturate);

   for (i = 0; i < num_src; i++) 
      src[i] = translate_src( t, &inst->SrcReg[i] );

   switch( inst->Opcode ) {
   case OPCODE_SWZ:
      emit_swz( t, dst[0], &inst->SrcReg[0] );
      return;

   case OPCODE_TEX:
   case OPCODE_TXB:
   case OPCODE_TXP:
      src[num_src++] = t->samplers[inst->TexSrcUnit];
      ureg_tex_insn( ureg,
                     translate_opcode( inst->Opcode ),
                     dst, num_dst, 
                     st_translate_texture_target( inst->TexSrcTarget,
                                               inst->TexShadow ),
                     NULL, 0,
                     src, num_src );
      return;

   case OPCODE_SCS:
      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
      ureg_insn( ureg, 
                 translate_opcode( inst->Opcode ), 
                 dst, num_dst, 
                 src, num_src );
      break;

   case OPCODE_XPD:
      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
      ureg_insn( ureg, 
                 translate_opcode( inst->Opcode ), 
                 dst, num_dst, 
                 src, num_src );
      break;

   case OPCODE_RSQ:
      ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) );
      break;

   case OPCODE_ABS:
      ureg_MOV(ureg, dst[0], ureg_abs(src[0]));
      break;

   case OPCODE_SUB:
      ureg_ADD(ureg, dst[0], src[0], ureg_negate(src[1]));
      break;

   default:
      ureg_insn( ureg, 
                 translate_opcode( inst->Opcode ), 
                 dst, num_dst, 
                 src, num_src );
      break;
   }
}