Beispiel #1
0
/* Add the fog parameters to the parameter list of the original
 * program, rather than creating a new list.  Doesn't really do any
 * harm and it's not as if the parameter handling isn't a big hack
 * anyway.
 */
static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, 
                                                     GLint s0,
                                                     GLint s1,
                                                     GLint s2,
                                                     GLint s3,
                                                     GLint s4)
{
   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
   gl_state_index tokens[STATE_LENGTH];
   GLuint idx;
   tokens[0] = s0;
   tokens[1] = s1;
   tokens[2] = s2;
   tokens[3] = s3;
   tokens[4] = s4;
   
   for (idx = 0; idx < paramList->NumParameters; idx++) {
      if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
	  memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
	 return src_reg(PROGRAM_STATE_VAR, idx);
   }

   idx = _mesa_add_state_reference( paramList, tokens );

   return src_reg(PROGRAM_STATE_VAR, idx);
}
Beispiel #2
0
nir_shader *
brw_create_nir(struct brw_context *brw,
               const struct gl_shader_program *shader_prog,
               const struct gl_program *prog,
               gl_shader_stage stage,
               bool is_scalar)
{
   struct gl_context *ctx = &brw->ctx;
   const nir_shader_compiler_options *options =
      ctx->Const.ShaderCompilerOptions[stage].NirOptions;
   bool progress;
   nir_shader *nir;

   /* First, lower the GLSL IR or Mesa IR to NIR */
   if (shader_prog) {
      nir = glsl_to_nir(shader_prog, stage, options);
      nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
      NIR_PASS_V(nir, nir_lower_io_to_temporaries,
                 nir_shader_get_entrypoint(nir), true, false);
   } else {
      nir = prog_to_nir(prog, options);
      NIR_PASS_V(nir, nir_convert_to_ssa); /* turn registers into SSA */
   }
   nir_validate_shader(nir);

   (void)progress;

   nir = brw_preprocess_nir(brw->screen->compiler, nir);

   if (stage == MESA_SHADER_FRAGMENT) {
      static const struct nir_lower_wpos_ytransform_options wpos_options = {
         .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
         .fs_coord_pixel_center_integer = 1,
         .fs_coord_origin_upper_left = 1,
      };
      _mesa_add_state_reference(prog->Parameters,
                                (gl_state_index *) wpos_options.state_tokens);

      NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
   }

   NIR_PASS(progress, nir, nir_lower_system_values);
   NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);

   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));

   if (shader_prog) {
      NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
      NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
   }

   return nir;
}
static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
{
	gl_state_index fail_value_tokens[STATE_LENGTH] = {
		STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
	};
	struct prog_src_register reg = { 0, };

	fail_value_tokens[2] = tmu;
	reg.File = PROGRAM_STATE_VAR;
	reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
	reg.Swizzle = SWIZZLE_WWWW;
	return reg;
}
Beispiel #4
0
void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog)
{
    static const gl_state_index winstate[STATE_LENGTH]
         = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0};
    struct prog_instruction *newInst, *inst;
    GLint  win_size;  /* state reference */
    GLuint wpos_temp; /* temp register */
    int i, j;

    /* PARAM win_size = STATE_FB_SIZE */
    win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);

    wpos_temp = fprog->Base.NumTemporaries++;

    /* scan program where WPOS is used and replace with wpos_temp */
    inst = fprog->Base.Instructions;
    for (i = 0; i < fprog->Base.NumInstructions; i++) {
        for (j=0; j < 3; j++) {
            if(inst->SrcReg[j].File == PROGRAM_INPUT && 
               inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) {
                inst->SrcReg[j].File = PROGRAM_TEMPORARY;
                inst->SrcReg[j].Index = wpos_temp;
            }
        }
        inst++;
    }

    _mesa_insert_instructions(&(fprog->Base), 0, 1);

    newInst = fprog->Base.Instructions;
    /* invert wpos.y
     * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */
    newInst[0].Opcode = OPCODE_ADD;
    newInst[0].DstReg.File = PROGRAM_TEMPORARY;
    newInst[0].DstReg.Index = wpos_temp;
    newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;

    newInst[0].SrcReg[0].File = PROGRAM_INPUT;
    newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
    newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
    newInst[0].SrcReg[0].Negate = NEGATE_Y;

    newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
    newInst[0].SrcReg[1].Index = win_size;
    newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);

}
Beispiel #5
0
static struct ureg register_param5(struct tnl_program *p,
				   GLint s0,
				   GLint s1,
				   GLint s2,
				   GLint s3,
                                   GLint s4)
{
   gl_state_index tokens[STATE_LENGTH];
   GLint idx;
   tokens[0] = s0;
   tokens[1] = s1;
   tokens[2] = s2;
   tokens[3] = s3;
   tokens[4] = s4;
   idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
   return make_ureg(PROGRAM_STATE_VAR, idx);
}
void
brw_add_texrect_params(struct gl_program *prog)
{
   for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
      if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
         continue;

      int tokens[STATE_LENGTH] = {
         STATE_INTERNAL,
         STATE_TEXRECT_SCALE,
         texunit,
         0,
         0
      };

      _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
   }
}
Beispiel #7
0
static void
brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
                                   const struct gl_program *prog,
                                   struct brw_stage_prog_data *stage_prog_data,
                                   bool is_scalar)
{
   const nir_state_slot *const slots = var->state_slots;
   assert(var->state_slots != NULL);

   unsigned uniform_index = var->data.driver_location / 4;
   for (unsigned int i = 0; i < var->num_state_slots; i++) {
      /* This state reference has already been setup by ir_to_mesa, but we'll
       * get the same index back here.
       */
      int index = _mesa_add_state_reference(prog->Parameters,
					    (gl_state_index *)slots[i].tokens);

      /* Add each of the unique swizzles of the element as a parameter.
       * This'll end up matching the expected layout of the
       * array/matrix/structure we're trying to fill in.
       */
      int last_swiz = -1;
      for (unsigned j = 0; j < 4; j++) {
         int swiz = GET_SWZ(slots[i].swizzle, j);

         /* If we hit a pair of identical swizzles, this means we've hit the
          * end of the builtin variable.  In scalar mode, we should just quit
          * and move on to the next one.  In vec4, we need to continue and pad
          * it out to 4 components.
          */
         if (swiz == last_swiz && is_scalar)
            break;

         last_swiz = swiz;

         stage_prog_data->param[uniform_index++] =
            &prog->Parameters->ParameterValues[index][swiz];
      }
   }
}
Beispiel #8
0
/**
 * Called in ProgramStringNotify, we need to fill the metadata of the
 * gl_program attached to the ati_fragment_shader
 */
void
st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog)
{
   /* we know this is st_fragment_program, because of st_new_ati_fs() */
   struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
   struct ati_fragment_shader *atifs = stfp->ati_fs;

   unsigned pass, i, r, optype, arg;

   static const gl_state_index fog_params_state[STATE_LENGTH] =
      {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0};
   static const gl_state_index fog_color[STATE_LENGTH] =
      {STATE_FOG_COLOR, 0, 0, 0, 0};

   prog->InputsRead = 0;
   prog->OutputsWritten = BITFIELD64_BIT(FRAG_RESULT_COLOR);
   prog->SamplersUsed = 0;
   prog->Parameters = _mesa_new_parameter_list();

   /* fill in InputsRead, SamplersUsed, TexturesUsed */
   for (pass = 0; pass < atifs->NumPasses; pass++) {
      for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
         struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
         GLuint pass_tex = texinst->src;

         if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
            /* mark which texcoords are used */
            prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
            /* by default there is 1:1 mapping between samplers and textures */
            prog->SamplersUsed |= (1 << r);
            /* the target is unknown here, it will be fixed in the draw call */
            prog->TexturesUsed[r] = TEXTURE_2D_BIT;
         } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
            if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
               prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
            }
         }
      }
   }
   for (pass = 0; pass < atifs->NumPasses; pass++) {
      for (i = 0; i < atifs->numArithInstr[pass]; i++) {
         struct atifs_instruction *inst = &atifs->Instructions[pass][i];

         for (optype = 0; optype < 2; optype++) { /* color, alpha */
            if (inst->Opcode[optype]) {
               for (arg = 0; arg < inst->ArgCount[optype]; arg++) {
                  GLint index = inst->SrcReg[optype][arg].Index;
                  if (index == GL_PRIMARY_COLOR_EXT) {
                     prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_COL0);
                  } else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
                     /* note: ATI_fragment_shader.txt never specifies what
                      * GL_SECONDARY_INTERPOLATOR_ATI is, swrast uses
                      * VARYING_SLOT_COL1 for this input */
                     prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_COL1);
                  }
               }
            }
         }
      }
   }
   /* we may need fog */
   prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_FOGC);

   /* we always have the ATI_fs constants, and the fog params */
   for (i = 0; i < MAX_NUM_FRAGMENT_CONSTANTS_ATI; i++) {
      _mesa_add_parameter(prog->Parameters, PROGRAM_UNIFORM,
                          NULL, 4, GL_FLOAT, NULL, NULL);
   }
   _mesa_add_state_reference(prog->Parameters, fog_params_state);
   _mesa_add_state_reference(prog->Parameters, fog_color);

   prog->NumInstructions = 0;
   prog->NumTemporaries = MAX_NUM_FRAGMENT_REGISTERS_ATI + 3; /* 3 input temps for arith ops */
   prog->NumParameters = MAX_NUM_FRAGMENT_CONSTANTS_ATI + 2; /* 2 state variables for fog */
}
Beispiel #9
0
/**
 * Generate an R200 vertex program from Mesa's internal representation.
 *
 * \return  GL_TRUE for success, GL_FALSE for failure.
 */
static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
{
   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
   struct prog_instruction *vpi;
   int i;
   VERTEX_SHADER_INSTRUCTION *o_inst;
   unsigned long operands;
   int are_srcs_scalar;
   unsigned long hw_op;
   int dofogfix = 0;
   int fog_temp_i = 0;
   int free_inputs;
   int array_count = 0;
   int u_temp_used;

   vp->native = GL_FALSE;
   vp->translated = GL_TRUE;
   vp->fogmode = ctx->Fog.Mode;

   if (mesa_vp->Base.NumInstructions == 0)
      return GL_FALSE;

#if 0
   if ((mesa_vp->Base.InputsRead &
      ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
      VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
      VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
	    mesa_vp->Base.InputsRead);
      }
      return GL_FALSE;
   }
#endif

   if ((mesa_vp->Base.OutputsWritten &
      ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) |
      (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) |
      (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) |
      (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
                 (unsigned long long) mesa_vp->Base.OutputsWritten);
      }
      return GL_FALSE;
   }

   /* Initial value should be last tmp reg that hw supports.
      Strangely enough r300 doesnt mind even though these would be out of range.
      Smart enough to realize that it doesnt need it? */
   int u_temp_i = R200_VSF_MAX_TEMPS - 1;
   struct prog_src_register src[3];
   struct prog_dst_register dst;

/* FIXME: is changing the prog safe to do here? */
   if (mesa_vp->IsPositionInvariant &&
      /* make sure we only do this once */
       !(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) {
	 _mesa_insert_mvp_code(ctx, mesa_vp);
      }

   /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
      base e isn't directly available neither. */
   if ((mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_FOGC)) && !vp->fogpidx) {
      struct gl_program_parameter_list *paramList;
      gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
      paramList = mesa_vp->Base.Parameters;
      vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
   }

   vp->pos_end = 0;
   mesa_vp->Base.NumNativeInstructions = 0;
   if (mesa_vp->Base.Parameters)
      mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
   else
      mesa_vp->Base.NumNativeParameters = 0;

   for(i = 0; i < VERT_ATTRIB_MAX; i++)
      vp->inputs[i] = -1;
   for(i = 0; i < 15; i++)
      vp->inputmap_rev[i] = 255;
   free_inputs = 0x2ffd;

/* fglrx uses fixed inputs as follows for conventional attribs.
   generic attribs use non-fixed assignment, fglrx will always use the
   lowest attrib values available. We'll just do the same.
   There are 12 generic attribs possible, corresponding to attrib 0, 2-11
   and 13 in a hw vertex prog.
   attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
   (correspond to vertex normal/weight - maybe weight actually could be made vec4).
   Additionally, not more than 12 arrays in total are possible I think.
   attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
   attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
   attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
   attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
*/

/* attr 4,5 and 13 are only used with generic attribs.
   Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
   not possibe to use with vertex progs as it is lacking in vert prog specification) */
/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
   if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
      vp->inputs[VERT_ATTRIB_POS] = 0;
      vp->inputmap_rev[0] = VERT_ATTRIB_POS;
      free_inputs &= ~(1 << 0);
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
      vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
      vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
      vp->inputs[VERT_ATTRIB_NORMAL] = 1;
      vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
      vp->inputs[VERT_ATTRIB_COLOR0] = 2;
      vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
      free_inputs &= ~(1 << 2);
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
      vp->inputs[VERT_ATTRIB_COLOR1] = 3;
      vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
      free_inputs &= ~(1 << 3);
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
      vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
      vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
      array_count++;
   }
   /* VERT_ATTRIB_TEX0-5 */
   for (i = 0; i <= 5; i++) {
      if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
	 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
	 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
	 free_inputs &= ~(1 << (i + 6));
	 array_count++;
      }
   }
   /* using VERT_ATTRIB_TEX6/7 would be illegal */
   for (; i < VERT_ATTRIB_TEX_MAX; i++) {
      if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
          if (R200_DEBUG & RADEON_FALLBACKS) {
              fprintf(stderr, "texture attribute %d in vert prog\n", i);
          }
          return GL_FALSE;
      }
   }
   /* completely ignore aliasing? */
   for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
      int j;
   /* completely ignore aliasing? */
      if (mesa_vp->Base.InputsRead & VERT_BIT_GENERIC(i)) {
	 array_count++;
	 if (array_count > 12) {
	    if (R200_DEBUG & RADEON_FALLBACKS) {
	       fprintf(stderr, "more than 12 attribs used in vert prog\n");
	    }
	    return GL_FALSE;
	 }
	 for (j = 0; j < 14; j++) {
	    /* will always find one due to limited array_count */
	    if (free_inputs & (1 << j)) {
	       free_inputs &= ~(1 << j);
	       vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
	       if (j == 0) {
                  /* mapped to pos */
                  vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
	       } else if (j < 12) {
                  /* mapped to col/tex */
                  vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
	       } else {
                  /* mapped to pos1 */
                  vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
               }
	       break;
	    }
	 }
      }
   }

   if (!(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog without position output\n");
      }
      return GL_FALSE;
   }
   if (free_inputs & 1) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog without position input\n");
      }
      return GL_FALSE;
   }

   o_inst = vp->instr;
   for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
      operands = op_operands(vpi->Opcode);
      are_srcs_scalar = operands & SCALAR_FLAG;
      operands &= OP_MASK;

      for(i = 0; i < operands; i++) {
	 src[i] = vpi->SrcReg[i];
	 /* hack up default attrib values as per spec as swizzling.
	    normal, fog, secondary color. Crazy?
	    May need more if we don't submit vec4 elements? */
	 if (src[i].File == PROGRAM_INPUT) {
	    if (src[i].Index == VERT_ATTRIB_NORMAL) {
	       int j;
	       for (j = 0; j < 4; j++) {
		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
		  }
	       }
	    }
	    else if (src[i].Index == VERT_ATTRIB_COLOR1) {
	       int j;
	       for (j = 0; j < 4; j++) {
		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
		  }
	       }
	    }
	    else if (src[i].Index == VERT_ATTRIB_FOG) {
	       int j;
	       for (j = 0; j < 4; j++) {
		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
		  }
		  else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
			    GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
		  }
	       }
	    }
	 }
      }

      if(operands == 3){
	 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_ALL);

	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
		  SWIZZLE_X, SWIZZLE_Y,
		  SWIZZLE_Z, SWIZZLE_W,
		  t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);

	    o_inst->src1 = ZERO_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;

	    src[2].File = PROGRAM_TEMPORARY;
	    src[2].Index = u_temp_i;
	    src[2].RelAddr = 0;
	    u_temp_i--;
	 }
      }

      if(operands >= 2){
	 if( CMP_SRCS(src[1], src[0]) ){
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_ALL);

	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		  SWIZZLE_X, SWIZZLE_Y,
		  SWIZZLE_Z, SWIZZLE_W,
		  t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);

	    o_inst->src1 = ZERO_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;

	    src[0].File = PROGRAM_TEMPORARY;
	    src[0].Index = u_temp_i;
	    src[0].RelAddr = 0;
	    u_temp_i--;
	 }
      }

      dst = vpi->DstReg;
      if (dst.File == PROGRAM_OUTPUT &&
	  dst.Index == VARYING_SLOT_FOGC &&
	  dst.WriteMask & WRITEMASK_X) {
	  fog_temp_i = u_temp_i;
	  dst.File = PROGRAM_TEMPORARY;
	  dst.Index = fog_temp_i;
	  dofogfix = 1;
	  u_temp_i--;
      }

      /* These ops need special handling. */
      switch(vpi->Opcode){
      case OPCODE_POW:
/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
   So may need to insert additional instruction */
	 if ((src[0].File == src[1].File) &&
	     (src[0].Index == src[1].Index)) {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
		   t_dst_mask(dst.WriteMask));
	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		   SWIZZLE_ZERO,
		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
		   SWIZZLE_ZERO,
		   t_src_class(src[0].File),
		   src[0].Negate) | (src[0].RelAddr << 4);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_0;
	 }
	 else {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		   (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		   VSF_FLAG_ALL);
	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		   SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
		   t_src_class(src[0].File),
		   src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
	    o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		   SWIZZLE_ZERO, SWIZZLE_ZERO,
		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
		   t_src_class(src[1].File),
		   src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;

	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
		   t_dst_mask(dst.WriteMask));
	    o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
		   VSF_IN_COMPONENT_X,
		   VSF_IN_COMPONENT_Y,
		   VSF_IN_COMPONENT_Z,
		   VSF_IN_COMPONENT_W,
		   VSF_IN_CLASS_TMP,
		   VSF_FLAG_NONE);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_0;
	    u_temp_i--;
	 }
	 goto next;

      case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} 
      case OPCODE_SWZ:
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
		t_dst_mask(dst.WriteMask));
	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = ZERO_SRC_0;
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_MAD:
	 /* only 2 read ports into temp memory thus may need the macro op MAD_2
	    instead (requiring 2 clocks) if all inputs are in temp memory
	    (and, only if they actually reference 3 distinct temps) */
	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
	    src[1].File == PROGRAM_TEMPORARY &&
	    src[2].File == PROGRAM_TEMPORARY &&
	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
	    (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;

	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
	    t_dst_mask(dst.WriteMask));
	 o_inst->src0 = t_src(vp, &src[0]);
#if 0
if ((o_inst - vp->instr) == 31) {
/* fix up the broken vertex program of quake4 demo... */
o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
			SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
			t_src_class(src[1].File),
			src[1].Negate) | (src[1].RelAddr << 4);
o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
			SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
			t_src_class(src[1].File),
			src[1].Negate) | (src[1].RelAddr << 4);
}
else {
	 o_inst->src1 = t_src(vp, &src[1]);
	 o_inst->src2 = t_src(vp, &src[2]);
}
#else
	 o_inst->src1 = t_src(vp, &src[1]);
	 o_inst->src2 = t_src(vp, &src[2]);
#endif
	 goto next;

      case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} 
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
		SWIZZLE_ZERO,
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);

	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
		SWIZZLE_ZERO,
		t_src_class(src[1].File),
		src[1].Negate) | (src[1].RelAddr << 4);

	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} 
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
		VSF_IN_COMPONENT_ONE,
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);
	 o_inst->src1 = t_src(vp, &src[1]);
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
		t_src_class(src[1].File),
		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
	 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0=t_src(vp, &src[0]);
	 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
		t_src_class(src[0].File),
		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_FLR:
      /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} 
         ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */

	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
	    t_dst_mask(dst.WriteMask));

	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = UNUSED_SRC_0;
	 o_inst->src2 = UNUSED_SRC_1;
	 o_inst++;

	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
		VSF_IN_COMPONENT_X,
		VSF_IN_COMPONENT_Y,
		VSF_IN_COMPONENT_Z,
		VSF_IN_COMPONENT_W,
		VSF_IN_CLASS_TMP,
		/* Not 100% sure about this */
		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);

	 o_inst->src2 = UNUSED_SRC_0;
	 u_temp_i--;
	 goto next;

      case OPCODE_XPD:
	 /* mul r0, r1.yzxw, r2.zxyw
	    mad r0, -r2.yzxw, r1.zxyw, r0
	  */
	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
	    src[1].File == PROGRAM_TEMPORARY &&
	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;

	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
	    t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);

	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
		t_src_class(src[1].File),
		src[1].Negate) | (src[1].RelAddr << 4);

	 o_inst->src2 = UNUSED_SRC_1;
	 o_inst++;
	 u_temp_i--;

	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
		t_src_class(src[1].File),
		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);

	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);

	 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
		VSF_IN_COMPONENT_X,
		VSF_IN_COMPONENT_Y,
		VSF_IN_COMPONENT_Z,
		VSF_IN_COMPONENT_W,
		VSF_IN_CLASS_TMP,
		VSF_FLAG_NONE);
	 goto next;

      case OPCODE_END:
	 assert(0);
      default:
	 break;
      }

      o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
	    t_dst_mask(dst.WriteMask));

      if(are_srcs_scalar){
	 switch(operands){
	    case 1:
		o_inst->src0 = t_src_scalar(vp, &src[0]);
		o_inst->src1 = UNUSED_SRC_0;
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 2:
		o_inst->src0 = t_src_scalar(vp, &src[0]);
		o_inst->src1 = t_src_scalar(vp, &src[1]);
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 3:
		o_inst->src0 = t_src_scalar(vp, &src[0]);
		o_inst->src1 = t_src_scalar(vp, &src[1]);
		o_inst->src2 = t_src_scalar(vp, &src[2]);
	    break;

	    default:
		fprintf(stderr, "illegal number of operands %lu\n", operands);
		exit(-1);
	    break;
	 }
      } else {
	 switch(operands){
	    case 1:
		o_inst->src0 = t_src(vp, &src[0]);
		o_inst->src1 = UNUSED_SRC_0;
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 2:
		o_inst->src0 = t_src(vp, &src[0]);
		o_inst->src1 = t_src(vp, &src[1]);
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 3:
		o_inst->src0 = t_src(vp, &src[0]);
		o_inst->src1 = t_src(vp, &src[1]);
		o_inst->src2 = t_src(vp, &src[2]);
	    break;

	    default:
		fprintf(stderr, "illegal number of operands %lu\n", operands);
		exit(-1);
	    break;
	 }
      }
      next:

      if (dofogfix) {
	 o_inst++;
	 if (vp->fogmode == GL_EXP) {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
		R200_VSF_OUT_CLASS_RESULT_FOGC,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	 }
	 else if (vp->fogmode == GL_EXP2) {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
		R200_VSF_OUT_CLASS_RESULT_FOGC,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	 }
	 else { /* fogmode == GL_LINEAR */
		/* could do that with single op (dot) if using params like
		   with fixed function pipeline fog */
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		R200_VSF_OUT_CLASS_RESULT_FOGC,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;

	 }
         dofogfix = 0;
      }

      u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
      if (mesa_vp->Base.NumNativeTemporaries <
	 (mesa_vp->Base.NumTemporaries + u_temp_used)) {
	 mesa_vp->Base.NumNativeTemporaries =
	    mesa_vp->Base.NumTemporaries + u_temp_used;
      }
      if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
	 if (R200_DEBUG & RADEON_FALLBACKS) {
	    fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
	 }
	 return GL_FALSE;
      }
      u_temp_i = R200_VSF_MAX_TEMPS - 1;
      if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
	 mesa_vp->Base.NumNativeInstructions = 129;
	 if (R200_DEBUG & RADEON_FALLBACKS) {
	    fprintf(stderr, "more than 128 native instructions\n");
	 }
	 return GL_FALSE;
      }
      if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
	 vp->pos_end = (o_inst - vp->instr);
      }
   }

   vp->native = GL_TRUE;
   mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
#if 0
   fprintf(stderr, "hw program:\n");
   for(i=0; i < vp->program.length; i++)
      fprintf(stderr, "%08x\n", vp->instr[i]);
#endif
   return GL_TRUE;
}
Beispiel #10
0
/**
 * Lookup GL state given a variable name, 0, 1 or 2 indexes and a field.
 * Allocate room for the state in the given param list and return position
 * in the list.
 * Yes, this is kind of ugly, but it works.
 */
static GLint
lookup_statevar(const char *var, GLint index1, GLint index2, const char *field,
                GLuint *swizzleOut,
                struct gl_program_parameter_list *paramList)
{
   /*
    * NOTE: The ARB_vertex_program extension specified that matrices get
    * loaded in registers in row-major order.  With GLSL, we want column-
    * major order.  So, we need to transpose all matrices here...
    */
   static const struct {
      const char *name;
      gl_state_index matrix;
      gl_state_index modifier;
   } matrices[] = {
      { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
      { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
      { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
      { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },

      { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
      { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
      { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
      { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },

      { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
      { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
      { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
      { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },

      { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
      { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
      { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
      { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },

      { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },

      { NULL, 0, 0 }
   };
   gl_state_index tokens[STATE_LENGTH];
   GLuint i;
   GLboolean isMatrix = GL_FALSE;

   for (i = 0; i < STATE_LENGTH; i++) {
      tokens[i] = 0;
   }
   *swizzleOut = SWIZZLE_NOOP;

   /* first, look if var is a pre-defined matrix */
   for (i = 0; matrices[i].name; i++) {
      if (strcmp(var, matrices[i].name) == 0) {
         tokens[0] = matrices[i].matrix;
         /* tokens[1], [2] and [3] filled below */
         tokens[4] = matrices[i].modifier;
         isMatrix = GL_TRUE;
         break;
      }
   }

   if (isMatrix) {
      if (tokens[0] == STATE_TEXTURE_MATRIX) {
         if (index1 >= 0) {
            tokens[1] = index1; /* which texture matrix */
            index1 = 0; /* prevent extra addition at end of function */
         }
      }
      if (index1 < 0) {
         /* index1 is unused: prevent extra addition at end of function */
         index1 = 0;
      }
   }
   else if (strcmp(var, "gl_DepthRange") == 0) {
      tokens[0] = STATE_DEPTH_RANGE;
      if (strcmp(field, "near") == 0) {
         *swizzleOut = SWIZZLE_XXXX;
      }
      else if (strcmp(field, "far") == 0) {
         *swizzleOut = SWIZZLE_YYYY;
      }
      else if (strcmp(field, "diff") == 0) {
         *swizzleOut = SWIZZLE_ZZZZ;
      }
      else {
         return -1;
      }
   }
   else if (strcmp(var, "gl_ClipPlane") == 0) {
      if (index1 < 0)
         return -1;
      tokens[0] = STATE_CLIPPLANE;
      tokens[1] = index1;
   }
   else if (strcmp(var, "gl_Point") == 0) {
      if (strcmp(field, "size") == 0) {
         tokens[0] = STATE_POINT_SIZE;
         *swizzleOut = SWIZZLE_XXXX;
      }
      else if (strcmp(field, "sizeMin") == 0) {
         tokens[0] = STATE_POINT_SIZE;
         *swizzleOut = SWIZZLE_YYYY;
      }
      else if (strcmp(field, "sizeMax") == 0) {
         tokens[0] = STATE_POINT_SIZE;
         *swizzleOut = SWIZZLE_ZZZZ;
      }
      else if (strcmp(field, "fadeThresholdSize") == 0) {
         tokens[0] = STATE_POINT_SIZE;
         *swizzleOut = SWIZZLE_WWWW;
      }
      else if (strcmp(field, "distanceConstantAttenuation") == 0) {
         tokens[0] = STATE_POINT_ATTENUATION;
         *swizzleOut = SWIZZLE_XXXX;
      }
      else if (strcmp(field, "distanceLinearAttenuation") == 0) {
         tokens[0] = STATE_POINT_ATTENUATION;
         *swizzleOut = SWIZZLE_YYYY;
      }
      else if (strcmp(field, "distanceQuadraticAttenuation") == 0) {
         tokens[0] = STATE_POINT_ATTENUATION;
         *swizzleOut = SWIZZLE_ZZZZ;
      }
      else {
         return -1;
      }
   }
   else if (strcmp(var, "gl_FrontMaterial") == 0 ||
            strcmp(var, "gl_BackMaterial") == 0) {
      tokens[0] = STATE_MATERIAL;
      if (strcmp(var, "gl_FrontMaterial") == 0)
         tokens[1] = 0;
      else
         tokens[1] = 1;
      if (strcmp(field, "emission") == 0) {
         tokens[2] = STATE_EMISSION;
      }
      else if (strcmp(field, "ambient") == 0) {
         tokens[2] = STATE_AMBIENT;
      }
      else if (strcmp(field, "diffuse") == 0) {
         tokens[2] = STATE_DIFFUSE;
      }
      else if (strcmp(field, "specular") == 0) {
         tokens[2] = STATE_SPECULAR;
      }
      else if (strcmp(field, "shininess") == 0) {
         tokens[2] = STATE_SHININESS;
         *swizzleOut = SWIZZLE_XXXX;
      }
      else {
         return -1;
      }
   }
   else if (strcmp(var, "gl_LightSource") == 0) {
      if (!field || index1 < 0)
         return -1;

      tokens[0] = STATE_LIGHT;
      tokens[1] = index1;

      if (strcmp(field, "ambient") == 0) {
         tokens[2] = STATE_AMBIENT;
      }
      else if (strcmp(field, "diffuse") == 0) {
         tokens[2] = STATE_DIFFUSE;
      }
      else if (strcmp(field, "specular") == 0) {
         tokens[2] = STATE_SPECULAR;
      }
      else if (strcmp(field, "position") == 0) {
         tokens[2] = STATE_POSITION;
      }
      else if (strcmp(field, "halfVector") == 0) {
         tokens[2] = STATE_HALF_VECTOR;
      }
      else if (strcmp(field, "spotDirection") == 0) {
         tokens[2] = STATE_SPOT_DIRECTION;
      }
      else if (strcmp(field, "spotCosCutoff") == 0) {
         tokens[2] = STATE_SPOT_DIRECTION;
         *swizzleOut = SWIZZLE_WWWW;
      }
      else if (strcmp(field, "spotCutoff") == 0) {
         tokens[2] = STATE_SPOT_CUTOFF;
         *swizzleOut = SWIZZLE_XXXX;
      }
      else if (strcmp(field, "spotExponent") == 0) {
         tokens[2] = STATE_ATTENUATION;
         *swizzleOut = SWIZZLE_WWWW;
      }
      else if (strcmp(field, "constantAttenuation") == 0) {
         tokens[2] = STATE_ATTENUATION;
         *swizzleOut = SWIZZLE_XXXX;
      }
      else if (strcmp(field, "linearAttenuation") == 0) {
         tokens[2] = STATE_ATTENUATION;
         *swizzleOut = SWIZZLE_YYYY;
      }
      else if (strcmp(field, "quadraticAttenuation") == 0) {
         tokens[2] = STATE_ATTENUATION;
         *swizzleOut = SWIZZLE_ZZZZ;
      }
      else {
         return -1;
      }
   }
   else if (strcmp(var, "gl_LightModel") == 0) {
      if (strcmp(field, "ambient") == 0) {
         tokens[0] = STATE_LIGHTMODEL_AMBIENT;
      }
      else {
         return -1;
      }
   }
   else if (strcmp(var, "gl_FrontLightModelProduct") == 0) {
      if (strcmp(field, "sceneColor") == 0) {
         tokens[0] = STATE_LIGHTMODEL_SCENECOLOR;
         tokens[1] = 0;
      }
      else {
         return -1;
      }
   }
   else if (strcmp(var, "gl_BackLightModelProduct") == 0) {
      if (strcmp(field, "sceneColor") == 0) {
         tokens[0] = STATE_LIGHTMODEL_SCENECOLOR;
         tokens[1] = 1;
      }
      else {
         return -1;
      }
   }
   else if (strcmp(var, "gl_FrontLightProduct") == 0 ||
            strcmp(var, "gl_BackLightProduct") == 0) {
      if (index1 < 0 || !field)
         return -1;

      tokens[0] = STATE_LIGHTPROD;
      tokens[1] = index1; /* light number */
      if (strcmp(var, "gl_FrontLightProduct") == 0) {
         tokens[2] = 0; /* front */
      }
      else {
         tokens[2] = 1; /* back */
      }
      if (strcmp(field, "ambient") == 0) {
         tokens[3] = STATE_AMBIENT;
      }
      else if (strcmp(field, "diffuse") == 0) {
         tokens[3] = STATE_DIFFUSE;
      }
      else if (strcmp(field, "specular") == 0) {
         tokens[3] = STATE_SPECULAR;
      }
      else {
         return -1;
      }
   }
   else if (strcmp(var, "gl_TextureEnvColor") == 0) {
      if (index1 < 0)
         return -1;
      tokens[0] = STATE_TEXENV_COLOR;
      tokens[1] = index1;
   }
   else if (strcmp(var, "gl_EyePlaneS") == 0) {
      if (index1 < 0)
         return -1;
      tokens[0] = STATE_TEXGEN;
      tokens[1] = index1; /* tex unit */
      tokens[2] = STATE_TEXGEN_EYE_S;
   }
   else if (strcmp(var, "gl_EyePlaneT") == 0) {
      if (index1 < 0)
         return -1;
      tokens[0] = STATE_TEXGEN;
      tokens[1] = index1; /* tex unit */
      tokens[2] = STATE_TEXGEN_EYE_T;
   }
   else if (strcmp(var, "gl_EyePlaneR") == 0) {
      if (index1 < 0)
         return -1;
      tokens[0] = STATE_TEXGEN;
      tokens[1] = index1; /* tex unit */
      tokens[2] = STATE_TEXGEN_EYE_R;
   }
   else if (strcmp(var, "gl_EyePlaneQ") == 0) {
      if (index1 < 0)
         return -1;
      tokens[0] = STATE_TEXGEN;
      tokens[1] = index1; /* tex unit */
      tokens[2] = STATE_TEXGEN_EYE_Q;
   }
   else if (strcmp(var, "gl_ObjectPlaneS") == 0) {
      if (index1 < 0)
         return -1;
      tokens[0] = STATE_TEXGEN;
      tokens[1] = index1; /* tex unit */
      tokens[2] = STATE_TEXGEN_OBJECT_S;
   }
   else if (strcmp(var, "gl_ObjectPlaneT") == 0) {
      if (index1 < 0)
         return -1;
      tokens[0] = STATE_TEXGEN;
      tokens[1] = index1; /* tex unit */
      tokens[2] = STATE_TEXGEN_OBJECT_T;
   }
   else if (strcmp(var, "gl_ObjectPlaneR") == 0) {
      if (index1 < 0)
         return -1;
      tokens[0] = STATE_TEXGEN;
      tokens[1] = index1; /* tex unit */
      tokens[2] = STATE_TEXGEN_OBJECT_R;
   }
   else if (strcmp(var, "gl_ObjectPlaneQ") == 0) {
      if (index1 < 0)
         return -1;
      tokens[0] = STATE_TEXGEN;
      tokens[1] = index1; /* tex unit */
      tokens[2] = STATE_TEXGEN_OBJECT_Q;
   }
   else if (strcmp(var, "gl_Fog") == 0) {
      if (strcmp(field, "color") == 0) {
         tokens[0] = STATE_FOG_COLOR;
      }
      else if (strcmp(field, "density") == 0) {
         tokens[0] = STATE_FOG_PARAMS;
         *swizzleOut = SWIZZLE_XXXX;
      }
      else if (strcmp(field, "start") == 0) {
         tokens[0] = STATE_FOG_PARAMS;
         *swizzleOut = SWIZZLE_YYYY;
      }
      else if (strcmp(field, "end") == 0) {
         tokens[0] = STATE_FOG_PARAMS;
         *swizzleOut = SWIZZLE_ZZZZ;
      }
      else if (strcmp(field, "scale") == 0) {
         tokens[0] = STATE_FOG_PARAMS;
         *swizzleOut = SWIZZLE_WWWW;
      }
      else {
         return -1;
      }
   }
   else {
      return -1;
   }

   if (isMatrix) {
      /* load all four columns of matrix */
      GLint pos[4];
      GLuint j;
      for (j = 0; j < 4; j++) {
         tokens[2] = tokens[3] = j; /* jth row of matrix */
         pos[j] = _mesa_add_state_reference(paramList, tokens);
         assert(pos[j] >= 0);
         ASSERT(pos[j] >= 0);
      }
      return pos[0] + index1;
   }
   else {
      /* allocate a single register */
      GLint pos = _mesa_add_state_reference(paramList, tokens);
      ASSERT(pos >= 0);
      return pos;
   }
}
/**
 * Transform TEX, TXP, TXB, and KIL instructions in the following way:
 *  - premultiply texture coordinates for RECT
 *  - extract operand swizzles
 *  - introduce a temporary register when write masks are needed
 *
 * \todo If/when r5xx uses the radeon_program architecture, this can probably
 * be reused.
 */
static GLboolean transform_TEX(
	struct radeon_transform_context *t,
	struct prog_instruction* orig_inst, void* data)
{
	struct r300_fragment_program_compiler *compiler =
		(struct r300_fragment_program_compiler*)data;
	struct prog_instruction inst = *orig_inst;
	struct prog_instruction* tgt;
	GLboolean destredirect = GL_FALSE;

	if (inst.Opcode != OPCODE_TEX &&
	    inst.Opcode != OPCODE_TXB &&
	    inst.Opcode != OPCODE_TXP &&
	    inst.Opcode != OPCODE_KIL)
		return GL_FALSE;

	if (inst.Opcode != OPCODE_KIL &&
	    t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
		GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;

		if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
			tgt = radeonAppendInstructions(t->Program, 1);

			tgt->Opcode = OPCODE_MOV;
			tgt->DstReg = inst.DstReg;
			if (comparefunc == GL_ALWAYS) {
				tgt->SrcReg[0].File = PROGRAM_BUILTIN;
				tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
			} else {
				tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
			}
			return GL_TRUE;
		}

		inst.DstReg.File = PROGRAM_TEMPORARY;
		inst.DstReg.Index = radeonFindFreeTemporary(t);
		inst.DstReg.WriteMask = WRITEMASK_XYZW;
	}


	/* Hardware uses [0..1]x[0..1] range for rectangle textures
	 * instead of [0..Width]x[0..Height].
	 * Add a scaling instruction.
	 */
	if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
		gl_state_index tokens[STATE_LENGTH] = {
			STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
			0
		};

		int tempreg = radeonFindFreeTemporary(t);
		int factor_index;

		tokens[2] = inst.TexSrcUnit;
		factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);

		tgt = radeonAppendInstructions(t->Program, 1);

		tgt->Opcode = OPCODE_MUL;
		tgt->DstReg.File = PROGRAM_TEMPORARY;
		tgt->DstReg.Index = tempreg;
		tgt->SrcReg[0] = inst.SrcReg[0];
		tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
		tgt->SrcReg[1].Index = factor_index;

		reset_srcreg(&inst.SrcReg[0]);
		inst.SrcReg[0].File = PROGRAM_TEMPORARY;
		inst.SrcReg[0].Index = tempreg;
	}

	if (inst.Opcode != OPCODE_KIL) {
		if (inst.DstReg.File != PROGRAM_TEMPORARY ||
		    inst.DstReg.WriteMask != WRITEMASK_XYZW) {
			int tempreg = radeonFindFreeTemporary(t);

			inst.DstReg.File = PROGRAM_TEMPORARY;
			inst.DstReg.Index = tempreg;
			inst.DstReg.WriteMask = WRITEMASK_XYZW;
			destredirect = GL_TRUE;
		}
	}

	tgt = radeonAppendInstructions(t->Program, 1);
	_mesa_copy_instructions(tgt, &inst, 1);

	if (inst.Opcode != OPCODE_KIL &&
	    t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
		GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
		GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
		int rcptemp = radeonFindFreeTemporary(t);
		int pass, fail;

		tgt = radeonAppendInstructions(t->Program, 3);

		tgt[0].Opcode = OPCODE_RCP;
		tgt[0].DstReg.File = PROGRAM_TEMPORARY;
		tgt[0].DstReg.Index = rcptemp;
		tgt[0].DstReg.WriteMask = WRITEMASK_W;
		tgt[0].SrcReg[0] = inst.SrcReg[0];
		tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;

		tgt[1].Opcode = OPCODE_MAD;
		tgt[1].DstReg = inst.DstReg;
		tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
		tgt[1].SrcReg[0] = inst.SrcReg[0];
		tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
		tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
		tgt[1].SrcReg[1].Index = rcptemp;
		tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
		tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
		tgt[1].SrcReg[2].Index = inst.DstReg.Index;
		if (depthmode == 0) /* GL_LUMINANCE */
			tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
		else if (depthmode == 2) /* GL_ALPHA */
			tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;

		/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
		 *   r  < tex  <=>      -tex+r < 0
		 *   r >= tex  <=> not (-tex+r < 0 */
		if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
			tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW;
		else
			tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW;

		tgt[2].Opcode = OPCODE_CMP;
		tgt[2].DstReg = orig_inst->DstReg;
		tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
		tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;

		if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
			pass = 1;
			fail = 2;
		} else {
			pass = 2;
			fail = 1;
		}

		tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
		tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
		tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
	} else if (destredirect) {
		tgt = radeonAppendInstructions(t->Program, 1);

		tgt->Opcode = OPCODE_MOV;
		tgt->DstReg = orig_inst->DstReg;
		tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
		tgt->SrcReg[0].Index = inst.DstReg.Index;
	}

	return GL_TRUE;
}
/**
 * Returns a fragment program which implements the current pixel transfer ops.
 */
static struct gl_fragment_program *
get_pixel_transfer_program(GLcontext *ctx, const struct state_key *key)
{
   struct st_context *st = ctx->st;
   struct prog_instruction inst[MAX_INST];
   struct gl_program_parameter_list *params;
   struct gl_fragment_program *fp;
   GLuint ic = 0;
   const GLuint colorTemp = 0;

   fp = (struct gl_fragment_program *)
      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
   if (!fp)
      return NULL;

   params = _mesa_new_parameter_list();

   /*
    * Get initial pixel color from the texture.
    * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
    */
   _mesa_init_instructions(inst + ic, 1);
   inst[ic].Opcode = OPCODE_TEX;
   inst[ic].DstReg.File = PROGRAM_TEMPORARY;
   inst[ic].DstReg.Index = colorTemp;
   inst[ic].SrcReg[0].File = PROGRAM_INPUT;
   inst[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
   inst[ic].TexSrcUnit = 0;
   inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
   ic++;
   fp->Base.InputsRead = (1 << FRAG_ATTRIB_TEX0);
   fp->Base.OutputsWritten = (1 << FRAG_RESULT_COLR);
   fp->Base.SamplersUsed = 0x1;  /* sampler 0 (bit 0) is used */

   if (key->scaleAndBias) {
      static const gl_state_index scale_state[STATE_LENGTH] =
         { STATE_INTERNAL, STATE_PT_SCALE, 0, 0, 0 };
      static const gl_state_index bias_state[STATE_LENGTH] =
         { STATE_INTERNAL, STATE_PT_BIAS, 0, 0, 0 };
      GLfloat scale[4], bias[4];
      GLint scale_p, bias_p;

      scale[0] = ctx->Pixel.RedScale;
      scale[1] = ctx->Pixel.GreenScale;
      scale[2] = ctx->Pixel.BlueScale;
      scale[3] = ctx->Pixel.AlphaScale;
      bias[0] = ctx->Pixel.RedBias;
      bias[1] = ctx->Pixel.GreenBias;
      bias[2] = ctx->Pixel.BlueBias;
      bias[3] = ctx->Pixel.AlphaBias;

      scale_p = _mesa_add_state_reference(params, scale_state);
      bias_p = _mesa_add_state_reference(params, bias_state);

      /* MAD colorTemp, colorTemp, scale, bias; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_MAD;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = colorTemp;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = scale_p;
      inst[ic].SrcReg[2].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[2].Index = bias_p;
      ic++;
   }

   if (key->pixelMaps) {
      const GLuint temp = 1;

      /* create the colormap/texture now if not already done */
      if (!st->pixel_xfer.pixelmap_texture) {
         st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx);
      }

      /* with a little effort, we can do four pixel map look-ups with
       * two TEX instructions:
       */

      /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_TEX;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_XY; /* write R,G */
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].TexSrcUnit = 1;
      inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
      ic++;

      /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_TEX;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_ZW; /* write B,A */
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W,
                                                 SWIZZLE_Z, SWIZZLE_W);
      inst[ic].TexSrcUnit = 1;
      inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
      ic++;

      /* MOV colorTemp, temp; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_MOV;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = colorTemp;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = temp;
      ic++;

      fp->Base.SamplersUsed |= (1 << 1);  /* sampler 1 is used */
   }

   if (key->colorMatrix) {
      static const gl_state_index row0_state[STATE_LENGTH] =
         { STATE_COLOR_MATRIX, 0, 0, 0, 0 };
      static const gl_state_index row1_state[STATE_LENGTH] =
         { STATE_COLOR_MATRIX, 0, 1, 1, 0 };
      static const gl_state_index row2_state[STATE_LENGTH] =
         { STATE_COLOR_MATRIX, 0, 2, 2, 0 };
      static const gl_state_index row3_state[STATE_LENGTH] =
         { STATE_COLOR_MATRIX, 0, 3, 3, 0 };

      GLint row0_p = _mesa_add_state_reference(params, row0_state);
      GLint row1_p = _mesa_add_state_reference(params, row1_state);
      GLint row2_p = _mesa_add_state_reference(params, row2_state);
      GLint row3_p = _mesa_add_state_reference(params, row3_state);
      const GLuint temp = 1;

      /* DP4 temp.x, colorTemp, matrow0; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_DP4;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_X;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = row0_p;
      ic++;

      /* DP4 temp.y, colorTemp, matrow1; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_DP4;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_Y;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = row1_p;
      ic++;

      /* DP4 temp.z, colorTemp, matrow2; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_DP4;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_Z;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = row2_p;
      ic++;

      /* DP4 temp.w, colorTemp, matrow3; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_DP4;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_W;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = row3_p;
      ic++;

      /* MOV colorTemp, temp; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_MOV;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = colorTemp;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = temp;
      ic++;
   }

   if (key->colorMatrixPostScaleBias) {
      static const gl_state_index scale_state[STATE_LENGTH] =
         { STATE_INTERNAL, STATE_PT_SCALE, 0, 0, 0 };
      static const gl_state_index bias_state[STATE_LENGTH] =
         { STATE_INTERNAL, STATE_PT_BIAS, 0, 0, 0 };
      GLint scale_param, bias_param;

      scale_param = _mesa_add_state_reference(params, scale_state);
      bias_param = _mesa_add_state_reference(params, bias_state);

      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_MAD;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = colorTemp;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = scale_param;
      inst[ic].SrcReg[2].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[2].Index = bias_param;
      ic++;
   }

   /* Modify last instruction's dst reg to write to result.color */
   {
      struct prog_instruction *last = &inst[ic - 1];
      last->DstReg.File = PROGRAM_OUTPUT;
      last->DstReg.Index = FRAG_RESULT_COLR;
   }

   /* END; */
   _mesa_init_instructions(inst + ic, 1);
   inst[ic].Opcode = OPCODE_END;
   ic++;

   assert(ic <= MAX_INST);


   fp->Base.Instructions = _mesa_alloc_instructions(ic);
   if (!fp->Base.Instructions) {
      _mesa_error(ctx, GL_OUT_OF_MEMORY,
                  "generating pixel transfer program");
      return NULL;
   }

   _mesa_copy_instructions(fp->Base.Instructions, inst, ic);
   fp->Base.NumInstructions = ic;
   fp->Base.Parameters = params;

#if 0
   printf("========= pixel transfer prog\n");
   _mesa_print_program(&fp->Base);
   _mesa_print_parameter_list(fp->Base.Parameters);
#endif

   return fp;
}
Beispiel #13
0
/**
 * XXX description???
 * \return GL_TRUE for success, GL_FALSE for failure
 */
GLboolean
_mesa_layout_parameters(struct asm_parser_state *state)
{
   struct gl_program_parameter_list *layout;
   struct asm_instruction *inst;
   unsigned i;

   layout =
      _mesa_new_parameter_list_sized(state->prog->Parameters->NumParameters);

   /* PASS 1:  Move any parameters that are accessed indirectly from the
    * original parameter list to the new parameter list.
    */
   for (inst = state->inst_head; inst != NULL; inst = inst->next) {
      for (i = 0; i < 3; i++) {
	 if (inst->SrcReg[i].Base.RelAddr) {
	    /* Only attempt to add the to the new parameter list once.
	     */
	    if (!inst->SrcReg[i].Symbol->pass1_done) {
	       const int new_begin =
		  copy_indirect_accessed_array(state->prog->Parameters, layout,
		      inst->SrcReg[i].Symbol->param_binding_begin,
		      inst->SrcReg[i].Symbol->param_binding_length);

	       if (new_begin < 0) {
		  return GL_FALSE;
	       }

	       inst->SrcReg[i].Symbol->param_binding_begin = new_begin;
	       inst->SrcReg[i].Symbol->pass1_done = 1;
	    }

	    /* Previously the Index was just the offset from the parameter
	     * array.  Now that the base of the parameter array is known, the
	     * index can be updated to its actual value.
	     */
	    inst->Base.SrcReg[i] = inst->SrcReg[i].Base;
	    inst->Base.SrcReg[i].Index +=
	       inst->SrcReg[i].Symbol->param_binding_begin;
	 }
      }
   }

   /* PASS 2:  Move any parameters that are not accessed indirectly from the
    * original parameter list to the new parameter list.
    */
   for (inst = state->inst_head; inst != NULL; inst = inst->next) {
      for (i = 0; i < 3; i++) {
	 const struct gl_program_parameter *p;
	 const int idx = inst->SrcReg[i].Base.Index;
	 unsigned swizzle = SWIZZLE_NOOP;

	 /* All relative addressed operands were processed on the first
	  * pass.  Just skip them here.
	  */
	 if (inst->SrcReg[i].Base.RelAddr) {
	    continue;
	 }

	 if ((inst->SrcReg[i].Base.File <= PROGRAM_VARYING )
	     || (inst->SrcReg[i].Base.File >= PROGRAM_WRITE_ONLY)) {
	    continue;
	 }

	 inst->Base.SrcReg[i] = inst->SrcReg[i].Base;
	 p = & state->prog->Parameters->Parameters[idx];

	 switch (p->Type) {
	 case PROGRAM_CONSTANT: {
	    const float *const v =
	       state->prog->Parameters->ParameterValues[idx];

	    inst->Base.SrcReg[i].Index =
	       _mesa_add_unnamed_constant(layout, v, p->Size, & swizzle);

	    inst->Base.SrcReg[i].Swizzle = 
	       _mesa_combine_swizzles(swizzle, inst->Base.SrcReg[i].Swizzle);
	    break;
	 }

	 case PROGRAM_STATE_VAR:
	    inst->Base.SrcReg[i].Index =
	       _mesa_add_state_reference(layout, p->StateIndexes);
	    break;

	 default:
	    break;
	 }

	 inst->SrcReg[i].Base.File = p->Type;
	 inst->Base.SrcReg[i].File = p->Type;
      }
   }

   layout->StateFlags = state->prog->Parameters->StateFlags;
   _mesa_free_parameter_list(state->prog->Parameters);
   state->prog->Parameters = layout;

   return GL_TRUE;
}
Beispiel #14
0
/**
 * Translate Mesa program to TGSI format.
 * \param program  the program to translate
 * \param numInputs  number of input registers used
 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
 *                      input indexes
 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for each input
 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input

 * \param numOutputs  number of output registers used
 * \param outputMapping  maps Mesa fragment program outputs to TGSI
 *                       generic outputs
 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for each output
 * \param tokens  array to store translated tokens in
 * \param maxTokens  size of the tokens array
 *
 * \return number of tokens placed in 'tokens' buffer, or zero if error
 */
GLuint
st_translate_mesa_program(
   GLcontext *ctx,
   uint procType,
   const struct gl_program *program,
   GLuint numInputs,
   const GLuint inputMapping[],
   const ubyte inputSemanticName[],
   const ubyte inputSemanticIndex[],
   const GLuint interpMode[],
   const GLbitfield inputFlags[],
   GLuint numOutputs,
   const GLuint outputMapping[],
   const ubyte outputSemanticName[],
   const ubyte outputSemanticIndex[],
   const GLbitfield outputFlags[],
   struct tgsi_token *tokens,
   GLuint maxTokens )
{
   GLuint i;
   GLuint ti;  /* token index */
   struct tgsi_header *header;
   struct tgsi_processor *processor;
   GLuint preamble_size = 0;
   GLuint immediates[1000];
   GLuint numImmediates = 0;
   GLboolean insideSubroutine = GL_FALSE;
   GLboolean indirectAccess = GL_FALSE;
   GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1];
   GLint wposTemp = -1, winHeightConst = -1;

   assert(procType == TGSI_PROCESSOR_FRAGMENT ||
          procType == TGSI_PROCESSOR_VERTEX);

   find_temporaries(program, tempsUsed);

   if (procType == TGSI_PROCESSOR_FRAGMENT) {
      if (program->InputsRead & FRAG_BIT_WPOS) {
         /* Fragment program uses fragment position input.
          * Need to replace instances of INPUT[WPOS] with temp T
          * where T = INPUT[WPOS] by y is inverted.
          */
         static const gl_state_index winSizeState[STATE_LENGTH]
            = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
         winHeightConst = _mesa_add_state_reference(program->Parameters,
                                                    winSizeState);
         wposTemp = find_free_temporary(tempsUsed);
      }
   }


   *(struct tgsi_version *) &tokens[0] = tgsi_build_version();

   header = (struct tgsi_header *) &tokens[1];
   *header = tgsi_build_header();

   processor = (struct tgsi_processor *) &tokens[2];
   *processor = tgsi_build_processor( procType, header );

   ti = 3;

   /*
    * Declare input attributes.
    */
   if (procType == TGSI_PROCESSOR_FRAGMENT) {
      for (i = 0; i < numInputs; i++) {
         struct tgsi_full_declaration fulldecl;
         fulldecl = make_input_decl(i,
                                    GL_TRUE, interpMode[i],
                                    TGSI_WRITEMASK_XYZW,
                                    GL_TRUE, inputSemanticName[i],
                                    inputSemanticIndex[i],
                                    inputFlags[i]);
         ti += tgsi_build_full_declaration(&fulldecl,
                                           &tokens[ti],
                                           header,
                                           maxTokens - ti );
      }
   }
   else {
      /* vertex prog */
      /* XXX: this could probaby be merged with the clause above.
       * the only difference is the semantic tags.
       */
      for (i = 0; i < numInputs; i++) {
         struct tgsi_full_declaration fulldecl;
         fulldecl = make_input_decl(i,
                                    GL_FALSE, 0,
                                    TGSI_WRITEMASK_XYZW,
                                    GL_FALSE, 0, 0,
                                    inputFlags[i]);
         ti += tgsi_build_full_declaration(&fulldecl,
                                           &tokens[ti],
                                           header,
                                           maxTokens - ti );
      }
   }

   /*
    * Declare output attributes.
    */
   if (procType == TGSI_PROCESSOR_FRAGMENT) {
      for (i = 0; i < numOutputs; i++) {
         struct tgsi_full_declaration fulldecl;
         switch (outputSemanticName[i]) {
         case TGSI_SEMANTIC_POSITION:
            fulldecl = make_output_decl(i,
                                        TGSI_SEMANTIC_POSITION, /* Z / Depth */
                                        outputSemanticIndex[i],
                                        TGSI_WRITEMASK_Z,
                                        outputFlags[i]);
            break;
         case TGSI_SEMANTIC_COLOR:
            fulldecl = make_output_decl(i,
                                        TGSI_SEMANTIC_COLOR,
                                        outputSemanticIndex[i],
                                        TGSI_WRITEMASK_XYZW,
                                        outputFlags[i]);
            break;
         default:
            assert(0);
            return 0;
         }
         ti += tgsi_build_full_declaration(&fulldecl,
                                           &tokens[ti],
                                           header,
                                           maxTokens - ti );
      }
   }
   else {
      /* vertex prog */
      for (i = 0; i < numOutputs; i++) {
         struct tgsi_full_declaration fulldecl;
         fulldecl = make_output_decl(i,
                                     outputSemanticName[i],
                                     outputSemanticIndex[i],
                                     TGSI_WRITEMASK_XYZW,
                                     outputFlags[i]);
         ti += tgsi_build_full_declaration(&fulldecl,
                                           &tokens[ti],
                                           header,
                                           maxTokens - ti );
      }
   }

   /* temporary decls */
   {
      GLboolean inside_range = GL_FALSE;
      GLuint start_range = 0;

      tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE;
      for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) {
         if (tempsUsed[i] && !inside_range) {
            inside_range = GL_TRUE;
            start_range = i;
         }
         else if (!tempsUsed[i] && inside_range) {
            struct tgsi_full_declaration fulldecl;

            inside_range = GL_FALSE;
            fulldecl = make_temp_decl( start_range, i - 1 );
            ti += tgsi_build_full_declaration(
               &fulldecl,
               &tokens[ti],
               header,
               maxTokens - ti );
         }
      }
   }

   /* Declare address register.
   */
   if (program->NumAddressRegs > 0) {
      struct tgsi_full_declaration fulldecl;

      assert( program->NumAddressRegs == 1 );

      fulldecl = make_addr_decl( 0, 0 );
      ti += tgsi_build_full_declaration(
         &fulldecl,
         &tokens[ti],
         header,
         maxTokens - ti );

      indirectAccess = GL_TRUE;
   }

   /* immediates/literals */
   memset(immediates, ~0, sizeof(immediates));

   /* Emit immediates only when there is no address register in use.
    * FIXME: Be smarter and recognize param arrays -- indirect addressing is
    *        only valid within the referenced array.
    */
   if (program->Parameters && !indirectAccess) {
      for (i = 0; i < program->Parameters->NumParameters; i++) {
         if (program->Parameters->Parameters[i].Type == PROGRAM_CONSTANT) {
            struct tgsi_full_immediate fullimm;

            fullimm = make_immediate( program->Parameters->ParameterValues[i], 4 );
            ti += tgsi_build_full_immediate(
               &fullimm,
               &tokens[ti],
               header,
               maxTokens - ti );
            immediates[i] = numImmediates;
            numImmediates++;
         }
      }
   }

   /* constant buffer refs */
   if (program->Parameters) {
      GLint start = -1, end = -1;

      for (i = 0; i < program->Parameters->NumParameters; i++) {
         GLboolean emit = (i == program->Parameters->NumParameters - 1);
         GLboolean matches;

         switch (program->Parameters->Parameters[i].Type) {
         case PROGRAM_ENV_PARAM:
         case PROGRAM_STATE_VAR:
         case PROGRAM_NAMED_PARAM:
         case PROGRAM_UNIFORM:
            matches = GL_TRUE;
            break;
         case PROGRAM_CONSTANT:
            matches = indirectAccess;
            break;
         default:
            matches = GL_FALSE;
         }

         if (matches) {
            if (start == -1) {
               /* begin a sequence */
               start = i;
               end = i;
            }
            else {
               /* continue sequence */
               end = i;
            }
         }
         else {
            if (start != -1) {
               /* end of sequence */
               emit = GL_TRUE;
            }
         }

         if (emit && start >= 0) {
            struct tgsi_full_declaration fulldecl;

            fulldecl = make_constant_decl( start, end );
            ti += tgsi_build_full_declaration(
               &fulldecl,
               &tokens[ti],
               header,
               maxTokens - ti );
            start = end = -1;
         }
      }
   }

   /* texture samplers */
   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
      if (program->SamplersUsed & (1 << i)) {
         struct tgsi_full_declaration fulldecl;

         fulldecl = make_sampler_decl( i );
         ti += tgsi_build_full_declaration(
            &fulldecl,
            &tokens[ti],
            header,
            maxTokens - ti );
      }
   }

   /* invert WPOS fragment input */
   if (wposTemp >= 0) {
      ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst,
                               inputMapping[FRAG_ATTRIB_WPOS],
                               header, maxTokens - ti);
      preamble_size = 2; /* two instructions added */
   }

   for (i = 0; i < program->NumInstructions; i++) {
      struct tgsi_full_instruction fullinst;

      compile_instruction(
         &program->Instructions[i],
         &fullinst,
         inputMapping,
         outputMapping,
         immediates,
         indirectAccess,
         preamble_size,
         procType,
         &insideSubroutine,
         wposTemp);

      ti += tgsi_build_full_instruction(
         &fullinst,
         &tokens[ti],
         header,
         maxTokens - ti );
   }

#if DEBUG
   if(!tgsi_sanity_check(tokens)) {
      debug_printf("Due to sanity check failure(s) above the following shader program is invalid:\n");
      debug_printf("\nOriginal program:\n%s", program->String);
      debug_printf("\nMesa program:\n");
      _mesa_print_program(program);
      debug_printf("\nTGSI program:\n");
      tgsi_dump(tokens, 0);
      assert(0);
   }
#endif

   return ti;
}
/**
 * Transform the program to support fragment.position.
 *
 * Introduce a small fragment at the start of the program that will be
 * the only code that directly reads the FRAG_ATTRIB_WPOS input.
 * All other code pieces that reference that input will be rewritten
 * to read from a newly allocated temporary.
 *
 * \todo if/when r5xx supports the radeon_program architecture, this is a
 * likely candidate for code sharing.
 */
static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
{
	GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead;

	if (!(InputsRead & FRAG_BIT_WPOS))
		return;

	static gl_state_index tokens[STATE_LENGTH] = {
		STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
	};
	struct prog_instruction *fpi;
	GLuint window_index;
	int i = 0;
	GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);

	_mesa_insert_instructions(compiler->program, 0, 3);
	fpi = compiler->program->Instructions;

	/* perspective divide */
	fpi[i].Opcode = OPCODE_RCP;

	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
	fpi[i].DstReg.Index = tempregi;
	fpi[i].DstReg.WriteMask = WRITEMASK_W;
	fpi[i].DstReg.CondMask = COND_TR;

	fpi[i].SrcReg[0].File = PROGRAM_INPUT;
	fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
	fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
	i++;

	fpi[i].Opcode = OPCODE_MUL;

	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
	fpi[i].DstReg.Index = tempregi;
	fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
	fpi[i].DstReg.CondMask = COND_TR;

	fpi[i].SrcReg[0].File = PROGRAM_INPUT;
	fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
	fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;

	fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
	fpi[i].SrcReg[1].Index = tempregi;
	fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
	i++;

	/* viewport transformation */
	window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);

	fpi[i].Opcode = OPCODE_MAD;

	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
	fpi[i].DstReg.Index = tempregi;
	fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
	fpi[i].DstReg.CondMask = COND_TR;

	fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
	fpi[i].SrcReg[0].Index = tempregi;
	fpi[i].SrcReg[0].Swizzle =
	    MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);

	fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
	fpi[i].SrcReg[1].Index = window_index;
	fpi[i].SrcReg[1].Swizzle =
	    MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);

	fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
	fpi[i].SrcReg[2].Index = window_index;
	fpi[i].SrcReg[2].Swizzle =
	    MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
	i++;

	for (; i < compiler->program->NumInstructions; ++i) {
		int reg;
		for (reg = 0; reg < 3; reg++) {
			if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
			    fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
				fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
				fpi[i].SrcReg[reg].Index = tempregi;
			}
		}
	}
}
Beispiel #16
0
static void
_mesa_insert_mvp_mad_code(struct gl_context *ctx, struct gl_vertex_program *vprog)
{
    struct prog_instruction *newInst;
    const GLuint origLen = vprog->Base.NumInstructions;
    const GLuint newLen = origLen + 4;
    GLuint hposTemp;
    GLuint i;

    /*
     * Setup state references for the modelview/projection matrix.
     * XXX we should check if these state vars are already declared.
     */
    static const gl_state_index mvpState[4][STATE_LENGTH] = {
        { STATE_MVP_MATRIX, 0, 0, 0, STATE_MATRIX_TRANSPOSE },
        { STATE_MVP_MATRIX, 0, 1, 1, STATE_MATRIX_TRANSPOSE },
        { STATE_MVP_MATRIX, 0, 2, 2, STATE_MATRIX_TRANSPOSE },
        { STATE_MVP_MATRIX, 0, 3, 3, STATE_MATRIX_TRANSPOSE },
    };
    GLint mvpRef[4];

    for (i = 0; i < 4; i++) {
        mvpRef[i] = _mesa_add_state_reference(vprog->Base.Parameters,
                                              mvpState[i]);
    }

    /* Alloc storage for new instructions */
    newInst = _mesa_alloc_instructions(newLen);
    if (!newInst) {
        _mesa_error(ctx, GL_OUT_OF_MEMORY,
                    "glProgramString(inserting position_invariant code)");
        return;
    }

    /* TEMP hposTemp; */
    hposTemp = vprog->Base.NumTemporaries++;

    /*
     * Generated instructions:
     *    emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
     *    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
     *    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
     *    emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
     */
    _mesa_init_instructions(newInst, 4);

    newInst[0].Opcode = OPCODE_MUL;
    newInst[0].DstReg.File = PROGRAM_TEMPORARY;
    newInst[0].DstReg.Index = hposTemp;
    newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
    newInst[0].SrcReg[0].File = PROGRAM_INPUT;
    newInst[0].SrcReg[0].Index = VERT_ATTRIB_POS;
    newInst[0].SrcReg[0].Swizzle = SWIZZLE_XXXX;
    newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
    newInst[0].SrcReg[1].Index = mvpRef[0];
    newInst[0].SrcReg[1].Swizzle = SWIZZLE_NOOP;

    for (i = 1; i <= 2; i++) {
        newInst[i].Opcode = OPCODE_MAD;
        newInst[i].DstReg.File = PROGRAM_TEMPORARY;
        newInst[i].DstReg.Index = hposTemp;
        newInst[i].DstReg.WriteMask = WRITEMASK_XYZW;
        newInst[i].SrcReg[0].File = PROGRAM_INPUT;
        newInst[i].SrcReg[0].Index = VERT_ATTRIB_POS;
        newInst[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(i,i,i,i);
        newInst[i].SrcReg[1].File = PROGRAM_STATE_VAR;
        newInst[i].SrcReg[1].Index = mvpRef[i];
        newInst[i].SrcReg[1].Swizzle = SWIZZLE_NOOP;
        newInst[i].SrcReg[2].File = PROGRAM_TEMPORARY;
        newInst[i].SrcReg[2].Index = hposTemp;
        newInst[1].SrcReg[2].Swizzle = SWIZZLE_NOOP;
    }

    newInst[3].Opcode = OPCODE_MAD;
    newInst[3].DstReg.File = PROGRAM_OUTPUT;
    newInst[3].DstReg.Index = VARYING_SLOT_POS;
    newInst[3].DstReg.WriteMask = WRITEMASK_XYZW;
    newInst[3].SrcReg[0].File = PROGRAM_INPUT;
    newInst[3].SrcReg[0].Index = VERT_ATTRIB_POS;
    newInst[3].SrcReg[0].Swizzle = SWIZZLE_WWWW;
    newInst[3].SrcReg[1].File = PROGRAM_STATE_VAR;
    newInst[3].SrcReg[1].Index = mvpRef[3];
    newInst[3].SrcReg[1].Swizzle = SWIZZLE_NOOP;
    newInst[3].SrcReg[2].File = PROGRAM_TEMPORARY;
    newInst[3].SrcReg[2].Index = hposTemp;
    newInst[3].SrcReg[2].Swizzle = SWIZZLE_NOOP;


    /* Append original instructions after new instructions */
    _mesa_copy_instructions (newInst + 4, vprog->Base.Instructions, origLen);

    /* free old instructions */
    _mesa_free_instructions(vprog->Base.Instructions, origLen);

    /* install new instructions */
    vprog->Base.Instructions = newInst;
    vprog->Base.NumInstructions = newLen;
    vprog->Base.InputsRead |= VERT_BIT_POS;
    vprog->Base.OutputsWritten |= BITFIELD64_BIT(VARYING_SLOT_POS);
}
Beispiel #17
0
/**
 * Append instructions to implement fog
 *
 * The \c fragment.fogcoord input is used to compute the fog blend factor.
 *
 * \param ctx      The GL context
 * \param fprog    Fragment program that fog instructions will be appended to.
 * \param fog_mode Fog mode.  One of \c GL_EXP, \c GL_EXP2, or \c GL_LINEAR.
 * \param saturate True if writes to color outputs should be clamped to [0, 1]
 *
 * \note
 * This function sets \c VARYING_BIT_FOGC in \c fprog->Base.InputsRead.
 *
 * \todo With a little work, this function could be adapted to add fog code
 * to vertex programs too.
 */
void
_mesa_append_fog_code(struct gl_context *ctx,
                      struct gl_fragment_program *fprog, GLenum fog_mode,
                      GLboolean saturate)
{
    static const gl_state_index fogPStateOpt[STATE_LENGTH]
        = { STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0 };
    static const gl_state_index fogColorState[STATE_LENGTH]
        = { STATE_FOG_COLOR, 0, 0, 0, 0};
    struct prog_instruction *newInst, *inst;
    const GLuint origLen = fprog->Base.NumInstructions;
    const GLuint newLen = origLen + 5;
    GLuint i;
    GLint fogPRefOpt, fogColorRef; /* state references */
    GLuint colorTemp, fogFactorTemp; /* temporary registerss */

    if (fog_mode == GL_NONE) {
        _mesa_problem(ctx, "_mesa_append_fog_code() called for fragment program"
                      " with fog_mode == GL_NONE");
        return;
    }

    if (!(fprog->Base.OutputsWritten & (1 << FRAG_RESULT_COLOR))) {
        /* program doesn't output color, so nothing to do */
        return;
    }

    /* Alloc storage for new instructions */
    newInst = _mesa_alloc_instructions(newLen);
    if (!newInst) {
        _mesa_error(ctx, GL_OUT_OF_MEMORY,
                    "glProgramString(inserting fog_option code)");
        return;
    }

    /* Copy orig instructions into new instruction buffer */
    _mesa_copy_instructions(newInst, fprog->Base.Instructions, origLen);

    /* PARAM fogParamsRefOpt = internal optimized fog params; */
    fogPRefOpt
        = _mesa_add_state_reference(fprog->Base.Parameters, fogPStateOpt);
    /* PARAM fogColorRef = state.fog.color; */
    fogColorRef
        = _mesa_add_state_reference(fprog->Base.Parameters, fogColorState);

    /* TEMP colorTemp; */
    colorTemp = fprog->Base.NumTemporaries++;
    /* TEMP fogFactorTemp; */
    fogFactorTemp = fprog->Base.NumTemporaries++;

    /* Scan program to find where result.color is written */
    inst = newInst;
    for (i = 0; i < fprog->Base.NumInstructions; i++) {
        if (inst->Opcode == OPCODE_END)
            break;
        if (inst->DstReg.File == PROGRAM_OUTPUT &&
                inst->DstReg.Index == FRAG_RESULT_COLOR) {
            /* change the instruction to write to colorTemp w/ clamping */
            inst->DstReg.File = PROGRAM_TEMPORARY;
            inst->DstReg.Index = colorTemp;
            inst->SaturateMode = saturate;
            /* don't break (may be several writes to result.color) */
        }
        inst++;
    }
    assert(inst->Opcode == OPCODE_END); /* we'll overwrite this inst */

    _mesa_init_instructions(inst, 5);

    /* emit instructions to compute fog blending factor */
    /* this is always clamped to [0, 1] regardless of fragment clamping */
    if (fog_mode == GL_LINEAR) {
        /* MAD fogFactorTemp.x, fragment.fogcoord.x, fogPRefOpt.x, fogPRefOpt.y; */
        inst->Opcode = OPCODE_MAD;
        inst->DstReg.File = PROGRAM_TEMPORARY;
        inst->DstReg.Index = fogFactorTemp;
        inst->DstReg.WriteMask = WRITEMASK_X;
        inst->SrcReg[0].File = PROGRAM_INPUT;
        inst->SrcReg[0].Index = VARYING_SLOT_FOGC;
        inst->SrcReg[0].Swizzle = SWIZZLE_XXXX;
        inst->SrcReg[1].File = PROGRAM_STATE_VAR;
        inst->SrcReg[1].Index = fogPRefOpt;
        inst->SrcReg[1].Swizzle = SWIZZLE_XXXX;
        inst->SrcReg[2].File = PROGRAM_STATE_VAR;
        inst->SrcReg[2].Index = fogPRefOpt;
        inst->SrcReg[2].Swizzle = SWIZZLE_YYYY;
        inst->SaturateMode = SATURATE_ZERO_ONE;
        inst++;
    }
    else {
        ASSERT(fog_mode == GL_EXP || fog_mode == GL_EXP2);
        /* fogPRefOpt.z = d/ln(2), fogPRefOpt.w = d/sqrt(ln(2) */
        /* EXP: MUL fogFactorTemp.x, fogPRefOpt.z, fragment.fogcoord.x; */
        /* EXP2: MUL fogFactorTemp.x, fogPRefOpt.w, fragment.fogcoord.x; */
        inst->Opcode = OPCODE_MUL;
        inst->DstReg.File = PROGRAM_TEMPORARY;
        inst->DstReg.Index = fogFactorTemp;
        inst->DstReg.WriteMask = WRITEMASK_X;
        inst->SrcReg[0].File = PROGRAM_STATE_VAR;
        inst->SrcReg[0].Index = fogPRefOpt;
        inst->SrcReg[0].Swizzle
            = (fog_mode == GL_EXP) ? SWIZZLE_ZZZZ : SWIZZLE_WWWW;
        inst->SrcReg[1].File = PROGRAM_INPUT;
        inst->SrcReg[1].Index = VARYING_SLOT_FOGC;
        inst->SrcReg[1].Swizzle = SWIZZLE_XXXX;
        inst++;
        if (fog_mode == GL_EXP2) {
            /* MUL fogFactorTemp.x, fogFactorTemp.x, fogFactorTemp.x; */
            inst->Opcode = OPCODE_MUL;
            inst->DstReg.File = PROGRAM_TEMPORARY;
            inst->DstReg.Index = fogFactorTemp;
            inst->DstReg.WriteMask = WRITEMASK_X;
            inst->SrcReg[0].File = PROGRAM_TEMPORARY;
            inst->SrcReg[0].Index = fogFactorTemp;
            inst->SrcReg[0].Swizzle = SWIZZLE_XXXX;
            inst->SrcReg[1].File = PROGRAM_TEMPORARY;
            inst->SrcReg[1].Index = fogFactorTemp;
            inst->SrcReg[1].Swizzle = SWIZZLE_XXXX;
            inst++;
        }
        /* EX2_SAT fogFactorTemp.x, -fogFactorTemp.x; */
        inst->Opcode = OPCODE_EX2;
        inst->DstReg.File = PROGRAM_TEMPORARY;
        inst->DstReg.Index = fogFactorTemp;
        inst->DstReg.WriteMask = WRITEMASK_X;
        inst->SrcReg[0].File = PROGRAM_TEMPORARY;
        inst->SrcReg[0].Index = fogFactorTemp;
        inst->SrcReg[0].Negate = NEGATE_XYZW;
        inst->SrcReg[0].Swizzle = SWIZZLE_XXXX;
        inst->SaturateMode = SATURATE_ZERO_ONE;
        inst++;
    }
    /* LRP result.color.xyz, fogFactorTemp.xxxx, colorTemp, fogColorRef; */
    inst->Opcode = OPCODE_LRP;
    inst->DstReg.File = PROGRAM_OUTPUT;
    inst->DstReg.Index = FRAG_RESULT_COLOR;
    inst->DstReg.WriteMask = WRITEMASK_XYZ;
    inst->SrcReg[0].File = PROGRAM_TEMPORARY;
    inst->SrcReg[0].Index = fogFactorTemp;
    inst->SrcReg[0].Swizzle = SWIZZLE_XXXX;
    inst->SrcReg[1].File = PROGRAM_TEMPORARY;
    inst->SrcReg[1].Index = colorTemp;
    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    inst->SrcReg[2].File = PROGRAM_STATE_VAR;
    inst->SrcReg[2].Index = fogColorRef;
    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
    inst++;
    /* MOV result.color.w, colorTemp.x;  # copy alpha */
    inst->Opcode = OPCODE_MOV;
    inst->DstReg.File = PROGRAM_OUTPUT;
    inst->DstReg.Index = FRAG_RESULT_COLOR;
    inst->DstReg.WriteMask = WRITEMASK_W;
    inst->SrcReg[0].File = PROGRAM_TEMPORARY;
    inst->SrcReg[0].Index = colorTemp;
    inst->SrcReg[0].Swizzle = SWIZZLE_NOOP;
    inst++;
    /* END; */
    inst->Opcode = OPCODE_END;
    inst++;

    /* free old instructions */
    _mesa_free_instructions(fprog->Base.Instructions, origLen);

    /* install new instructions */
    fprog->Base.Instructions = newInst;
    fprog->Base.NumInstructions = inst - newInst;
    fprog->Base.InputsRead |= VARYING_BIT_FOGC;
    assert(fprog->Base.OutputsWritten & (1 << FRAG_RESULT_COLOR));
}
Beispiel #18
0
/**
 * Given a variable name and datatype, emit uniform/constant buffer
 * entries which will store that state variable.
 * For example, if name="gl_LightSource" we'll emit 64 state variable
 * vectors/references and return position where that data starts.  This will
 * allow run-time array indexing into the light source array.
 *
 * Note that this is a recursive function.
 *
 * \return -1 if error, else index of start of data in the program parameter list
 */
static GLint
emit_statevars(const char *name, int array_len,
               const slang_type_specifier *type,
               gl_state_index tokens[STATE_LENGTH],
               struct gl_program_parameter_list *paramList)
{
   if (type->type == SLANG_SPEC_ARRAY) {
      GLint i, pos;
      assert(array_len > 0);
      if (strcmp(name, "gl_ClipPlane") == 0) {
         tokens[0] = STATE_CLIPPLANE;
      }
      else if (strcmp(name, "gl_LightSource") == 0) {
         tokens[0] = STATE_LIGHT;
      }
      else if (strcmp(name, "gl_FrontLightProduct") == 0) {
         tokens[0] = STATE_LIGHTPROD;
         tokens[2] = 0; /* front */
      }
      else if (strcmp(name, "gl_BackLightProduct") == 0) {
         tokens[0] = STATE_LIGHTPROD;
         tokens[2] = 1; /* back */
      }
      else if (strcmp(name, "gl_TextureEnvColor") == 0) {
         tokens[0] = STATE_TEXENV_COLOR;
      }
      else if (strcmp(name, "gl_EyePlaneS") == 0) {
         tokens[0] = STATE_TEXGEN;
         tokens[2] = STATE_TEXGEN_EYE_S;
      }
      else if (strcmp(name, "gl_EyePlaneT") == 0) {
         tokens[0] = STATE_TEXGEN;
         tokens[2] = STATE_TEXGEN_EYE_T;
      }
      else if (strcmp(name, "gl_EyePlaneR") == 0) {
         tokens[0] = STATE_TEXGEN;
         tokens[2] = STATE_TEXGEN_EYE_R;
      }
      else if (strcmp(name, "gl_EyePlaneQ") == 0) {
         tokens[0] = STATE_TEXGEN;
         tokens[2] = STATE_TEXGEN_EYE_Q;
      }
      else if (strcmp(name, "gl_ObjectPlaneS") == 0) {
         tokens[0] = STATE_TEXGEN;
         tokens[2] = STATE_TEXGEN_OBJECT_S;
      }
      else if (strcmp(name, "gl_ObjectPlaneT") == 0) {
         tokens[0] = STATE_TEXGEN;
         tokens[2] = STATE_TEXGEN_OBJECT_T;
      }
      else if (strcmp(name, "gl_ObjectPlaneR") == 0) {
         tokens[0] = STATE_TEXGEN;
         tokens[2] = STATE_TEXGEN_OBJECT_R;
      }
      else if (strcmp(name, "gl_ObjectPlaneQ") == 0) {
         tokens[0] = STATE_TEXGEN;
         tokens[2] = STATE_TEXGEN_OBJECT_Q;
      }
      else {
         return -1; /* invalid array name */
      }
      for (i = 0; i < array_len; i++) {
         GLint p;
         tokens[1] = i;
         p = emit_statevars(NULL, 0, type->_array, tokens, paramList);
         if (i == 0)
            pos = p;
      }
      return pos;
   }
   else if (type->type == SLANG_SPEC_STRUCT) {
      const slang_variable_scope *fields = type->_struct->fields;
      GLuint i, pos;
      for (i = 0; i < fields->num_variables; i++) {
         const slang_variable *var = fields->variables[i];
         GLint p = emit_statevars(var->a_name, 0, &var->type.specifier,
                                  tokens, paramList);
         if (i == 0)
            pos = p;
      }
      return pos;
   }
   else {
      GLint pos;
      assert(type->type == SLANG_SPEC_VEC4 ||
             type->type == SLANG_SPEC_VEC3 ||
             type->type == SLANG_SPEC_VEC2 ||
             type->type == SLANG_SPEC_FLOAT ||
             type->type == SLANG_SPEC_IVEC4 ||
             type->type == SLANG_SPEC_IVEC3 ||
             type->type == SLANG_SPEC_IVEC2 ||
             type->type == SLANG_SPEC_INT);
      if (name) {
         GLint t;

         if (tokens[0] == STATE_LIGHT)
            t = 2;
         else if (tokens[0] == STATE_LIGHTPROD)
            t = 3;
         else
            return -1; /* invalid array name */

         if (strcmp(name, "ambient") == 0) {
            tokens[t] = STATE_AMBIENT;
         }
         else if (strcmp(name, "diffuse") == 0) {
            tokens[t] = STATE_DIFFUSE;
         }
         else if (strcmp(name, "specular") == 0) {
            tokens[t] = STATE_SPECULAR;
         }
         else if (strcmp(name, "position") == 0) {
            tokens[t] = STATE_POSITION;
         }
         else if (strcmp(name, "halfVector") == 0) {
            tokens[t] = STATE_HALF_VECTOR;
         }
         else if (strcmp(name, "spotDirection") == 0) {
            tokens[t] = STATE_SPOT_DIRECTION; /* xyz components */
         }
         else if (strcmp(name, "spotCosCutoff") == 0) {
            tokens[t] = STATE_SPOT_DIRECTION; /* w component */
         }

         else if (strcmp(name, "constantAttenuation") == 0) {
            tokens[t] = STATE_ATTENUATION; /* x component */
         }
         else if (strcmp(name, "linearAttenuation") == 0) {
            tokens[t] = STATE_ATTENUATION; /* y component */
         }
         else if (strcmp(name, "quadraticAttenuation") == 0) {
            tokens[t] = STATE_ATTENUATION; /* z component */
         }
         else if (strcmp(name, "spotExponent") == 0) {
            tokens[t] = STATE_ATTENUATION; /* w = spot exponent */
         }

         else if (strcmp(name, "spotCutoff") == 0) {
            tokens[t] = STATE_SPOT_CUTOFF; /* x component */
         }

         else {
            return -1; /* invalid field name */
         }
      }

      pos = _mesa_add_state_reference(paramList, tokens);
      return pos;
   }

   return 1;
}
Beispiel #19
0
/**
 * This function inserts instructions for coordinate modelview * projection
 * into a vertex program.
 * May be used to implement the position_invariant option.
 */
static void
_mesa_insert_mvp_dp4_code(struct gl_context *ctx, struct gl_vertex_program *vprog)
{
    struct prog_instruction *newInst;
    const GLuint origLen = vprog->Base.NumInstructions;
    const GLuint newLen = origLen + 4;
    GLuint i;

    /*
     * Setup state references for the modelview/projection matrix.
     * XXX we should check if these state vars are already declared.
     */
    static const gl_state_index mvpState[4][STATE_LENGTH] = {
        { STATE_MVP_MATRIX, 0, 0, 0, 0 },  /* state.matrix.mvp.row[0] */
        { STATE_MVP_MATRIX, 0, 1, 1, 0 },  /* state.matrix.mvp.row[1] */
        { STATE_MVP_MATRIX, 0, 2, 2, 0 },  /* state.matrix.mvp.row[2] */
        { STATE_MVP_MATRIX, 0, 3, 3, 0 },  /* state.matrix.mvp.row[3] */
    };
    GLint mvpRef[4];

    for (i = 0; i < 4; i++) {
        mvpRef[i] = _mesa_add_state_reference(vprog->Base.Parameters,
                                              mvpState[i]);
    }

    /* Alloc storage for new instructions */
    newInst = _mesa_alloc_instructions(newLen);
    if (!newInst) {
        _mesa_error(ctx, GL_OUT_OF_MEMORY,
                    "glProgramString(inserting position_invariant code)");
        return;
    }

    /*
     * Generated instructions:
     * newInst[0] = DP4 result.position.x, mvp.row[0], vertex.position;
     * newInst[1] = DP4 result.position.y, mvp.row[1], vertex.position;
     * newInst[2] = DP4 result.position.z, mvp.row[2], vertex.position;
     * newInst[3] = DP4 result.position.w, mvp.row[3], vertex.position;
     */
    _mesa_init_instructions(newInst, 4);
    for (i = 0; i < 4; i++) {
        newInst[i].Opcode = OPCODE_DP4;
        newInst[i].DstReg.File = PROGRAM_OUTPUT;
        newInst[i].DstReg.Index = VARYING_SLOT_POS;
        newInst[i].DstReg.WriteMask = (WRITEMASK_X << i);
        newInst[i].SrcReg[0].File = PROGRAM_STATE_VAR;
        newInst[i].SrcReg[0].Index = mvpRef[i];
        newInst[i].SrcReg[0].Swizzle = SWIZZLE_NOOP;
        newInst[i].SrcReg[1].File = PROGRAM_INPUT;
        newInst[i].SrcReg[1].Index = VERT_ATTRIB_POS;
        newInst[i].SrcReg[1].Swizzle = SWIZZLE_NOOP;
    }

    /* Append original instructions after new instructions */
    _mesa_copy_instructions (newInst + 4, vprog->Base.Instructions, origLen);

    /* free old instructions */
    _mesa_free_instructions(vprog->Base.Instructions, origLen);

    /* install new instructions */
    vprog->Base.Instructions = newInst;
    vprog->Base.NumInstructions = newLen;
    vprog->Base.InputsRead |= VERT_BIT_POS;
    vprog->Base.OutputsWritten |= BITFIELD64_BIT(VARYING_SLOT_POS);
}
Beispiel #20
0
/**
 * Emit the TGSI instructions for inverting and adjusting WPOS.
 * This code is unavoidable because it also depends on whether
 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
 */
static void
emit_wpos_adjustment( struct st_translate *t,
                      const struct gl_program *program,
                      boolean invert,
                      GLfloat adjX, GLfloat adjY[2])
{
   struct ureg_program *ureg = t->ureg;

   /* Fragment program uses fragment position input.
    * Need to replace instances of INPUT[WPOS] with temp T
    * where T = INPUT[WPOS] by y is inverted.
    */
   static const gl_state_index wposTransformState[STATE_LENGTH]
      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 };
   
   /* XXX: note we are modifying the incoming shader here!  Need to
    * do this before emitting the constant decls below, or this
    * will be missed:
    */
   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
                                                       wposTransformState);

   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
   struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
   struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]];

   /* First, apply the coordinate shift: */
   if (adjX || adjY[0] || adjY[1]) {
      if (adjY[0] != adjY[1]) {
         /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
          * depending on whether inversion is actually going to be applied
          * or not, which is determined by testing against the inversion
          * state variable used below, which will be either +1 or -1.
          */
         struct ureg_dst adj_temp = ureg_DECL_temporary(ureg);

         ureg_CMP(ureg, adj_temp,
                  ureg_scalar(wpostrans, invert ? 2 : 0),
                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
                  ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
         ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
      } else {
         ureg_ADD(ureg, wpos_temp, wpos_input,
                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
      }
      wpos_input = ureg_src(wpos_temp);
   } else {
      /* MOV wpos_temp, input[wpos]
       */
      ureg_MOV( ureg, wpos_temp, wpos_input );
   }

   /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
    * inversion/identity, or the other way around if we're drawing to an FBO.
    */
   if (invert) {
      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
       */
      ureg_MAD( ureg,
                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
                wpos_input,
                ureg_scalar(wpostrans, 0),
                ureg_scalar(wpostrans, 1));
   } else {
      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
       */
      ureg_MAD( ureg,
                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
                wpos_input,
                ureg_scalar(wpostrans, 2),
                ureg_scalar(wpostrans, 3));
   }

   /* Use wpos_temp as position input from here on:
    */
   t->inputs[t->inputMapping[VARYING_SLOT_POS]] = ureg_src(wpos_temp);
}
Beispiel #21
0
GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
							     struct gl_fragment_program   *mesa_fp,
                                 GLcontext *ctx) 
{
    context_t *context = R700_CONTEXT(ctx);      
    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);

	GLuint    number_of_colors_exported;
	GLboolean z_enabled = GL_FALSE;
	GLuint    unBit, shadow_unit;
	int i;
	struct prog_instruction *inst;
	gl_state_index shadow_ambient[STATE_LENGTH]
	    = { STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0};

    //Init_Program
	Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );

    if(GL_TRUE == r700->bShaderUseMemConstant)
    {
        fp->r700AsmCode.bUseMemConstant = GL_TRUE;
    }
    else
    {
        fp->r700AsmCode.bUseMemConstant = GL_FALSE;
    }

    fp->r700AsmCode.unAsic = 7;

    if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
    {
        insert_wpos_code(ctx, mesa_fp);
    }

    /* add/map  consts for ARB_shadow_ambient */
    if(mesa_fp->Base.ShadowSamplers)
    {
        inst = mesa_fp->Base.Instructions;
        for (i = 0; i < mesa_fp->Base.NumInstructions; i++)
        {
            if(inst->TexShadow == 1)
            {
                shadow_unit = inst->TexSrcUnit;
                shadow_ambient[2] = shadow_unit;
                fp->r700AsmCode.shadow_regs[shadow_unit] = 
                    _mesa_add_state_reference(mesa_fp->Base.Parameters, shadow_ambient);
            }
            inst++;
        }
    }

    Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); 

    if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
	{
		return GL_FALSE;
    }

    InitShaderProgram(&(fp->r700AsmCode));
	
    for(i=0; i < MAX_SAMPLERS; i++)
    {
         fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i];
    }

    fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions;

	if( GL_FALSE == AssembleInstr(0,
                                  0,
                                  mesa_fp->Base.NumInstructions,
                                  &(mesa_fp->Base.Instructions[0]), 
                                  &(fp->r700AsmCode)) )
	{
		return GL_FALSE;
	}

    if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
    {
        return GL_FALSE;
    }

    if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) )
    {
        return GL_FALSE;
    }

    fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0 
                         : (fp->r700AsmCode.number_used_registers - 1);

	fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;

	number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;

	unBit = 1 << FRAG_RESULT_DEPTH;
	if(mesa_fp->Base.OutputsWritten & unBit)
	{
		z_enabled = GL_TRUE;
		number_of_colors_exported--;
	}

	/* illegal to set this to 0 */
	if(number_of_colors_exported || z_enabled)
	{
	    fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
	}
	else
	{
	    fp->r700Shader.exportMode = (1 << 1);
	}

    fp->translated = GL_TRUE;

	return GL_TRUE;
}