Exemplo n.º 1
0
/**
 * Make fragment shader for glDraw/CopyPixels.  This shader is made
 * by combining the pixel transfer shader with the user-defined shader.
 * \param fpIn  the current/incoming fragment program
 * \param fpOut  returns the combined fragment program
 */
void
st_make_drawpix_fragment_program(struct st_context *st,
                                 struct gl_fragment_program *fpIn,
                                 struct gl_fragment_program **fpOut)
{
   struct gl_program *newProg;

   if (is_passthrough_program(fpIn)) {
      newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
                                             &st->pixel_xfer.program->Base);
   }
   else {
#if 0
      /* debug */
      printf("Base program:\n");
      _mesa_print_program(&fpIn->Base);
      printf("DrawPix program:\n");
      _mesa_print_program(&st->pixel_xfer.program->Base.Base);
#endif
      newProg = _mesa_combine_programs(st->ctx,
                                       &st->pixel_xfer.program->Base.Base,
                                       &fpIn->Base);
   }

#if 0
   /* debug */
   printf("Combined DrawPixels program:\n");
   _mesa_print_program(newProg);
   printf("InputsRead: 0x%x\n", newProg->InputsRead);
   printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
   _mesa_print_parameter_list(newProg->Parameters);
#endif

   *fpOut = (struct gl_fragment_program *) newProg;
}
Exemplo n.º 2
0
static void
print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
   fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
   _mesa_print_program(program);
   _mesa_print_program_parameters(ctx, program);
   fprintf(stderr, "\n\n");
}
Exemplo n.º 3
0
/* Translate program into hardware format */
GLboolean
nouveau_shader_pass2(nvsPtr nvs)
{
	struct pass2_rec *rec;
	int i;

	NVSDBG("start: nvs=%p\n", nvs);

	rec = calloc(1, sizeof(struct pass2_rec));
	for (i=0; i<NVS_MAX_TEMPS; i++)
		rec->temps[i] = -1;
	nvs->pass_rec = rec;

	/* Start off with allocating 4 uint32_t's for each inst, will be grown
	 * if necessary..
	 */
	nvs->program_alloc_size = nvs->mesa.vp.Base.NumInstructions * 4;
	nvs->program = calloc(nvs->program_alloc_size, sizeof(uint32_t));
	nvs->program_size    = 0;
	nvs->program_current = 0;

	if (!pass2_translate(nvs,
			     ((nvsSubroutine*)nvs->program_tree)->insn_head)) {
		free(nvs->program);
		nvs->program = NULL;
		return GL_FALSE;
	}

	/* Shrink allocated memory to only what we need */
	nvs->program = realloc(nvs->program,
			       nvs->program_size * sizeof(uint32_t));
	nvs->program_alloc_size = nvs->program_size;

	nvs->translated  = 1;
	nvs->on_hardware = 0;

	if (NOUVEAU_DEBUG & DEBUG_SHADERS) {
		fflush(stdout); fflush(stderr);
		fprintf(stderr, "-----------MESA PROGRAM target=%s, id=0x%x\n",
				_mesa_lookup_enum_by_nr(
					nvs->mesa.vp.Base.Target),
				nvs->mesa.vp.Base.Id);
		fflush(stdout); fflush(stderr);
		_mesa_print_program(&nvs->mesa.vp.Base);
		fflush(stdout); fflush(stderr);
		fprintf(stderr, "^^^^^^^^^^^^^^^^MESA PROGRAM\n");
		fflush(stdout); fflush(stderr);
		fprintf(stderr, "----------------NV PROGRAM\n");
		fflush(stdout); fflush(stderr);
		nvsDisasmHWShader(nvs);
		fflush(stdout); fflush(stderr);
		fprintf(stderr, "^^^^^^^^^^^^^^^^NV PROGRAM\n");
		fflush(stdout); fflush(stderr);
	}

	return GL_TRUE;
}
/**
 * Parse the vertex program string.  If success, update the given
 * vertex_program object with the new program.  Else, leave the vertex_program
 * object unchanged.
 */
void
_mesa_parse_arb_vertex_program(struct gl_context *ctx, GLenum target,
			       const GLvoid *str, GLsizei len,
			       struct gl_vertex_program *program)
{
   struct gl_program prog;
   struct asm_parser_state state;

   ASSERT(target == GL_VERTEX_PROGRAM_ARB);

   memset(&prog, 0, sizeof(prog));
   memset(&state, 0, sizeof(state));
   state.prog = &prog;

   if (!_mesa_parse_arb_program(ctx, target, (const GLubyte*) str, len,
				&state)) {
      _mesa_error(ctx, GL_INVALID_OPERATION, "glProgramString(bad program)");
      return;
   }

   if (program->Base.String != NULL)
      free(program->Base.String);

   /* Copy the relevant contents of the arb_program struct into the 
    * vertex_program struct.
    */
   program->Base.String          = prog.String;
   program->Base.NumInstructions = prog.NumInstructions;
   program->Base.NumTemporaries  = prog.NumTemporaries;
   program->Base.NumParameters   = prog.NumParameters;
   program->Base.NumAttributes   = prog.NumAttributes;
   program->Base.NumAddressRegs  = prog.NumAddressRegs;
   program->Base.NumNativeInstructions = prog.NumNativeInstructions;
   program->Base.NumNativeTemporaries = prog.NumNativeTemporaries;
   program->Base.NumNativeParameters = prog.NumNativeParameters;
   program->Base.NumNativeAttributes = prog.NumNativeAttributes;
   program->Base.NumNativeAddressRegs = prog.NumNativeAddressRegs;
   program->Base.InputsRead     = prog.InputsRead;
   program->Base.OutputsWritten = prog.OutputsWritten;
   program->Base.IndirectRegisterFiles = prog.IndirectRegisterFiles;
   program->IsPositionInvariant = (state.option.PositionInvariant)
      ? GL_TRUE : GL_FALSE;

   if (program->Base.Instructions)
      free(program->Base.Instructions);
   program->Base.Instructions = prog.Instructions;

   if (program->Base.Parameters)
      _mesa_free_parameter_list(program->Base.Parameters);
   program->Base.Parameters = prog.Parameters; 

#if DEBUG_VP
   printf("____________Vertex program %u __________\n", program->Base.Id);
   _mesa_print_program(&program->Base);
#endif
}
Exemplo n.º 5
0
/**
 * Combine basic bitmap fragment program with the user-defined program.
 * \param st  current context
 * \param fpIn  the incoming fragment program
 * \param fpOut  the new fragment program which does fragment culling
 * \param bitmap_sampler  sampler number for the bitmap texture
 */
void
st_make_bitmap_fragment_program(struct st_context *st,
                                struct gl_fragment_program *fpIn,
                                struct gl_fragment_program **fpOut,
                                GLuint *bitmap_sampler)
{
   struct st_fragment_program *bitmap_prog;
   struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn;
   struct gl_program *newProg;
   uint sampler;

   /*
    * Generate new program which is the user-defined program prefixed
    * with the bitmap sampler/kill instructions.
    */
   sampler = find_free_bit(fpIn->Base.SamplersUsed);
   
   if (stfpIn->glsl_to_tgsi)
      newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler);
   else {
      bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);

      newProg = _mesa_combine_programs(st->ctx,
                                       &bitmap_prog->Base.Base,
                                       &fpIn->Base);
      /* done with this after combining */
      st_reference_fragprog(st, &bitmap_prog, NULL);
   }

#if 0
   {
      printf("Combined bitmap program:\n");
      _mesa_print_program(newProg);
      printf("InputsRead: 0x%x\n", newProg->InputsRead);
      printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
      _mesa_print_parameter_list(newProg->Parameters);
   }
#endif

   /* return results */
   *fpOut = (struct gl_fragment_program *) newProg;
   *bitmap_sampler = sampler;
}
Exemplo n.º 6
0
/**
 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
 * instruction is the first instruction to write to register T0.  The are
 * several lowering passes done in GLSL IR (e.g. branches and
 * relative addressing) that create a large number of conditional assignments
 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
 *
 * Here is why this conversion is safe:
 * CMP T0, T1 T2 T0 can be expanded to:
 * if (T1 < 0.0)
 * 	MOV T0, T2;
 * else
 * 	MOV T0, T0;
 *
 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
 * as the original program.  If (T1 < 0.0) evaluates to false, executing
 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
 * because any instruction that was going to read from T0 after this was going
 * to read a garbage value anyway.
 */
static void
_mesa_simplify_cmp(struct gl_program * program)
{
   GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
   GLuint i;

   if (dbg) {
      printf("Optimize: Begin reads without writes\n");
      _mesa_print_program(program);
   }

   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
      tempWrites[i] = 0;
   }

   for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
      outputWrites[i] = 0;
   }

   for (i = 0; i < program->NumInstructions; i++) {
      struct prog_instruction *inst = program->Instructions + i;
      GLuint prevWriteMask;

      /* Give up if we encounter relative addressing or flow control. */
      if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
         return;
      }

      if (inst->DstReg.File == PROGRAM_OUTPUT) {
         assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
         prevWriteMask = outputWrites[inst->DstReg.Index];
         outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
      } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
         assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
         prevWriteMask = tempWrites[inst->DstReg.Index];
         tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
      } else {
         /* No other register type can be a destination register. */
         continue;
      }

      /* For a CMP to be considered a conditional write, the destination
       * register and source register two must be the same. */
      if (inst->Opcode == OPCODE_CMP
          && !(inst->DstReg.WriteMask & prevWriteMask)
          && inst->SrcReg[2].File == inst->DstReg.File
          && inst->SrcReg[2].Index == inst->DstReg.Index
          && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {

         inst->Opcode = OPCODE_MOV;
         inst->SrcReg[0] = inst->SrcReg[1];

	 /* Unused operands are expected to have the file set to
	  * PROGRAM_UNDEFINED.  This is how _mesa_init_instructions initializes
	  * all of the sources.
	  */
	 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
	 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
	 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
	 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
      }
   }
   if (dbg) {
      printf("Optimize: End reads without writes\n");
      _mesa_print_program(program);
   }
}
Exemplo n.º 7
0
/**
 * This function implements "Linear Scan Register Allocation" to reduce
 * the number of temporary registers used by the program.
 *
 * We compute the "live interval" for all temporary registers then
 * examine the overlap of the intervals to allocate new registers.
 * Basically, if two intervals do not overlap, they can use the same register.
 */
static void
_mesa_reallocate_registers(struct gl_program *prog)
{
   struct interval_list liveIntervals;
   GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   GLuint i;
   GLint maxTemp = -1;

   if (dbg) {
      printf("Optimize: Begin live-interval register reallocation\n");
      _mesa_print_program(prog);
   }

   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
      registerMap[i] = -1;
      usedRegs[i] = GL_FALSE;
   }

   if (!find_live_intervals(prog, &liveIntervals)) {
      if (dbg)
         printf("Aborting register reallocation\n");
      return;
   }

   {
      struct interval_list activeIntervals;
      activeIntervals.Num = 0;

      /* loop over live intervals, allocating a new register for each */
      for (i = 0; i < liveIntervals.Num; i++) {
         const struct interval *live = liveIntervals.Intervals + i;

         if (dbg)
            printf("Consider register %u\n", live->Reg);

         /* Expire old intervals.  Intervals which have ended with respect
          * to the live interval can have their remapped registers freed.
          */
         {
            GLint j;
            for (j = 0; j < (GLint) activeIntervals.Num; j++) {
               const struct interval *inv = activeIntervals.Intervals + j;
               if (inv->End >= live->Start) {
                  /* Stop now.  Since the activeInterval list is sorted
                   * we know we don't have to go further.
                   */
                  break;
               }
               else {
                  /* Interval 'inv' has expired */
                  const GLint regNew = registerMap[inv->Reg];
                  ASSERT(regNew >= 0);

                  if (dbg)
                     printf("  expire interval for reg %u\n", inv->Reg);

                  /* remove interval j from active list */
                  remove_interval(&activeIntervals, inv);
                  j--;  /* counter-act j++ in for-loop above */

                  /* return register regNew to the free pool */
                  if (dbg)
                     printf("  free reg %d\n", regNew);
                  ASSERT(usedRegs[regNew] == GL_TRUE);
                  usedRegs[regNew] = GL_FALSE;
               }
            }
         }

         /* find a free register for this live interval */
         {
            const GLint k = alloc_register(usedRegs);
            if (k < 0) {
               /* out of registers, give up */
               return;
            }
            registerMap[live->Reg] = k;
            maxTemp = MAX2(maxTemp, k);
            if (dbg)
               printf("  remap register %u -> %d\n", live->Reg, k);
         }

         /* Insert this live interval into the active list which is sorted
          * by increasing end points.
          */
         insert_interval_by_end(&activeIntervals, live);
      }
   }

   if (maxTemp + 1 < (GLint) liveIntervals.Num) {
      /* OK, we've reduced the number of registers needed.
       * Scan the program and replace all the old temporary register
       * indexes with the new indexes.
       */
      replace_regs(prog, PROGRAM_TEMPORARY, registerMap);

      prog->NumTemporaries = maxTemp + 1;
   }

   if (dbg) {
      printf("Optimize: End live-interval register reallocation\n");
      printf("Num temp regs before: %u  after: %u\n",
                   liveIntervals.Num, maxTemp + 1);
      _mesa_print_program(prog);
   }
}
Exemplo n.º 8
0
/**
 * Try to remove extraneous MOV instructions from the given program.
 */
static void
_mesa_remove_extra_moves(struct gl_program *prog)
{
   GLboolean *removeInst; /* per-instruction removal flag */
   GLuint i, rem, loopNesting = 0, subroutineNesting = 0;

   if (dbg) {
      _mesa_printf("Optimize: Begin remove extra moves\n");
      _mesa_print_program(prog);
   }

   removeInst = (GLboolean *)
      _mesa_calloc(prog->NumInstructions * sizeof(GLboolean));

   /*
    * Look for sequences such as this:
    *    FOO tmpX, arg0, arg1;
    *    MOV tmpY, tmpX;
    * and convert into:
    *    FOO tmpY, arg0, arg1;
    */

   for (i = 0; i < prog->NumInstructions; i++) {
      const struct prog_instruction *inst = prog->Instructions + i;

      switch (inst->Opcode) {
      case OPCODE_BGNLOOP:
         loopNesting++;
         break;
      case OPCODE_ENDLOOP:
         loopNesting--;
         break;
      case OPCODE_BGNSUB:
         subroutineNesting++;
         break;
      case OPCODE_ENDSUB:
         subroutineNesting--;
         break;
      case OPCODE_MOV:
         if (i > 0 &&
             loopNesting == 0 &&
             subroutineNesting == 0 &&
             inst->SrcReg[0].File == PROGRAM_TEMPORARY &&
             inst->SrcReg[0].Swizzle == SWIZZLE_XYZW) {
            /* see if this MOV can be removed */
            const GLuint tempIndex = inst->SrcReg[0].Index;
            struct prog_instruction *prevInst;
            GLuint prevI;

            /* get pointer to previous instruction */
            prevI = i - 1;
            while (removeInst[prevI] && prevI > 0)
               prevI--;
            prevInst = prog->Instructions + prevI;

            if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
                prevInst->DstReg.Index == tempIndex &&
                prevInst->DstReg.WriteMask == WRITEMASK_XYZW) {

               enum temp_use next_use =
                  find_next_temp_use(prog, i + 1, tempIndex);

               if (next_use == WRITE || next_use == END) {
                  /* OK, we can safely remove this MOV instruction.
                   * Transform:
                   *   prevI: FOO tempIndex, x, y;
                   *       i: MOV z, tempIndex;
                   * Into:
                   *   prevI: FOO z, x, y;
                   */

                  /* patch up prev inst */
                  prevInst->DstReg.File = inst->DstReg.File;
                  prevInst->DstReg.Index = inst->DstReg.Index;

                  /* flag this instruction for removal */
                  removeInst[i] = GL_TRUE;

                  if (dbg) {
                     _mesa_printf("Remove MOV at %u\n", i);
                     _mesa_printf("new prev inst %u: ", prevI);
                     _mesa_print_instruction(prevInst);
                  }
               }
            }
         }
         break;
      default:
         ; /* nothing */
      }
   }

   /* now remove the instructions which aren't needed */
   rem = remove_instructions(prog, removeInst);

   if (dbg) {
      _mesa_printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
      /*_mesa_print_program(prog);*/
   }
}
Exemplo n.º 9
0
/**
 * Translate a Mesa fragment shader into a TGSI shader using extra info in
 * the key.
 * \return  new fragment program variant
 */
static struct st_fp_variant *
st_translate_fragment_program(struct st_context *st,
                              struct st_fragment_program *stfp,
                              const struct st_fp_variant_key *key)
{
   struct pipe_context *pipe = st->pipe;
   struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
   GLboolean deleteFP = GL_FALSE;

   GLuint outputMapping[FRAG_RESULT_MAX];
   GLuint inputMapping[VARYING_SLOT_MAX];
   GLuint interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
   GLuint interpLocation[PIPE_MAX_SHADER_INPUTS];
   GLuint attr;
   GLbitfield64 inputsRead;
   struct ureg_program *ureg;

   GLboolean write_all = GL_FALSE;

   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
   uint fs_num_inputs = 0;

   ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
   ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
   uint fs_num_outputs = 0;

   if (!variant)
      return NULL;

   assert(!(key->bitmap && key->drawpixels));

   if (key->bitmap) {
      /* glBitmap drawing */
      struct gl_fragment_program *fp; /* we free this temp program below */

      st_make_bitmap_fragment_program(st, &stfp->Base,
                                      &fp, &variant->bitmap_sampler);

      variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters);
      stfp = st_fragment_program(fp);
      deleteFP = GL_TRUE;
   }
   else if (key->drawpixels) {
      /* glDrawPixels drawing */
      struct gl_fragment_program *fp; /* we free this temp program below */

      if (key->drawpixels_z || key->drawpixels_stencil) {
         fp = st_make_drawpix_z_stencil_program(st, key->drawpixels_z,
                                                key->drawpixels_stencil);
      }
      else {
         /* RGBA */
         st_make_drawpix_fragment_program(st, &stfp->Base, &fp);
         variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters);
         deleteFP = GL_TRUE;
      }
      stfp = st_fragment_program(fp);
   }

   if (!stfp->glsl_to_tgsi)
      _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);

   /*
    * Convert Mesa program inputs to TGSI input register semantics.
    */
   inputsRead = stfp->Base.Base.InputsRead;
   for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
      if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
         const GLuint slot = fs_num_inputs++;

         inputMapping[attr] = slot;
         if (stfp->Base.IsCentroid & BITFIELD64_BIT(attr))
            interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTROID;
         else if (stfp->Base.IsSample & BITFIELD64_BIT(attr))
            interpLocation[slot] = TGSI_INTERPOLATE_LOC_SAMPLE;
         else
            interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTER;

         if (key->persample_shading)
            interpLocation[slot] = TGSI_INTERPOLATE_LOC_SAMPLE;

         switch (attr) {
         case VARYING_SLOT_POS:
            input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
            input_semantic_index[slot] = 0;
            interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
            break;
         case VARYING_SLOT_COL0:
            input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            input_semantic_index[slot] = 0;
            interpMode[slot] = st_translate_interp(stfp->Base.InterpQualifier[attr],
                                                   TRUE);
            break;
         case VARYING_SLOT_COL1:
            input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            input_semantic_index[slot] = 1;
            interpMode[slot] = st_translate_interp(stfp->Base.InterpQualifier[attr],
                                                   TRUE);
            break;
         case VARYING_SLOT_FOGC:
            input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
            input_semantic_index[slot] = 0;
            interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
            break;
         case VARYING_SLOT_FACE:
            input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
            input_semantic_index[slot] = 0;
            interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
            break;
         case VARYING_SLOT_PRIMITIVE_ID:
            input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
            input_semantic_index[slot] = 0;
            interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
            break;
         case VARYING_SLOT_LAYER:
            input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
            input_semantic_index[slot] = 0;
            interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
            break;
         case VARYING_SLOT_VIEWPORT:
            input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
            input_semantic_index[slot] = 0;
            interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
            break;
         case VARYING_SLOT_CLIP_DIST0:
            input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
            input_semantic_index[slot] = 0;
            interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
            break;
         case VARYING_SLOT_CLIP_DIST1:
            input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
            input_semantic_index[slot] = 1;
            interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
            break;
            /* In most cases, there is nothing special about these
             * inputs, so adopt a convention to use the generic
             * semantic name and the mesa VARYING_SLOT_ number as the
             * index.
             *
             * All that is required is that the vertex shader labels
             * its own outputs similarly, and that the vertex shader
             * generates at least every output required by the
             * fragment shader plus fixed-function hardware (such as
             * BFC).
             *
             * However, some drivers may need us to identify the PNTC and TEXi
             * varyings if, for example, their capability to replace them with
             * sprite coordinates is limited.
             */
         case VARYING_SLOT_PNTC:
            if (st->needs_texcoord_semantic) {
               input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
               input_semantic_index[slot] = 0;
               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
               break;
            }
            /* fall through */
         case VARYING_SLOT_TEX0:
         case VARYING_SLOT_TEX1:
         case VARYING_SLOT_TEX2:
         case VARYING_SLOT_TEX3:
         case VARYING_SLOT_TEX4:
         case VARYING_SLOT_TEX5:
         case VARYING_SLOT_TEX6:
         case VARYING_SLOT_TEX7:
            if (st->needs_texcoord_semantic) {
               input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
               input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
               interpMode[slot] =
                  st_translate_interp(stfp->Base.InterpQualifier[attr], FALSE);
               break;
            }
            /* fall through */
         case VARYING_SLOT_VAR0:
         default:
            /* Semantic indices should be zero-based because drivers may choose
             * to assign a fixed slot determined by that index.
             * This is useful because ARB_separate_shader_objects uses location
             * qualifiers for linkage, and if the semantic index corresponds to
             * these locations, linkage passes in the driver become unecessary.
             *
             * If needs_texcoord_semantic is true, no semantic indices will be
             * consumed for the TEXi varyings, and we can base the locations of
             * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
             */
            assert(attr >= VARYING_SLOT_TEX0);
            input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
            input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
            if (attr == VARYING_SLOT_PNTC)
               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
            else
               interpMode[slot] = st_translate_interp(stfp->Base.InterpQualifier[attr],
                                                      FALSE);
            break;
         }
      }
      else {
         inputMapping[attr] = -1;
      }
   }

   /*
    * Semantics and mapping for outputs
    */
   {
      uint numColors = 0;
      GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;

      /* if z is written, emit that first */
      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
         fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
         fs_output_semantic_index[fs_num_outputs] = 0;
         outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
         fs_num_outputs++;
         outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
      }

      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
         fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
         fs_output_semantic_index[fs_num_outputs] = 0;
         outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
         fs_num_outputs++;
         outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
      }

      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
         fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
         fs_output_semantic_index[fs_num_outputs] = 0;
         outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
         fs_num_outputs++;
         outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
      }

      /* handle remaining outputs (color) */
      for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
         if (outputsWritten & BITFIELD64_BIT(attr)) {
            switch (attr) {
            case FRAG_RESULT_DEPTH:
            case FRAG_RESULT_STENCIL:
            case FRAG_RESULT_SAMPLE_MASK:
               /* handled above */
               assert(0);
               break;
            case FRAG_RESULT_COLOR:
               write_all = GL_TRUE; /* fallthrough */
            default:
               assert(attr == FRAG_RESULT_COLOR ||
                      (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
               fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
               fs_output_semantic_index[fs_num_outputs] = numColors;
               outputMapping[attr] = fs_num_outputs;
               numColors++;
               break;
            }

            fs_num_outputs++;
         }
      }
   }

   ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
   if (ureg == NULL) {
      free(variant);
      return NULL;
   }

   if (ST_DEBUG & DEBUG_MESA) {
      _mesa_print_program(&stfp->Base.Base);
      _mesa_print_program_parameters(st->ctx, &stfp->Base.Base);
      debug_printf("\n");
   }
   if (write_all == GL_TRUE)
      ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);

   if (stfp->Base.FragDepthLayout != FRAG_DEPTH_LAYOUT_NONE) {
      switch (stfp->Base.FragDepthLayout) {
      case FRAG_DEPTH_LAYOUT_ANY:
         ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
                       TGSI_FS_DEPTH_LAYOUT_ANY);
         break;
      case FRAG_DEPTH_LAYOUT_GREATER:
         ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
                       TGSI_FS_DEPTH_LAYOUT_GREATER);
         break;
      case FRAG_DEPTH_LAYOUT_LESS:
         ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
                       TGSI_FS_DEPTH_LAYOUT_LESS);
         break;
      case FRAG_DEPTH_LAYOUT_UNCHANGED:
         ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
                       TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
         break;
      default:
         assert(0);
      }
   }

   if (stfp->glsl_to_tgsi)
      st_translate_program(st->ctx,
                           TGSI_PROCESSOR_FRAGMENT,
                           ureg,
                           stfp->glsl_to_tgsi,
                           &stfp->Base.Base,
                           /* inputs */
                           fs_num_inputs,
                           inputMapping,
                           input_semantic_name,
                           input_semantic_index,
                           interpMode,
                           interpLocation,
                           /* outputs */
                           fs_num_outputs,
                           outputMapping,
                           fs_output_semantic_name,
                           fs_output_semantic_index, FALSE,
                           key->clamp_color );
   else
      st_translate_mesa_program(st->ctx,
                                TGSI_PROCESSOR_FRAGMENT,
                                ureg,
                                &stfp->Base.Base,
                                /* inputs */
                                fs_num_inputs,
                                inputMapping,
                                input_semantic_name,
                                input_semantic_index,
                                interpMode,
                                /* outputs */
                                fs_num_outputs,
                                outputMapping,
                                fs_output_semantic_name,
                                fs_output_semantic_index, FALSE,
                                key->clamp_color);

   variant->tgsi.tokens = ureg_get_tokens( ureg, NULL );
   ureg_destroy( ureg );

   if (ST_DEBUG & DEBUG_TGSI) {
      tgsi_dump(variant->tgsi.tokens, 0/*TGSI_DUMP_VERBOSE*/);
      debug_printf("\n");
   }

   /* fill in variant */
   variant->driver_shader = pipe->create_fs_state(pipe, &variant->tgsi);
   variant->key = *key;

   if (deleteFP) {
      /* Free the temporary program made above */
      struct gl_fragment_program *fp = &stfp->Base;
      _mesa_reference_fragprog(st->ctx, &fp, NULL);
   }

   return variant;
}
Exemplo n.º 10
0
/**
 * Translate a geometry program to create a new variant.
 */
static struct st_gp_variant *
st_translate_geometry_program(struct st_context *st,
                              struct st_geometry_program *stgp,
                              const struct st_gp_variant_key *key)
{
   GLuint inputMapping[VARYING_SLOT_MAX];
   GLuint outputMapping[VARYING_SLOT_MAX];
   struct pipe_context *pipe = st->pipe;
   GLuint attr;

   uint gs_num_inputs = 0;

   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];

   ubyte gs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
   ubyte gs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
   uint gs_num_outputs = 0;

   GLint i;
   struct ureg_program *ureg;
   struct pipe_shader_state state = {0};
   struct st_gp_variant *gpv;

   gpv = CALLOC_STRUCT(st_gp_variant);
   if (!gpv)
      return NULL;

   ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
   if (ureg == NULL) {
      free(gpv);
      return NULL;
   }

   memset(inputMapping, 0, sizeof(inputMapping));
   memset(outputMapping, 0, sizeof(outputMapping));

   /*
    * Convert Mesa program inputs to TGSI input register semantics.
    */
   for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
      if ((stgp->Base.Base.InputsRead & BITFIELD64_BIT(attr)) != 0) {
         const GLuint slot = gs_num_inputs++;

         inputMapping[attr] = slot;

         switch (attr) {
         case VARYING_SLOT_PRIMITIVE_ID:
            input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
            input_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_POS:
            input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
            input_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_COL0:
            input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            input_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_COL1:
            input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            input_semantic_index[slot] = 1;
            break;
         case VARYING_SLOT_FOGC:
            input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
            input_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_CLIP_VERTEX:
            input_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
            input_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_CLIP_DIST0:
            input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
            input_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_CLIP_DIST1:
            input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
            input_semantic_index[slot] = 1;
            break;
         case VARYING_SLOT_PSIZ:
            input_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
            input_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_TEX0:
         case VARYING_SLOT_TEX1:
         case VARYING_SLOT_TEX2:
         case VARYING_SLOT_TEX3:
         case VARYING_SLOT_TEX4:
         case VARYING_SLOT_TEX5:
         case VARYING_SLOT_TEX6:
         case VARYING_SLOT_TEX7:
            if (st->needs_texcoord_semantic) {
               input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
               input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
               break;
            }
            /* fall through */
         case VARYING_SLOT_VAR0:
         default:
            assert(attr >= VARYING_SLOT_VAR0 && attr < VARYING_SLOT_MAX);
            input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
            input_semantic_index[slot] =
               st_get_generic_varying_index(st, attr);
         break;
         }
      }
   }

   /* initialize output semantics to defaults */
   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
      gs_output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
      gs_output_semantic_index[i] = 0;
   }

   /*
    * Determine number of outputs, the (default) output register
    * mapping and the semantic information for each output.
    */
   for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
      if (stgp->Base.Base.OutputsWritten & BITFIELD64_BIT(attr)) {
         GLuint slot = gs_num_outputs++;

         outputMapping[attr] = slot;

         switch (attr) {
         case VARYING_SLOT_POS:
            assert(slot == 0);
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
            gs_output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_COL0:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            gs_output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_COL1:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            gs_output_semantic_index[slot] = 1;
            break;
         case VARYING_SLOT_BFC0:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
            gs_output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_BFC1:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
            gs_output_semantic_index[slot] = 1;
            break;
         case VARYING_SLOT_FOGC:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
            gs_output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_PSIZ:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
            gs_output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_CLIP_VERTEX:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
            gs_output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_CLIP_DIST0:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
            gs_output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_CLIP_DIST1:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
            gs_output_semantic_index[slot] = 1;
            break;
         case VARYING_SLOT_LAYER:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
            gs_output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_PRIMITIVE_ID:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
            gs_output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_VIEWPORT:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
            gs_output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_TEX0:
         case VARYING_SLOT_TEX1:
         case VARYING_SLOT_TEX2:
         case VARYING_SLOT_TEX3:
         case VARYING_SLOT_TEX4:
         case VARYING_SLOT_TEX5:
         case VARYING_SLOT_TEX6:
         case VARYING_SLOT_TEX7:
            if (st->needs_texcoord_semantic) {
               gs_output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
               gs_output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
               break;
            }
            /* fall through */
         case VARYING_SLOT_VAR0:
         default:
            assert(slot < ARRAY_SIZE(gs_output_semantic_name));
            assert(attr >= VARYING_SLOT_VAR0);
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
            gs_output_semantic_index[slot] =
               st_get_generic_varying_index(st, attr);
         break;
         }
      }
   }

   ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, stgp->Base.InputType);
   ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, stgp->Base.OutputType);
   ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
                 stgp->Base.VerticesOut);
   ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, stgp->Base.Invocations);

   st_translate_program(st->ctx,
                        TGSI_PROCESSOR_GEOMETRY,
                        ureg,
                        stgp->glsl_to_tgsi,
                        &stgp->Base.Base,
                        /* inputs */
                        gs_num_inputs,
                        inputMapping,
                        input_semantic_name,
                        input_semantic_index,
                        NULL,
                        NULL,
                        /* outputs */
                        gs_num_outputs,
                        outputMapping,
                        gs_output_semantic_name,
                        gs_output_semantic_index,
                        FALSE,
                        FALSE);

   state.tokens = ureg_get_tokens(ureg, NULL);
   ureg_destroy(ureg);

   st_translate_stream_output_info(stgp->glsl_to_tgsi,
                                   outputMapping,
                                   &state.stream_output);

   if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) {
      _mesa_print_program(&stgp->Base.Base);
      debug_printf("\n");
   }

   if (ST_DEBUG & DEBUG_TGSI) {
      tgsi_dump(state.tokens, 0);
      debug_printf("\n");
   }

   /* fill in new variant */
   gpv->driver_shader = pipe->create_gs_state(pipe, &state);
   gpv->key = *key;

   ureg_free_tokens(state.tokens);
   return gpv;
}
void r300TranslateFragmentShader(r300ContextPtr r300,
				 struct r300_fragment_program *fp)
{
	struct r300_fragment_program_external_state state;

	build_state(r300, fp, &state);
	if (_mesa_memcmp(&fp->state, &state, sizeof(state))) {
		/* TODO: cache compiled programs */
		fp->translated = GL_FALSE;
		_mesa_memcpy(&fp->state, &state, sizeof(state));
	}

	if (!fp->translated) {
		struct r300_fragment_program_compiler compiler;

		compiler.r300 = r300;
		compiler.fp = fp;
		compiler.code = &fp->code;
		compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base);

		if (RADEON_DEBUG & DEBUG_PIXEL) {
			_mesa_printf("Fragment Program: Initial program:\n");
			_mesa_print_program(compiler.program);
		}

		insert_WPOS_trailer(&compiler);

		struct radeon_program_transformation transformations[] = {
			{ &transform_TEX, &compiler },
			{ &radeonTransformALU, 0 },
			{ &radeonTransformTrigSimple, 0 }
		};
		radeonLocalTransform(
			r300->radeon.glCtx,
			compiler.program,
			3, transformations);

		if (RADEON_DEBUG & DEBUG_PIXEL) {
			_mesa_printf("Fragment Program: After native rewrite:\n");
			_mesa_print_program(compiler.program);
		}

		struct radeon_nqssadce_descr nqssadce = {
			.Init = &nqssadce_init,
			.IsNativeSwizzle = &r300FPIsNativeSwizzle,
			.BuildSwizzle = &r300FPBuildSwizzle,
			.RewriteDepthOut = GL_TRUE
		};
		radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);

		if (RADEON_DEBUG & DEBUG_PIXEL) {
			_mesa_printf("Compiler: after NqSSA-DCE:\n");
			_mesa_print_program(compiler.program);
		}

		if (!r300FragmentProgramEmit(&compiler))
			fp->error = GL_TRUE;

		/* Subtle: Rescue any parameters that have been added during transformations */
		_mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
		fp->mesa_program.Base.Parameters = compiler.program->Parameters;
		compiler.program->Parameters = 0;

		_mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL);

		if (!fp->error)
			fp->translated = GL_TRUE;
		if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
			r300FragmentProgramDump(fp, &fp->code);
		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
	}

	update_params(r300, fp);
}
Exemplo n.º 12
0
/**
 * Translate Mesa program to TGSI format.
 * \param program  the program to translate
 * \param numInputs  number of input registers used
 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
 *                      input indexes
 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for each input
 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input

 * \param numOutputs  number of output registers used
 * \param outputMapping  maps Mesa fragment program outputs to TGSI
 *                       generic outputs
 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for each output
 * \param tokens  array to store translated tokens in
 * \param maxTokens  size of the tokens array
 *
 * \return number of tokens placed in 'tokens' buffer, or zero if error
 */
GLuint
st_translate_mesa_program(
   GLcontext *ctx,
   uint procType,
   const struct gl_program *program,
   GLuint numInputs,
   const GLuint inputMapping[],
   const ubyte inputSemanticName[],
   const ubyte inputSemanticIndex[],
   const GLuint interpMode[],
   const GLbitfield inputFlags[],
   GLuint numOutputs,
   const GLuint outputMapping[],
   const ubyte outputSemanticName[],
   const ubyte outputSemanticIndex[],
   const GLbitfield outputFlags[],
   struct tgsi_token *tokens,
   GLuint maxTokens )
{
   GLuint i;
   GLuint ti;  /* token index */
   struct tgsi_header *header;
   struct tgsi_processor *processor;
   GLuint preamble_size = 0;
   GLuint immediates[1000];
   GLuint numImmediates = 0;
   GLboolean insideSubroutine = GL_FALSE;
   GLboolean indirectAccess = GL_FALSE;
   GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1];
   GLint wposTemp = -1, winHeightConst = -1;

   assert(procType == TGSI_PROCESSOR_FRAGMENT ||
          procType == TGSI_PROCESSOR_VERTEX);

   find_temporaries(program, tempsUsed);

   if (procType == TGSI_PROCESSOR_FRAGMENT) {
      if (program->InputsRead & FRAG_BIT_WPOS) {
         /* Fragment program uses fragment position input.
          * Need to replace instances of INPUT[WPOS] with temp T
          * where T = INPUT[WPOS] by y is inverted.
          */
         static const gl_state_index winSizeState[STATE_LENGTH]
            = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
         winHeightConst = _mesa_add_state_reference(program->Parameters,
                                                    winSizeState);
         wposTemp = find_free_temporary(tempsUsed);
      }
   }


   *(struct tgsi_version *) &tokens[0] = tgsi_build_version();

   header = (struct tgsi_header *) &tokens[1];
   *header = tgsi_build_header();

   processor = (struct tgsi_processor *) &tokens[2];
   *processor = tgsi_build_processor( procType, header );

   ti = 3;

   /*
    * Declare input attributes.
    */
   if (procType == TGSI_PROCESSOR_FRAGMENT) {
      for (i = 0; i < numInputs; i++) {
         struct tgsi_full_declaration fulldecl;
         fulldecl = make_input_decl(i,
                                    GL_TRUE, interpMode[i],
                                    TGSI_WRITEMASK_XYZW,
                                    GL_TRUE, inputSemanticName[i],
                                    inputSemanticIndex[i],
                                    inputFlags[i]);
         ti += tgsi_build_full_declaration(&fulldecl,
                                           &tokens[ti],
                                           header,
                                           maxTokens - ti );
      }
   }
   else {
      /* vertex prog */
      /* XXX: this could probaby be merged with the clause above.
       * the only difference is the semantic tags.
       */
      for (i = 0; i < numInputs; i++) {
         struct tgsi_full_declaration fulldecl;
         fulldecl = make_input_decl(i,
                                    GL_FALSE, 0,
                                    TGSI_WRITEMASK_XYZW,
                                    GL_FALSE, 0, 0,
                                    inputFlags[i]);
         ti += tgsi_build_full_declaration(&fulldecl,
                                           &tokens[ti],
                                           header,
                                           maxTokens - ti );
      }
   }

   /*
    * Declare output attributes.
    */
   if (procType == TGSI_PROCESSOR_FRAGMENT) {
      for (i = 0; i < numOutputs; i++) {
         struct tgsi_full_declaration fulldecl;
         switch (outputSemanticName[i]) {
         case TGSI_SEMANTIC_POSITION:
            fulldecl = make_output_decl(i,
                                        TGSI_SEMANTIC_POSITION, /* Z / Depth */
                                        outputSemanticIndex[i],
                                        TGSI_WRITEMASK_Z,
                                        outputFlags[i]);
            break;
         case TGSI_SEMANTIC_COLOR:
            fulldecl = make_output_decl(i,
                                        TGSI_SEMANTIC_COLOR,
                                        outputSemanticIndex[i],
                                        TGSI_WRITEMASK_XYZW,
                                        outputFlags[i]);
            break;
         default:
            assert(0);
            return 0;
         }
         ti += tgsi_build_full_declaration(&fulldecl,
                                           &tokens[ti],
                                           header,
                                           maxTokens - ti );
      }
   }
   else {
      /* vertex prog */
      for (i = 0; i < numOutputs; i++) {
         struct tgsi_full_declaration fulldecl;
         fulldecl = make_output_decl(i,
                                     outputSemanticName[i],
                                     outputSemanticIndex[i],
                                     TGSI_WRITEMASK_XYZW,
                                     outputFlags[i]);
         ti += tgsi_build_full_declaration(&fulldecl,
                                           &tokens[ti],
                                           header,
                                           maxTokens - ti );
      }
   }

   /* temporary decls */
   {
      GLboolean inside_range = GL_FALSE;
      GLuint start_range = 0;

      tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE;
      for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) {
         if (tempsUsed[i] && !inside_range) {
            inside_range = GL_TRUE;
            start_range = i;
         }
         else if (!tempsUsed[i] && inside_range) {
            struct tgsi_full_declaration fulldecl;

            inside_range = GL_FALSE;
            fulldecl = make_temp_decl( start_range, i - 1 );
            ti += tgsi_build_full_declaration(
               &fulldecl,
               &tokens[ti],
               header,
               maxTokens - ti );
         }
      }
   }

   /* Declare address register.
   */
   if (program->NumAddressRegs > 0) {
      struct tgsi_full_declaration fulldecl;

      assert( program->NumAddressRegs == 1 );

      fulldecl = make_addr_decl( 0, 0 );
      ti += tgsi_build_full_declaration(
         &fulldecl,
         &tokens[ti],
         header,
         maxTokens - ti );

      indirectAccess = GL_TRUE;
   }

   /* immediates/literals */
   memset(immediates, ~0, sizeof(immediates));

   /* Emit immediates only when there is no address register in use.
    * FIXME: Be smarter and recognize param arrays -- indirect addressing is
    *        only valid within the referenced array.
    */
   if (program->Parameters && !indirectAccess) {
      for (i = 0; i < program->Parameters->NumParameters; i++) {
         if (program->Parameters->Parameters[i].Type == PROGRAM_CONSTANT) {
            struct tgsi_full_immediate fullimm;

            fullimm = make_immediate( program->Parameters->ParameterValues[i], 4 );
            ti += tgsi_build_full_immediate(
               &fullimm,
               &tokens[ti],
               header,
               maxTokens - ti );
            immediates[i] = numImmediates;
            numImmediates++;
         }
      }
   }

   /* constant buffer refs */
   if (program->Parameters) {
      GLint start = -1, end = -1;

      for (i = 0; i < program->Parameters->NumParameters; i++) {
         GLboolean emit = (i == program->Parameters->NumParameters - 1);
         GLboolean matches;

         switch (program->Parameters->Parameters[i].Type) {
         case PROGRAM_ENV_PARAM:
         case PROGRAM_STATE_VAR:
         case PROGRAM_NAMED_PARAM:
         case PROGRAM_UNIFORM:
            matches = GL_TRUE;
            break;
         case PROGRAM_CONSTANT:
            matches = indirectAccess;
            break;
         default:
            matches = GL_FALSE;
         }

         if (matches) {
            if (start == -1) {
               /* begin a sequence */
               start = i;
               end = i;
            }
            else {
               /* continue sequence */
               end = i;
            }
         }
         else {
            if (start != -1) {
               /* end of sequence */
               emit = GL_TRUE;
            }
         }

         if (emit && start >= 0) {
            struct tgsi_full_declaration fulldecl;

            fulldecl = make_constant_decl( start, end );
            ti += tgsi_build_full_declaration(
               &fulldecl,
               &tokens[ti],
               header,
               maxTokens - ti );
            start = end = -1;
         }
      }
   }

   /* texture samplers */
   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
      if (program->SamplersUsed & (1 << i)) {
         struct tgsi_full_declaration fulldecl;

         fulldecl = make_sampler_decl( i );
         ti += tgsi_build_full_declaration(
            &fulldecl,
            &tokens[ti],
            header,
            maxTokens - ti );
      }
   }

   /* invert WPOS fragment input */
   if (wposTemp >= 0) {
      ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst,
                               inputMapping[FRAG_ATTRIB_WPOS],
                               header, maxTokens - ti);
      preamble_size = 2; /* two instructions added */
   }

   for (i = 0; i < program->NumInstructions; i++) {
      struct tgsi_full_instruction fullinst;

      compile_instruction(
         &program->Instructions[i],
         &fullinst,
         inputMapping,
         outputMapping,
         immediates,
         indirectAccess,
         preamble_size,
         procType,
         &insideSubroutine,
         wposTemp);

      ti += tgsi_build_full_instruction(
         &fullinst,
         &tokens[ti],
         header,
         maxTokens - ti );
   }

#if DEBUG
   if(!tgsi_sanity_check(tokens)) {
      debug_printf("Due to sanity check failure(s) above the following shader program is invalid:\n");
      debug_printf("\nOriginal program:\n%s", program->String);
      debug_printf("\nMesa program:\n");
      _mesa_print_program(program);
      debug_printf("\nTGSI program:\n");
      tgsi_dump(tokens, 0);
      assert(0);
   }
#endif

   return ti;
}
Exemplo n.º 13
0
/**
 * Make fragment shader for glDraw/CopyPixels.  This shader is made
 * by combining the pixel transfer shader with the user-defined shader.
 */
static struct st_fragment_program *
combined_drawpix_fragment_program(GLcontext *ctx)
{
   struct st_context *st = ctx->st;
   struct st_fragment_program *stfp;

   if (st->pixel_xfer.program->serialNo == st->pixel_xfer.xfer_prog_sn
       && st->fp->serialNo == st->pixel_xfer.user_prog_sn) {
      /* the pixel tranfer program has not changed and the user-defined
       * program has not changed, so re-use the combined program.
       */
      stfp = st->pixel_xfer.combined_prog;
   }
   else {
      /* Concatenate the pixel transfer program with the current user-
       * defined program.
       */
      if (is_passthrough_program(&st->fp->Base)) {
         stfp = (struct st_fragment_program *)
            _mesa_clone_program(ctx, &st->pixel_xfer.program->Base.Base);
      }
      else {
#if 0
         printf("Base program:\n");
         _mesa_print_program(&st->fp->Base.Base);
         printf("DrawPix program:\n");
         _mesa_print_program(&st->pixel_xfer.program->Base.Base);
#endif
         stfp = (struct st_fragment_program *)
            _mesa_combine_programs(ctx,
                                   &st->pixel_xfer.program->Base.Base,
                                   &st->fp->Base.Base);
      }

#if 0
      {
         struct gl_program *p = &stfp->Base.Base;
         printf("Combined DrawPixels program:\n");
         _mesa_print_program(p);
         printf("InputsRead: 0x%x\n", p->InputsRead);
         printf("OutputsWritten: 0x%x\n", p->OutputsWritten);
         _mesa_print_parameter_list(p->Parameters);
      }
#endif

      /* translate to TGSI tokens */
      st_translate_fragment_program(st, stfp, NULL);

      /* save new program, update serial numbers */
      st->pixel_xfer.xfer_prog_sn = st->pixel_xfer.program->serialNo;
      st->pixel_xfer.user_prog_sn = st->fp->serialNo;
      st->pixel_xfer.combined_prog_sn = stfp->serialNo;
      /* can't reference new program directly, already have a reference on it */
      st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL);
      st->pixel_xfer.combined_prog = stfp;
   }

   /* Ideally we'd have updated the pipe constants during the normal
    * st/atom mechanism.  But we can't since this is specific to glDrawPixels.
    */
   st_upload_constants(st, stfp->Base.Base.Parameters, PIPE_SHADER_FRAGMENT);

   return stfp;
}
Exemplo n.º 14
0
/**
 * Shader linker.  Currently:
 *
 * 1. The last attached vertex shader and fragment shader are linked.
 * 2. Varying vars in the two shaders are combined so their locations
 *    agree between the vertex and fragment stages.  They're treated as
 *    vertex program output attribs and as fragment program input attribs.
 * 3. The vertex and fragment programs are cloned and modified to update
 *    src/dst register references so they use the new, linked varying
 *    storage locations.
 */
void
_slang_link(GLcontext *ctx,
            GLhandleARB programObj,
            struct gl_shader_program *shProg)
{
   const struct gl_vertex_program *vertProg = NULL;
   const struct gl_fragment_program *fragProg = NULL;
   GLboolean vertNotify = GL_TRUE, fragNotify = GL_TRUE;
   GLuint numSamplers = 0;
   GLuint i;

   _mesa_clear_shader_program_data(ctx, shProg);

   /* Initialize LinkStatus to "success".  Will be cleared if error. */
   shProg->LinkStatus = GL_TRUE;

   /* check that all programs compiled successfully */
   for (i = 0; i < shProg->NumShaders; i++) {
      if (!shProg->Shaders[i]->CompileStatus) {
         link_error(shProg, "linking with uncompiled shader\n");
         return;
      }
   }

   shProg->Uniforms = _mesa_new_uniform_list();
   shProg->Varying = _mesa_new_parameter_list();

   /*
    * Find the vertex and fragment shaders which define main()
    */
   {
      struct gl_shader *vertShader, *fragShader;
      vertShader = get_main_shader(ctx, shProg, GL_VERTEX_SHADER);
      fragShader = get_main_shader(ctx, shProg, GL_FRAGMENT_SHADER);
      if (vertShader)
         vertProg = vertex_program(vertShader->Program);
      if (fragShader)
         fragProg = fragment_program(fragShader->Program);
      if (!shProg->LinkStatus)
         return;
   }

#if FEATURE_es2_glsl
   /* must have both a vertex and fragment program for ES2 */
   if (!vertProg) {
      link_error(shProg, "missing vertex shader\n");
      return;
   }
   if (!fragProg) {
      link_error(shProg, "missing fragment shader\n");
      return;
   }
#endif

   /*
    * Make copies of the vertex/fragment programs now since we'll be
    * changing src/dst registers after merging the uniforms and varying vars.
    */
   _mesa_reference_vertprog(ctx, &shProg->VertexProgram, NULL);
   if (vertProg) {
      struct gl_vertex_program *linked_vprog =
         _mesa_clone_vertex_program(ctx, vertProg);
      shProg->VertexProgram = linked_vprog; /* refcount OK */
      /* vertex program ID not significant; just set Id for debugging purposes */
      shProg->VertexProgram->Base.Id = shProg->Name;
      ASSERT(shProg->VertexProgram->Base.RefCount == 1);
   }

   _mesa_reference_fragprog(ctx, &shProg->FragmentProgram, NULL);
   if (fragProg) {
      struct gl_fragment_program *linked_fprog = 
         _mesa_clone_fragment_program(ctx, fragProg);
      shProg->FragmentProgram = linked_fprog; /* refcount OK */
      /* vertex program ID not significant; just set Id for debugging purposes */
      shProg->FragmentProgram->Base.Id = shProg->Name;
      ASSERT(shProg->FragmentProgram->Base.RefCount == 1);
   }

   /* link varying vars */
   if (shProg->VertexProgram) {
      if (!link_varying_vars(ctx, shProg, &shProg->VertexProgram->Base))
         return;
   }
   if (shProg->FragmentProgram) {
      if (!link_varying_vars(ctx, shProg, &shProg->FragmentProgram->Base))
         return;
   }

   /* link uniform vars */
   if (shProg->VertexProgram) {
      if (!link_uniform_vars(ctx, shProg, &shProg->VertexProgram->Base,
                             &numSamplers)) {
         return;
      }
   }
   if (shProg->FragmentProgram) {
      if (!link_uniform_vars(ctx, shProg, &shProg->FragmentProgram->Base,
                             &numSamplers)) {
         return;
      }
   }

   /*_mesa_print_uniforms(shProg->Uniforms);*/

   if (shProg->VertexProgram) {
      if (!_slang_resolve_attributes(shProg, &vertProg->Base,
                                     &shProg->VertexProgram->Base)) {
         return;
      }
   }

   if (shProg->VertexProgram) {
      _slang_update_inputs_outputs(&shProg->VertexProgram->Base);
      _slang_count_temporaries(&shProg->VertexProgram->Base);
      if (!(shProg->VertexProgram->Base.OutputsWritten
	    & BITFIELD64_BIT(VERT_RESULT_HPOS))) {
         /* the vertex program did not compute a vertex position */
         link_error(shProg,
                    "gl_Position was not written by vertex shader\n");
         return;
      }
   }
   if (shProg->FragmentProgram) {
      _slang_count_temporaries(&shProg->FragmentProgram->Base);
      _slang_update_inputs_outputs(&shProg->FragmentProgram->Base);
   }

   /* Check that all the varying vars needed by the fragment shader are
    * actually produced by the vertex shader.
    */
   if (shProg->FragmentProgram) {
      const GLbitfield varyingRead
         = shProg->FragmentProgram->Base.InputsRead >> FRAG_ATTRIB_VAR0;
      const GLbitfield64 varyingWritten = shProg->VertexProgram ?
         shProg->VertexProgram->Base.OutputsWritten >> VERT_RESULT_VAR0 : 0x0;
      if ((varyingRead & varyingWritten) != varyingRead) {
         link_error(shProg,
          "Fragment program using varying vars not written by vertex shader\n");
         return;
      }         
   }

   /* check that gl_FragColor and gl_FragData are not both written to */
   if (shProg->FragmentProgram) {
      const GLbitfield64 outputsWritten =
	 shProg->FragmentProgram->Base.OutputsWritten;
      if ((outputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR)) &&
          (outputsWritten >= BITFIELD64_BIT(FRAG_RESULT_DATA0))) {
         link_error(shProg, "Fragment program cannot write both gl_FragColor"
                    " and gl_FragData[].\n");
         return;
      }         
   }


   if (fragProg && shProg->FragmentProgram) {
      /* Compute initial program's TexturesUsed info */
      _mesa_update_shader_textures_used(&shProg->FragmentProgram->Base);

      /* notify driver that a new fragment program has been compiled/linked */
      vertNotify = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
                                                 &shProg->FragmentProgram->Base);
      if (ctx->Shader.Flags & GLSL_DUMP) {
         printf("Mesa pre-link fragment program:\n");
         _mesa_print_program(&fragProg->Base);
         _mesa_print_program_parameters(ctx, &fragProg->Base);

         printf("Mesa post-link fragment program:\n");
         _mesa_print_program(&shProg->FragmentProgram->Base);
         _mesa_print_program_parameters(ctx, &shProg->FragmentProgram->Base);
      }
   }

   if (vertProg && shProg->VertexProgram) {
      /* Compute initial program's TexturesUsed info */
      _mesa_update_shader_textures_used(&shProg->VertexProgram->Base);

      /* notify driver that a new vertex program has been compiled/linked */
      fragNotify = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
                                                   &shProg->VertexProgram->Base);
      if (ctx->Shader.Flags & GLSL_DUMP) {
         printf("Mesa pre-link vertex program:\n");
         _mesa_print_program(&vertProg->Base);
         _mesa_print_program_parameters(ctx, &vertProg->Base);

         printf("Mesa post-link vertex program:\n");
         _mesa_print_program(&shProg->VertexProgram->Base);
         _mesa_print_program_parameters(ctx, &shProg->VertexProgram->Base);
      }
   }

   /* Debug: */
   if (0) {
      if (shProg->VertexProgram)
         _mesa_postprocess_program(ctx, &shProg->VertexProgram->Base);
      if (shProg->FragmentProgram)
         _mesa_postprocess_program(ctx, &shProg->FragmentProgram->Base);
   }

   if (ctx->Shader.Flags & GLSL_DUMP) {
      printf("Varying vars:\n");
      _mesa_print_parameter_list(shProg->Varying);
      if (shProg->InfoLog) {
         printf("Info Log: %s\n", shProg->InfoLog);
      }
   }

   if (!vertNotify || !fragNotify) {
      /* driver rejected one/both of the vertex/fragment programs */
      if (!shProg->InfoLog) {
	 link_error(shProg,
		    "Vertex and/or fragment program rejected by driver\n");
      }
   }
   else {
      shProg->LinkStatus = (shProg->VertexProgram || shProg->FragmentProgram);
   }
}
Exemplo n.º 15
0
/**
 * Translate a Mesa fragment shader into a TGSI shader using extra info in
 * the key.
 * \return  new fragment program variant
 */
static struct st_fp_variant *
st_translate_fragment_program(struct st_context *st,
                              struct st_fragment_program *stfp,
                              const struct st_fp_variant_key *key)
{
   struct pipe_context *pipe = st->pipe;
   struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
   GLboolean deleteFP = GL_FALSE;

   if (!variant)
      return NULL;

   assert(!(key->bitmap && key->drawpixels));

#if FEATURE_drawpix
   if (key->bitmap) {
      /* glBitmap drawing */
      struct gl_fragment_program *fp; /* we free this temp program below */

      st_make_bitmap_fragment_program(st, &stfp->Base,
                                      &fp, &variant->bitmap_sampler);

      variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters);
      stfp = st_fragment_program(fp);
      deleteFP = GL_TRUE;
   }
   else if (key->drawpixels) {
      /* glDrawPixels drawing */
      struct gl_fragment_program *fp; /* we free this temp program below */

      if (key->drawpixels_z || key->drawpixels_stencil) {
         fp = st_make_drawpix_z_stencil_program(st, key->drawpixels_z,
                                                key->drawpixels_stencil);
      }
      else {
         /* RGBA */
         st_make_drawpix_fragment_program(st, &stfp->Base, &fp);
         variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters);
         deleteFP = GL_TRUE;
      }
      stfp = st_fragment_program(fp);
   }
#endif

   if (!stfp->tgsi.tokens) {
      /* need to translate Mesa instructions to TGSI now */
      GLuint outputMapping[FRAG_RESULT_MAX];
      GLuint inputMapping[FRAG_ATTRIB_MAX];
      GLuint interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
      GLuint attr;
      enum pipe_error error;
      const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
      struct ureg_program *ureg;
      GLboolean write_all = GL_FALSE;

      ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
      ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
      uint fs_num_inputs = 0;

      ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
      ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
      uint fs_num_outputs = 0;


      _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);

      /*
       * Convert Mesa program inputs to TGSI input register semantics.
       */
      for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
         if (inputsRead & (1 << attr)) {
            const GLuint slot = fs_num_inputs++;

            inputMapping[attr] = slot;

            switch (attr) {
            case FRAG_ATTRIB_WPOS:
               input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
               input_semantic_index[slot] = 0;
               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
               break;
            case FRAG_ATTRIB_COL0:
               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
               input_semantic_index[slot] = 0;
               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
               break;
            case FRAG_ATTRIB_COL1:
               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
               input_semantic_index[slot] = 1;
               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
               break;
            case FRAG_ATTRIB_FOGC:
               input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
               input_semantic_index[slot] = 0;
               interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
               break;
            case FRAG_ATTRIB_FACE:
               input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
               input_semantic_index[slot] = 0;
               interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
               break;
               /* In most cases, there is nothing special about these
                * inputs, so adopt a convention to use the generic
                * semantic name and the mesa FRAG_ATTRIB_ number as the
                * index. 
                * 
                * All that is required is that the vertex shader labels
                * its own outputs similarly, and that the vertex shader
                * generates at least every output required by the
                * fragment shader plus fixed-function hardware (such as
                * BFC).
                * 
                * There is no requirement that semantic indexes start at
                * zero or be restricted to a particular range -- nobody
                * should be building tables based on semantic index.
                */
            case FRAG_ATTRIB_PNTC:
            case FRAG_ATTRIB_TEX0:
            case FRAG_ATTRIB_TEX1:
            case FRAG_ATTRIB_TEX2:
            case FRAG_ATTRIB_TEX3:
            case FRAG_ATTRIB_TEX4:
            case FRAG_ATTRIB_TEX5:
            case FRAG_ATTRIB_TEX6:
            case FRAG_ATTRIB_TEX7:
            case FRAG_ATTRIB_VAR0:
            default:
               /* Actually, let's try and zero-base this just for
                * readability of the generated TGSI.
                */
               assert(attr >= FRAG_ATTRIB_TEX0);
               input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0);
               input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
               if (attr == FRAG_ATTRIB_PNTC)
                  interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
               else
                  interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
               break;
            }
         }
         else {
            inputMapping[attr] = -1;
         }
      }

      /*
       * Semantics and mapping for outputs
       */
      {
         uint numColors = 0;
         GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;

         /* if z is written, emit that first */
         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
            fs_output_semantic_index[fs_num_outputs] = 0;
            outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
            fs_num_outputs++;
            outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
         }

         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
            fs_output_semantic_index[fs_num_outputs] = 0;
            outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
            fs_num_outputs++;
            outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
         }

         /* handle remaning outputs (color) */
         for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
            if (outputsWritten & BITFIELD64_BIT(attr)) {
               switch (attr) {
               case FRAG_RESULT_DEPTH:
               case FRAG_RESULT_STENCIL:
                  /* handled above */
                  assert(0);
                  break;
               case FRAG_RESULT_COLOR:
                  write_all = GL_TRUE; /* fallthrough */
               default:
                  assert(attr == FRAG_RESULT_COLOR ||
                         (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
                  fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
                  fs_output_semantic_index[fs_num_outputs] = numColors;
                  outputMapping[attr] = fs_num_outputs;
                  numColors++;
                  break;
               }

               fs_num_outputs++;
            }
         }
      }

      ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
      if (ureg == NULL)
         return NULL;

      if (ST_DEBUG & DEBUG_MESA) {
         _mesa_print_program(&stfp->Base.Base);
         _mesa_print_program_parameters(st->ctx, &stfp->Base.Base);
         debug_printf("\n");
      }
      if (write_all == GL_TRUE)
         ureg_property_fs_color0_writes_all_cbufs(ureg, 1);

      error = st_translate_mesa_program(st->ctx,
                                        TGSI_PROCESSOR_FRAGMENT,
                                        ureg,
                                        &stfp->Base.Base,
                                        /* inputs */
                                        fs_num_inputs,
                                        inputMapping,
                                        input_semantic_name,
                                        input_semantic_index,
                                        interpMode,
                                        /* outputs */
                                        fs_num_outputs,
                                        outputMapping,
                                        fs_output_semantic_name,
                                        fs_output_semantic_index, FALSE );

      stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL );
      ureg_destroy( ureg );
   }

   /* fill in variant */
   variant->driver_shader = pipe->create_fs_state(pipe, &stfp->tgsi);
   variant->key = *key;

   if (ST_DEBUG & DEBUG_TGSI) {
      tgsi_dump( stfp->tgsi.tokens, 0/*TGSI_DUMP_VERBOSE*/ );
      debug_printf("\n");
   }

   if (deleteFP) {
      /* Free the temporary program made above */
      struct gl_fragment_program *fp = &stfp->Base;
      _mesa_reference_fragprog(st->ctx, &fp, NULL);
   }

   return variant;
}
Exemplo n.º 16
0
/**
 * Translate a geometry program to create a new variant.
 */
static struct st_gp_variant *
st_translate_geometry_program(struct st_context *st,
                              struct st_geometry_program *stgp,
                              const struct st_gp_variant_key *key)
{
   GLuint inputMapping[GEOM_ATTRIB_MAX];
   GLuint outputMapping[GEOM_RESULT_MAX];
   struct pipe_context *pipe = st->pipe;
   enum pipe_error error;
   GLuint attr;
   const GLbitfield inputsRead = stgp->Base.Base.InputsRead;
   GLuint vslot = 0;
   GLuint num_generic = 0;

   uint gs_num_inputs = 0;
   uint gs_builtin_inputs = 0;
   uint gs_array_offset = 0;

   ubyte gs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
   ubyte gs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
   uint gs_num_outputs = 0;

   GLint i;
   GLuint maxSlot = 0;
   struct ureg_program *ureg;

   struct st_gp_variant *gpv;

   gpv = CALLOC_STRUCT(st_gp_variant);
   if (!gpv)
      return NULL;

   _mesa_remove_output_reads(&stgp->Base.Base, PROGRAM_OUTPUT);
   _mesa_remove_output_reads(&stgp->Base.Base, PROGRAM_VARYING);

   ureg = ureg_create( TGSI_PROCESSOR_GEOMETRY );
   if (ureg == NULL) {
      FREE(gpv);
      return NULL;
   }

   /* which vertex output goes to the first geometry input */
   vslot = 0;

   memset(inputMapping, 0, sizeof(inputMapping));
   memset(outputMapping, 0, sizeof(outputMapping));

   /*
    * Convert Mesa program inputs to TGSI input register semantics.
    */
   for (attr = 0; attr < GEOM_ATTRIB_MAX; attr++) {
      if (inputsRead & (1 << attr)) {
         const GLuint slot = gs_num_inputs;

         gs_num_inputs++;

         inputMapping[attr] = slot;

         stgp->input_map[slot + gs_array_offset] = vslot - gs_builtin_inputs;
         stgp->input_to_index[attr] = vslot;
         stgp->index_to_input[vslot] = attr;
         ++vslot;

         if (attr != GEOM_ATTRIB_PRIMITIVE_ID) {
            gs_array_offset += 2;
         } else
            ++gs_builtin_inputs;

#if 0
         debug_printf("input map at %d = %d\n",
                      slot + gs_array_offset, stgp->input_map[slot + gs_array_offset]);
#endif

         switch (attr) {
         case GEOM_ATTRIB_PRIMITIVE_ID:
            stgp->input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
            stgp->input_semantic_index[slot] = 0;
            break;
         case GEOM_ATTRIB_POSITION:
            stgp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
            stgp->input_semantic_index[slot] = 0;
            break;
         case GEOM_ATTRIB_COLOR0:
            stgp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            stgp->input_semantic_index[slot] = 0;
            break;
         case GEOM_ATTRIB_COLOR1:
            stgp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            stgp->input_semantic_index[slot] = 1;
            break;
         case GEOM_ATTRIB_FOG_FRAG_COORD:
            stgp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
            stgp->input_semantic_index[slot] = 0;
            break;
         case GEOM_ATTRIB_TEX_COORD:
            stgp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
            stgp->input_semantic_index[slot] = num_generic++;
            break;
         case GEOM_ATTRIB_VAR0:
            /* fall-through */
         default:
            stgp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
            stgp->input_semantic_index[slot] = num_generic++;
         }
      }
   }

   /* initialize output semantics to defaults */
   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
      gs_output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
      gs_output_semantic_index[i] = 0;
   }

   num_generic = 0;
   /*
    * Determine number of outputs, the (default) output register
    * mapping and the semantic information for each output.
    */
   for (attr = 0; attr < GEOM_RESULT_MAX; attr++) {
      if (stgp->Base.Base.OutputsWritten & BITFIELD64_BIT(attr)) {
         GLuint slot;

         slot = gs_num_outputs;
         gs_num_outputs++;
         outputMapping[attr] = slot;

         switch (attr) {
         case GEOM_RESULT_POS:
            assert(slot == 0);
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
            gs_output_semantic_index[slot] = 0;
            break;
         case GEOM_RESULT_COL0:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            gs_output_semantic_index[slot] = 0;
            break;
         case GEOM_RESULT_COL1:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            gs_output_semantic_index[slot] = 1;
            break;
         case GEOM_RESULT_SCOL0:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
            gs_output_semantic_index[slot] = 0;
            break;
         case GEOM_RESULT_SCOL1:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
            gs_output_semantic_index[slot] = 1;
            break;
         case GEOM_RESULT_FOGC:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
            gs_output_semantic_index[slot] = 0;
            break;
         case GEOM_RESULT_PSIZ:
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
            gs_output_semantic_index[slot] = 0;
            break;
         case GEOM_RESULT_TEX0:
         case GEOM_RESULT_TEX1:
         case GEOM_RESULT_TEX2:
         case GEOM_RESULT_TEX3:
         case GEOM_RESULT_TEX4:
         case GEOM_RESULT_TEX5:
         case GEOM_RESULT_TEX6:
         case GEOM_RESULT_TEX7:
            /* fall-through */
         case GEOM_RESULT_VAR0:
            /* fall-through */
         default:
            assert(slot < Elements(gs_output_semantic_name));
            /* use default semantic info */
            gs_output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
            gs_output_semantic_index[slot] = num_generic++;
         }
      }
   }

   assert(gs_output_semantic_name[0] == TGSI_SEMANTIC_POSITION);

   /* find max output slot referenced to compute gs_num_outputs */
   for (attr = 0; attr < GEOM_RESULT_MAX; attr++) {
      if (outputMapping[attr] != ~0 && outputMapping[attr] > maxSlot)
         maxSlot = outputMapping[attr];
   }
   gs_num_outputs = maxSlot + 1;

#if 0 /* debug */
   {
      GLuint i;
      printf("outputMapping? %d\n", outputMapping ? 1 : 0);
      if (outputMapping) {
         printf("attr -> slot\n");
         for (i = 0; i < 16;  i++) {
            printf(" %2d       %3d\n", i, outputMapping[i]);
         }
      }
      printf("slot    sem_name  sem_index\n");
      for (i = 0; i < gs_num_outputs; i++) {
         printf(" %2d         %d         %d\n",
                i,
                gs_output_semantic_name[i],
                gs_output_semantic_index[i]);
      }
   }
#endif

   /* free old shader state, if any */
   if (stgp->tgsi.tokens) {
      st_free_tokens(stgp->tgsi.tokens);
      stgp->tgsi.tokens = NULL;
   }

   ureg_property_gs_input_prim(ureg, stgp->Base.InputType);
   ureg_property_gs_output_prim(ureg, stgp->Base.OutputType);
   ureg_property_gs_max_vertices(ureg, stgp->Base.VerticesOut);

   error = st_translate_mesa_program(st->ctx,
                                     TGSI_PROCESSOR_GEOMETRY,
                                     ureg,
                                     &stgp->Base.Base,
                                     /* inputs */
                                     gs_num_inputs,
                                     inputMapping,
                                     stgp->input_semantic_name,
                                     stgp->input_semantic_index,
                                     NULL,
                                     /* outputs */
                                     gs_num_outputs,
                                     outputMapping,
                                     gs_output_semantic_name,
                                     gs_output_semantic_index,
                                     FALSE);

   stgp->num_inputs = gs_num_inputs;
   stgp->tgsi.tokens = ureg_get_tokens( ureg, NULL );
   ureg_destroy( ureg );

   /* fill in new variant */
   gpv->driver_shader = pipe->create_gs_state(pipe, &stgp->tgsi);
   gpv->key = *key;

   if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) {
      _mesa_print_program(&stgp->Base.Base);
      debug_printf("\n");
   }

   if (ST_DEBUG & DEBUG_TGSI) {
      tgsi_dump(stgp->tgsi.tokens, 0);
      debug_printf("\n");
   }

   return gpv;
}
Exemplo n.º 17
0
void GLAPIENTRY
_mesa_ProgramStringARB(GLenum target, GLenum format, GLsizei len,
                       const GLvoid *string)
{
   struct gl_program *base;
   bool failed;
   GET_CURRENT_CONTEXT(ctx);

   FLUSH_VERTICES(ctx, _NEW_PROGRAM);

   if (!ctx->Extensions.ARB_vertex_program
       && !ctx->Extensions.ARB_fragment_program) {
      _mesa_error(ctx, GL_INVALID_OPERATION, "glProgramStringARB()");
      return;
   }

   if (format != GL_PROGRAM_FORMAT_ASCII_ARB) {
      _mesa_error(ctx, GL_INVALID_ENUM, "glProgramStringARB(format)");
      return;
   }

   if (target == GL_VERTEX_PROGRAM_ARB && ctx->Extensions.ARB_vertex_program) {
      struct gl_vertex_program *prog = ctx->VertexProgram.Current;
      _mesa_parse_arb_vertex_program(ctx, target, string, len, prog);

      base = & prog->Base;
   }
   else if (target == GL_FRAGMENT_PROGRAM_ARB
            && ctx->Extensions.ARB_fragment_program) {
      struct gl_fragment_program *prog = ctx->FragmentProgram.Current;
      _mesa_parse_arb_fragment_program(ctx, target, string, len, prog);

      base = & prog->Base;
   }
   else {
      _mesa_error(ctx, GL_INVALID_ENUM, "glProgramStringARB(target)");
      return;
   }

   failed = ctx->Program.ErrorPos != -1;

   if (!failed) {
      /* finally, give the program to the driver for translation/checking */
      if (!ctx->Driver.ProgramStringNotify(ctx, target, base)) {
         failed = true;
         _mesa_error(ctx, GL_INVALID_OPERATION,
                     "glProgramStringARB(rejected by driver");
      }
   }

   if (ctx->_Shader->Flags & GLSL_DUMP) {
      const char *shader_type =
         target == GL_FRAGMENT_PROGRAM_ARB ? "fragment" : "vertex";

      fprintf(stderr, "ARB_%s_program source for program %d:\n",
              shader_type, base->Id);
      fprintf(stderr, "%s\n", (const char *) string);

      if (failed) {
         fprintf(stderr, "ARB_%s_program %d failed to compile.\n",
                 shader_type, base->Id);
      } else {
         fprintf(stderr, "Mesa IR for ARB_%s_program %d:\n",
                 shader_type, base->Id);
         _mesa_print_program(base);
         fprintf(stderr, "\n");
      }
      fflush(stderr);
   }

   /* Capture vp-*.shader_test/fp-*.shader_test files. */
   const char *capture_path = _mesa_get_shader_capture_path();
   if (capture_path != NULL) {
      FILE *file;
      const char *shader_type =
         target == GL_FRAGMENT_PROGRAM_ARB ? "fragment" : "vertex";
      char *filename =
         ralloc_asprintf(NULL, "%s/%cp-%u.shader_test",
                         capture_path, shader_type[0], base->Id);

      file = fopen(filename, "w");
      if (file) {
         fprintf(file,
                 "[require]\nGL_ARB_%s_program\n\n[%s program]\n%s\n",
                 shader_type, shader_type, (const char *) string);
         fclose(file);
      } else {
         _mesa_warning(ctx, "Failed to open %s", filename);
      }
      ralloc_free(filename);
   }
}
Exemplo n.º 18
0
/**
 * Returns a fragment program which implements the current pixel transfer ops.
 */
static struct gl_fragment_program *
get_pixel_transfer_program(GLcontext *ctx, const struct state_key *key)
{
   struct st_context *st = ctx->st;
   struct prog_instruction inst[MAX_INST];
   struct gl_program_parameter_list *params;
   struct gl_fragment_program *fp;
   GLuint ic = 0;
   const GLuint colorTemp = 0;

   fp = (struct gl_fragment_program *)
      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
   if (!fp)
      return NULL;

   params = _mesa_new_parameter_list();

   /*
    * Get initial pixel color from the texture.
    * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
    */
   _mesa_init_instructions(inst + ic, 1);
   inst[ic].Opcode = OPCODE_TEX;
   inst[ic].DstReg.File = PROGRAM_TEMPORARY;
   inst[ic].DstReg.Index = colorTemp;
   inst[ic].SrcReg[0].File = PROGRAM_INPUT;
   inst[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
   inst[ic].TexSrcUnit = 0;
   inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
   ic++;
   fp->Base.InputsRead = (1 << FRAG_ATTRIB_TEX0);
   fp->Base.OutputsWritten = (1 << FRAG_RESULT_COLR);
   fp->Base.SamplersUsed = 0x1;  /* sampler 0 (bit 0) is used */

   if (key->scaleAndBias) {
      static const gl_state_index scale_state[STATE_LENGTH] =
         { STATE_INTERNAL, STATE_PT_SCALE, 0, 0, 0 };
      static const gl_state_index bias_state[STATE_LENGTH] =
         { STATE_INTERNAL, STATE_PT_BIAS, 0, 0, 0 };
      GLfloat scale[4], bias[4];
      GLint scale_p, bias_p;

      scale[0] = ctx->Pixel.RedScale;
      scale[1] = ctx->Pixel.GreenScale;
      scale[2] = ctx->Pixel.BlueScale;
      scale[3] = ctx->Pixel.AlphaScale;
      bias[0] = ctx->Pixel.RedBias;
      bias[1] = ctx->Pixel.GreenBias;
      bias[2] = ctx->Pixel.BlueBias;
      bias[3] = ctx->Pixel.AlphaBias;

      scale_p = _mesa_add_state_reference(params, scale_state);
      bias_p = _mesa_add_state_reference(params, bias_state);

      /* MAD colorTemp, colorTemp, scale, bias; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_MAD;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = colorTemp;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = scale_p;
      inst[ic].SrcReg[2].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[2].Index = bias_p;
      ic++;
   }

   if (key->pixelMaps) {
      const GLuint temp = 1;

      /* create the colormap/texture now if not already done */
      if (!st->pixel_xfer.pixelmap_texture) {
         st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx);
      }

      /* with a little effort, we can do four pixel map look-ups with
       * two TEX instructions:
       */

      /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_TEX;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_XY; /* write R,G */
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].TexSrcUnit = 1;
      inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
      ic++;

      /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_TEX;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_ZW; /* write B,A */
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W,
                                                 SWIZZLE_Z, SWIZZLE_W);
      inst[ic].TexSrcUnit = 1;
      inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
      ic++;

      /* MOV colorTemp, temp; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_MOV;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = colorTemp;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = temp;
      ic++;

      fp->Base.SamplersUsed |= (1 << 1);  /* sampler 1 is used */
   }

   if (key->colorMatrix) {
      static const gl_state_index row0_state[STATE_LENGTH] =
         { STATE_COLOR_MATRIX, 0, 0, 0, 0 };
      static const gl_state_index row1_state[STATE_LENGTH] =
         { STATE_COLOR_MATRIX, 0, 1, 1, 0 };
      static const gl_state_index row2_state[STATE_LENGTH] =
         { STATE_COLOR_MATRIX, 0, 2, 2, 0 };
      static const gl_state_index row3_state[STATE_LENGTH] =
         { STATE_COLOR_MATRIX, 0, 3, 3, 0 };

      GLint row0_p = _mesa_add_state_reference(params, row0_state);
      GLint row1_p = _mesa_add_state_reference(params, row1_state);
      GLint row2_p = _mesa_add_state_reference(params, row2_state);
      GLint row3_p = _mesa_add_state_reference(params, row3_state);
      const GLuint temp = 1;

      /* DP4 temp.x, colorTemp, matrow0; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_DP4;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_X;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = row0_p;
      ic++;

      /* DP4 temp.y, colorTemp, matrow1; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_DP4;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_Y;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = row1_p;
      ic++;

      /* DP4 temp.z, colorTemp, matrow2; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_DP4;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_Z;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = row2_p;
      ic++;

      /* DP4 temp.w, colorTemp, matrow3; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_DP4;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = temp;
      inst[ic].DstReg.WriteMask = WRITEMASK_W;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = row3_p;
      ic++;

      /* MOV colorTemp, temp; */
      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_MOV;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = colorTemp;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = temp;
      ic++;
   }

   if (key->colorMatrixPostScaleBias) {
      static const gl_state_index scale_state[STATE_LENGTH] =
         { STATE_INTERNAL, STATE_PT_SCALE, 0, 0, 0 };
      static const gl_state_index bias_state[STATE_LENGTH] =
         { STATE_INTERNAL, STATE_PT_BIAS, 0, 0, 0 };
      GLint scale_param, bias_param;

      scale_param = _mesa_add_state_reference(params, scale_state);
      bias_param = _mesa_add_state_reference(params, bias_state);

      _mesa_init_instructions(inst + ic, 1);
      inst[ic].Opcode = OPCODE_MAD;
      inst[ic].DstReg.File = PROGRAM_TEMPORARY;
      inst[ic].DstReg.Index = colorTemp;
      inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
      inst[ic].SrcReg[0].Index = colorTemp;
      inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[1].Index = scale_param;
      inst[ic].SrcReg[2].File = PROGRAM_STATE_VAR;
      inst[ic].SrcReg[2].Index = bias_param;
      ic++;
   }

   /* Modify last instruction's dst reg to write to result.color */
   {
      struct prog_instruction *last = &inst[ic - 1];
      last->DstReg.File = PROGRAM_OUTPUT;
      last->DstReg.Index = FRAG_RESULT_COLR;
   }

   /* END; */
   _mesa_init_instructions(inst + ic, 1);
   inst[ic].Opcode = OPCODE_END;
   ic++;

   assert(ic <= MAX_INST);


   fp->Base.Instructions = _mesa_alloc_instructions(ic);
   if (!fp->Base.Instructions) {
      _mesa_error(ctx, GL_OUT_OF_MEMORY,
                  "generating pixel transfer program");
      return NULL;
   }

   _mesa_copy_instructions(fp->Base.Instructions, inst, ic);
   fp->Base.NumInstructions = ic;
   fp->Base.Parameters = params;

#if 0
   printf("========= pixel transfer prog\n");
   _mesa_print_program(&fp->Base);
   _mesa_print_parameter_list(fp->Base.Parameters);
#endif

   return fp;
}
Exemplo n.º 19
0
/**
 * Initial pass for fragment program code generation.
 * This function is used by both the GLSL and non-GLSL paths.
 */
void brw_wm_pass_fp( struct brw_wm_compile *c )
{
   struct brw_fragment_program *fp = c->fp;
   GLuint insn;

   if (INTEL_DEBUG & DEBUG_WM) {
      printf("pre-fp:\n");
      _mesa_print_program(&fp->program.Base); 
      printf("\n");
   }

   c->pixel_xy = src_undef();
   c->delta_xy = src_undef();
   c->pixel_w = src_undef();
   c->nr_fp_insns = 0;
   c->fp->tex_units_used = 0x0;

   /* Emit preamble instructions.  This is where special instructions such as
    * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
    * compute shader inputs from varying vars.
    */
   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
      validate_src_regs(c, inst);
      validate_dst_regs(c, inst);
   }

   /* Loop over all instructions doing assorted simplifications and
    * transformations.
    */
   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
      struct prog_instruction *out;

      /* Check for INPUT values, emit INTERP instructions where
       * necessary:
       */

      switch (inst->Opcode) {
      case OPCODE_SWZ: 
	 out = emit_insn(c, inst);
	 out->Opcode = OPCODE_MOV;
	 break;
	 
      case OPCODE_ABS:
	 out = emit_insn(c, inst);
	 out->Opcode = OPCODE_MOV;
	 out->SrcReg[0].Negate = NEGATE_NONE;
	 out->SrcReg[0].Abs = 1;
	 break;

      case OPCODE_SUB: 
	 out = emit_insn(c, inst);
	 out->Opcode = OPCODE_ADD;
	 out->SrcReg[1].Negate ^= NEGATE_XYZW;
	 break;

      case OPCODE_SCS: 
	 out = emit_insn(c, inst);
	 /* This should probably be done in the parser. 
	  */
	 out->DstReg.WriteMask &= WRITEMASK_XY;
	 break;
	 
      case OPCODE_DST:
	 precalc_dst(c, inst);
	 break;

      case OPCODE_LIT:
	 precalc_lit(c, inst);
	 break;

      case OPCODE_TEX:
	 precalc_tex(c, inst);
	 break;

      case OPCODE_TXP:
	 precalc_txp(c, inst);
	 break;

      case OPCODE_TXB:
	 out = emit_insn(c, inst);
	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
         assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
	 break;

      case OPCODE_XPD: 
	 out = emit_insn(c, inst);
	 /* This should probably be done in the parser. 
	  */
	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
	 break;

      case OPCODE_KIL: 
	 out = emit_insn(c, inst);
	 /* This should probably be done in the parser. 
	  */
	 out->DstReg.WriteMask = 0;
	 break;
      case OPCODE_END:
	 emit_render_target_writes(c);
	 break;
      case OPCODE_PRINT:
	 break;
      default:
	 if (brw_wm_is_scalar_result(inst->Opcode))
	    emit_scalar_insn(c, inst);
	 else
	    emit_insn(c, inst);
	 break;
      }
   }

   if (INTEL_DEBUG & DEBUG_WM) {
      printf("pass_fp:\n");
      print_insns( c->prog_instructions, c->nr_fp_insns );
      printf("\n");
   }
}
Exemplo n.º 20
0
/**
 * Try to remove use of extraneous MOV instructions, to free them up for dead
 * code removal.
 */
static void
_mesa_remove_extra_move_use(struct gl_program *prog)
{
   GLuint i, j;

   if (dbg) {
      printf("Optimize: Begin remove extra move use\n");
      _mesa_print_program(prog);
   }

   /*
    * Look for sequences such as this:
    *    MOV tmpX, arg0;
    *    ...
    *    FOO tmpY, tmpX, arg1;
    * and convert into:
    *    MOV tmpX, arg0;
    *    ...
    *    FOO tmpY, arg0, arg1;
    */

   for (i = 0; i + 1 < prog->NumInstructions; i++) {
      const struct prog_instruction *mov = prog->Instructions + i;
      GLuint dst_mask, src_mask;
      if (can_upward_mov_be_modifed(mov) == GL_FALSE)
         continue;

      /* Scanning the code, we maintain the components which are still active in
       * these two masks
       */
      dst_mask = mov->DstReg.WriteMask;
      src_mask = get_src_arg_mask(mov, 0, NO_MASK);

      /* Walk through remaining instructions until the or src reg gets
       * rewritten or we get into some flow-control, eliminating the use of
       * this MOV.
       */
      for (j = i + 1; j < prog->NumInstructions; j++) {
	 struct prog_instruction *inst2 = prog->Instructions + j;
         GLuint arg;

	 if (_mesa_is_flow_control_opcode(inst2->Opcode))
	     break;

	 /* First rewrite this instruction's args if appropriate. */
	 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
	    GLuint comp, read_mask;

	    if (inst2->SrcReg[arg].File != mov->DstReg.File ||
		inst2->SrcReg[arg].Index != mov->DstReg.Index ||
		inst2->SrcReg[arg].RelAddr ||
		inst2->SrcReg[arg].Abs)
	       continue;
            read_mask = get_src_arg_mask(inst2, arg, NO_MASK);

	    /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
             * components read still come from the mov instructions
             */
            if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
               (read_mask & dst_mask) == read_mask) {
               for (comp = 0; comp < 4; comp++) {
                  const GLuint inst2_swz =
                     GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
                  const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
                  inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
                  inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
                  inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
                                                  inst2_swz) & 0x1) << comp);
               }
               inst2->SrcReg[arg].File = mov->SrcReg[0].File;
               inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
            }
	 }

	 /* The source of MOV is written. This potentially deactivates some
          * components from the src and dst of the MOV instruction
          */
	 if (inst2->DstReg.File == mov->DstReg.File &&
	     (inst2->DstReg.RelAddr ||
	      inst2->DstReg.Index == mov->DstReg.Index)) {
            dst_mask &= ~inst2->DstReg.WriteMask;
            src_mask = get_src_arg_mask(mov, 0, dst_mask);
         }

         /* Idem when the destination of mov is written */
	 if (inst2->DstReg.File == mov->SrcReg[0].File &&
	     (inst2->DstReg.RelAddr ||
	      inst2->DstReg.Index == mov->SrcReg[0].Index)) {
            src_mask &= ~inst2->DstReg.WriteMask;
            dst_mask &= get_dst_mask_for_mov(mov, src_mask);
         }
         if (dst_mask == 0)
            break;
      }
   }

   if (dbg) {
      printf("Optimize: End remove extra move use.\n");
      /*_mesa_print_program(prog);*/
   }
}
Exemplo n.º 21
0
static struct r300_vertex_program *build_program(GLcontext *ctx,
						 struct r300_vertex_program_key *wanted_key,
						 const struct gl_vertex_program *mesa_vp)
{
	struct r300_vertex_program *vp;
	struct r300_vertex_program_compiler compiler;

	vp = _mesa_calloc(sizeof(*vp));
	vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
	_mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));

	rc_init(&compiler.Base);
	compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;

	compiler.code = &vp->code;
	compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
	compiler.SetHwInputOutput = &t_inputs_outputs;

	if (compiler.Base.Debug) {
		fprintf(stderr, "Initial vertex program:\n");
		_mesa_print_program(&vp->Base->Base);
		fflush(stderr);
	}

	if (mesa_vp->IsPositionInvariant) {
		_mesa_insert_mvp_code(ctx, vp->Base);
	}

	radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);

	if (mesa_vp->IsNVProgram)
		initialize_NV_registers(&compiler.Base);

	rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);

	if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
		rc_copy_output(&compiler.Base,
			VERT_RESULT_HPOS,
			vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
	}

	if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
		rc_move_output(&compiler.Base,
			VERT_RESULT_FOGC,
			vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
	}

	r3xx_compile_vertex_program(&compiler);

	if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) {
		rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n");
	}

	vp->error = compiler.Base.Error;

	vp->Base->Base.InputsRead = vp->code.InputsRead;
	vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;

	rc_destroy(&compiler.Base);

	return vp;
}
Exemplo n.º 22
0
/**
 * Try to remove extraneous MOV instructions from the given program.
 */
static GLboolean
_mesa_remove_extra_moves(struct gl_program *prog)
{
   GLboolean *removeInst; /* per-instruction removal flag */
   GLuint i, rem = 0, nesting = 0;

   if (dbg) {
      printf("Optimize: Begin remove extra moves\n");
      _mesa_print_program(prog);
   }

   removeInst = (GLboolean *)
      calloc(1, prog->NumInstructions * sizeof(GLboolean));

   /*
    * Look for sequences such as this:
    *    FOO tmpX, arg0, arg1;
    *    MOV tmpY, tmpX;
    * and convert into:
    *    FOO tmpY, arg0, arg1;
    */

   for (i = 0; i < prog->NumInstructions; i++) {
      const struct prog_instruction *mov = prog->Instructions + i;

      switch (mov->Opcode) {
      case OPCODE_BGNLOOP:
      case OPCODE_BGNSUB:
      case OPCODE_IF:
         nesting++;
         break;
      case OPCODE_ENDLOOP:
      case OPCODE_ENDSUB:
      case OPCODE_ENDIF:
         nesting--;
         break;
      case OPCODE_MOV:
         if (i > 0 &&
             can_downward_mov_be_modifed(mov) &&
             mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
             nesting == 0)
         {

            /* see if this MOV can be removed */
            const GLuint id = mov->SrcReg[0].Index;
            struct prog_instruction *prevInst;
            GLuint prevI;

            /* get pointer to previous instruction */
            prevI = i - 1;
            while (prevI > 0 && removeInst[prevI])
               prevI--;
            prevInst = prog->Instructions + prevI;

            if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
                prevInst->DstReg.Index == id &&
                prevInst->DstReg.RelAddr == 0 &&
                prevInst->DstReg.CondSrc == 0 && 
                prevInst->DstReg.CondMask == COND_TR) {

               const GLuint dst_mask = prevInst->DstReg.WriteMask;
               enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);

               if (next_use == WRITE || next_use == END) {
                  /* OK, we can safely remove this MOV instruction.
                   * Transform:
                   *   prevI: FOO tempIndex, x, y;
                   *       i: MOV z, tempIndex;
                   * Into:
                   *   prevI: FOO z, x, y;
                   */
                  if (_mesa_merge_mov_into_inst(prevInst, mov)) {
                     removeInst[i] = GL_TRUE;
                     if (dbg) {
                        printf("Remove MOV at %u\n", i);
                        printf("new prev inst %u: ", prevI);
                        _mesa_print_instruction(prevInst);
                     }
                  }
               }
            }
         }
         break;
      default:
         ; /* nothing */
      }
   }

   /* now remove the instructions which aren't needed */
   rem = remove_instructions(prog, removeInst);

   free(removeInst);

   if (dbg) {
      printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
      /*_mesa_print_program(prog);*/
   }

   return rem != 0;
}
Exemplo n.º 23
0
/**
 * Translate a vertex program to create a new variant.
 */
static struct st_vp_variant *
st_translate_vertex_program(struct st_context *st,
                            struct st_vertex_program *stvp,
                            const struct st_vp_variant_key *key)
{
   struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
   struct pipe_context *pipe = st->pipe;
   struct ureg_program *ureg;
   enum pipe_error error;
   unsigned num_outputs;

   st_prepare_vertex_program(st->ctx, stvp);

   if (!stvp->glsl_to_tgsi)
   {
      _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
   }

   ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
   if (ureg == NULL) {
      free(vpv);
      return NULL;
   }

   vpv->key = *key;

   vpv->num_inputs = stvp->num_inputs;
   num_outputs = stvp->num_outputs;
   if (key->passthrough_edgeflags) {
      vpv->num_inputs++;
      num_outputs++;
   }

   if (ST_DEBUG & DEBUG_MESA) {
      _mesa_print_program(&stvp->Base.Base);
      _mesa_print_program_parameters(st->ctx, &stvp->Base.Base);
      debug_printf("\n");
   }

   if (stvp->glsl_to_tgsi)
      error = st_translate_program(st->ctx,
                                   TGSI_PROCESSOR_VERTEX,
                                   ureg,
                                   stvp->glsl_to_tgsi,
                                   &stvp->Base.Base,
                                   /* inputs */
                                   vpv->num_inputs,
                                   stvp->input_to_index,
                                   NULL, /* input semantic name */
                                   NULL, /* input semantic index */
                                   NULL, /* interp mode */
                                   NULL, /* interp location */
                                   /* outputs */
                                   num_outputs,
                                   stvp->result_to_output,
                                   stvp->output_semantic_name,
                                   stvp->output_semantic_index,
                                   key->passthrough_edgeflags,
                                   key->clamp_color);
   else
      error = st_translate_mesa_program(st->ctx,
                                        TGSI_PROCESSOR_VERTEX,
                                        ureg,
                                        &stvp->Base.Base,
                                        /* inputs */
                                        vpv->num_inputs,
                                        stvp->input_to_index,
                                        NULL, /* input semantic name */
                                        NULL, /* input semantic index */
                                        NULL,
                                        /* outputs */
                                        num_outputs,
                                        stvp->result_to_output,
                                        stvp->output_semantic_name,
                                        stvp->output_semantic_index,
                                        key->passthrough_edgeflags,
                                        key->clamp_color);

   if (error)
      goto fail;

   vpv->tgsi.tokens = ureg_get_tokens( ureg, NULL );
   if (!vpv->tgsi.tokens)
      goto fail;

   ureg_destroy( ureg );

   if (stvp->glsl_to_tgsi) {
      st_translate_stream_output_info(stvp->glsl_to_tgsi,
                                      stvp->result_to_output,
                                      &vpv->tgsi.stream_output);
   }

   if (ST_DEBUG & DEBUG_TGSI) {
      tgsi_dump(vpv->tgsi.tokens, 0);
      debug_printf("\n");
   }

   vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi);
   return vpv;

fail:
   debug_printf("%s: failed to translate Mesa program:\n", __func__);
   _mesa_print_program(&stvp->Base.Base);
   debug_assert(0);

   ureg_destroy( ureg );
   return NULL;
}
Exemplo n.º 24
0
void brw_wm_pass_fp( struct brw_wm_compile *c )
{
   struct brw_fragment_program *fp = c->fp;
   GLuint insn;

   if (INTEL_DEBUG & DEBUG_WM) {
      _mesa_printf("\n\n\npre-fp:\n");
      _mesa_print_program(&fp->program.Base); 
      _mesa_printf("\n");
   }

   c->pixel_xy = src_undef();
   c->delta_xy = src_undef();
   c->pixel_w = src_undef();
   c->nr_fp_insns = 0;

   /* Emit preamble instructions:
    */


   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
      struct prog_instruction *out;

      /* Check for INPUT values, emit INTERP instructions where
       * necessary:
       */
      validate_src_regs(c, inst);


      switch (inst->Opcode) {
      case OPCODE_SWZ: 
	 out = emit_insn(c, inst);
	 out->Opcode = OPCODE_MOV;
	 break;
	 
      case OPCODE_ABS:
	 out = emit_insn(c, inst);
	 out->Opcode = OPCODE_MOV;
	 out->SrcReg[0].NegateBase = 0;
	 out->SrcReg[0].Abs = 1;
	 break;

      case OPCODE_SUB: 
	 out = emit_insn(c, inst);
	 out->Opcode = OPCODE_ADD;
	 out->SrcReg[1].NegateBase ^= 0xf;
	 break;

      case OPCODE_SCS: 
	 out = emit_insn(c, inst);
	 /* This should probably be done in the parser. 
	  */
	 out->DstReg.WriteMask &= WRITEMASK_XY;
	 break;
	 
      case OPCODE_DST:
	 precalc_dst(c, inst);
	 break;

      case OPCODE_LIT:
	 precalc_lit(c, inst);
	 break;
     
      case OPCODE_TXP:
	 precalc_txp(c, inst);
	 break;

      case OPCODE_XPD: 
	 out = emit_insn(c, inst);
	 /* This should probably be done in the parser. 
	  */
	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
	 break;

      case OPCODE_KIL: 
	 out = emit_insn(c, inst);
	 /* This should probably be done in the parser. 
	  */
	 out->DstReg.WriteMask = 0;
	 break;

      case OPCODE_END:
      case OPCODE_PRINT:
	 break;
	 
      default:
	 emit_insn(c, inst);
	 break;
      }
   }
   
   emit_fog(c);
   emit_fb_write(c);


   if (INTEL_DEBUG & DEBUG_WM) {
      _mesa_printf("\n\n\npass_fp:\n");
      print_insns( c->prog_instructions, c->nr_fp_insns );
      _mesa_printf("\n");
   }
}
void
_mesa_parse_arb_fragment_program(struct gl_context* ctx, GLenum target,
                                 const GLvoid *str, GLsizei len,
                                 struct gl_fragment_program *program)
{
   struct gl_program prog;
   struct asm_parser_state state;
   GLuint i;

   ASSERT(target == GL_FRAGMENT_PROGRAM_ARB);

   memset(&prog, 0, sizeof(prog));
   memset(&state, 0, sizeof(state));
   state.prog = &prog;

   if (!_mesa_parse_arb_program(ctx, target, (const GLubyte*) str, len,
				&state)) {
      /* Error in the program. Just return. */
      return;
   }

   if (program->Base.String != NULL)
      free(program->Base.String);

   /* Copy the relevant contents of the arb_program struct into the
    * fragment_program struct.
    */
   program->Base.String          = prog.String;
   program->Base.NumInstructions = prog.NumInstructions;
   program->Base.NumTemporaries  = prog.NumTemporaries;
   program->Base.NumParameters   = prog.NumParameters;
   program->Base.NumAttributes   = prog.NumAttributes;
   program->Base.NumAddressRegs  = prog.NumAddressRegs;
   program->Base.NumNativeInstructions = prog.NumNativeInstructions;
   program->Base.NumNativeTemporaries = prog.NumNativeTemporaries;
   program->Base.NumNativeParameters = prog.NumNativeParameters;
   program->Base.NumNativeAttributes = prog.NumNativeAttributes;
   program->Base.NumNativeAddressRegs = prog.NumNativeAddressRegs;
   program->Base.NumAluInstructions   = prog.NumAluInstructions;
   program->Base.NumTexInstructions   = prog.NumTexInstructions;
   program->Base.NumTexIndirections   = prog.NumTexIndirections;
   program->Base.NumNativeAluInstructions = prog.NumAluInstructions;
   program->Base.NumNativeTexInstructions = prog.NumTexInstructions;
   program->Base.NumNativeTexIndirections = prog.NumTexIndirections;
   program->Base.InputsRead      = prog.InputsRead;
   program->Base.OutputsWritten  = prog.OutputsWritten;
   program->Base.IndirectRegisterFiles = prog.IndirectRegisterFiles;
   for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) {
      program->Base.TexturesUsed[i] = prog.TexturesUsed[i];
      if (prog.TexturesUsed[i])
         program->Base.SamplersUsed |= (1 << i);
   }
   program->Base.ShadowSamplers = prog.ShadowSamplers;
   program->OriginUpperLeft = state.option.OriginUpperLeft;
   program->PixelCenterInteger = state.option.PixelCenterInteger;

   program->UsesKill            = state.fragment.UsesKill;
   program->UsesDFdy            = state.fragment.UsesDFdy;

   if (program->Base.Instructions)
      free(program->Base.Instructions);
   program->Base.Instructions = prog.Instructions;

   if (program->Base.Parameters)
      _mesa_free_parameter_list(program->Base.Parameters);
   program->Base.Parameters    = prog.Parameters;

   /* Append fog instructions now if the program has "OPTION ARB_fog_exp"
    * or similar.  We used to leave this up to drivers, but it appears
    * there's no hardware that wants to do fog in a discrete stage separate
    * from the fragment shader.
    */
   if (state.option.Fog != OPTION_NONE) {
      static const GLenum fog_modes[4] = {
	 GL_NONE, GL_EXP, GL_EXP2, GL_LINEAR
      };

      /* XXX: we should somehow recompile this to remove clamping if disabled
       * On the ATI driver, this is unclampled if fragment clamping is disabled
       */
      _mesa_append_fog_code(ctx, program, fog_modes[state.option.Fog], GL_TRUE);
   }

#if DEBUG_FP
   printf("____________Fragment program %u ________\n", program->Base.Id);
   _mesa_print_program(&program->Base);
#endif
}