Exemplo n.º 1
0
static GLboolean
brwProgramStringNotify(struct gl_context *ctx,
		       GLenum target,
		       struct gl_program *prog)
{
   assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);

   struct brw_context *brw = brw_context(ctx);
   const struct brw_compiler *compiler = brw->screen->compiler;

   switch (target) {
   case GL_FRAGMENT_PROGRAM_ARB: {
      struct brw_program *newFP = brw_program(prog);
      const struct brw_program *curFP =
         brw_program_const(brw->fragment_program);

      if (newFP == curFP)
	 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
      newFP->id = get_new_program_id(brw->screen);

      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);

      brw_fs_precompile(ctx, prog);
      break;
   }
   case GL_VERTEX_PROGRAM_ARB: {
      struct brw_program *newVP = brw_program(prog);
      const struct brw_program *curVP =
         brw_program_const(brw->vertex_program);

      if (newVP == curVP)
	 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
      if (newVP->program.arb.IsPositionInvariant) {
	 _mesa_insert_mvp_code(ctx, &newVP->program);
      }
      newVP->id = get_new_program_id(brw->screen);

      /* Also tell tnl about it:
       */
      _tnl_program_string(ctx, target, prog);

      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
                                 compiler->scalar_stage[MESA_SHADER_VERTEX]);

      brw_vs_precompile(ctx, prog);
      break;
   }
   default:
      /*
       * driver->ProgramStringNotify is only called for ARB programs, fixed
       * function vertex programs, and ir_to_mesa (which isn't used by the
       * i965 back-end).  Therefore, even after geometry shaders are added,
       * this function should only ever be called with a target of
       * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
       */
      unreachable("Unexpected target in brwProgramStringNotify");
   }

   return true;
}
static void brwProgramStringNotify( GLcontext *ctx,
				    GLenum target,
				    struct gl_program *prog )
{
   if (target == GL_FRAGMENT_PROGRAM_ARB) {
      struct brw_context *brw = brw_context(ctx);
      struct brw_fragment_program *p = (struct brw_fragment_program *)prog;
      struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
      if (p == fp)
	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
      p->id = brw->program_id++;      
      p->param_state = p->program.Base.Parameters->StateFlags;
   }
   else if (target == GL_VERTEX_PROGRAM_ARB) {
      struct brw_context *brw = brw_context(ctx);
      struct brw_vertex_program *p = (struct brw_vertex_program *)prog;
      struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
      if (p == vp)
	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
      if (p->program.IsPositionInvariant) {
	 _mesa_insert_mvp_code(ctx, &p->program);
      }
      p->id = brw->program_id++;      
      p->param_state = p->program.Base.Parameters->StateFlags;

      /* Also tell tnl about it:
       */
      _tnl_program_string(ctx, target, prog);
   }
}
Exemplo n.º 3
0
static GLboolean brwProgramStringNotify( GLcontext *ctx,
        GLenum target,
        struct gl_program *prog )
{
    struct brw_context *brw = brw_context(ctx);
    int i;

    if (target == GL_FRAGMENT_PROGRAM_ARB) {
        struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
        struct brw_fragment_program *newFP = brw_fragment_program(fprog);
        const struct brw_fragment_program *curFP =
            brw_fragment_program_const(brw->fragment_program);

        if (fprog->FogOption) {
            _mesa_append_fog_code(ctx, fprog);
            fprog->FogOption = GL_NONE;
        }

        if (newFP == curFP)
            brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
        newFP->id = brw->program_id++;
        newFP->isGLSL = brw_wm_is_glsl(fprog);
    }
    else if (target == GL_VERTEX_PROGRAM_ARB) {
        struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
        struct brw_vertex_program *newVP = brw_vertex_program(vprog);
        const struct brw_vertex_program *curVP =
            brw_vertex_program_const(brw->vertex_program);

        if (newVP == curVP)
            brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
        if (newVP->program.IsPositionInvariant) {
            _mesa_insert_mvp_code(ctx, &newVP->program);
        }
        newVP->id = brw->program_id++;

        /* Also tell tnl about it:
         */
        _tnl_program_string(ctx, target, prog);
    }

    /* Reject programs with subroutines, which are totally broken at the moment
     * (all program flows return when any program flow returns, and
     * the VS also hangs if a function call calls a function.
     *
     * See piglit glsl-{vs,fs}-functions-[23] tests.
     */
    for (i = 0; i < prog->NumInstructions; i++) {
        if (prog->Instructions[i].Opcode == OPCODE_CAL) {
            shader_error(ctx, prog,
                         "i965 driver doesn't yet support uninlined function "
                         "calls.  Move to using a single return statement at "
                         "the end of the function to work around it.");
            return GL_FALSE;
        }
    }

    return GL_TRUE;
}
Exemplo n.º 4
0
struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
        struct gl_vertex_program *mesa_vp)
{
    context_t *context = R700_CONTEXT(ctx);
    struct r700_vertex_program *vp;
    unsigned int i;

    vp = _mesa_calloc(sizeof(*vp));
    vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base);

    if (mesa_vp->IsPositionInvariant)
    {
        _mesa_insert_mvp_code(ctx, vp->mesa_program);
    }

    for(i=0; i<context->nNumActiveAos; i++)
    {
        vp->aos_desc[i].size   = context->stream_desc[i].size;
        vp->aos_desc[i].stride = context->stream_desc[i].stride;
        vp->aos_desc[i].type   = context->stream_desc[i].type;
    }

    if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
    {
        vp->r700AsmCode.bR6xx = 1;
    }

    //Init_Program
    Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
    Map_Vertex_Program(ctx, vp, vp->mesa_program );

    if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
    {
        return NULL;
    }

    if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions,
                                 &(vp->mesa_program->Base.Instructions[0]),
                                 &(vp->r700AsmCode)) )
    {
        return NULL;
    }

    if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
    {
        return NULL;
    }

    vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
                           : (vp->r700AsmCode.number_used_registers - 1);

    vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;

    vp->translated = GL_TRUE;

    return vp;
}
Exemplo n.º 5
0
/**
 * \sa _mesa_nop_fragment_program
 * Replace the given vertex program with a "no-op" program that just
 * transforms vertex position and emits color.
 */
void
_mesa_nop_vertex_program(struct gl_context *ctx, struct gl_vertex_program *prog)
{
    struct prog_instruction *inst;
    GLuint inputAttr;

    /*
     * Start with a simple vertex program that emits color.
     */
    inst = _mesa_alloc_instructions(2);
    if (!inst) {
        _mesa_error(ctx, GL_OUT_OF_MEMORY, "_mesa_nop_vertex_program");
        return;
    }

    _mesa_init_instructions(inst, 2);

    inst[0].Opcode = OPCODE_MOV;
    inst[0].DstReg.File = PROGRAM_OUTPUT;
    inst[0].DstReg.Index = VARYING_SLOT_COL0;
    inst[0].SrcReg[0].File = PROGRAM_INPUT;
    if (prog->Base.InputsRead & VERT_BIT_COLOR0)
        inputAttr = VERT_ATTRIB_COLOR0;
    else
        inputAttr = VERT_ATTRIB_TEX0;
    inst[0].SrcReg[0].Index = inputAttr;

    inst[1].Opcode = OPCODE_END;

    _mesa_free_instructions(prog->Base.Instructions,
                            prog->Base.NumInstructions);

    prog->Base.Instructions = inst;
    prog->Base.NumInstructions = 2;
    prog->Base.InputsRead = BITFIELD64_BIT(inputAttr);
    prog->Base.OutputsWritten = BITFIELD64_BIT(VARYING_SLOT_COL0);

    /*
     * Now insert code to do standard modelview/projection transformation.
     */
    _mesa_insert_mvp_code(ctx, prog);
}
Exemplo n.º 6
0
static void brwProgramStringNotify( GLcontext *ctx,
				    GLenum target,
				    struct gl_program *prog )
{
   struct brw_context *brw = brw_context(ctx);

   if (target == GL_FRAGMENT_PROGRAM_ARB) {
      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
      struct brw_fragment_program *newFP = brw_fragment_program(fprog);
      const struct brw_fragment_program *curFP =
         brw_fragment_program_const(brw->fragment_program);

      if (fprog->FogOption) {
         _mesa_append_fog_code(ctx, fprog);
         fprog->FogOption = GL_NONE;
      }

      if (newFP == curFP)
	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
      newFP->id = brw->program_id++;      
      newFP->isGLSL = brw_wm_is_glsl(fprog);
   }
   else if (target == GL_VERTEX_PROGRAM_ARB) {
      struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
      struct brw_vertex_program *newVP = brw_vertex_program(vprog);
      const struct brw_vertex_program *curVP =
         brw_vertex_program_const(brw->vertex_program);

      if (newVP == curVP)
	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
      if (newVP->program.IsPositionInvariant) {
	 _mesa_insert_mvp_code(ctx, &newVP->program);
      }
      newVP->id = brw->program_id++;      

      /* Also tell tnl about it:
       */
      _tnl_program_string(ctx, target, prog);
   }
}
Exemplo n.º 7
0
/**
 * Translate a Mesa vertex shader into a TGSI shader.
 * \param outputMapping  to map vertex program output registers (VARYING_SLOT_x)
 *       to TGSI output slots
 * \param tokensOut  destination for TGSI tokens
 * \return  pointer to cached pipe_shader object.
 */
void
st_prepare_vertex_program(struct gl_context *ctx,
                            struct st_vertex_program *stvp)
{
   struct st_context *st = st_context(ctx);
   GLuint attr;

   stvp->num_inputs = 0;
   stvp->num_outputs = 0;

   if (stvp->Base.IsPositionInvariant)
      _mesa_insert_mvp_code(ctx, &stvp->Base);

   /*
    * Determine number of inputs, the mappings between VERT_ATTRIB_x
    * and TGSI generic input indexes, plus input attrib semantic info.
    */
   for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
      if ((stvp->Base.Base.InputsRead & BITFIELD64_BIT(attr)) != 0) {
         stvp->input_to_index[attr] = stvp->num_inputs;
         stvp->index_to_input[stvp->num_inputs] = attr;
         stvp->num_inputs++;
         if ((stvp->Base.Base.DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
            /* add placeholder for second part of a double attribute */
            stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
            stvp->num_inputs++;
         }
      }
   }
   /* bit of a hack, presetup potentially unused edgeflag input */
   stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
   stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;

   /* Compute mapping of vertex program outputs to slots.
    */
   for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
      if ((stvp->Base.Base.OutputsWritten & BITFIELD64_BIT(attr)) == 0) {
         stvp->result_to_output[attr] = ~0;
      }
      else {
         unsigned slot = stvp->num_outputs++;

         stvp->result_to_output[attr] = slot;

         switch (attr) {
         case VARYING_SLOT_POS:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_COL0:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_COL1:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            stvp->output_semantic_index[slot] = 1;
            break;
         case VARYING_SLOT_BFC0:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_BFC1:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
            stvp->output_semantic_index[slot] = 1;
            break;
         case VARYING_SLOT_FOGC:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_PSIZ:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_CLIP_DIST0:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_CLIP_DIST1:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
            stvp->output_semantic_index[slot] = 1;
            break;
         case VARYING_SLOT_EDGE:
            assert(0);
            break;
         case VARYING_SLOT_CLIP_VERTEX:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_LAYER:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VARYING_SLOT_VIEWPORT:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
            stvp->output_semantic_index[slot] = 0;
            break;

         case VARYING_SLOT_TEX0:
         case VARYING_SLOT_TEX1:
         case VARYING_SLOT_TEX2:
         case VARYING_SLOT_TEX3:
         case VARYING_SLOT_TEX4:
         case VARYING_SLOT_TEX5:
         case VARYING_SLOT_TEX6:
         case VARYING_SLOT_TEX7:
            if (st->needs_texcoord_semantic) {
               stvp->output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
               stvp->output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
               break;
            }
            /* fall through */
         case VARYING_SLOT_VAR0:
         default:
            assert(attr < VARYING_SLOT_MAX);
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
            stvp->output_semantic_index[slot] =
               st_get_generic_varying_index(st, attr);
            break;
         }
      }
   }
   /* similar hack to above, presetup potentially unused edgeflag output */
   stvp->result_to_output[VARYING_SLOT_EDGE] = stvp->num_outputs;
   stvp->output_semantic_name[stvp->num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
   stvp->output_semantic_index[stvp->num_outputs] = 0;
}
Exemplo n.º 8
0
/**
 * Generate an R200 vertex program from Mesa's internal representation.
 *
 * \return  GL_TRUE for success, GL_FALSE for failure.
 */
static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
{
   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
   struct prog_instruction *vpi;
   int i;
   VERTEX_SHADER_INSTRUCTION *o_inst;
   unsigned long operands;
   int are_srcs_scalar;
   unsigned long hw_op;
   int dofogfix = 0;
   int fog_temp_i = 0;
   int free_inputs;
   int array_count = 0;
   int u_temp_used;

   vp->native = GL_FALSE;
   vp->translated = GL_TRUE;
   vp->fogmode = ctx->Fog.Mode;

   if (mesa_vp->Base.NumInstructions == 0)
      return GL_FALSE;

#if 0
   if ((mesa_vp->Base.InputsRead &
      ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
      VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
      VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
	    mesa_vp->Base.InputsRead);
      }
      return GL_FALSE;
   }
#endif

   if ((mesa_vp->Base.OutputsWritten &
      ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) |
      (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) |
      (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) |
      (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
                 (unsigned long long) mesa_vp->Base.OutputsWritten);
      }
      return GL_FALSE;
   }

   /* Initial value should be last tmp reg that hw supports.
      Strangely enough r300 doesnt mind even though these would be out of range.
      Smart enough to realize that it doesnt need it? */
   int u_temp_i = R200_VSF_MAX_TEMPS - 1;
   struct prog_src_register src[3];
   struct prog_dst_register dst;

/* FIXME: is changing the prog safe to do here? */
   if (mesa_vp->IsPositionInvariant &&
      /* make sure we only do this once */
       !(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) {
	 _mesa_insert_mvp_code(ctx, mesa_vp);
      }

   /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
      base e isn't directly available neither. */
   if ((mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_FOGC)) && !vp->fogpidx) {
      struct gl_program_parameter_list *paramList;
      gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
      paramList = mesa_vp->Base.Parameters;
      vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
   }

   vp->pos_end = 0;
   mesa_vp->Base.NumNativeInstructions = 0;
   if (mesa_vp->Base.Parameters)
      mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
   else
      mesa_vp->Base.NumNativeParameters = 0;

   for(i = 0; i < VERT_ATTRIB_MAX; i++)
      vp->inputs[i] = -1;
   for(i = 0; i < 15; i++)
      vp->inputmap_rev[i] = 255;
   free_inputs = 0x2ffd;

/* fglrx uses fixed inputs as follows for conventional attribs.
   generic attribs use non-fixed assignment, fglrx will always use the
   lowest attrib values available. We'll just do the same.
   There are 12 generic attribs possible, corresponding to attrib 0, 2-11
   and 13 in a hw vertex prog.
   attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
   (correspond to vertex normal/weight - maybe weight actually could be made vec4).
   Additionally, not more than 12 arrays in total are possible I think.
   attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
   attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
   attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
   attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
*/

/* attr 4,5 and 13 are only used with generic attribs.
   Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
   not possibe to use with vertex progs as it is lacking in vert prog specification) */
/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
   if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
      vp->inputs[VERT_ATTRIB_POS] = 0;
      vp->inputmap_rev[0] = VERT_ATTRIB_POS;
      free_inputs &= ~(1 << 0);
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
      vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
      vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
      vp->inputs[VERT_ATTRIB_NORMAL] = 1;
      vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
      vp->inputs[VERT_ATTRIB_COLOR0] = 2;
      vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
      free_inputs &= ~(1 << 2);
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
      vp->inputs[VERT_ATTRIB_COLOR1] = 3;
      vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
      free_inputs &= ~(1 << 3);
      array_count++;
   }
   if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
      vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
      vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
      array_count++;
   }
   /* VERT_ATTRIB_TEX0-5 */
   for (i = 0; i <= 5; i++) {
      if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
	 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
	 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
	 free_inputs &= ~(1 << (i + 6));
	 array_count++;
      }
   }
   /* using VERT_ATTRIB_TEX6/7 would be illegal */
   for (; i < VERT_ATTRIB_TEX_MAX; i++) {
      if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
          if (R200_DEBUG & RADEON_FALLBACKS) {
              fprintf(stderr, "texture attribute %d in vert prog\n", i);
          }
          return GL_FALSE;
      }
   }
   /* completely ignore aliasing? */
   for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
      int j;
   /* completely ignore aliasing? */
      if (mesa_vp->Base.InputsRead & VERT_BIT_GENERIC(i)) {
	 array_count++;
	 if (array_count > 12) {
	    if (R200_DEBUG & RADEON_FALLBACKS) {
	       fprintf(stderr, "more than 12 attribs used in vert prog\n");
	    }
	    return GL_FALSE;
	 }
	 for (j = 0; j < 14; j++) {
	    /* will always find one due to limited array_count */
	    if (free_inputs & (1 << j)) {
	       free_inputs &= ~(1 << j);
	       vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
	       if (j == 0) {
                  /* mapped to pos */
                  vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
	       } else if (j < 12) {
                  /* mapped to col/tex */
                  vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
	       } else {
                  /* mapped to pos1 */
                  vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
               }
	       break;
	    }
	 }
      }
   }

   if (!(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog without position output\n");
      }
      return GL_FALSE;
   }
   if (free_inputs & 1) {
      if (R200_DEBUG & RADEON_FALLBACKS) {
	 fprintf(stderr, "can't handle vert prog without position input\n");
      }
      return GL_FALSE;
   }

   o_inst = vp->instr;
   for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
      operands = op_operands(vpi->Opcode);
      are_srcs_scalar = operands & SCALAR_FLAG;
      operands &= OP_MASK;

      for(i = 0; i < operands; i++) {
	 src[i] = vpi->SrcReg[i];
	 /* hack up default attrib values as per spec as swizzling.
	    normal, fog, secondary color. Crazy?
	    May need more if we don't submit vec4 elements? */
	 if (src[i].File == PROGRAM_INPUT) {
	    if (src[i].Index == VERT_ATTRIB_NORMAL) {
	       int j;
	       for (j = 0; j < 4; j++) {
		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
		  }
	       }
	    }
	    else if (src[i].Index == VERT_ATTRIB_COLOR1) {
	       int j;
	       for (j = 0; j < 4; j++) {
		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
		  }
	       }
	    }
	    else if (src[i].Index == VERT_ATTRIB_FOG) {
	       int j;
	       for (j = 0; j < 4; j++) {
		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
		  }
		  else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
			    GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
		  }
	       }
	    }
	 }
      }

      if(operands == 3){
	 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_ALL);

	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
		  SWIZZLE_X, SWIZZLE_Y,
		  SWIZZLE_Z, SWIZZLE_W,
		  t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);

	    o_inst->src1 = ZERO_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;

	    src[2].File = PROGRAM_TEMPORARY;
	    src[2].Index = u_temp_i;
	    src[2].RelAddr = 0;
	    u_temp_i--;
	 }
      }

      if(operands >= 2){
	 if( CMP_SRCS(src[1], src[0]) ){
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_ALL);

	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		  SWIZZLE_X, SWIZZLE_Y,
		  SWIZZLE_Z, SWIZZLE_W,
		  t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);

	    o_inst->src1 = ZERO_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;

	    src[0].File = PROGRAM_TEMPORARY;
	    src[0].Index = u_temp_i;
	    src[0].RelAddr = 0;
	    u_temp_i--;
	 }
      }

      dst = vpi->DstReg;
      if (dst.File == PROGRAM_OUTPUT &&
	  dst.Index == VARYING_SLOT_FOGC &&
	  dst.WriteMask & WRITEMASK_X) {
	  fog_temp_i = u_temp_i;
	  dst.File = PROGRAM_TEMPORARY;
	  dst.Index = fog_temp_i;
	  dofogfix = 1;
	  u_temp_i--;
      }

      /* These ops need special handling. */
      switch(vpi->Opcode){
      case OPCODE_POW:
/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
   So may need to insert additional instruction */
	 if ((src[0].File == src[1].File) &&
	     (src[0].Index == src[1].Index)) {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
		   t_dst_mask(dst.WriteMask));
	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		   SWIZZLE_ZERO,
		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
		   SWIZZLE_ZERO,
		   t_src_class(src[0].File),
		   src[0].Negate) | (src[0].RelAddr << 4);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_0;
	 }
	 else {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		   (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		   VSF_FLAG_ALL);
	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		   SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
		   t_src_class(src[0].File),
		   src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
	    o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		   SWIZZLE_ZERO, SWIZZLE_ZERO,
		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
		   t_src_class(src[1].File),
		   src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;

	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
		   t_dst_mask(dst.WriteMask));
	    o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
		   VSF_IN_COMPONENT_X,
		   VSF_IN_COMPONENT_Y,
		   VSF_IN_COMPONENT_Z,
		   VSF_IN_COMPONENT_W,
		   VSF_IN_CLASS_TMP,
		   VSF_FLAG_NONE);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_0;
	    u_temp_i--;
	 }
	 goto next;

      case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} 
      case OPCODE_SWZ:
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
		t_dst_mask(dst.WriteMask));
	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = ZERO_SRC_0;
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_MAD:
	 /* only 2 read ports into temp memory thus may need the macro op MAD_2
	    instead (requiring 2 clocks) if all inputs are in temp memory
	    (and, only if they actually reference 3 distinct temps) */
	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
	    src[1].File == PROGRAM_TEMPORARY &&
	    src[2].File == PROGRAM_TEMPORARY &&
	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
	    (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;

	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
	    t_dst_mask(dst.WriteMask));
	 o_inst->src0 = t_src(vp, &src[0]);
#if 0
if ((o_inst - vp->instr) == 31) {
/* fix up the broken vertex program of quake4 demo... */
o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
			SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
			t_src_class(src[1].File),
			src[1].Negate) | (src[1].RelAddr << 4);
o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
			SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
			t_src_class(src[1].File),
			src[1].Negate) | (src[1].RelAddr << 4);
}
else {
	 o_inst->src1 = t_src(vp, &src[1]);
	 o_inst->src2 = t_src(vp, &src[2]);
}
#else
	 o_inst->src1 = t_src(vp, &src[1]);
	 o_inst->src2 = t_src(vp, &src[2]);
#endif
	 goto next;

      case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} 
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
		SWIZZLE_ZERO,
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);

	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
		SWIZZLE_ZERO,
		t_src_class(src[1].File),
		src[1].Negate) | (src[1].RelAddr << 4);

	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} 
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
		VSF_IN_COMPONENT_ONE,
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);
	 o_inst->src1 = t_src(vp, &src[1]);
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
		t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
		t_src_class(src[1].File),
		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
	 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0=t_src(vp, &src[0]);
	 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
		t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
		t_src_class(src[0].File),
		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
	 o_inst->src2 = UNUSED_SRC_1;
	 goto next;

      case OPCODE_FLR:
      /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} 
         ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */

	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
	    t_dst_mask(dst.WriteMask));

	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = UNUSED_SRC_0;
	 o_inst->src2 = UNUSED_SRC_1;
	 o_inst++;

	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = t_src(vp, &src[0]);
	 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
		VSF_IN_COMPONENT_X,
		VSF_IN_COMPONENT_Y,
		VSF_IN_COMPONENT_Z,
		VSF_IN_COMPONENT_W,
		VSF_IN_CLASS_TMP,
		/* Not 100% sure about this */
		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);

	 o_inst->src2 = UNUSED_SRC_0;
	 u_temp_i--;
	 goto next;

      case OPCODE_XPD:
	 /* mul r0, r1.yzxw, r2.zxyw
	    mad r0, -r2.yzxw, r1.zxyw, r0
	  */
	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
	    src[1].File == PROGRAM_TEMPORARY &&
	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;

	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
	    t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);

	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
		t_src_class(src[1].File),
		src[1].Negate) | (src[1].RelAddr << 4);

	 o_inst->src2 = UNUSED_SRC_1;
	 o_inst++;
	 u_temp_i--;

	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
		t_dst_mask(dst.WriteMask));

	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
		t_src_class(src[1].File),
		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);

	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
		t_src_class(src[0].File),
		src[0].Negate) | (src[0].RelAddr << 4);

	 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
		VSF_IN_COMPONENT_X,
		VSF_IN_COMPONENT_Y,
		VSF_IN_COMPONENT_Z,
		VSF_IN_COMPONENT_W,
		VSF_IN_CLASS_TMP,
		VSF_FLAG_NONE);
	 goto next;

      case OPCODE_END:
	 assert(0);
      default:
	 break;
      }

      o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
	    t_dst_mask(dst.WriteMask));

      if(are_srcs_scalar){
	 switch(operands){
	    case 1:
		o_inst->src0 = t_src_scalar(vp, &src[0]);
		o_inst->src1 = UNUSED_SRC_0;
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 2:
		o_inst->src0 = t_src_scalar(vp, &src[0]);
		o_inst->src1 = t_src_scalar(vp, &src[1]);
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 3:
		o_inst->src0 = t_src_scalar(vp, &src[0]);
		o_inst->src1 = t_src_scalar(vp, &src[1]);
		o_inst->src2 = t_src_scalar(vp, &src[2]);
	    break;

	    default:
		fprintf(stderr, "illegal number of operands %lu\n", operands);
		exit(-1);
	    break;
	 }
      } else {
	 switch(operands){
	    case 1:
		o_inst->src0 = t_src(vp, &src[0]);
		o_inst->src1 = UNUSED_SRC_0;
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 2:
		o_inst->src0 = t_src(vp, &src[0]);
		o_inst->src1 = t_src(vp, &src[1]);
		o_inst->src2 = UNUSED_SRC_1;
	    break;

	    case 3:
		o_inst->src0 = t_src(vp, &src[0]);
		o_inst->src1 = t_src(vp, &src[1]);
		o_inst->src2 = t_src(vp, &src[2]);
	    break;

	    default:
		fprintf(stderr, "illegal number of operands %lu\n", operands);
		exit(-1);
	    break;
	 }
      }
      next:

      if (dofogfix) {
	 o_inst++;
	 if (vp->fogmode == GL_EXP) {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
		R200_VSF_OUT_CLASS_RESULT_FOGC,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	 }
	 else if (vp->fogmode == GL_EXP2) {
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
		R200_VSF_OUT_CLASS_RESULT_FOGC,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
	    o_inst->src1 = UNUSED_SRC_0;
	    o_inst->src2 = UNUSED_SRC_1;
	 }
	 else { /* fogmode == GL_LINEAR */
		/* could do that with single op (dot) if using params like
		   with fixed function pipeline fog */
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;
	    o_inst++;
	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
		R200_VSF_OUT_CLASS_RESULT_FOGC,
		VSF_FLAG_X);
	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
	    o_inst->src2 = UNUSED_SRC_1;

	 }
         dofogfix = 0;
      }

      u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
      if (mesa_vp->Base.NumNativeTemporaries <
	 (mesa_vp->Base.NumTemporaries + u_temp_used)) {
	 mesa_vp->Base.NumNativeTemporaries =
	    mesa_vp->Base.NumTemporaries + u_temp_used;
      }
      if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
	 if (R200_DEBUG & RADEON_FALLBACKS) {
	    fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
	 }
	 return GL_FALSE;
      }
      u_temp_i = R200_VSF_MAX_TEMPS - 1;
      if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
	 mesa_vp->Base.NumNativeInstructions = 129;
	 if (R200_DEBUG & RADEON_FALLBACKS) {
	    fprintf(stderr, "more than 128 native instructions\n");
	 }
	 return GL_FALSE;
      }
      if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
	 vp->pos_end = (o_inst - vp->instr);
      }
   }

   vp->native = GL_TRUE;
   mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
#if 0
   fprintf(stderr, "hw program:\n");
   for(i=0; i < vp->program.length; i++)
      fprintf(stderr, "%08x\n", vp->instr[i]);
#endif
   return GL_TRUE;
}
Exemplo n.º 9
0
struct r700_vertex_program* r700TranslateVertexShader(struct gl_context *ctx,
						      struct gl_vertex_program *mesa_vp)
{
	context_t *context = R700_CONTEXT(ctx);

    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);

	struct r700_vertex_program *vp;
	unsigned int i;

	vp = calloc(1, sizeof(*vp));
	vp->mesa_program = _mesa_clone_vertex_program(ctx, mesa_vp);

    vp->constbo0 = NULL;

	if (mesa_vp->IsPositionInvariant)
	{
                _mesa_insert_mvp_code(ctx, vp->mesa_program);
        }

	for(i=0; i<context->nNumActiveAos; i++)
	{
		vp->aos_desc[i].size   = context->stream_desc[i].size;
		vp->aos_desc[i].stride = context->stream_desc[i].stride;
		vp->aos_desc[i].type   = context->stream_desc[i].type;
		vp->aos_desc[i].format = context->stream_desc[i].format;
	}

	if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
	{
		vp->r700AsmCode.bR6xx = 1;
	}

	//Init_Program
	Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );

    if(GL_TRUE == r700->bShaderUseMemConstant)
    {
        vp->r700AsmCode.bUseMemConstant = GL_TRUE;
    }
    else
    {
        vp->r700AsmCode.bUseMemConstant = GL_FALSE;
    }

    vp->r700AsmCode.unAsic = 7;

	Map_Vertex_Program(ctx, vp, vp->mesa_program );

	if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
	{
		return NULL;
	}

    InitShaderProgram(&(vp->r700AsmCode));

    for(i=0; i < MAX_SAMPLERS; i++)
    {
        vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i];
    }

    vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions;

	if(GL_FALSE == AssembleInstr(0,
                                 0,
                                 vp->mesa_program->Base.NumInstructions,
                                 &(vp->mesa_program->Base.Instructions[0]),
                                 &(vp->r700AsmCode)) )
	{
		return NULL;
	}

    if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
    {
        return NULL;
    }

    if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) )
    {
        return GL_FALSE;
    }

    vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 
                         : (vp->r700AsmCode.number_used_registers - 1);

	vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;

    vp->translated = GL_TRUE;

	return vp;
}
Exemplo n.º 10
0
static struct r300_vertex_program *build_program(GLcontext *ctx,
						 struct r300_vertex_program_key *wanted_key,
						 const struct gl_vertex_program *mesa_vp)
{
	struct r300_vertex_program *vp;
	struct r300_vertex_program_compiler compiler;

	vp = _mesa_calloc(sizeof(*vp));
	vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
	_mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));

	rc_init(&compiler.Base);
	compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;

	compiler.code = &vp->code;
	compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
	compiler.SetHwInputOutput = &t_inputs_outputs;

	if (compiler.Base.Debug) {
		fprintf(stderr, "Initial vertex program:\n");
		_mesa_print_program(&vp->Base->Base);
		fflush(stderr);
	}

	if (mesa_vp->IsPositionInvariant) {
		_mesa_insert_mvp_code(ctx, vp->Base);
	}

	radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);

	if (mesa_vp->IsNVProgram)
		initialize_NV_registers(&compiler.Base);

	rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);

	if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
		rc_copy_output(&compiler.Base,
			VERT_RESULT_HPOS,
			vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
	}

	if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
		rc_move_output(&compiler.Base,
			VERT_RESULT_FOGC,
			vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
	}

	r3xx_compile_vertex_program(&compiler);

	if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) {
		rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n");
	}

	vp->error = compiler.Base.Error;

	vp->Base->Base.InputsRead = vp->code.InputsRead;
	vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;

	rc_destroy(&compiler.Base);

	return vp;
}
Exemplo n.º 11
0
/**
 * Translate a Mesa vertex shader into a TGSI shader.
 * \param outputMapping  to map vertex program output registers (VERT_RESULT_x)
 *       to TGSI output slots
 * \param tokensOut  destination for TGSI tokens
 * \return  pointer to cached pipe_shader object.
 */
static void
st_prepare_vertex_program(struct st_context *st,
                            struct st_vertex_program *stvp)
{
   GLuint attr;

   stvp->num_inputs = 0;
   stvp->num_outputs = 0;

   if (stvp->Base.IsPositionInvariant)
      _mesa_insert_mvp_code(st->ctx, &stvp->Base);

   assert(stvp->Base.Base.NumInstructions > 1);

   /*
    * Determine number of inputs, the mappings between VERT_ATTRIB_x
    * and TGSI generic input indexes, plus input attrib semantic info.
    */
   for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
      if (stvp->Base.Base.InputsRead & (1 << attr)) {
         stvp->input_to_index[attr] = stvp->num_inputs;
         stvp->index_to_input[stvp->num_inputs] = attr;
         stvp->num_inputs++;
      }
   }
   /* bit of a hack, presetup potentially unused edgeflag input */
   stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
   stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;

   /* Compute mapping of vertex program outputs to slots.
    */
   for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
      if ((stvp->Base.Base.OutputsWritten & BITFIELD64_BIT(attr)) == 0) {
         stvp->result_to_output[attr] = ~0;
      }
      else {
         unsigned slot = stvp->num_outputs++;

         stvp->result_to_output[attr] = slot;

         switch (attr) {
         case VERT_RESULT_HPOS:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VERT_RESULT_COL0:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VERT_RESULT_COL1:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
            stvp->output_semantic_index[slot] = 1;
            break;
         case VERT_RESULT_BFC0:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VERT_RESULT_BFC1:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
            stvp->output_semantic_index[slot] = 1;
            break;
         case VERT_RESULT_FOGC:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VERT_RESULT_PSIZ:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
            stvp->output_semantic_index[slot] = 0;
            break;
         case VERT_RESULT_EDGE:
            assert(0);
            break;

         case VERT_RESULT_TEX0:
         case VERT_RESULT_TEX1:
         case VERT_RESULT_TEX2:
         case VERT_RESULT_TEX3:
         case VERT_RESULT_TEX4:
         case VERT_RESULT_TEX5:
         case VERT_RESULT_TEX6:
         case VERT_RESULT_TEX7:
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
            stvp->output_semantic_index[slot] = attr - VERT_RESULT_TEX0;
            break;

         case VERT_RESULT_VAR0:
         default:
            assert(attr < VERT_RESULT_MAX);
            stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
            stvp->output_semantic_index[slot] = (FRAG_ATTRIB_VAR0 - 
                                                FRAG_ATTRIB_TEX0 +
                                                attr - 
                                                VERT_RESULT_VAR0);
            break;
         }
      }
   }
   /* similar hack to above, presetup potentially unused edgeflag output */
   stvp->result_to_output[VERT_RESULT_EDGE] = stvp->num_outputs;
   stvp->output_semantic_name[stvp->num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
   stvp->output_semantic_index[stvp->num_outputs] = 0;
}
Exemplo n.º 12
0
static GLboolean brwProgramStringNotify( struct gl_context *ctx,
                                         GLenum target,
                                         struct gl_program *prog )
{
   struct brw_context *brw = brw_context(ctx);
   int i;

   if (target == GL_FRAGMENT_PROGRAM_ARB) {
      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
      struct brw_fragment_program *newFP = brw_fragment_program(fprog);
      const struct brw_fragment_program *curFP =
         brw_fragment_program_const(brw->fragment_program);
      struct gl_shader_program *shader_program;

      if (newFP == curFP)
	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
      newFP->id = brw->program_id++;      

      /* Don't reject fragment shaders for their Mesa IR state when we're
       * using the new FS backend.
       */
      shader_program = _mesa_lookup_shader_program(ctx, prog->Id);
      if (shader_program
	  && shader_program->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
	 return GL_TRUE;
      }
   }
   else if (target == GL_VERTEX_PROGRAM_ARB) {
      struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
      struct brw_vertex_program *newVP = brw_vertex_program(vprog);
      const struct brw_vertex_program *curVP =
         brw_vertex_program_const(brw->vertex_program);

      if (newVP == curVP)
	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
      if (newVP->program.IsPositionInvariant) {
	 _mesa_insert_mvp_code(ctx, &newVP->program);
      }
      newVP->id = brw->program_id++;      

      /* Also tell tnl about it:
       */
      _tnl_program_string(ctx, target, prog);
   }

   /* Reject programs with subroutines, which are totally broken at the moment
    * (all program flows return when any program flow returns, and
    * the VS also hangs if a function call calls a function.
    *
    * See piglit glsl-{vs,fs}-functions-[23] tests.
    */
   for (i = 0; i < prog->NumInstructions; i++) {
      struct prog_instruction *inst = prog->Instructions + i;
      int r;

      if (prog->Instructions[i].Opcode == OPCODE_CAL) {
	 shader_error(ctx, prog,
		      "i965 driver doesn't yet support uninlined function "
		      "calls.  Move to using a single return statement at "
		      "the end of the function to work around it.\n");
	 return GL_FALSE;
      }

      if (prog->Instructions[i].Opcode == OPCODE_RET) {
	 shader_error(ctx, prog,
		      "i965 driver doesn't yet support \"return\" "
		      "from main().\n");
	 return GL_FALSE;
      }

      for (r = 0; r < _mesa_num_inst_src_regs(inst->Opcode); r++) {
	 if (prog->Instructions[i].SrcReg[r].RelAddr &&
	     prog->Instructions[i].SrcReg[r].File == PROGRAM_INPUT) {
	    shader_error(ctx, prog,
			 "Variable indexing of shader inputs unsupported\n");
	    return GL_FALSE;
	 }
      }

      if (target == GL_FRAGMENT_PROGRAM_ARB &&
	  prog->Instructions[i].DstReg.RelAddr &&
	  prog->Instructions[i].DstReg.File == PROGRAM_OUTPUT) {
	 shader_error(ctx, prog,
		      "Variable indexing of FS outputs unsupported\n");
	 return GL_FALSE;
      }
      if (target == GL_FRAGMENT_PROGRAM_ARB) {
	 if ((prog->Instructions[i].DstReg.RelAddr &&
	      prog->Instructions[i].DstReg.File == PROGRAM_TEMPORARY) ||
	     (prog->Instructions[i].SrcReg[0].RelAddr &&
	      prog->Instructions[i].SrcReg[0].File == PROGRAM_TEMPORARY) ||
	     (prog->Instructions[i].SrcReg[1].RelAddr &&
	      prog->Instructions[i].SrcReg[1].File == PROGRAM_TEMPORARY) ||
	     (prog->Instructions[i].SrcReg[2].RelAddr &&
	      prog->Instructions[i].SrcReg[2].File == PROGRAM_TEMPORARY)) {
	    shader_error(ctx, prog,
			 "Variable indexing of variable arrays in the FS "
			 "unsupported\n");
	    return GL_FALSE;
	 }
      }
   }

   return GL_TRUE;
}