Пример #1
0
uint
i915_emit_const1f(struct i915_fp_compile * p, float c0)
{
   struct i915_fragment_shader *ifs = p->shader;
   unsigned reg, idx;

   if (c0 == 0.0)
      return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
   if (c0 == 1.0)
      return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);

   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
      if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
         continue;
      for (idx = 0; idx < 4; idx++) {
         if (!(ifs->constant_flags[reg] & (1 << idx)) ||
             ifs->constants[reg][idx] == c0) {
            ifs->constants[reg][idx] = c0;
            ifs->constant_flags[reg] |= 1 << idx;
            if (reg + 1 > ifs->num_constants)
               ifs->num_constants = reg + 1;
            return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
         }
      }
   }

   i915_program_error(p, "i915_emit_const1f: out of constants");
   return 0;
}
Пример #2
0
GLuint
i915_emit_const1f(struct i915_fragment_program * p, GLfloat c0)
{
   GLint reg, idx;

   if (c0 == 0.0)
      return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
   if (c0 == 1.0)
      return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);

   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
      if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
         continue;
      for (idx = 0; idx < 4; idx++) {
         if (!(p->constant_flags[reg] & (1 << idx)) ||
             p->constant[reg][idx] == c0) {
            p->constant[reg][idx] = c0;
            p->constant_flags[reg] |= 1 << idx;
            if (reg + 1 > p->nr_constants)
               p->nr_constants = reg + 1;
            return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
         }
      }
   }

   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
   p->error = 1;
   return 0;
}
 void updatePolyhedron(const Vec3& current) {
     const Grid& grid = m_tool->grid();
     
     const Math::Axis::Type axis = m_plane.normal.firstComponent();
     const Plane3 swizzledPlane(swizzle(m_plane.anchor(), axis), swizzle(m_plane.normal, axis));
     const Vec3 theMin = swizzle(grid.snapDown(min(m_initialPoint, current)), axis);
     const Vec3 theMax = swizzle(grid.snapUp  (max(m_initialPoint, current)), axis);
     
     const Vec2     topLeft2(theMin.x(), theMin.y());
     const Vec2    topRight2(theMax.x(), theMin.y());
     const Vec2  bottomLeft2(theMin.x(), theMax.y());
     const Vec2 bottomRight2(theMax.x(), theMax.y());
     
     const Vec3     topLeft3 = unswizzle(Vec3(topLeft2,     swizzledPlane.zAt(topLeft2)),     axis);
     const Vec3    topRight3 = unswizzle(Vec3(topRight2,    swizzledPlane.zAt(topRight2)),    axis);
     const Vec3  bottomLeft3 = unswizzle(Vec3(bottomLeft2,  swizzledPlane.zAt(bottomLeft2)),  axis);
     const Vec3 bottomRight3 = unswizzle(Vec3(bottomRight2, swizzledPlane.zAt(bottomRight2)), axis);
     
     Polyhedron3 polyhedron = m_oldPolyhedron;
     polyhedron.addPoint(topLeft3);
     polyhedron.addPoint(bottomLeft3);
     polyhedron.addPoint(bottomRight3);
     polyhedron.addPoint(topRight3);
     m_tool->update(polyhedron);
 }
Пример #4
0
/**
 * [1, src0.y*src1.y, src0.z, src1.w]
 * So basically MUL with lotsa swizzling.
 */
static void transform_DST(struct radeon_compiler* c,
	struct rc_instruction* inst)
{
	emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
	rc_remove_instruction(inst);
}
Пример #5
0
static void transform_XPD(struct radeon_compiler* c,
	struct rc_instruction* inst)
{
	struct rc_dst_register dst = try_to_reuse_dst(c, inst);

	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
	emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
		negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));

	rc_remove_instruction(inst);
}
Пример #6
0
void
vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
                                       unsigned base_offset,
                                       unsigned first_component,
                                       const src_reg &indirect_offset)
{
   vec4_instruction *inst;

   /* Set up the message header to reference the proper parts of the URB */
   dst_reg header = dst_reg(this, glsl_type::uvec4_type);
   inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header,
               brw_imm_ud(dst.writemask << first_component), indirect_offset);
   inst->force_writemask_all = true;

   vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header));
   read->offset = base_offset;
   read->mlen = 1;
   read->base_mrf = -1;

   if (first_component) {
      /* Read into a temporary and copy with a swizzle and writemask. */
      read->dst = retype(dst_reg(this, glsl_type::ivec4_type), dst.type);
      emit(MOV(dst, swizzle(src_reg(read->dst),
                            BRW_SWZ_COMP_INPUT(first_component))));
   }
}
Пример #7
0
void
vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
                                      const src_reg &vertex_index,
                                      unsigned base_offset,
                                      unsigned first_component,
                                      const src_reg &indirect_offset)
{
   vec4_instruction *inst;
   dst_reg temp(this, glsl_type::ivec4_type);
   temp.type = dst.type;

   /* Set up the message header to reference the proper parts of the URB */
   dst_reg header = dst_reg(this, glsl_type::uvec4_type);
   inst = emit(TCS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index,
               indirect_offset);
   inst->force_writemask_all = true;

   /* Read into a temporary, ignoring writemasking. */
   inst = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
   inst->offset = base_offset;
   inst->mlen = 1;
   inst->base_mrf = -1;

   /* Copy the temporary to the destination to deal with writemasking.
    *
    * Also attempt to deal with gl_PointSize being in the .w component.
    */
   if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
      emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW)));
   } else {
      src_reg src = src_reg(temp);
      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
      emit(MOV(dst, src));
   }
}
Пример #8
0
int test(lua_State *L) {
	float data[12] = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.};
	float out_data[12];
	
	VecN<float, 6> *v_in = (VecN<float, 6> *)data;
	VecN<float, 6> *v_out = (VecN<float, 6> *)out_data;
	for(int i=0; i < 2; i++) {
		int oo[] = {1, 0};
		int ii[] = {0, 1};
		MAT(v_out) = MAT(v_in)+MAT(v_in);
		swizzle(MAT(v_out), MAT(v_in), 2, oo, ii);
		v_in++;
		v_out++;
	}
	
	for(int j=0; j < 12; j++) {
		printf("j: %f\n", out_data[j]);
	}
	
	/*
	Vec3<char> v1c(64, 123, 5);
	v1c = v1c*0.5;
	printf("t: %d %d %d\n", v1c.x, v1c.y, v1c.z);
	*/
	return 0;
}
Пример #9
0
void *accessorThread(void *arg){

  int *result = (int*)malloc(sizeof(int));; 
  *result = 0;

  while(*result < MAXVAL){ 
    swizzle(result);
    usleep(10 + (rand() % 100) );
  }
  
  pthread_exit(result); 
}
Пример #10
0
void *accessorThread(void *arg){
    
    int *result = (int*)malloc(sizeof(int)); klee_make_symbolic(result, sizeof(int), "result");
    *result = 0;
    
    while(*result < MAXVAL){
        swizzle(result);
        usleep(10 + (rand() % 100) );
    }
    
    pthread_exit(result);
}
/* Rather than trying to intercept and jiggle depth writes during
 * emit, just move the value into its correct position at the end of
 * the program:
 */
static void
fixup_depth_write(struct i915_fragment_program *p)
{
   if (p->depth_written) {
      GLuint depth = UREG(REG_TYPE_OD, 0);

      i915_emit_arith(p,
                      A0_MOV,
                      depth, A0_DEST_CHANNEL_W, 0,
                      swizzle(depth, X, Y, Z, Z), 0, 0);
   }
}
Пример #12
0
static GLuint emit_combine_source( struct i915_fragment_program *p, 
				   GLuint mask,
				   GLuint unit,
				   GLenum source, 
				   GLenum operand )
{
   GLuint arg, src;

   src = get_source(p, source, unit);

   switch (operand) {
   case GL_ONE_MINUS_SRC_COLOR: 
      /* Get unused tmp,
       * Emit tmp = 1.0 + arg.-x-y-z-w
       */
      arg = i915_get_temp( p );
      return i915_emit_arith( p, A0_ADD, arg, mask, 0,
		  swizzle(src, ONE, ONE, ONE, ONE ),
		  negate(src, 1,1,1,1), 0);

   case GL_SRC_ALPHA: 
      if (mask == A0_DEST_CHANNEL_W)
	 return src;
      else
	 return swizzle( src, W, W, W, W );
   case GL_ONE_MINUS_SRC_ALPHA: 
      /* Get unused tmp,
       * Emit tmp = 1.0 + arg.-w-w-w-w
       */
      arg = i915_get_temp( p );
      return i915_emit_arith( p, A0_ADD, arg, mask, 0,
			 swizzle(src, ONE, ONE, ONE, ONE ),
			 negate( swizzle(src,W,W,W,W), 1,1,1,1), 0);
   case GL_SRC_COLOR: 
   default:
      return src;
   }
}
Пример #13
0
uint
i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
{
   struct i915_fragment_shader *ifs = p->shader;
   unsigned reg, idx;

   if (c0 == 0.0)
      return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
   if (c0 == 1.0)
      return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);

   if (c1 == 0.0)
      return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
   if (c1 == 1.0)
      return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);

   // XXX emit swizzle here for 0, 1, -1 and any combination thereof
   // we can use swizzle + neg for that
   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
      if (ifs->constant_flags[reg] == 0xf ||
          ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
         continue;
      for (idx = 0; idx < 3; idx++) {
         if (!(ifs->constant_flags[reg] & (3 << idx))) {
            ifs->constants[reg][idx + 0] = c0;
            ifs->constants[reg][idx + 1] = c1;
            ifs->constant_flags[reg] |= 3 << idx;
            if (reg + 1 > ifs->num_constants)
               ifs->num_constants = reg + 1;
            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE);
         }
      }
   }

   i915_program_error(p, "i915_emit_const2f: out of constants");
   return 0;
}
Пример #14
0
GLuint
i915_emit_const2f(struct i915_fragment_program * p, GLfloat c0, GLfloat c1)
{
   GLint reg, idx;

   if (c0 == 0.0)
      return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
   if (c0 == 1.0)
      return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);

   if (c1 == 0.0)
      return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
   if (c1 == 1.0)
      return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);

   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
      if (p->constant_flags[reg] == 0xf ||
          p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
         continue;
      for (idx = 0; idx < 3; idx++) {
         if (!(p->constant_flags[reg] & (3 << idx))) {
            p->constant[reg][idx] = c0;
            p->constant[reg][idx + 1] = c1;
            p->constant_flags[reg] |= 3 << idx;
            if (reg + 1 > p->nr_constants)
               p->nr_constants = reg + 1;
            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO,
                           ONE);
         }
      }
   }

   fprintf(stderr, "%s: out of constants\n", __func__);
   p->error = 1;
   return 0;
}
/**
 * Rather than trying to intercept and jiggle depth writes during
 * emit, just move the value into its correct position at the end of
 * the program:
 */
static void
i915_fixup_depth_write(struct i915_fp_compile *p)
{
   /* XXX assuming pos/depth is always in output[0] */
   if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
      const uint depth = UREG(REG_TYPE_OD, 0);

      i915_emit_arith(p,
                      A0_MOV,                     /* opcode */
                      depth,                      /* dest reg */
                      A0_DEST_CHANNEL_W,          /* write mask */
                      0,                          /* saturate? */
                      swizzle(depth, X, Y, Z, Z), /* src0 */
                      0, 0 /* src1, src2 */);
   }
}
Пример #16
0
static void emit_program_fini( struct i915_fragment_program *p )
{
   int cf = get_source( p, GL_PREVIOUS, 0 );
   int out = UREG( REG_TYPE_OC, 0 );

   if (p->ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
      /* Emit specular add.
       */
      GLuint s = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_ALL);
      i915_emit_arith( p, A0_ADD, out, A0_DEST_CHANNEL_ALL, 0, cf, 
		  swizzle(s, X,Y,Z,ZERO), 0 );
   }
   else if (cf != out) {
      /* Will wind up in here if no texture enabled or a couple of
       * other scenarios (GL_REPLACE for instance).
       */
      i915_emit_arith( p, A0_MOV, out, A0_DEST_CHANNEL_ALL, 0, cf, 0, 0 );
   }
}
Пример #17
0
/**
 * Retrieve a ureg for the given source register.  Will emit
 * constants, apply swizzling and negation as needed.
 */
static GLuint src_vector( const struct fp_src_register *source )
{
   GLuint src;

   assert(source->Index < 32);	/* limitiation of UREG representation */

   src = UREG( src_reg_file( source->File ), source->Index );

   src = swizzle(src, 
		 _X + source->Swizzle[0],
		 _X + source->Swizzle[1],
		 _X + source->Swizzle[2],
		 _X + source->Swizzle[3]);

   if (source->NegateBase)
      src = negate( src, 1,1,1,1 );

   return src;
}
Пример #18
0
/* Remove a block from a given list.  Does no sanity checking. */
static
void unlinkBlock ( Arena* a, UInt* b, Int listno )
{
   vg_assert(listno >= 0 && listno < VG_N_MALLOC_LISTS);
   if (get_prev_p(b) == b) {
      /* Only one element in the list; treat it specially. */
      vg_assert(get_next_p(b) == b);
      a->freelist[listno] = NULL;
   } else {
      UInt* b_prev = get_prev_p(b);
      UInt* b_next = get_next_p(b);
      a->freelist[listno] = b_prev;
      set_next_p(b_prev, b_next);
      set_prev_p(b_next, b_prev);
      swizzle ( a, listno );
   }
   set_prev_p(b, NULL);
   set_next_p(b, NULL);
}
Пример #19
0
static void build_sphere_texgen( struct tnl_program *p,
				 struct ureg dest,
				 GLuint writemask )
{
   struct ureg normal = get_transformed_normal(p);
   struct ureg eye_hat = get_eye_position_normalized(p);
   struct ureg tmp = get_temp(p);
   struct ureg half = register_scalar_const(p, .5);
   struct ureg r = get_temp(p);
   struct ureg inv_m = get_temp(p);
   struct ureg id = get_identity_param(p);

   /* Could share the above calculations, but it would be
    * a fairly odd state for someone to set (both sphere and
    * reflection active for different texture coordinate
    * components.  Of course - if two texture units enable
    * reflect and/or sphere, things start to tilt in favour
    * of seperating this out:
    */

   /* n.u */
   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
   /* 2n.u */
   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
   /* (-2n.u)n + u */
   emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
   /* r + 0,0,1 */
   emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
   /* rx^2 + ry^2 + (rz+1)^2 */
   emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
   /* 2/m */
   emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
   /* 1/m */
   emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
   /* r/m + 1/2 */
   emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);

   release_temp(p, tmp);
   release_temp(p, r);
   release_temp(p, inv_m);
}
Пример #20
0
ir_swizzle *
swizzle_w(operand a)
{
   return swizzle(a, SWIZZLE_WWWW, 1);
}
/**
 * Retrieve a ureg for the given source register.  Will emit
 * constants, apply swizzling and negation as needed.
 */
static GLuint
src_vector(struct i915_fragment_program *p,
           const struct prog_src_register *source,
           const struct gl_fragment_program *program)
{
   GLuint src;

   switch (source->File) {

      /* Registers:
       */
   case PROGRAM_TEMPORARY:
      if (source->Index >= I915_MAX_TEMPORARY) {
         i915_program_error(p, "Exceeded max temporary reg");
         return 0;
      }
      src = UREG(REG_TYPE_R, source->Index);
      break;
   case PROGRAM_INPUT:
      switch (source->Index) {
      case FRAG_ATTRIB_WPOS:
         src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
         break;
      case FRAG_ATTRIB_COL0:
         src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
         break;
      case FRAG_ATTRIB_COL1:
         src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
         src = swizzle(src, X, Y, Z, ONE);
         break;
      case FRAG_ATTRIB_FOGC:
         src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
         src = swizzle(src, W, ZERO, ZERO, ONE);
         break;
      case FRAG_ATTRIB_TEX0:
      case FRAG_ATTRIB_TEX1:
      case FRAG_ATTRIB_TEX2:
      case FRAG_ATTRIB_TEX3:
      case FRAG_ATTRIB_TEX4:
      case FRAG_ATTRIB_TEX5:
      case FRAG_ATTRIB_TEX6:
      case FRAG_ATTRIB_TEX7:
         src = i915_emit_decl(p, REG_TYPE_T,
                              T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
                              D0_CHANNEL_ALL);
         break;

      default:
         i915_program_error(p, "Bad source->Index");
         return 0;
      }
      break;

      /* Various paramters and env values.  All emitted to
       * hardware as program constants.
       */
   case PROGRAM_LOCAL_PARAM:
      src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]);
      break;

   case PROGRAM_ENV_PARAM:
      src =
         i915_emit_param4fv(p,
                            p->ctx->FragmentProgram.Parameters[source->
                                                               Index]);
      break;

   case PROGRAM_CONSTANT:
   case PROGRAM_STATE_VAR:
   case PROGRAM_NAMED_PARAM:
      src =
         i915_emit_param4fv(p,
                            program->Base.Parameters->ParameterValues[source->
                                                                      Index]);
      break;

   default:
      i915_program_error(p, "Bad source->File");
      return 0;
   }

   src = swizzle(src,
                 GET_SWZ(source->Swizzle, 0),
                 GET_SWZ(source->Swizzle, 1),
                 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));

   if (source->NegateBase)
      src = negate(src,
                   GET_BIT(source->NegateBase, 0),
                   GET_BIT(source->NegateBase, 1),
                   GET_BIT(source->NegateBase, 2),
                   GET_BIT(source->NegateBase, 3));

   return src;
}
/* Possible concerns:
 *
 * SIN, COS -- could use another taylor step?
 * LIT      -- results seem a little different to sw mesa
 * LOG      -- different to mesa on negative numbers, but this is conformant.
 * 
 * Parse failures -- Mesa doesn't currently give a good indication
 * internally whether a particular program string parsed or not.  This
 * can lead to confusion -- hopefully we cope with it ok now.
 *
 */
static void
upload_program(struct i915_fragment_program *p)
{
   const struct gl_fragment_program *program =
      p->ctx->FragmentProgram._Current;
   const struct prog_instruction *inst = program->Base.Instructions;

/*    _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */

   /* Is this a parse-failed program?  Ensure a valid program is
    * loaded, as the flagging of an error isn't sufficient to stop
    * this being uploaded to hardware.
    */
   if (inst[0].Opcode == OPCODE_END) {
      GLuint tmp = i915_get_utemp(p);
      i915_emit_arith(p,
                      A0_MOV,
                      UREG(REG_TYPE_OC, 0),
                      A0_DEST_CHANNEL_ALL, 0,
                      swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
      return;
   }

   if (program->Base.NumInstructions > I915_MAX_INSN) {
       i915_program_error( p, "Exceeded max instructions" );
       return;
    }

   /* Not always needed:
    */
   calc_live_regs(p);

   while (1) {
      GLuint src0, src1, src2, flags;
      GLuint tmp = 0, consts0 = 0, consts1 = 0;

      switch (inst->Opcode) {
      case OPCODE_ABS:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         i915_emit_arith(p,
                         A0_MAX,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         src0, negate(src0, 1, 1, 1, 1), 0);
         break;

      case OPCODE_ADD:
         EMIT_2ARG_ARITH(A0_ADD);
         break;

      case OPCODE_CMP:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);
         src2 = src_vector(p, &inst->SrcReg[2], program);
         i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1);   /* NOTE: order of src2, src1 */
         break;

      case OPCODE_COS:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         tmp = i915_get_utemp(p);
	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);

	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
         i915_emit_arith(p,
                         A0_MAD,
                         tmp, A0_DEST_CHANNEL_X, 0,
                         src0,
			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
			 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */

         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);

	 i915_emit_arith(p,
			 A0_MAD,
			 tmp, A0_DEST_CHANNEL_X, 0,
			 tmp,
			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */

	 /* Compute COS with the same calculation used for SIN, but a
	  * different source range has been mapped to [-1,1] this time.
	  */

	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
	 i915_emit_arith(p,
                         A0_MAX,
			 tmp, A0_DEST_CHANNEL_Y, 0,
			 swizzle(tmp, ZERO, X, ZERO, ZERO),
			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
			 0);

	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
	 i915_emit_arith(p,
			 A0_MUL,
			 tmp, A0_DEST_CHANNEL_Y, 0,
			 swizzle(tmp, ZERO, X, ZERO, ZERO),
			 tmp,
			 0);

	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
         i915_emit_arith(p,
                         A0_DP3,
                         tmp, A0_DEST_CHANNEL_X, 0,
			 tmp,
                         swizzle(consts1, X, Y, ZERO, ZERO),
			 0);

	 /* tmp.x now contains a first approximation (y).  Now, weight it
	  * against tmp.y**2 to get closer.
	  */
	 i915_emit_arith(p,
                         A0_MAX,
			 tmp, A0_DEST_CHANNEL_Y, 0,
			 swizzle(tmp, ZERO, X, ZERO, ZERO),
			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
			 0);

	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
	 i915_emit_arith(p,
			 A0_MAD,
			 tmp, A0_DEST_CHANNEL_Y, 0,
			 swizzle(tmp, ZERO, X, ZERO, ZERO),
			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));

	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
	 i915_emit_arith(p,
			 A0_MAD,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
			 swizzle(consts1, W, W, W, W),
			 swizzle(tmp, Y, Y, Y, Y),
			 swizzle(tmp, X, X, X, X));
         break;

      case OPCODE_DP3:
         EMIT_2ARG_ARITH(A0_DP3);
         break;

      case OPCODE_DP4:
         EMIT_2ARG_ARITH(A0_DP4);
         break;

      case OPCODE_DPH:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);

         i915_emit_arith(p,
                         A0_DP4,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, X, Y, Z, ONE), src1, 0);
         break;

      case OPCODE_DST:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);

         /* result[0] = 1    * 1;
          * result[1] = a[1] * b[1];
          * result[2] = a[2] * 1;
          * result[3] = 1    * b[3];
          */
         i915_emit_arith(p,
                         A0_MUL,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, ONE, Y, Z, ONE),
                         swizzle(src1, ONE, Y, ONE, W), 0);
         break;

      case OPCODE_EX2:
         src0 = src_vector(p, &inst->SrcReg[0], program);

         i915_emit_arith(p,
                         A0_EXP,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, X, X, X, X), 0, 0);
         break;

      case OPCODE_FLR:
         EMIT_1ARG_ARITH(A0_FLR);
         break;

      case OPCODE_FRC:
         EMIT_1ARG_ARITH(A0_FRC);
         break;

      case OPCODE_KIL:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         tmp = i915_get_utemp(p);

         i915_emit_texld(p, get_live_regs(p, inst),
                         tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */
                         0, src0, T0_TEXKILL);
         break;

      case OPCODE_LG2:
         src0 = src_vector(p, &inst->SrcReg[0], program);

         i915_emit_arith(p,
                         A0_LOG,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, X, X, X, X), 0, 0);
         break;

      case OPCODE_LIT:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         tmp = i915_get_utemp(p);

         /* tmp = max( a.xyzw, a.00zw )
          * XXX: Clamp tmp.w to -128..128
          * tmp.y = log(tmp.y)
          * tmp.y = tmp.w * tmp.y
          * tmp.y = exp(tmp.y)
          * result = cmp (a.11-x1, a.1x01, a.1xy1 )
          */
         i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
                         src0, swizzle(src0, ZERO, ZERO, Z, W), 0);

         i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
                         swizzle(tmp, Y, Y, Y, Y), 0, 0);

         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
                         swizzle(tmp, ZERO, Y, ZERO, ZERO),
                         swizzle(tmp, ZERO, W, ZERO, ZERO), 0);

         i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
                         swizzle(tmp, Y, Y, Y, Y), 0, 0);

         i915_emit_arith(p, A0_CMP,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
                         swizzle(tmp, ONE, X, ZERO, ONE),
                         swizzle(tmp, ONE, X, Y, ONE));

         break;

      case OPCODE_LRP:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);
         src2 = src_vector(p, &inst->SrcReg[2], program);
         flags = get_result_flags(inst);
         tmp = i915_get_utemp(p);

         /* b*a + c*(1-a)
          *
          * b*a + c - ca 
          *
          * tmp = b*a + c, 
          * result = (-c)*a + tmp 
          */
         i915_emit_arith(p, A0_MAD, tmp,
                         flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);

         i915_emit_arith(p, A0_MAD,
                         get_result_vector(p, inst),
                         flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
         break;

      case OPCODE_MAD:
         EMIT_3ARG_ARITH(A0_MAD);
         break;

      case OPCODE_MAX:
         EMIT_2ARG_ARITH(A0_MAX);
         break;

      case OPCODE_MIN:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);
         tmp = i915_get_utemp(p);
         flags = get_result_flags(inst);

         i915_emit_arith(p,
                         A0_MAX,
                         tmp, flags & A0_DEST_CHANNEL_ALL, 0,
                         negate(src0, 1, 1, 1, 1),
                         negate(src1, 1, 1, 1, 1), 0);

         i915_emit_arith(p,
                         A0_MOV,
                         get_result_vector(p, inst),
                         flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
         break;

      case OPCODE_MOV:
         EMIT_1ARG_ARITH(A0_MOV);
         break;

      case OPCODE_MUL:
         EMIT_2ARG_ARITH(A0_MUL);
         break;

      case OPCODE_POW:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);
         tmp = i915_get_utemp(p);
         flags = get_result_flags(inst);

         /* XXX: masking on intermediate values, here and elsewhere.
          */
         i915_emit_arith(p,
                         A0_LOG,
                         tmp, A0_DEST_CHANNEL_X, 0,
                         swizzle(src0, X, X, X, X), 0, 0);

         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);


         i915_emit_arith(p,
                         A0_EXP,
                         get_result_vector(p, inst),
                         flags, 0, swizzle(tmp, X, X, X, X), 0, 0);

         break;

      case OPCODE_RCP:
         src0 = src_vector(p, &inst->SrcReg[0], program);

         i915_emit_arith(p,
                         A0_RCP,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, X, X, X, X), 0, 0);
         break;

      case OPCODE_RSQ:

         src0 = src_vector(p, &inst->SrcReg[0], program);

         i915_emit_arith(p,
                         A0_RSQ,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, X, X, X, X), 0, 0);
         break;

      case OPCODE_SCS:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         tmp = i915_get_utemp(p);

         /* 
          * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
          * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
          * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
          * scs.x = DP4 t1, sin_constants
          * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
          * scs.y = DP4 t1, cos_constants
          */
         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_XY, 0,
                         swizzle(src0, X, X, ONE, ONE),
                         swizzle(src0, X, ONE, ONE, ONE), 0);

         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_ALL, 0,
                         swizzle(tmp, X, Y, X, Y),
                         swizzle(tmp, X, X, ONE, ONE), 0);

         if (inst->DstReg.WriteMask & WRITEMASK_Y) {
            GLuint tmp1;

            if (inst->DstReg.WriteMask & WRITEMASK_X)
               tmp1 = i915_get_utemp(p);
            else
               tmp1 = tmp;

            i915_emit_arith(p,
                            A0_MUL,
                            tmp1, A0_DEST_CHANNEL_ALL, 0,
                            swizzle(tmp, X, Y, Y, W),
                            swizzle(tmp, X, Z, ONE, ONE), 0);

            i915_emit_arith(p,
                            A0_DP4,
                            get_result_vector(p, inst),
                            A0_DEST_CHANNEL_Y, 0,
                            swizzle(tmp1, W, Z, Y, X),
                            i915_emit_const4fv(p, sin_constants), 0);
         }

         if (inst->DstReg.WriteMask & WRITEMASK_X) {
            i915_emit_arith(p,
                            A0_MUL,
                            tmp, A0_DEST_CHANNEL_XYZ, 0,
                            swizzle(tmp, X, X, Z, ONE),
                            swizzle(tmp, Z, ONE, ONE, ONE), 0);

            i915_emit_arith(p,
                            A0_DP4,
                            get_result_vector(p, inst),
                            A0_DEST_CHANNEL_X, 0,
                            swizzle(tmp, ONE, Z, Y, X),
                            i915_emit_const4fv(p, cos_constants), 0);
         }
         break;

      case OPCODE_SGE:
         EMIT_2ARG_ARITH(A0_SGE);
         break;

      case OPCODE_SIN:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         tmp = i915_get_utemp(p);
	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);

	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
         i915_emit_arith(p,
                         A0_MAD,
                         tmp, A0_DEST_CHANNEL_X, 0,
                         src0,
			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
			 swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */

         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);

	 i915_emit_arith(p,
			 A0_MAD,
			 tmp, A0_DEST_CHANNEL_X, 0,
			 tmp,
			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */

	 /* Compute sin using a quadratic and quartic.  It gives continuity
	  * that repeating the Taylor series lacks every 2*pi, and has
	  * reduced error.
	  *
	  * The idea was described at:
	  * http://www.devmaster.net/forums/showthread.php?t=5784
	  */

	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
	 i915_emit_arith(p,
                         A0_MAX,
			 tmp, A0_DEST_CHANNEL_Y, 0,
			 swizzle(tmp, ZERO, X, ZERO, ZERO),
			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
			 0);

	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
	 i915_emit_arith(p,
			 A0_MUL,
			 tmp, A0_DEST_CHANNEL_Y, 0,
			 swizzle(tmp, ZERO, X, ZERO, ZERO),
			 tmp,
			 0);

	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
         i915_emit_arith(p,
                         A0_DP3,
                         tmp, A0_DEST_CHANNEL_X, 0,
			 tmp,
                         swizzle(consts1, X, Y, ZERO, ZERO),
			 0);

	 /* tmp.x now contains a first approximation (y).  Now, weight it
	  * against tmp.y**2 to get closer.
	  */
	 i915_emit_arith(p,
                         A0_MAX,
			 tmp, A0_DEST_CHANNEL_Y, 0,
			 swizzle(tmp, ZERO, X, ZERO, ZERO),
			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
			 0);

	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
	 i915_emit_arith(p,
			 A0_MAD,
			 tmp, A0_DEST_CHANNEL_Y, 0,
			 swizzle(tmp, ZERO, X, ZERO, ZERO),
			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));

	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
	 i915_emit_arith(p,
			 A0_MAD,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
			 swizzle(consts1, W, W, W, W),
			 swizzle(tmp, Y, Y, Y, Y),
			 swizzle(tmp, X, X, X, X));

         break;

      case OPCODE_SLT:
         EMIT_2ARG_ARITH(A0_SLT);
         break;

      case OPCODE_SUB:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);

         i915_emit_arith(p,
                         A0_ADD,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         src0, negate(src1, 1, 1, 1, 1), 0);
         break;

      case OPCODE_SWZ:
         EMIT_1ARG_ARITH(A0_MOV);       /* extended swizzle handled natively */
         break;

      case OPCODE_TEX:
         EMIT_TEX(T0_TEXLD);
         break;

      case OPCODE_TXB:
         EMIT_TEX(T0_TEXLDB);
         break;

      case OPCODE_TXP:
         EMIT_TEX(T0_TEXLDP);
         break;

      case OPCODE_XPD:
         /* Cross product:
          *      result.x = src0.y * src1.z - src0.z * src1.y;
          *      result.y = src0.z * src1.x - src0.x * src1.z;
          *      result.z = src0.x * src1.y - src0.y * src1.x;
          *      result.w = undef;
          */
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);
         tmp = i915_get_utemp(p);

         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_ALL, 0,
                         swizzle(src0, Z, X, Y, ONE),
                         swizzle(src1, Y, Z, X, ONE), 0);

         i915_emit_arith(p,
                         A0_MAD,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, Y, Z, X, ONE),
                         swizzle(src1, Z, X, Y, ONE),
                         negate(tmp, 1, 1, 1, 0));
         break;

      case OPCODE_END:
         return;

      default:
         i915_program_error(p, "bad opcode");
         return;
      }

      inst++;
      i915_release_utemps(p);
   }
}
Пример #23
0
static bool
try_constant_propagate(const struct gen_device_info *devinfo,
                       vec4_instruction *inst,
                       int arg, const copy_entry *entry)
{
   /* For constant propagation, we only handle the same constant
    * across all 4 channels.  Some day, we should handle the 8-bit
    * float vector format, which would let us constant propagate
    * vectors better.
    * We could be more aggressive here -- some channels might not get used
    * based on the destination writemask.
    */
   src_reg value =
      get_copy_value(*entry,
                     brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle,
                                                   WRITEMASK_XYZW));

   if (value.file != IMM)
      return false;

   if (value.type == BRW_REGISTER_TYPE_VF) {
      /* The result of bit-casting the component values of a vector float
       * cannot in general be represented as an immediate.
       */
      if (inst->src[arg].type != BRW_REGISTER_TYPE_F)
         return false;
   } else {
      value.type = inst->src[arg].type;
   }

   if (inst->src[arg].abs) {
      if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
          !brw_abs_immediate(value.type, &value.as_brw_reg())) {
         return false;
      }
   }

   if (inst->src[arg].negate) {
      if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
          !brw_negate_immediate(value.type, &value.as_brw_reg())) {
         return false;
      }
   }

   value = swizzle(value, inst->src[arg].swizzle);

   switch (inst->opcode) {
   case BRW_OPCODE_MOV:
   case SHADER_OPCODE_BROADCAST:
      inst->src[arg] = value;
      return true;

   case SHADER_OPCODE_POW:
   case SHADER_OPCODE_INT_QUOTIENT:
   case SHADER_OPCODE_INT_REMAINDER:
      if (devinfo->gen < 8)
         break;
      /* fallthrough */
   case BRW_OPCODE_DP2:
   case BRW_OPCODE_DP3:
   case BRW_OPCODE_DP4:
   case BRW_OPCODE_DPH:
   case BRW_OPCODE_BFI1:
   case BRW_OPCODE_ASR:
   case BRW_OPCODE_SHL:
   case BRW_OPCODE_SHR:
   case BRW_OPCODE_SUBB:
      if (arg == 1) {
         inst->src[arg] = value;
         return true;
      }
      break;

   case BRW_OPCODE_MACH:
   case BRW_OPCODE_MUL:
   case SHADER_OPCODE_MULH:
   case BRW_OPCODE_ADD:
   case BRW_OPCODE_OR:
   case BRW_OPCODE_AND:
   case BRW_OPCODE_XOR:
   case BRW_OPCODE_ADDC:
      if (arg == 1) {
	 inst->src[arg] = value;
	 return true;
      } else if (arg == 0 && inst->src[1].file != IMM) {
	 /* Fit this constant in by commuting the operands.  Exception: we
	  * can't do this for 32-bit integer MUL/MACH because it's asymmetric.
	  */
	 if ((inst->opcode == BRW_OPCODE_MUL ||
              inst->opcode == BRW_OPCODE_MACH) &&
	     (inst->src[1].type == BRW_REGISTER_TYPE_D ||
	      inst->src[1].type == BRW_REGISTER_TYPE_UD))
	    break;
	 inst->src[0] = inst->src[1];
	 inst->src[1] = value;
	 return true;
      }
      break;
   case GS_OPCODE_SET_WRITE_OFFSET:
      /* This is just a multiply by a constant with special strides.
       * The generator will handle immediates in both arguments (generating
       * a single MOV of the product).  So feel free to propagate in src0.
       */
      inst->src[arg] = value;
      return true;

   case BRW_OPCODE_CMP:
      if (arg == 1) {
	 inst->src[arg] = value;
	 return true;
      } else if (arg == 0 && inst->src[1].file != IMM) {
	 enum brw_conditional_mod new_cmod;

	 new_cmod = brw_swap_cmod(inst->conditional_mod);
	 if (new_cmod != BRW_CONDITIONAL_NONE) {
	    /* Fit this constant in by swapping the operands and
	     * flipping the test.
	     */
	    inst->src[0] = inst->src[1];
	    inst->src[1] = value;
	    inst->conditional_mod = new_cmod;
	    return true;
	 }
      }
      break;

   case BRW_OPCODE_SEL:
      if (arg == 1) {
	 inst->src[arg] = value;
	 return true;
      } else if (arg == 0 && inst->src[1].file != IMM) {
	 inst->src[0] = inst->src[1];
	 inst->src[1] = value;

	 /* If this was predicated, flipping operands means
	  * we also need to flip the predicate.
	  */
	 if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
	    inst->predicate_inverse = !inst->predicate_inverse;
	 }
	 return true;
      }
      break;

   default:
      break;
   }

   return false;
}
Пример #24
0
static struct ureg swizzle1( struct ureg reg, int x )
{
   return swizzle(reg, x, x, x, x);
}
Пример #25
0
/* Need to add some addtional parameters to allow lighting in object
 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
 * space lighting.
 */
static void build_lighting( struct tnl_program *p )
{
   const GLboolean twoside = p->state->light_twoside;
   const GLboolean separate = p->state->separate_specular;
   GLuint nr_lights = 0, count = 0;
   struct ureg normal = get_transformed_normal(p);
   struct ureg lit = get_temp(p);
   struct ureg dots = get_temp(p);
   struct ureg _col0 = undef, _col1 = undef;
   struct ureg _bfc0 = undef, _bfc1 = undef;
   GLuint i;

   /*
    * NOTE:
    * dots.x = dot(normal, VPpli)
    * dots.y = dot(normal, halfAngle)
    * dots.z = back.shininess
    * dots.w = front.shininess
    */

   for (i = 0; i < MAX_LIGHTS; i++)
      if (p->state->unit[i].light_enabled)
	 nr_lights++;

   set_material_flags(p);

   {
      if (!p->state->material_shininess_is_zero) {
         struct ureg shininess = get_material(p, 0, STATE_SHININESS);
         emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
         release_temp(p, shininess);
      }

      _col0 = make_temp(p, get_scenecolor(p, 0));
      if (separate)
	 _col1 = make_temp(p, get_identity_param(p));
      else
	 _col1 = _col0;
   }

   if (twoside) {
      if (!p->state->material_shininess_is_zero) {
         /* Note that we negate the back-face specular exponent here.
          * The negation will be un-done later in the back-face code below.
          */
         struct ureg shininess = get_material(p, 1, STATE_SHININESS);
         emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
                  negate(swizzle1(shininess,X)));
         release_temp(p, shininess);
      }

      _bfc0 = make_temp(p, get_scenecolor(p, 1));
      if (separate)
	 _bfc1 = make_temp(p, get_identity_param(p));
      else
	 _bfc1 = _bfc0;
   }

   /* If no lights, still need to emit the scenecolor.
    */
   {
      struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
      emit_op1(p, OPCODE_MOV, res0, 0, _col0);
   }

   if (separate) {
      struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
      emit_op1(p, OPCODE_MOV, res1, 0, _col1);
   }

   if (twoside) {
      struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
      emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
   }

   if (twoside && separate) {
      struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
      emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
   }

   if (nr_lights == 0) {
      release_temps(p);
      return;
   }

   for (i = 0; i < MAX_LIGHTS; i++) {
      if (p->state->unit[i].light_enabled) {
	 struct ureg half = undef;
	 struct ureg att = undef, VPpli = undef;

	 count++;

	 if (p->state->unit[i].light_eyepos3_is_zero) {
	    /* Can used precomputed constants in this case.
	     * Attenuation never applies to infinite lights.
	     */
	    VPpli = register_param3(p, STATE_INTERNAL,
				    STATE_LIGHT_POSITION_NORMALIZED, i);

            if (!p->state->material_shininess_is_zero) {
               if (p->state->light_local_viewer) {
                  struct ureg eye_hat = get_eye_position_normalized(p);
                  half = get_temp(p);
                  emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
                  emit_normalize_vec3(p, half, half);
               }
               else {
                  half = register_param3(p, STATE_INTERNAL,
                                         STATE_LIGHT_HALF_VECTOR, i);
               }
            }
	 }
	 else {
	    struct ureg Ppli = register_param3(p, STATE_INTERNAL,
					       STATE_LIGHT_POSITION, i);
	    struct ureg V = get_eye_position(p);
	    struct ureg dist = get_temp(p);

	    VPpli = get_temp(p);

	    /* Calculate VPpli vector
	     */
	    emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);

	    /* Normalize VPpli.  The dist value also used in
	     * attenuation below.
	     */
	    emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
	    emit_op1(p, OPCODE_RSQ, dist, 0, dist);
	    emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);

	    /* Calculate attenuation:
	     */
	    if (!p->state->unit[i].light_spotcutoff_is_180 ||
		p->state->unit[i].light_attenuated) {
	       att = calculate_light_attenuation(p, i, VPpli, dist);
	    }

	    /* Calculate viewer direction, or use infinite viewer:
	     */
            if (!p->state->material_shininess_is_zero) {
               half = get_temp(p);

               if (p->state->light_local_viewer) {
                  struct ureg eye_hat = get_eye_position_normalized(p);
                  emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
               }
               else {
                  struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
                  emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
               }

               emit_normalize_vec3(p, half, half);
            }

	    release_temp(p, dist);
	 }

	 /* Calculate dot products:
	  */
         if (p->state->material_shininess_is_zero) {
            emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
         }
         else {
            emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
            emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
         }

	 /* Front face lighting:
	  */
	 {
	    struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
	    struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
	    struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
	    struct ureg res0, res1;
	    GLuint mask0, mask1;

	    if (count == nr_lights) {
	       if (separate) {
		  mask0 = WRITEMASK_XYZ;
		  mask1 = WRITEMASK_XYZ;
		  res0 = register_output( p, VERT_RESULT_COL0 );
		  res1 = register_output( p, VERT_RESULT_COL1 );
	       }
	       else {
		  mask0 = 0;
		  mask1 = WRITEMASK_XYZ;
		  res0 = _col0;
		  res1 = register_output( p, VERT_RESULT_COL0 );
	       }
	    }
            else {
	       mask0 = 0;
	       mask1 = 0;
	       res0 = _col0;
	       res1 = _col1;
	    }

	    if (!is_undef(att)) {
               /* light is attenuated by distance */
               emit_op1(p, OPCODE_LIT, lit, 0, dots);
               emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
               emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
            }
            else if (!p->state->material_shininess_is_zero) {
               /* there's a non-zero specular term */
               emit_op1(p, OPCODE_LIT, lit, 0, dots);
               emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
            }
            else {
               /* no attenutation, no specular */
               emit_degenerate_lit(p, lit, dots);
               emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
            }

	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);

	    release_temp(p, ambient);
	    release_temp(p, diffuse);
	    release_temp(p, specular);
	 }

	 /* Back face lighting:
	  */
	 if (twoside) {
	    struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
	    struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
	    struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
	    struct ureg res0, res1;
	    GLuint mask0, mask1;

	    if (count == nr_lights) {
	       if (separate) {
		  mask0 = WRITEMASK_XYZ;
		  mask1 = WRITEMASK_XYZ;
		  res0 = register_output( p, VERT_RESULT_BFC0 );
		  res1 = register_output( p, VERT_RESULT_BFC1 );
	       }
	       else {
		  mask0 = 0;
		  mask1 = WRITEMASK_XYZ;
		  res0 = _bfc0;
		  res1 = register_output( p, VERT_RESULT_BFC0 );
	       }
	    }
            else {
	       res0 = _bfc0;
	       res1 = _bfc1;
	       mask0 = 0;
	       mask1 = 0;
	    }

            /* For the back face we need to negate the X and Y component
             * dot products.  dots.Z has the negated back-face specular
             * exponent.  We swizzle that into the W position.  This
             * negation makes the back-face specular term positive again.
             */
            dots = negate(swizzle(dots,X,Y,W,Z));

	    if (!is_undef(att)) {
               emit_op1(p, OPCODE_LIT, lit, 0, dots);
	       emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
               emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
            }
            else if (!p->state->material_shininess_is_zero) {
               emit_op1(p, OPCODE_LIT, lit, 0, dots);
               emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/
            }
            else {
               emit_degenerate_lit(p, lit, dots);
               emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
            }

	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
            /* restore dots to its original state for subsequent lights
             * by negating and swizzling again.
             */
            dots = negate(swizzle(dots,X,Y,W,Z));

	    release_temp(p, ambient);
	    release_temp(p, diffuse);
	    release_temp(p, specular);
	 }

	 release_temp(p, half);
	 release_temp(p, VPpli);
	 release_temp(p, att);
      }
   }

   release_temps( p );
}
Пример #26
0
ir_swizzle *
swizzle_yyyy(operand a)
{
   return swizzle(a, SWIZZLE_YYYY, 4);
}
Пример #27
0
ir_swizzle *
swizzle_xxxx(operand a)
{
   return swizzle(a, SWIZZLE_XXXX, 4);
}
Пример #28
0
void BVH4iIntersector1::occluded(BVH4i* bvh, Ray& ray)
{
    /* near and node stack */
    __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1];

    /* setup */
    const mic3f rdir16      = rcp_safe(mic3f(ray.dir.x,ray.dir.y,ray.dir.z));
    const mic_f inf         = mic_f(pos_inf);
    const mic_f zero        = mic_f::zero();

    const Node      * __restrict__ nodes = (Node     *)bvh->nodePtr();
    const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr();

    stack_node[0] = BVH4i::invalidNode;
    stack_node[1] = bvh->root;
    size_t sindex = 2;

    const mic_f org_xyz      = loadAOS4to16f(ray.org.x,ray.org.y,ray.org.z);
    const mic_f dir_xyz      = loadAOS4to16f(ray.dir.x,ray.dir.y,ray.dir.z);
    const mic_f rdir_xyz     = loadAOS4to16f(rdir16.x[0],rdir16.y[0],rdir16.z[0]);
    const mic_f org_rdir_xyz = org_xyz * rdir_xyz;
    const mic_f min_dist_xyz = broadcast1to16f(&ray.tnear);
    const mic_f max_dist_xyz = broadcast1to16f(&ray.tfar);

    const unsigned int leaf_mask = BVH4I_LEAF_MASK;

    while (1)
    {
        NodeRef curNode = stack_node[sindex-1];
        sindex--;

        while (1)
        {
            /* test if this is a leaf node */
            if (unlikely(curNode.isLeaf(leaf_mask))) break;

            const Node* __restrict__ const node = curNode.node(nodes);
            const float* __restrict const plower = (float*)node->lower;
            const float* __restrict const pupper = (float*)node->upper;

            prefetch<PFHINT_L1>((char*)node + 0);
            prefetch<PFHINT_L1>((char*)node + 64);

            /* intersect single ray with 4 bounding boxes */
            const mic_f tLowerXYZ = load16f(plower) * rdir_xyz - org_rdir_xyz;
            const mic_f tUpperXYZ = load16f(pupper) * rdir_xyz - org_rdir_xyz;
            const mic_f tLower = mask_min(0x7777,min_dist_xyz,tLowerXYZ,tUpperXYZ);
            const mic_f tUpper = mask_max(0x7777,max_dist_xyz,tLowerXYZ,tUpperXYZ);

            sindex--;
            curNode = stack_node[sindex];

            const Node* __restrict__ const next = curNode.node(nodes);
            prefetch<PFHINT_L2>((char*)next + 0);
            prefetch<PFHINT_L2>((char*)next + 64);

            const mic_f tNear = vreduce_max4(tLower);
            const mic_f tFar  = vreduce_min4(tUpper);
            const mic_m hitm = le(0x8888,tNear,tFar);
            const mic_f tNear_pos = select(hitm,tNear,inf);


            /* if no child is hit, continue with early popped child */
            if (unlikely(none(hitm))) continue;
            sindex++;

            const unsigned long hiti = toInt(hitm);
            const unsigned long pos_first = bitscan64(hiti);
            const unsigned long num_hitm = countbits(hiti);

            /* if a single child is hit, continue with that child */
            curNode = ((unsigned int *)plower)[pos_first];
            if (likely(num_hitm == 1)) continue;

            /* if two children are hit, push in correct order */
            const unsigned long pos_second = bitscan64(pos_first,hiti);
            if (likely(num_hitm == 2))
            {
                const unsigned int dist_first  = ((unsigned int*)&tNear)[pos_first];
                const unsigned int dist_second = ((unsigned int*)&tNear)[pos_second];
                const unsigned int node_first  = curNode;
                const unsigned int node_second = ((unsigned int*)plower)[pos_second];

                if (dist_first <= dist_second)
                {
                    stack_node[sindex] = node_second;
                    sindex++;
                    assert(sindex < 3*BVH4i::maxDepth+1);
                    continue;
                }
                else
                {
                    stack_node[sindex] = curNode;
                    curNode = node_second;
                    sindex++;
                    assert(sindex < 3*BVH4i::maxDepth+1);
                    continue;
                }
            }

            /* continue with closest child and push all others */
            const mic_f min_dist = set_min_lanes(tNear_pos);
            const unsigned old_sindex = sindex;
            sindex += countbits(hiti) - 1;
            assert(sindex < 3*BVH4i::maxDepth+1);

            const mic_m closest_child = eq(hitm,min_dist,tNear);
            const unsigned long closest_child_pos = bitscan64(closest_child);
            const mic_m m_pos = andn(hitm,andn(closest_child,(mic_m)((unsigned int)closest_child - 1)));
            const mic_i plower_node = load16i((int*)plower);
            curNode = ((unsigned int*)plower)[closest_child_pos];
            compactustore16i(m_pos,&stack_node[old_sindex],plower_node);
        }



        /* return if stack is empty */
        if (unlikely(curNode == BVH4i::invalidNode)) break;


        /* intersect one ray against four triangles */

        //////////////////////////////////////////////////////////////////////////////////////////////////

        const Triangle1* tptr  = (Triangle1*) curNode.leaf(accel);
        prefetch<PFHINT_L1>(tptr + 3);
        prefetch<PFHINT_L1>(tptr + 2);
        prefetch<PFHINT_L1>(tptr + 1);
        prefetch<PFHINT_L1>(tptr + 0);

        const mic_i and_mask = broadcast4to16i(zlc4);

        const mic_f v0 = gather_4f_zlc(and_mask,
                                       (float*)&tptr[0].v0,
                                       (float*)&tptr[1].v0,
                                       (float*)&tptr[2].v0,
                                       (float*)&tptr[3].v0);

        const mic_f v1 = gather_4f_zlc(and_mask,
                                       (float*)&tptr[0].v1,
                                       (float*)&tptr[1].v1,
                                       (float*)&tptr[2].v1,
                                       (float*)&tptr[3].v1);

        const mic_f v2 = gather_4f_zlc(and_mask,
                                       (float*)&tptr[0].v2,
                                       (float*)&tptr[1].v2,
                                       (float*)&tptr[2].v2,
                                       (float*)&tptr[3].v2);

        const mic_f e1 = v1 - v0;
        const mic_f e2 = v0 - v2;
        const mic_f normal = lcross_zxy(e1,e2);
        const mic_f org = v0 - org_xyz;
        const mic_f odzxy = msubr231(org * swizzle(dir_xyz,_MM_SWIZ_REG_DACB), dir_xyz, swizzle(org,_MM_SWIZ_REG_DACB));
        const mic_f den = ldot3_zxy(dir_xyz,normal);
        const mic_f rcp_den = rcp(den);
        const mic_f uu = ldot3_zxy(e2,odzxy);
        const mic_f vv = ldot3_zxy(e1,odzxy);
        const mic_f u = uu * rcp_den;
        const mic_f v = vv * rcp_den;

#if defined(__BACKFACE_CULLING__)
        const mic_m m_init = (mic_m)0x1111 & (den > zero);
#else
        const mic_m m_init = 0x1111;
#endif
        const mic_m valid_u = ge(m_init,u,zero);
        const mic_m valid_v = ge(valid_u,v,zero);
        const mic_m m_aperture = le(valid_v,u+v,mic_f::one());

        const mic_f nom = ldot3_zxy(org,normal);
        const mic_f t = rcp_den*nom;

        if (unlikely(none(m_aperture))) continue;

        mic_m m_final  = lt(lt(m_aperture,min_dist_xyz,t),t,max_dist_xyz);

#if defined(__USE_RAY_MASK__)
        const mic_i rayMask(ray.mask);
        const mic_i triMask = swDDDD(gather16i_4i_align(&tptr[0].v2,&tptr[1].v2,&tptr[2].v2,&tptr[3].v2));
        const mic_m m_ray_mask = (rayMask & triMask) != mic_i::zero();
        m_final &= m_ray_mask;
#endif

#if defined(__INTERSECTION_FILTER__)

        /* did the ray hit one of the four triangles? */
        while (any(m_final))
        {
            const mic_f temp_t  = select(m_final,t,max_dist_xyz);
            const mic_f min_dist = vreduce_min(temp_t);
            const mic_m m_dist = eq(min_dist,temp_t);
            const size_t vecIndex = bitscan(toInt(m_dist));
            const size_t triIndex = vecIndex >> 2;
            const Triangle1  *__restrict__ tri_ptr = tptr + triIndex;
            const mic_m m_tri = m_dist^(m_dist & (mic_m)((unsigned int)m_dist - 1));
            const mic_f gnormalx = mic_f(tri_ptr->Ng.x);
            const mic_f gnormaly = mic_f(tri_ptr->Ng.y);
            const mic_f gnormalz = mic_f(tri_ptr->Ng.z);
            const int geomID = tri_ptr->geomID();
            const int primID = tri_ptr->primID();
            Geometry* geom = ((Scene*)bvh->geometry)->get(geomID);

            if (likely(!geom->hasOcclusionFilter1())) break;

            if (runOcclusionFilter1(geom,ray,u,v,min_dist,gnormalx,gnormaly,gnormalz,m_tri,geomID,primID))
                break;

            m_final ^= m_tri; /* clear bit */
        }
#endif

        if (unlikely(any(m_final)))
        {
            ray.geomID = 0;
            return;
        }
        //////////////////////////////////////////////////////////////////////////////////////////////////

    }
}
Пример #29
0
static struct pipe_sampler_view *
nv30_sampler_view_create(struct pipe_context *pipe, struct pipe_resource *pt,
                         const struct pipe_sampler_view *tmpl)
{
   const struct nv30_texfmt *fmt = nv30_texfmt(pipe->screen, tmpl->format);
   struct nouveau_object *eng3d = nv30_context(pipe)->screen->eng3d;
   struct nv30_miptree *mt = nv30_miptree(pt);
   struct nv30_sampler_view *so;

   so = MALLOC_STRUCT(nv30_sampler_view);
   if (!so)
      return NULL;
   so->pipe = *tmpl;
   so->pipe.reference.count = 1;
   so->pipe.texture = NULL;
   so->pipe.context = pipe;
   pipe_resource_reference(&so->pipe.texture, pt);

   so->fmt = NV30_3D_TEX_FORMAT_NO_BORDER;
   switch (pt->target) {
   case PIPE_TEXTURE_1D:
      so->fmt |= NV30_3D_TEX_FORMAT_DIMS_1D;
      break;
   case PIPE_TEXTURE_CUBE:
      so->fmt |= NV30_3D_TEX_FORMAT_CUBIC;
   case PIPE_TEXTURE_2D:
   case PIPE_TEXTURE_RECT:
      so->fmt |= NV30_3D_TEX_FORMAT_DIMS_2D;
      break;
   case PIPE_TEXTURE_3D:
      so->fmt |= NV30_3D_TEX_FORMAT_DIMS_3D;
      break;
   default:
      assert(0);
      so->fmt |= NV30_3D_TEX_FORMAT_DIMS_1D;
      break;
   }

   so->filt = fmt->filter;
   so->wrap = fmt->wrap;
   so->swz  = fmt->swizzle;
   so->swz |= swizzle(fmt, 3, tmpl->swizzle_a);
   so->swz |= swizzle(fmt, 0, tmpl->swizzle_r) << 2;
   so->swz |= swizzle(fmt, 1, tmpl->swizzle_g) << 4;
   so->swz |= swizzle(fmt, 2, tmpl->swizzle_b) << 6;

   /* apparently, we need to ignore the t coordinate for 1D textures to
    * fix piglit tex1d-2dborder
    */
   so->wrap_mask = ~0;
   if (pt->target == PIPE_TEXTURE_1D) {
      so->wrap_mask &= ~NV30_3D_TEX_WRAP_T__MASK;
      so->wrap      |=  NV30_3D_TEX_WRAP_T_REPEAT;
   }

   /* yet more hardware suckage, can't filter 32-bit float formats */
   switch (tmpl->format) {
   case PIPE_FORMAT_R32_FLOAT:
   case PIPE_FORMAT_R32G32B32A32_FLOAT:
      so->filt_mask = ~(NV30_3D_TEX_FILTER_MIN__MASK |
                        NV30_3D_TEX_FILTER_MAG__MASK);
      so->filt     |= NV30_3D_TEX_FILTER_MIN_NEAREST |
                      NV30_3D_TEX_FILTER_MAG_NEAREST;
      break;
   default:
      so->filt_mask = ~0;
      break;
   }

   so->npot_size0 = (pt->width0 << 16) | pt->height0;
   if (eng3d->oclass >= NV40_3D_CLASS) {
      so->npot_size1 = (pt->depth0 << 20) | mt->uniform_pitch;
      if (!mt->swizzled)
         so->fmt |= NV40_3D_TEX_FORMAT_LINEAR;
      so->fmt |= 0x00008000;
      so->fmt |= (pt->last_level + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT;
   } else {
      so->swz |= mt->uniform_pitch << NV30_3D_TEX_SWIZZLE_RECT_PITCH__SHIFT;
      if (pt->last_level)
         so->fmt |= NV30_3D_TEX_FORMAT_MIPMAP;
      so->fmt |= util_logbase2(pt->width0)  << 20;
      so->fmt |= util_logbase2(pt->height0) << 24;
      so->fmt |= util_logbase2(pt->depth0)  << 28;
      so->fmt |= 0x00010000;
   }

   so->base_lod = so->pipe.u.tex.first_level << 8;
   so->high_lod = MIN2(pt->last_level, so->pipe.u.tex.last_level) << 8;
   return &so->pipe;
}
Пример #30
0
ir_swizzle *
swizzle_xyzw(operand a)
{
   return swizzle(a, SWIZZLE_XYZW, 4);
}