Exemple #1
0
static void brw_wm_xy(struct brw_compile *p, int dw)
{
	struct brw_reg r1 = brw_vec1_grf(1, 0);
	struct brw_reg r1_uw = __retype_uw(r1);
	struct brw_reg x_uw, y_uw;

	brw_set_compression_control(p, BRW_COMPRESSION_NONE);

	if (dw == 16) {
		x_uw = brw_uw16_grf(30, 0);
		y_uw = brw_uw16_grf(28, 0);
	} else {
		x_uw = brw_uw8_grf(30, 0);
		y_uw = brw_uw8_grf(28, 0);
	}

	brw_ADD(p,
		x_uw,
		__stride(__suboffset(r1_uw, 4), 2, 4, 0),
		brw_imm_v(0x10101010));
	brw_ADD(p,
		y_uw,
		__stride(__suboffset(r1_uw, 5), 2, 4, 0),
		brw_imm_v(0x11001100));

	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);

	brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1));
	brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
}
static void emit_pixel_xy(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_reg r1 = brw_vec1_grf(1, 0);
    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);

    struct brw_reg dst0, dst1;
    struct brw_compile *p = &c->func;
    GLuint mask = inst->DstReg.WriteMask;

    dst0 = get_dst_reg(c, inst, 0, 1);
    dst1 = get_dst_reg(c, inst, 1, 1);
    /* Calculate pixel centers by adding 1 or 0 to each of the
     * micro-tile coordinates passed in r1.
     */
    if (mask & WRITEMASK_X) {
	brw_ADD(p,
		vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
		stride(suboffset(r1_uw, 4), 2, 4, 0),
		brw_imm_v(0x10101010));
    }

    if (mask & WRITEMASK_Y) {
	brw_ADD(p,
		vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
		stride(suboffset(r1_uw, 5), 2, 4, 0),
		brw_imm_v(0x11001100));
    }

}
Exemple #3
0
void brw_copy8(struct brw_compile *p,
	       struct brw_reg dst,
	       struct brw_reg src,
	       GLuint count)
{
   GLuint i;

   dst = vec8(dst);
   src = vec8(src);

   for (i = 0; i < count; i++)
   {
      GLuint delta = i*32;
      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
   }
}
Exemple #4
0
void brw_copy8(struct brw_codegen *p,
	       struct brw_reg dst,
	       struct brw_reg src,
	       unsigned count)
{
   unsigned i;

   dst = vec8(dst);
   src = vec8(src);

   for (i = 0; i < count; i++)
   {
      unsigned delta = i*32;
      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
   }
}
static void fire_fb_write( struct brw_wm_compile *c,
                           GLuint base_reg,
                           GLuint nr,
                           GLuint target,
                           GLuint eot)
{
    struct brw_compile *p = &c->func;
    /* Pass through control information:
     */
    /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
    {
	brw_push_insn_state(p);
	brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
	brw_MOV(p,
		brw_message_reg(base_reg + 1),
		brw_vec8_grf(1, 0));
	brw_pop_insn_state(p);
    }
    /* Send framebuffer write message: */
    brw_fb_WRITE(p,
	    retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
	    base_reg,
	    retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
	    target,              
	    nr,
	    0,
	    eot);
}
Exemple #6
0
/**
 * Computes the screen-space x,y position of the pixels.
 *
 * This will be used by emit_delta_xy() or emit_wpos_xy() for
 * interpolation of attributes..
 *
 * Payload R0:
 *
 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
 *         corresponding to each of the 16 execution channels.
 * R0.1..8 -- ?
 * R1.0 -- triangle vertex 0.X
 * R1.1 -- triangle vertex 0.Y
 * R1.2 -- tile 0 x,y coords (2 packed uwords)
 * R1.3 -- tile 1 x,y coords (2 packed uwords)
 * R1.4 -- tile 2 x,y coords (2 packed uwords)
 * R1.5 -- tile 3 x,y coords (2 packed uwords)
 * R1.6 -- ?
 * R1.7 -- ?
 * R1.8 -- ?
 */
void emit_pixel_xy(struct brw_wm_compile *c,
		   const struct brw_reg *dst,
		   GLuint mask)
{
   struct brw_compile *p = &c->func;
   struct brw_reg r1 = brw_vec1_grf(1, 0);
   struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
   struct brw_reg dst0_uw, dst1_uw;

   brw_push_insn_state(p);
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);

   if (c->dispatch_width == 16) {
      dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
      dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
   } else {
      dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
      dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
   }

   /* Calculate pixel centers by adding 1 or 0 to each of the
    * micro-tile coordinates passed in r1.
    */
   if (mask & WRITEMASK_X) {
      brw_ADD(p,
	      dst0_uw,
	      stride(suboffset(r1_uw, 4), 2, 4, 0),
	      brw_imm_v(0x10101010));
   }

   if (mask & WRITEMASK_Y) {
      brw_ADD(p,
	      dst1_uw,
	      stride(suboffset(r1_uw,5), 2, 4, 0),
	      brw_imm_v(0x11001100));
   }
   brw_pop_insn_state(p);
}
Exemple #7
0
/**
 * Read a float[4] vector from the data port Data Cache (const buffer).
 * Location (in buffer) should be a multiple of 16.
 * Used for fetching shader constants.
 * If relAddr is true, we'll do an indirect fetch using the address register.
 */
void brw_dp_READ_4( struct brw_compile *p,
                    struct brw_reg dest,
                    GLboolean relAddr,
                    GLuint location,
                    GLuint bind_table_index )
{
   /* XXX: relAddr not implemented */
   GLuint msg_reg_nr = 1;
   {
      struct brw_reg b;
      brw_push_insn_state(p);
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_set_mask_control(p, BRW_MASK_DISABLE);

   /* Setup MRF[1] with location/offset into const buffer */
      b = brw_message_reg(msg_reg_nr);
      b = retype(b, BRW_REGISTER_TYPE_UD);
      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
       */
      brw_MOV(p, b, brw_imm_ud(location));
      brw_pop_insn_state(p);
   }

   {
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
   
      insn->header.predicate_control = BRW_PREDICATE_NONE;
      insn->header.compression_control = BRW_COMPRESSION_NONE; 
      insn->header.destreg__conditionalmod = msg_reg_nr;
      insn->header.mask_control = BRW_MASK_DISABLE;
  
      /* cast dest to a uword[8] vector */
      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);

      brw_set_dest(insn, dest);
      brw_set_src0(insn, brw_null_reg());

      brw_set_dp_read_message(p->brw,
			      insn,
			      bind_table_index,
			      0,  /* msg_control (0 means 1 Oword) */
			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
			      0, /* source cache = data cache */
			      1, /* msg_length */
			      1, /* response_length (1 Oword) */
			      0); /* eot */
   }
}
VectorXd DampedNumericalFilteredController::getNewJointVelocities( const DQ reference, const VectorXd thetas)
{
    thetas_ = thetas;
    ///--Controller Step

    //Calculate jacobian
    task_jacobian_  = robot_.analyticalJacobian(thetas_);

    // Recalculation of measured data.
    // End effectors pose
    end_effector_pose_ = robot_.fkm(thetas_);

    //Error
    last_error_      = error_;
    error_           = vec8(reference - end_effector_pose_);
    integral_error_ += error_;
    //error_ = vec8(dq_one_ - conj(end_effector_pose_)*reference);

    svd_.compute(task_jacobian_, ComputeFullU);
    singular_values_ = svd_.singularValues();

    //Damping Calculation
    double sigma_min = singular_values_(5);
    VectorXd u_min = svd_.matrixU().col(5);
    double lambda = lambda_max_;
    if (sigma_min < epsilon_)
    {
        lambda = (1-(sigma_min/epsilon_)*(sigma_min/epsilon_))*lambda_max_*lambda_max_;
    }

    //We want to solve the equation J+ = J^T(JJ^T+aI)^-1, in which the matrix
    //being inverted is obviously positive definite if a > 0.
    //The solver gives us the solution to X = A^-1.B
    //Therefore I chose to find X^T = B^T(A^T)^-1 then take the transpose of X.
    task_jacobian_pseudoinverse_ = ((task_jacobian_*task_jacobian_.transpose() + (beta_*beta_)*identity_ + (lambda*lambda)*u_min*u_min.transpose()).transpose()).ldlt().solve(task_jacobian_);
    task_jacobian_pseudoinverse_.transposeInPlace();

    if( at_least_one_error_ )
        delta_thetas_ = task_jacobian_pseudoinverse_*( kp_*error_ + ki_*integral_error_ + kd_*(error_ - last_error_) );
    else
    {
        at_least_one_error_ = true;
        delta_thetas_ = task_jacobian_pseudoinverse_*( kp_*error_ + ki_*integral_error_ );
    }

    return delta_thetas_;

}
/* TODO
   BIAS on SIMD8 not workind yet...
 */	
static void emit_txb(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    struct brw_reg dst[4], src[4], payload_reg;
    GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];

    GLuint i;
    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
    for (i = 0; i < 4; i++) 
	dst[i] = get_dst_reg(c, inst, i, 1);
    for (i = 0; i < 4; i++)
	src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);

    switch (inst->TexSrcTarget) {
	case TEXTURE_1D_INDEX:
	    brw_MOV(p, brw_message_reg(2), src[0]);
	    brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
	    brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
	    break;
	case TEXTURE_2D_INDEX:
	case TEXTURE_RECT_INDEX:
	    brw_MOV(p, brw_message_reg(2), src[0]);
	    brw_MOV(p, brw_message_reg(3), src[1]);
	    brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
	    break;
	default:
	    brw_MOV(p, brw_message_reg(2), src[0]);
	    brw_MOV(p, brw_message_reg(3), src[1]);
	    brw_MOV(p, brw_message_reg(4), src[2]);
	    break;
    }
    brw_MOV(p, brw_message_reg(5), src[3]);
    brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
    brw_SAMPLE(p,
	    retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
	    1,
	    retype(payload_reg, BRW_REGISTER_TYPE_UW),
	    unit + MAX_DRAW_BUFFERS, /* surface */
	    unit,     /* sampler */
	    inst->DstReg.WriteMask,
	    BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
	    4,
	    4,
	    0);
}
void
brw_blorp_eu_emitter::emit_scattered_read(const struct brw_reg &dst,
                                          enum opcode opcode,
                                          const struct brw_reg &src0,
                                          unsigned msg_reg_nr,
                                          unsigned msg_length,
                                          int dispatch_width,
                                          bool use_header)
{
   assert(opcode == SHADER_OPCODE_DWORD_SCATTERED_READ ||
          (brw_ctx->gen >= 7 && opcode == SHADER_OPCODE_BYTE_SCATTERED_READ));

   fs_inst *inst = new (mem_ctx) fs_inst(opcode);

   switch (dispatch_width) {
   case 1:
   default:
       inst->dst = vec1(dst);
       break;
   case 2:
       inst->dst = vec2(dst);
       break;
   case 4:
       inst->dst = vec4(dst);
       break;
   case 8:
       inst->dst = vec8(dst);
       break;
   case 16:
       inst->dst = vec16(dst);
       break;
   }

   inst->src[0] = src0;
   inst->base_mrf = msg_reg_nr;
   inst->mlen = msg_length;
   inst->header_present = use_header;
   inst->target = BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX;

   insts.push_tail(inst);
}
VectorXd HInfinityRobustController::getNewJointVelocities( const DQ reference, const VectorXd thetas)
{

    ///--Remapping arguments
    thetas_ = thetas;
    reference_state_variables_ = reference.vec8();

    ///--Controller Step
           
    //Calculate jacobian
    task_jacobian_  = robot_.jacobian(thetas_);
    
    // Recalculation of measured data.
    // End effectors pose
    end_effector_pose_ = robot_.fkm(thetas_);
    measured_state_variables_ = vec8(end_effector_pose_);

    //Error
    error_ = Hminus8(reference)*(C8_)*(reference_state_variables_ - measured_state_variables_);

    N_ = Hminus8(reference)*(C8_)*task_jacobian_;

    N_pseudoinverse_ = pseudoInverse(N_);
    
    //Recalculation of K (if reference changed)
    if(old_reference_ != reference)
    {
      Bw_ = Hminus8(reference)*(C8_)*B_;
      //std::cout << std::endl << (Bw_.transpose()*Bw_*sqrt(2.0)) << std::endl;
      double bwtbwsqrt2 = (Bw_.transpose()*Bw_*sqrt(2.0)).coeff(0);
      kp_ = (1.0/gamma_) * ( Bw_*Bw_.transpose() + (bwtbwsqrt2/4.0)*identity8_ )*(alpha_/sqrt(bwtbwsqrt2));
      std::cout << std::endl << kp_ << std::endl;
      old_reference_ = reference;
    }
           
    delta_thetas_ = N_pseudoinverse_*kp_*error_;

    return delta_thetas_;

}
Exemple #12
0
void emit_txb(struct brw_wm_compile *c,
	      struct brw_reg *dst,
	      GLuint dst_flags,
	      struct brw_reg *arg,
	      struct brw_reg depth_payload,
	      GLuint tex_idx,
	      GLuint sampler)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   GLuint msgLength;
   GLuint msg_type;
   GLuint mrf_per_channel;
   GLuint response_length;
   struct brw_reg dst_retyped;

   /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
    * samples, so we'll use the 16-wide instruction, leave the second halves
    * undefined, and trust the execution mask to keep the undefined pixels
    * from mattering.
    */
   if (c->dispatch_width == 16 || intel->gen < 5) {
      if (intel->gen >= 5)
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
      else
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
      mrf_per_channel = 2;
      dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
      response_length = 8;
   } else {
      msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
      mrf_per_channel = 1;
      dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
      response_length = 4;
   }

   /* Shadow ignored for txb. */
   switch (tex_idx) {
   case TEXTURE_1D_INDEX:
      brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
      brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
      brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
      break;
   case TEXTURE_2D_INDEX:
   case TEXTURE_RECT_INDEX:
      brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
      brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
      brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
      break;
   case TEXTURE_3D_INDEX:
   case TEXTURE_CUBE_INDEX:
      brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
      brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
      brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
      break;
   default:
      /* unexpected target */
      abort();
   }

   brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
   msgLength = 2 + 4 * mrf_per_channel - 1;

   brw_SAMPLE(p, 
	      dst_retyped,
	      1,
	      retype(depth_payload, BRW_REGISTER_TYPE_UW),
              SURF_INDEX_TEXTURE(sampler),
	      sampler,
	      dst_flags & WRITEMASK_XYZW,
	      msg_type,
	      response_length,
	      msgLength,
	      1,
	      BRW_SAMPLER_SIMD_MODE_SIMD16,
	      BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
}
Exemple #13
0
void emit_tex(struct brw_wm_compile *c,
	      struct brw_reg *dst,
	      GLuint dst_flags,
	      struct brw_reg *arg,
	      struct brw_reg depth_payload,
	      GLuint tex_idx,
	      GLuint sampler,
	      bool shadow)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   struct brw_reg dst_retyped;
   GLuint cur_mrf = 2, response_length;
   GLuint i, nr_texcoords;
   GLuint emit;
   GLuint msg_type;
   GLuint mrf_per_channel;
   GLuint simd_mode;

   if (c->dispatch_width == 16) {
      mrf_per_channel = 2;
      response_length = 8;
      dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
   } else {
      mrf_per_channel = 1;
      response_length = 4;
      dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
   }

   /* How many input regs are there?
    */
   switch (tex_idx) {
   case TEXTURE_1D_INDEX:
      emit = WRITEMASK_X;
      nr_texcoords = 1;
      break;
   case TEXTURE_2D_INDEX:
   case TEXTURE_1D_ARRAY_INDEX:
   case TEXTURE_RECT_INDEX:
      emit = WRITEMASK_XY;
      nr_texcoords = 2;
      break;
   case TEXTURE_3D_INDEX:
   case TEXTURE_2D_ARRAY_INDEX:
   case TEXTURE_CUBE_INDEX:
      emit = WRITEMASK_XYZ;
      nr_texcoords = 3;
      break;
   default:
      /* unexpected target */
      abort();
   }

   /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
   if (intel->gen < 5 && c->dispatch_width == 8)
      nr_texcoords = 3;

   if (shadow) {
      if (intel->gen < 7) {
	 /* For shadow comparisons, we have to supply u,v,r. */
	 nr_texcoords = 3;
      } else {
	 /* On Ivybridge, the shadow comparitor comes first. Just load it. */
	 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
	 cur_mrf += mrf_per_channel;
      }
   }

   /* Emit the texcoords. */
   for (i = 0; i < nr_texcoords; i++) {
      if (c->key.tex.gl_clamp_mask[i] & (1 << sampler))
	 brw_set_saturate(p, true);

      if (emit & (1<<i))
	 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
      else
	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
      cur_mrf += mrf_per_channel;

      brw_set_saturate(p, false);
   }

   /* Fill in the shadow comparison reference value. */
   if (shadow && intel->gen < 7) {
      if (intel->gen >= 5) {
	 /* Fill in the cube map array index value. */
	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
	 cur_mrf += mrf_per_channel;
      } else if (c->dispatch_width == 8) {
	 /* Fill in the LOD bias value. */
	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
	 cur_mrf += mrf_per_channel;
      }
      brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
      cur_mrf += mrf_per_channel;
   }

   if (intel->gen >= 5) {
      if (shadow)
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
      else
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
   } else {
      /* Note that G45 and older determines shadow compare and dispatch width
       * from message length for most messages.
       */
      if (c->dispatch_width == 16 && shadow)
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
      else
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
   }

   brw_SAMPLE(p,
	      dst_retyped,
	      1,
	      retype(depth_payload, BRW_REGISTER_TYPE_UW),
              SURF_INDEX_TEXTURE(sampler),
	      sampler,
	      dst_flags & WRITEMASK_XYZW,
	      msg_type,
	      response_length,
	      cur_mrf - 1,
	      1,
	      simd_mode,
	      BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
}
Exemple #14
0
/**
 * Texture sample instruction.
 * Note: the msg_type plus msg_length values determine exactly what kind
 * of sampling operation is performed.  See volume 4, page 161 of docs.
 */
void brw_SAMPLE(struct brw_compile *p,
		struct brw_reg dest,
		GLuint msg_reg_nr,
		struct brw_reg src0,
		GLuint binding_table_index,
		GLuint sampler,
		GLuint writemask,
		GLuint msg_type,
		GLuint response_length,
		GLuint msg_length,
		GLboolean eot,
		GLuint header_present,
		GLuint simd_mode)
{
   GLboolean need_stall = 0;
   
   if (writemask == 0) {
      /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
      return;
   }
   
   /* Hardware doesn't do destination dependency checking on send
    * instructions properly.  Add a workaround which generates the
    * dependency by other means.  In practice it seems like this bug
    * only crops up for texture samples, and only where registers are
    * written by the send and then written again later without being
    * read in between.  Luckily for us, we already track that
    * information and use it to modify the writemask for the
    * instruction, so that is a guide for whether a workaround is
    * needed.
    */
   if (writemask != WRITEMASK_XYZW) {
      GLuint dst_offset = 0;
      GLuint i, newmask = 0, len = 0;

      for (i = 0; i < 4; i++) {
	 if (writemask & (1<<i))
	    break;
	 dst_offset += 2;
      }
      for (; i < 4; i++) {
	 if (!(writemask & (1<<i)))
	    break;
	 newmask |= 1<<i;
	 len++;
      }

      if (newmask != writemask) {
	 need_stall = 1;
         /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
      }
      else {
	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
	 
	 newmask = ~newmask & WRITEMASK_XYZW;

	 brw_push_insn_state(p);

	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
	 brw_set_mask_control(p, BRW_MASK_DISABLE);

	 brw_MOV(p, m1, brw_vec8_grf(0,0));	 
  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 

	 brw_pop_insn_state(p);

  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
	 dest = offset(dest, dst_offset);
	 response_length = len * 2;
      }
   }

   {
      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
   
      insn->header.predicate_control = 0; /* XXX */
      insn->header.compression_control = BRW_COMPRESSION_NONE;
      insn->header.destreg__conditionalmod = msg_reg_nr;

      brw_set_dest(insn, dest);
      brw_set_src0(insn, src0);
      brw_set_sampler_message(p->brw, insn,
			      binding_table_index,
			      sampler,
			      msg_type,
			      response_length, 
			      msg_length,
			      eot,
			      header_present,
			      simd_mode);
   }

   if (need_stall) {
      struct brw_reg reg = vec8(offset(dest, response_length-1));

      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
       */
      brw_push_insn_state(p);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_MOV(p, reg, reg);	      
      brw_pop_insn_state(p);
   }

}
VectorXd Pid_SRIvar_TrackingController::getNewJointVelocities( const DQ reference, const VectorXd thetas)
{

    thetas_ = thetas; //This is necessary for the getNewJointPositions to work

    DQ robot_base = robot_.base();
    DQ robot_effector = robot_.effector();

    VectorXd pseudo_dummy_joint_marker = VectorXd::Zero(robot_dofs_);
    VectorXd pseudo_thetas = thetas_;
    VectorXd pseudo_delta_thetas = VectorXd::Zero(robot_dofs_);
    VectorXd possible_new_thetas = VectorXd::Zero(robot_dofs_);

    MatrixXd original_dh_matrix = robot_.getDHMatrix();
    MatrixXd step_dh_matrix = original_dh_matrix;
    DQ_kinematics pseudo_robot(original_dh_matrix);
    pseudo_robot.set_base(robot_base);
    pseudo_robot.set_effector(robot_effector);

    bool should_break_loop = false;
    while(not should_break_loop) {

        //Calculate jacobian
        task_jacobian_  = pseudo_robot.analyticalJacobian(pseudo_thetas);

        // Recalculation of measured data.
        end_effector_pose_ = pseudo_robot.fkm(pseudo_thetas);

        //Error
        last_error_      = error_;
        error_           = vec8(reference - end_effector_pose_);
        integral_error_ = error_ + ki_memory_*integral_error_;

        //error_ = vec8(dq_one_ - conj(end_effector_pose_)*reference);


        ///Inverse calculation
        svd_.compute(task_jacobian_, ComputeFullU);
        singular_values_ = svd_.singularValues();

        //Damping Calculation
        int current_step_relevant_dof = ( pseudo_robot.links() - pseudo_robot.n_dummy() );
        double sigma_min = singular_values_( current_step_relevant_dof - 1);
        VectorXd u_min = svd_.matrixU().col( current_step_relevant_dof - 1);
        double lambda = lambda_max_;
        if (sigma_min < epsilon_)
        {
            lambda = (1-(sigma_min/epsilon_)*(sigma_min/epsilon_))*lambda_max_*lambda_max_;
        }

        task_jacobian_pseudoinverse_ =    (task_jacobian_.transpose())*((task_jacobian_*task_jacobian_.transpose()
                                          + (beta_*beta_)*identity_ + (lambda*lambda)*u_min*u_min.transpose()).inverse());


        // pseudo_delta_thetas = task_jacobian_pseudoinverse_*kp_*error_;
        if( at_least_one_error_ )
            pseudo_delta_thetas = task_jacobian_pseudoinverse_*( kp_*error_ + ki_*integral_error_ + kd_*(error_ - last_error_) );
        else
        {
            at_least_one_error_ = true;
            pseudo_delta_thetas = task_jacobian_pseudoinverse_*( kp_*error_ + ki_*integral_error_ );
        }


        //Update delta_thetas for possiblenewthetas calculation
        for(int i=0,j=0; i < robot_dofs_; i++)
        {
            if(pseudo_dummy_joint_marker(i) == 0)
            {
                delta_thetas_(i) = pseudo_delta_thetas(j);
                ++j;
            }
            //Do NOTHING if it should be ignored.
        }


        //Possible new thetas
        possible_new_thetas = thetas_ + delta_thetas_;

        //Verify if loop should end
        should_break_loop = true;
        int j=0;
        //For all joints
        for(int i = 0; i < robot_dofs_; i++) {

            //If joint is not yet marked for not being considered in the minimization
            if(pseudo_dummy_joint_marker(i) == 0) {

                //If the controller is trying to put a joint further than any of its limits
                if(    possible_new_thetas(i) > upper_joint_limits_(i)
                        || possible_new_thetas(i) < lower_joint_limits_(i) )
                {

                    //If the joint was already saturated sometime ago
                    double ep = 1.e-05;
                    if (    thetas_(i) > upper_joint_limits_(i) - ep
                            || thetas_(i) < lower_joint_limits_(i) + ep) {

                        pseudo_dummy_joint_marker(i) = 1; //Mark it to be ignored in the minization
                        //std::cout << std::endl << "Joint " << i << " will be ignored in the next controller step.";
                        step_dh_matrix(4,i) = 1;          //Set matrix as dummy.
                        step_dh_matrix(0,i) = thetas_(i); //Set matrix theta as a fixed value.
                        should_break_loop = false;
                    }
                    //If the joint was not yet saturated and the controller wants to saturate it
                    else {

                        // Saturate the joint in this step.
                        if   ( possible_new_thetas(i) > upper_joint_limits_(i) ) {
                            delta_thetas_(i) = upper_joint_limits_(i) - thetas_(i);
                            //std::cout << std::endl << "Joint = " << i << " was saturated in its upper_limit";
                        }
                        else if ( possible_new_thetas(i) < lower_joint_limits_(i) ) {
                            delta_thetas_(i) = lower_joint_limits_(i) - thetas_(i);
                            //std::cout << std::endl << "Joint = " << i << " was saturated in its lower_limit";
                        }
                        else {
                            std::cout << std::endl << "Something is really wrong";
                        }
                        //The joint should still be considered in the minimizations.
                        pseudo_thetas(j) = thetas_(i);
                        ++j;
                    }


                }
                //If the controller is not trying to put this joint further than any of its limits, we consider the velocity given normally
                else {
                    delta_thetas_(i) = pseudo_delta_thetas(j);
                    pseudo_thetas(j) = thetas_(i);
                    ++j;
                }
            }
            //If joint was marked to be ignored, it shall be ignored.
            else {
                delta_thetas_(i) = 0;
            }

        }
        if( j == 0 ) {
            std::cout << std::endl << "Robot will be unable to get out of this configuration using this controller.";
            delta_thetas_ = VectorXd::Zero(robot_dofs_);
            break;
        }

        if(not should_break_loop) {
            pseudo_robot = DQ_kinematics(step_dh_matrix);  //Change DH
            pseudo_robot.set_base(robot_base);
            pseudo_robot.set_effector(robot_effector);
            pseudo_thetas.conservativeResize(j);           //Resize pseudothetas
        }



    }//While not should_break_loop

    return delta_thetas_;

}
Exemple #16
0
/**
 * Generate the geometry shader program used on Gen6 to perform stream output
 * (transform feedback).
 */
void
gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
	         unsigned num_verts, bool check_edge_flags)
{
   struct brw_compile *p = &c->func;
   c->prog_data.svbi_postincrement_value = num_verts;

   brw_gs_alloc_regs(c, num_verts, true);
   brw_gs_initialize_header(c);

   if (key->num_transform_feedback_bindings > 0) {
      unsigned vertex, binding;
      struct brw_reg destination_indices_uw =
         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));

      /* Note: since we use the binding table to keep track of buffer offsets
       * and stride, the GS doesn't need to keep track of a separate pointer
       * into each buffer; it uses a single pointer which increments by 1 for
       * each vertex.  So we use SVBI0 for this pointer, regardless of whether
       * transform feedback is in interleaved or separate attribs mode.
       *
       * Make sure that the buffers have enough room for all the vertices.
       */
      brw_ADD(p, get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
	         get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 4));
      brw_IF(p, BRW_EXECUTE_1);

      /* Compute the destination indices to write to.  Usually we use SVBI[0]
       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
       * vertices come down the pipeline in reversed winding order, so we need
       * to flip the order when writing to the transform feedback buffer.  To
       * ensure that flatshading accuracy is preserved, we need to write them
       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
       * the last provoking vertex convention.
       *
       * Note: since brw_imm_v can only be used in instructions in
       * packed-word execution mode, and SVBI is a double-word, we need to
       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
       * using a separate instruction.  Also, since the immediate constant is
       * expressed as packed words, and we need to load double-words into
       * destination_indices, we need to intersperse zeros to fill the upper
       * halves of each double-word.
       */
      brw_MOV(p, destination_indices_uw,
              brw_imm_v(0x00020100)); /* (0, 1, 2) */
      if (num_verts == 3) {
         /* Get primitive type into temp register. */
         brw_AND(p, get_element_ud(c->reg.temp, 0),
                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));

         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
          * an 8-wide comparison so that the conditional MOV that follows
          * moves all 8 words correctly.
          */
         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
                 get_element_ud(c->reg.temp, 0),
                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));

         /* If so, then overwrite destination_indices_uw with the appropriate
          * reordering.
          */
         brw_MOV(p, destination_indices_uw,
                 brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
                                         : 0x00020001)); /* (1, 0, 2) */
         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      }
      brw_ADD(p, c->reg.destination_indices,
              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));

      /* For each vertex, generate code to output each varying using the
       * appropriate binding table entry.
       */
      for (vertex = 0; vertex < num_verts; ++vertex) {
         /* Set up the correct destination index for this vertex */
         brw_MOV(p, get_element_ud(c->reg.header, 5),
                 get_element_ud(c->reg.destination_indices, vertex));

         for (binding = 0; binding < key->num_transform_feedback_bindings;
              ++binding) {
            unsigned char varying =
               key->transform_feedback_bindings[binding];
            unsigned char slot = c->vue_map.varying_to_slot[varying];
            /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
             *
             *   "Prior to End of Thread with a URB_WRITE, the kernel must
             *   ensure that all writes are complete by sending the final
             *   write as a committed write."
             */
            bool final_write =
               binding == key->num_transform_feedback_bindings - 1 &&
               vertex == num_verts - 1;
            struct brw_reg vertex_slot = c->reg.vertex[vertex];
            vertex_slot.nr += slot / 2;
            vertex_slot.subnr = (slot % 2) * 16;
            /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
            vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ
               ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
            brw_set_access_mode(p, BRW_ALIGN_16);
            brw_MOV(p, stride(c->reg.header, 4, 4, 1),
                    retype(vertex_slot, BRW_REGISTER_TYPE_UD));
            brw_set_access_mode(p, BRW_ALIGN_1);
            brw_svb_write(p,
                          final_write ? c->reg.temp : brw_null_reg(), /* dest */
                          1, /* msg_reg_nr */
                          c->reg.header, /* src0 */
                          SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */
                          final_write); /* send_commit_msg */
         }
      }
      brw_ENDIF(p);

      /* Now, reinitialize the header register from R0 to restore the parts of
       * the register that we overwrote while streaming out transform feedback
       * data.
       */
      brw_gs_initialize_header(c);

      /* Finally, wait for the write commit to occur so that we can proceed to
       * other things safely.
       *
       * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
       *
       *   The write commit does not modify the destination register, but
       *   merely clears the dependency associated with the destination
       *   register. Thus, a simple “mov” instruction using the register as a
       *   source is sufficient to wait for the write commit to occur.
       */
      brw_MOV(p, c->reg.temp, c->reg.temp);
   }

   brw_gs_ff_sync(c, 1);

   /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so
    * release the URB that was just allocated, and terminate the thread.
    */
   if (key->rasterizer_discard) {
      brw_gs_terminate(c);
      return;
   }

   brw_gs_overwrite_header_dw2_from_r0(c);
   switch (num_verts) {
   case 1:
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
      brw_gs_emit_vue(c, c->reg.vertex[0], true);
      break;
   case 2:
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[1], true);
      break;
   case 3:
      if (check_edge_flags) {
         /* Only emit vertices 0 and 1 if this is the first triangle of the
          * polygon.  Otherwise they are redundant.
          */
         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
         brw_IF(p, BRW_EXECUTE_1);
      }
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[1], false);
      if (check_edge_flags) {
         brw_ENDIF(p);
         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
          * of the polygon.  Otherwise leave the primitive incomplete because
          * there are more polygon vertices coming.
          */
         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
         brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
      }
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      brw_gs_emit_vue(c, c->reg.vertex[2], true);
      break;
   }
}
static void emit_tex(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    struct brw_reg dst[4], src[4], payload_reg;
    GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];

    GLuint msg_len;
    GLuint i, nr;
    GLuint emit;
    GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;

    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);

    for (i = 0; i < 4; i++) 
	dst[i] = get_dst_reg(c, inst, i, 1);
    for (i = 0; i < 4; i++)
	src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);


    switch (inst->TexSrcTarget) {
	case TEXTURE_1D_INDEX:
	    emit = WRITEMASK_X;
	    nr = 1;
	    break;
	case TEXTURE_2D_INDEX:
	case TEXTURE_RECT_INDEX:
	    emit = WRITEMASK_XY;
	    nr = 2;
	    break;
	default:
	    emit = WRITEMASK_XYZ;
	    nr = 3;
	    break;
    }
    msg_len = 1;

    for (i = 0; i < nr; i++) {
	static const GLuint swz[4] = {0,1,2,2};
	if (emit & (1<<i))
	    brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
	else
	    brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
	msg_len += 1;
    }

    if (shadow) {
	brw_MOV(p, brw_message_reg(5), brw_imm_f(0));
	brw_MOV(p, brw_message_reg(6), src[2]);
    }

    brw_SAMPLE(p,
	    retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
	    1,
	    retype(payload_reg, BRW_REGISTER_TYPE_UW),
	    unit + MAX_DRAW_BUFFERS, /* surface */
	    unit,     /* sampler */
	    inst->DstReg.WriteMask,
	    BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
	    4,
	    shadow ? 6 : 4,
	    0);

    if (shadow)
	brw_MOV(p, dst[3], brw_imm_f(1.0));
}