static void brw_wm_xy(struct brw_compile *p, int dw) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = __retype_uw(r1); struct brw_reg x_uw, y_uw; brw_set_compression_control(p, BRW_COMPRESSION_NONE); if (dw == 16) { x_uw = brw_uw16_grf(30, 0); y_uw = brw_uw16_grf(28, 0); } else { x_uw = brw_uw8_grf(30, 0); y_uw = brw_uw8_grf(28, 0); } brw_ADD(p, x_uw, __stride(__suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); brw_ADD(p, y_uw, __stride(__suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1)); brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1))); }
static void emit_pixel_xy(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); struct brw_reg dst0, dst1; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; dst0 = get_dst_reg(c, inst, 0, 1); dst1 = get_dst_reg(c, inst, 1, 1); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); } }
void brw_copy8(struct brw_compile *p, struct brw_reg dst, struct brw_reg src, GLuint count) { GLuint i; dst = vec8(dst); src = vec8(src); for (i = 0; i < count; i++) { GLuint delta = i*32; brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); } }
void brw_copy8(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src, unsigned count) { unsigned i; dst = vec8(dst); src = vec8(src); for (i = 0; i < count; i++) { unsigned delta = i*32; brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); } }
static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, GLuint target, GLuint eot) { struct brw_compile *p = &c->func; /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ brw_MOV(p, brw_message_reg(base_reg + 1), brw_vec8_grf(1, 0)); brw_pop_insn_state(p); } /* Send framebuffer write message: */ brw_fb_WRITE(p, retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), target, nr, 0, eot); }
/** * Computes the screen-space x,y position of the pixels. * * This will be used by emit_delta_xy() or emit_wpos_xy() for * interpolation of attributes.. * * Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, * corresponding to each of the 16 execution channels. * R0.1..8 -- ? * R1.0 -- triangle vertex 0.X * R1.1 -- triangle vertex 0.Y * R1.2 -- tile 0 x,y coords (2 packed uwords) * R1.3 -- tile 1 x,y coords (2 packed uwords) * R1.4 -- tile 2 x,y coords (2 packed uwords) * R1.5 -- tile 3 x,y coords (2 packed uwords) * R1.6 -- ? * R1.7 -- ? * R1.8 -- ? */ void emit_pixel_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask) { struct brw_compile *p = &c->func; struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); struct brw_reg dst0_uw, dst1_uw; brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); if (c->dispatch_width == 16) { dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)); dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)); } else { dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW)); dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW)); } /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, dst0_uw, stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, dst1_uw, stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } brw_pop_insn_state(p); }
/** * Read a float[4] vector from the data port Data Cache (const buffer). * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. * If relAddr is true, we'll do an indirect fetch using the address register. */ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLboolean relAddr, GLuint location, GLuint bind_table_index ) { /* XXX: relAddr not implemented */ GLuint msg_reg_nr = 1; { struct brw_reg b; brw_push_insn_state(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); /* Setup MRF[1] with location/offset into const buffer */ b = brw_message_reg(msg_reg_nr); b = retype(b, BRW_REGISTER_TYPE_UD); /* XXX I think we're setting all the dwords of MRF[1] to 'location'. * when the docs say only dword[2] should be set. Hmmm. But it works. */ brw_MOV(p, b, brw_imm_ud(location)); brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); brw_set_dest(insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, insn, bind_table_index, 0, /* msg_control (0 means 1 Oword) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 0, /* source cache = data cache */ 1, /* msg_length */ 1, /* response_length (1 Oword) */ 0); /* eot */ } }
VectorXd DampedNumericalFilteredController::getNewJointVelocities( const DQ reference, const VectorXd thetas) { thetas_ = thetas; ///--Controller Step //Calculate jacobian task_jacobian_ = robot_.analyticalJacobian(thetas_); // Recalculation of measured data. // End effectors pose end_effector_pose_ = robot_.fkm(thetas_); //Error last_error_ = error_; error_ = vec8(reference - end_effector_pose_); integral_error_ += error_; //error_ = vec8(dq_one_ - conj(end_effector_pose_)*reference); svd_.compute(task_jacobian_, ComputeFullU); singular_values_ = svd_.singularValues(); //Damping Calculation double sigma_min = singular_values_(5); VectorXd u_min = svd_.matrixU().col(5); double lambda = lambda_max_; if (sigma_min < epsilon_) { lambda = (1-(sigma_min/epsilon_)*(sigma_min/epsilon_))*lambda_max_*lambda_max_; } //We want to solve the equation J+ = J^T(JJ^T+aI)^-1, in which the matrix //being inverted is obviously positive definite if a > 0. //The solver gives us the solution to X = A^-1.B //Therefore I chose to find X^T = B^T(A^T)^-1 then take the transpose of X. task_jacobian_pseudoinverse_ = ((task_jacobian_*task_jacobian_.transpose() + (beta_*beta_)*identity_ + (lambda*lambda)*u_min*u_min.transpose()).transpose()).ldlt().solve(task_jacobian_); task_jacobian_pseudoinverse_.transposeInPlace(); if( at_least_one_error_ ) delta_thetas_ = task_jacobian_pseudoinverse_*( kp_*error_ + ki_*integral_error_ + kd_*(error_ - last_error_) ); else { at_least_one_error_ = true; delta_thetas_ = task_jacobian_pseudoinverse_*( kp_*error_ + ki_*integral_error_ ); } return delta_thetas_; }
/* TODO BIAS on SIMD8 not workind yet... */ static void emit_txb(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; GLuint i; payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) dst[i] = get_dst_reg(c, inst, i, 1); for (i = 0; i < 4; i++) src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: brw_MOV(p, brw_message_reg(2), src[0]); brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: brw_MOV(p, brw_message_reg(2), src[0]); brw_MOV(p, brw_message_reg(3), src[1]); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); break; default: brw_MOV(p, brw_message_reg(2), src[0]); brw_MOV(p, brw_message_reg(3), src[1]); brw_MOV(p, brw_message_reg(4), src[2]); break; } brw_MOV(p, brw_message_reg(5), src[3]); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); brw_SAMPLE(p, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(payload_reg, BRW_REGISTER_TYPE_UW), unit + MAX_DRAW_BUFFERS, /* surface */ unit, /* sampler */ inst->DstReg.WriteMask, BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, 4, 4, 0); }
void brw_blorp_eu_emitter::emit_scattered_read(const struct brw_reg &dst, enum opcode opcode, const struct brw_reg &src0, unsigned msg_reg_nr, unsigned msg_length, int dispatch_width, bool use_header) { assert(opcode == SHADER_OPCODE_DWORD_SCATTERED_READ || (brw_ctx->gen >= 7 && opcode == SHADER_OPCODE_BYTE_SCATTERED_READ)); fs_inst *inst = new (mem_ctx) fs_inst(opcode); switch (dispatch_width) { case 1: default: inst->dst = vec1(dst); break; case 2: inst->dst = vec2(dst); break; case 4: inst->dst = vec4(dst); break; case 8: inst->dst = vec8(dst); break; case 16: inst->dst = vec16(dst); break; } inst->src[0] = src0; inst->base_mrf = msg_reg_nr; inst->mlen = msg_length; inst->header_present = use_header; inst->target = BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX; insts.push_tail(inst); }
VectorXd HInfinityRobustController::getNewJointVelocities( const DQ reference, const VectorXd thetas) { ///--Remapping arguments thetas_ = thetas; reference_state_variables_ = reference.vec8(); ///--Controller Step //Calculate jacobian task_jacobian_ = robot_.jacobian(thetas_); // Recalculation of measured data. // End effectors pose end_effector_pose_ = robot_.fkm(thetas_); measured_state_variables_ = vec8(end_effector_pose_); //Error error_ = Hminus8(reference)*(C8_)*(reference_state_variables_ - measured_state_variables_); N_ = Hminus8(reference)*(C8_)*task_jacobian_; N_pseudoinverse_ = pseudoInverse(N_); //Recalculation of K (if reference changed) if(old_reference_ != reference) { Bw_ = Hminus8(reference)*(C8_)*B_; //std::cout << std::endl << (Bw_.transpose()*Bw_*sqrt(2.0)) << std::endl; double bwtbwsqrt2 = (Bw_.transpose()*Bw_*sqrt(2.0)).coeff(0); kp_ = (1.0/gamma_) * ( Bw_*Bw_.transpose() + (bwtbwsqrt2/4.0)*identity8_ )*(alpha_/sqrt(bwtbwsqrt2)); std::cout << std::endl << kp_ << std::endl; old_reference_ = reference; } delta_thetas_ = N_pseudoinverse_*kp_*error_; return delta_thetas_; }
void emit_txb(struct brw_wm_compile *c, struct brw_reg *dst, GLuint dst_flags, struct brw_reg *arg, struct brw_reg depth_payload, GLuint tex_idx, GLuint sampler) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; GLuint msgLength; GLuint msg_type; GLuint mrf_per_channel; GLuint response_length; struct brw_reg dst_retyped; /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased * samples, so we'll use the 16-wide instruction, leave the second halves * undefined, and trust the execution mask to keep the undefined pixels * from mattering. */ if (c->dispatch_width == 16 || intel->gen < 5) { if (intel->gen >= 5) msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; mrf_per_channel = 2; dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); response_length = 8; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; mrf_per_channel = 1; dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); response_length = 4; } /* Shadow ignored for txb. */ switch (tex_idx) { case TEXTURE_1D_INDEX: brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0)); brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]); break; default: /* unexpected target */ abort(); } brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]); msgLength = 2 + 4 * mrf_per_channel - 1; brw_SAMPLE(p, dst_retyped, 1, retype(depth_payload, BRW_REGISTER_TYPE_UW), SURF_INDEX_TEXTURE(sampler), sampler, dst_flags & WRITEMASK_XYZW, msg_type, response_length, msgLength, 1, BRW_SAMPLER_SIMD_MODE_SIMD16, BRW_SAMPLER_RETURN_FORMAT_FLOAT32); }
void emit_tex(struct brw_wm_compile *c, struct brw_reg *dst, GLuint dst_flags, struct brw_reg *arg, struct brw_reg depth_payload, GLuint tex_idx, GLuint sampler, bool shadow) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg dst_retyped; GLuint cur_mrf = 2, response_length; GLuint i, nr_texcoords; GLuint emit; GLuint msg_type; GLuint mrf_per_channel; GLuint simd_mode; if (c->dispatch_width == 16) { mrf_per_channel = 2; response_length = 8; dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; } else { mrf_per_channel = 1; response_length = 4; dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; } /* How many input regs are there? */ switch (tex_idx) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; nr_texcoords = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_1D_ARRAY_INDEX: case TEXTURE_RECT_INDEX: emit = WRITEMASK_XY; nr_texcoords = 2; break; case TEXTURE_3D_INDEX: case TEXTURE_2D_ARRAY_INDEX: case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; nr_texcoords = 3; break; default: /* unexpected target */ abort(); } /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ if (intel->gen < 5 && c->dispatch_width == 8) nr_texcoords = 3; if (shadow) { if (intel->gen < 7) { /* For shadow comparisons, we have to supply u,v,r. */ nr_texcoords = 3; } else { /* On Ivybridge, the shadow comparitor comes first. Just load it. */ brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); cur_mrf += mrf_per_channel; } } /* Emit the texcoords. */ for (i = 0; i < nr_texcoords; i++) { if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) brw_set_saturate(p, true); if (emit & (1<<i)) brw_MOV(p, brw_message_reg(cur_mrf), arg[i]); else brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; brw_set_saturate(p, false); } /* Fill in the shadow comparison reference value. */ if (shadow && intel->gen < 7) { if (intel->gen >= 5) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; } else if (c->dispatch_width == 8) { /* Fill in the LOD bias value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; } brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); cur_mrf += mrf_per_channel; } if (intel->gen >= 5) { if (shadow) msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; else msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; } else { /* Note that G45 and older determines shadow compare and dispatch width * from message length for most messages. */ if (c->dispatch_width == 16 && shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; } brw_SAMPLE(p, dst_retyped, 1, retype(depth_payload, BRW_REGISTER_TYPE_UW), SURF_INDEX_TEXTURE(sampler), sampler, dst_flags & WRITEMASK_XYZW, msg_type, response_length, cur_mrf - 1, 1, simd_mode, BRW_SAMPLER_RETURN_FORMAT_FLOAT32); }
/** * Texture sample instruction. * Note: the msg_type plus msg_length values determine exactly what kind * of sampling operation is performed. See volume 4, page 161 of docs. */ void brw_SAMPLE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, GLuint binding_table_index, GLuint sampler, GLuint writemask, GLuint msg_type, GLuint response_length, GLuint msg_length, GLboolean eot, GLuint header_present, GLuint simd_mode) { GLboolean need_stall = 0; if (writemask == 0) { /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } /* Hardware doesn't do destination dependency checking on send * instructions properly. Add a workaround which generates the * dependency by other means. In practice it seems like this bug * only crops up for texture samples, and only where registers are * written by the send and then written again later without being * read in between. Luckily for us, we already track that * information and use it to modify the writemask for the * instruction, so that is a guide for whether a workaround is * needed. */ if (writemask != WRITEMASK_XYZW) { GLuint dst_offset = 0; GLuint i, newmask = 0, len = 0; for (i = 0; i < 4; i++) { if (writemask & (1<<i)) break; dst_offset += 2; } for (; i < 4; i++) { if (!(writemask & (1<<i))) break; newmask |= 1<<i; len++; } if (newmask != writemask) { need_stall = 1; /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); newmask = ~newmask & WRITEMASK_XYZW; brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, m1, brw_vec8_grf(0,0)); brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); brw_pop_insn_state(p); src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); dest = offset(dest, dst_offset); response_length = len * 2; } } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_sampler_message(p->brw, insn, binding_table_index, sampler, msg_type, response_length, msg_length, eot, header_present, simd_mode); } if (need_stall) { struct brw_reg reg = vec8(offset(dest, response_length-1)); /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, reg, reg); brw_pop_insn_state(p); } }
VectorXd Pid_SRIvar_TrackingController::getNewJointVelocities( const DQ reference, const VectorXd thetas) { thetas_ = thetas; //This is necessary for the getNewJointPositions to work DQ robot_base = robot_.base(); DQ robot_effector = robot_.effector(); VectorXd pseudo_dummy_joint_marker = VectorXd::Zero(robot_dofs_); VectorXd pseudo_thetas = thetas_; VectorXd pseudo_delta_thetas = VectorXd::Zero(robot_dofs_); VectorXd possible_new_thetas = VectorXd::Zero(robot_dofs_); MatrixXd original_dh_matrix = robot_.getDHMatrix(); MatrixXd step_dh_matrix = original_dh_matrix; DQ_kinematics pseudo_robot(original_dh_matrix); pseudo_robot.set_base(robot_base); pseudo_robot.set_effector(robot_effector); bool should_break_loop = false; while(not should_break_loop) { //Calculate jacobian task_jacobian_ = pseudo_robot.analyticalJacobian(pseudo_thetas); // Recalculation of measured data. end_effector_pose_ = pseudo_robot.fkm(pseudo_thetas); //Error last_error_ = error_; error_ = vec8(reference - end_effector_pose_); integral_error_ = error_ + ki_memory_*integral_error_; //error_ = vec8(dq_one_ - conj(end_effector_pose_)*reference); ///Inverse calculation svd_.compute(task_jacobian_, ComputeFullU); singular_values_ = svd_.singularValues(); //Damping Calculation int current_step_relevant_dof = ( pseudo_robot.links() - pseudo_robot.n_dummy() ); double sigma_min = singular_values_( current_step_relevant_dof - 1); VectorXd u_min = svd_.matrixU().col( current_step_relevant_dof - 1); double lambda = lambda_max_; if (sigma_min < epsilon_) { lambda = (1-(sigma_min/epsilon_)*(sigma_min/epsilon_))*lambda_max_*lambda_max_; } task_jacobian_pseudoinverse_ = (task_jacobian_.transpose())*((task_jacobian_*task_jacobian_.transpose() + (beta_*beta_)*identity_ + (lambda*lambda)*u_min*u_min.transpose()).inverse()); // pseudo_delta_thetas = task_jacobian_pseudoinverse_*kp_*error_; if( at_least_one_error_ ) pseudo_delta_thetas = task_jacobian_pseudoinverse_*( kp_*error_ + ki_*integral_error_ + kd_*(error_ - last_error_) ); else { at_least_one_error_ = true; pseudo_delta_thetas = task_jacobian_pseudoinverse_*( kp_*error_ + ki_*integral_error_ ); } //Update delta_thetas for possiblenewthetas calculation for(int i=0,j=0; i < robot_dofs_; i++) { if(pseudo_dummy_joint_marker(i) == 0) { delta_thetas_(i) = pseudo_delta_thetas(j); ++j; } //Do NOTHING if it should be ignored. } //Possible new thetas possible_new_thetas = thetas_ + delta_thetas_; //Verify if loop should end should_break_loop = true; int j=0; //For all joints for(int i = 0; i < robot_dofs_; i++) { //If joint is not yet marked for not being considered in the minimization if(pseudo_dummy_joint_marker(i) == 0) { //If the controller is trying to put a joint further than any of its limits if( possible_new_thetas(i) > upper_joint_limits_(i) || possible_new_thetas(i) < lower_joint_limits_(i) ) { //If the joint was already saturated sometime ago double ep = 1.e-05; if ( thetas_(i) > upper_joint_limits_(i) - ep || thetas_(i) < lower_joint_limits_(i) + ep) { pseudo_dummy_joint_marker(i) = 1; //Mark it to be ignored in the minization //std::cout << std::endl << "Joint " << i << " will be ignored in the next controller step."; step_dh_matrix(4,i) = 1; //Set matrix as dummy. step_dh_matrix(0,i) = thetas_(i); //Set matrix theta as a fixed value. should_break_loop = false; } //If the joint was not yet saturated and the controller wants to saturate it else { // Saturate the joint in this step. if ( possible_new_thetas(i) > upper_joint_limits_(i) ) { delta_thetas_(i) = upper_joint_limits_(i) - thetas_(i); //std::cout << std::endl << "Joint = " << i << " was saturated in its upper_limit"; } else if ( possible_new_thetas(i) < lower_joint_limits_(i) ) { delta_thetas_(i) = lower_joint_limits_(i) - thetas_(i); //std::cout << std::endl << "Joint = " << i << " was saturated in its lower_limit"; } else { std::cout << std::endl << "Something is really wrong"; } //The joint should still be considered in the minimizations. pseudo_thetas(j) = thetas_(i); ++j; } } //If the controller is not trying to put this joint further than any of its limits, we consider the velocity given normally else { delta_thetas_(i) = pseudo_delta_thetas(j); pseudo_thetas(j) = thetas_(i); ++j; } } //If joint was marked to be ignored, it shall be ignored. else { delta_thetas_(i) = 0; } } if( j == 0 ) { std::cout << std::endl << "Robot will be unable to get out of this configuration using this controller."; delta_thetas_ = VectorXd::Zero(robot_dofs_); break; } if(not should_break_loop) { pseudo_robot = DQ_kinematics(step_dh_matrix); //Change DH pseudo_robot.set_base(robot_base); pseudo_robot.set_effector(robot_effector); pseudo_thetas.conservativeResize(j); //Resize pseudothetas } }//While not should_break_loop return delta_thetas_; }
/** * Generate the geometry shader program used on Gen6 to perform stream output * (transform feedback). */ void gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key, unsigned num_verts, bool check_edge_flags) { struct brw_compile *p = &c->func; c->prog_data.svbi_postincrement_value = num_verts; brw_gs_alloc_regs(c, num_verts, true); brw_gs_initialize_header(c); if (key->num_transform_feedback_bindings > 0) { unsigned vertex, binding; struct brw_reg destination_indices_uw = vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); /* Note: since we use the binding table to keep track of buffer offsets * and stride, the GS doesn't need to keep track of a separate pointer * into each buffer; it uses a single pointer which increments by 1 for * each vertex. So we use SVBI0 for this pointer, regardless of whether * transform feedback is in interleaved or separate attribs mode. * * Make sure that the buffers have enough room for all the vertices. */ brw_ADD(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 4)); brw_IF(p, BRW_EXECUTE_1); /* Compute the destination indices to write to. Usually we use SVBI[0] * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the * vertices come down the pipeline in reversed winding order, so we need * to flip the order when writing to the transform feedback buffer. To * ensure that flatshading accuracy is preserved, we need to write them * in order SVBI[0] + (0, 2, 1) if we're using the first provoking * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using * the last provoking vertex convention. * * Note: since brw_imm_v can only be used in instructions in * packed-word execution mode, and SVBI is a double-word, we need to * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1), * or (1, 0, 2)) to the destination_indices register, and then add SVBI * using a separate instruction. Also, since the immediate constant is * expressed as packed words, and we need to load double-words into * destination_indices, we need to intersperse zeros to fill the upper * halves of each double-word. */ brw_MOV(p, destination_indices_uw, brw_imm_v(0x00020100)); /* (0, 1, 2) */ if (num_verts == 3) { /* Get primitive type into temp register. */ brw_AND(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); /* Test if primitive type is TRISTRIP_REVERSE. We need to do this as * an 8-wide comparison so that the conditional MOV that follows * moves all 8 words correctly. */ brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, get_element_ud(c->reg.temp, 0), brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); /* If so, then overwrite destination_indices_uw with the appropriate * reordering. */ brw_MOV(p, destination_indices_uw, brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ : 0x00020001)); /* (1, 0, 2) */ brw_set_predicate_control(p, BRW_PREDICATE_NONE); } brw_ADD(p, c->reg.destination_indices, c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); /* For each vertex, generate code to output each varying using the * appropriate binding table entry. */ for (vertex = 0; vertex < num_verts; ++vertex) { /* Set up the correct destination index for this vertex */ brw_MOV(p, get_element_ud(c->reg.header, 5), get_element_ud(c->reg.destination_indices, vertex)); for (binding = 0; binding < key->num_transform_feedback_bindings; ++binding) { unsigned char varying = key->transform_feedback_bindings[binding]; unsigned char slot = c->vue_map.varying_to_slot[varying]; /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: * * "Prior to End of Thread with a URB_WRITE, the kernel must * ensure that all writes are complete by sending the final * write as a committed write." */ bool final_write = binding == key->num_transform_feedback_bindings - 1 && vertex == num_verts - 1; struct brw_reg vertex_slot = c->reg.vertex[vertex]; vertex_slot.nr += slot / 2; vertex_slot.subnr = (slot % 2) * 16; /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */ vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding]; brw_set_access_mode(p, BRW_ALIGN_16); brw_MOV(p, stride(c->reg.header, 4, 4, 1), retype(vertex_slot, BRW_REGISTER_TYPE_UD)); brw_set_access_mode(p, BRW_ALIGN_1); brw_svb_write(p, final_write ? c->reg.temp : brw_null_reg(), /* dest */ 1, /* msg_reg_nr */ c->reg.header, /* src0 */ SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */ final_write); /* send_commit_msg */ } } brw_ENDIF(p); /* Now, reinitialize the header register from R0 to restore the parts of * the register that we overwrote while streaming out transform feedback * data. */ brw_gs_initialize_header(c); /* Finally, wait for the write commit to occur so that we can proceed to * other things safely. * * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: * * The write commit does not modify the destination register, but * merely clears the dependency associated with the destination * register. Thus, a simple “mov” instruction using the register as a * source is sufficient to wait for the write commit to occur. */ brw_MOV(p, c->reg.temp, c->reg.temp); } brw_gs_ff_sync(c, 1); /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so * release the URB that was just allocated, and terminate the thread. */ if (key->rasterizer_discard) { brw_gs_terminate(c); return; } brw_gs_overwrite_header_dw2_from_r0(c); switch (num_verts) { case 1: brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); brw_gs_emit_vue(c, c->reg.vertex[0], true); break; case 2: brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[0], false); brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[1], true); break; case 3: if (check_edge_flags) { /* Only emit vertices 0 and 1 if this is the first triangle of the * polygon. Otherwise they are redundant. */ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_0)); brw_IF(p, BRW_EXECUTE_1); } brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[0], false); brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[1], false); if (check_edge_flags) { brw_ENDIF(p); /* Only emit vertex 2 in PRIM_END mode if this is the last triangle * of the polygon. Otherwise leave the primitive incomplete because * there are more polygon vertices coming. */ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_1)); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); } brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_gs_emit_vue(c, c->reg.vertex[2], true); break; } }
static void emit_tex(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; GLuint msg_len; GLuint i, nr; GLuint emit; GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0; payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) dst[i] = get_dst_reg(c, inst, i, 1); for (i = 0; i < 4; i++) src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; nr = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: emit = WRITEMASK_XY; nr = 2; break; default: emit = WRITEMASK_XYZ; nr = 3; break; } msg_len = 1; for (i = 0; i < nr; i++) { static const GLuint swz[4] = {0,1,2,2}; if (emit & (1<<i)) brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); else brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); msg_len += 1; } if (shadow) { brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); brw_MOV(p, brw_message_reg(6), src[2]); } brw_SAMPLE(p, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(payload_reg, BRW_REGISTER_TYPE_UW), unit + MAX_DRAW_BUFFERS, /* surface */ unit, /* sampler */ inst->DstReg.WriteMask, BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, 4, shadow ? 6 : 4, 0); if (shadow) brw_MOV(p, dst[3], brw_imm_f(1.0)); }