void vec4_generator::generate_pull_constant_load(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset) { assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; if (intel->gen == 7) { gen6_resolve_implied_move(p, &offset, inst->base_mrf); brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, offset); brw_set_sampler_message(p, insn, surf_index, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ 1, /* mlen */ false, /* no header */ BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); return; } struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D), offset); uint32_t msg_type; if (intel->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else if (intel->gen == 5 || intel->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; /* Each of the 8 channel enables is considered for whether each * dword is written. */ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (intel->gen < 6) send->header.destreg__conditionalmod = inst->base_mrf; brw_set_dp_read_message(p, send, surf_index, BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2, /* mlen */ 1 /* rlen */); }
/** * Extended math function, float[16]. * Use 2 send instructions. */ void brw_math_16( struct brw_compile *p, struct brw_reg dest, GLuint function, GLuint saturate, GLuint msg_reg_nr, struct brw_reg src, GLuint precision ) { struct brw_instruction *insn; GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; /* First instruction: */ brw_push_insn_state(p); brw_set_predicate_control_flag_value(p, 0xff); brw_set_compression_control(p, BRW_COMPRESSION_NONE); insn = next_insn(p, BRW_OPCODE_SEND); insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, precision, saturate, BRW_MATH_DATA_VECTOR); /* Second instruction: */ insn = next_insn(p, BRW_OPCODE_SEND); insn->header.compression_control = BRW_COMPRESSION_2NDHALF; insn->header.destreg__conditionalmod = msg_reg_nr+1; brw_set_dest(insn, offset(dest,1)); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, precision, saturate, BRW_MATH_DATA_VECTOR); brw_pop_insn_state(p); }
/* EU takes the value from the flag register and pushes it onto some * sort of a stack (presumably merging with any flag value already on * the stack). Within an if block, the flags at the top of the stack * control execution on each channel of the unit, eg. on each of the * 16 pixel values in our wm programs. * * When the matching 'else' instruction is reached (presumably by * countdown of the instruction count patched in by our ELSE/ENDIF * functions), the relevent flags are inverted. * * When the matching 'endif' instruction is reached, the flags are * popped off. If the stack is now empty, normal execution resumes. * * No attempt is made to deal with stack overflow (14 elements?). */ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) { struct brw_instruction *insn; if (p->single_program_flow) { assert(execute_size == BRW_EXECUTE_1); insn = next_insn(p, BRW_OPCODE_ADD); insn->header.predicate_inverse = 1; } else { insn = next_insn(p, BRW_OPCODE_IF); } /* Override the defaults for this instruction: */ brw_set_dest(insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.execution_size = execute_size; insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.predicate_control = BRW_PREDICATE_NORMAL; insn->header.mask_control = BRW_MASK_ENABLE; if (!p->single_program_flow) insn->header.thread_control = BRW_THREAD_SWITCH; p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; }
void brw_NOP(struct brw_compile *p) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_ud(0x0)); }
void brw_ff_sync(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, GLboolean allocate, GLboolean used, GLuint msg_length, GLuint response_length, GLboolean eot, GLboolean writes_complete, GLuint offset, GLuint swizzle) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); assert(msg_length < 16); brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_ff_sync_message(p->brw, insn, allocate, used, msg_length, response_length, eot, writes_complete, offset, swizzle); }
void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, GLuint binding_table_index, GLuint msg_length, GLuint response_length, GLboolean eot) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_dp_write_message(p->brw, insn, binding_table_index, BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ msg_length, 1, /* pixel scoreboard */ response_length, eot); }
/* To integrate with the above, it makes sense that the comparison * instruction should populate the flag register. It might be simpler * just to use the flag reg for most WM tasks? */ void brw_CMP(struct brw_compile *p, struct brw_reg dest, GLuint conditional, struct brw_reg src0, struct brw_reg src1) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); insn->header.destreg__conditionalmod = conditional; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); /* guess_execution_size(insn, src0); */ /* Make it so that future instructions will use the computed flag * value until brw_set_predicate_control_flag_value() is called * again. */ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && dest.nr == 0) { p->current->header.predicate_control = BRW_PREDICATE_NORMAL; p->flag_value = 0xff; } }
/** Extended math function, float[8]. */ void brw_math( struct brw_compile *p, struct brw_reg dest, GLuint function, GLuint saturate, GLuint msg_reg_nr, struct brw_reg src, GLuint data_type, GLuint precision ) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; /* Example code doesn't set predicate_control for send * instructions. */ insn->header.predicate_control = 0; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, precision, saturate, data_type); }
struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *do_insn) { struct brw_instruction *insn; if (p->single_program_flow) insn = next_insn(p, BRW_OPCODE_ADD); else insn = next_insn(p, BRW_OPCODE_WHILE); brw_set_dest(insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; if (p->single_program_flow) { insn->header.execution_size = BRW_EXECUTE_1; insn->bits3.d = (do_insn - insn) * 16; } else { insn->header.execution_size = do_insn->header.execution_size; assert(do_insn->header.opcode == BRW_OPCODE_DO); insn->bits3.if_else.jump_count = do_insn - insn + 1; insn->bits3.if_else.pop_count = 0; insn->bits3.if_else.pad0 = 0; } /* insn->header.mask_control = BRW_MASK_ENABLE; */ /* insn->header.mask_control = BRW_MASK_DISABLE; */ p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; }
void brw_ENDIF(struct brw_compile *p, struct brw_instruction *patch_insn) { GLuint br = 1; if (BRW_IS_IGDNG(p->brw)) br = 2; if (p->single_program_flow) { /* In single program flow mode, there's no need to execute an ENDIF, * since we don't need to do any stack operations, and if we're executing * currently, we want to just continue executing. */ struct brw_instruction *next = &p->store[p->nr_insn]; assert(patch_insn->header.opcode == BRW_OPCODE_ADD); patch_insn->bits3.ud = (next - patch_insn) * 16; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = patch_insn->header.execution_size; insn->header.mask_control = BRW_MASK_ENABLE; insn->header.thread_control = BRW_THREAD_SWITCH; assert(patch_insn->bits3.if_else.jump_count == 0); /* Patch the if or else instructions to point at this or the next * instruction respectively. */ if (patch_insn->header.opcode == BRW_OPCODE_IF) { /* Automagically turn it into an IFF: */ patch_insn->header.opcode = BRW_OPCODE_IFF; patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 0; patch_insn->bits3.if_else.pad0 = 0; } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 1; patch_insn->bits3.if_else.pad0 = 0; } else { assert(0); } /* Also pop item off the stack in the endif instruction: */ insn->bits3.if_else.jump_count = 0; insn->bits3.if_else.pop_count = 1; insn->bits3.if_else.pad0 = 0; } }
static struct brw_instruction *brw_alu1( struct brw_compile *p, GLuint opcode, struct brw_reg dest, struct brw_reg src ) { struct brw_instruction *insn = next_insn(p, opcode); brw_set_dest(insn, dest); brw_set_src0(insn, src); return insn; }
struct brw_instruction *brw_CONT(struct brw_compile *p) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_CONTINUE); brw_set_dest(insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = BRW_EXECUTE_8; /* insn->header.mask_control = BRW_MASK_DISABLE; */ insn->bits3.if_else.pad0 = 0; return insn; }
/** * Read a float[4] vector from the data port Data Cache (const buffer). * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. * If relAddr is true, we'll do an indirect fetch using the address register. */ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLboolean relAddr, GLuint location, GLuint bind_table_index ) { /* XXX: relAddr not implemented */ GLuint msg_reg_nr = 1; { struct brw_reg b; brw_push_insn_state(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); /* Setup MRF[1] with location/offset into const buffer */ b = brw_message_reg(msg_reg_nr); b = retype(b, BRW_REGISTER_TYPE_UD); /* XXX I think we're setting all the dwords of MRF[1] to 'location'. * when the docs say only dword[2] should be set. Hmmm. But it works. */ brw_MOV(p, b, brw_imm_ud(location)); brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); brw_set_dest(insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, insn, bind_table_index, 0, /* msg_control (0 means 1 Oword) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 0, /* source cache = data cache */ 1, /* msg_length */ 1, /* response_length (1 Oword) */ 0); /* eot */ } }
static void brw_fb_write(struct brw_compile *p, int dw) { struct brw_instruction *insn; unsigned msg_control, msg_type, msg_len; struct brw_reg src0; bool header; if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; msg_len = 8; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; msg_len = 4; } if (p->gen < 060) { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0)); brw_pop_insn_state(p); msg_len += 2; } /* The execution mask is ignored for render target writes. */ insn = brw_next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; insn->header.compression_control = BRW_COMPRESSION_NONE; if (p->gen >= 060) { msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; src0 = brw_message_reg(2); header = false; } else { insn->header.destreg__conditionalmod = 0; msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; src0 = __retype_uw(brw_vec8_grf(0, 0)); header = true; } brw_set_dest(p, insn, null_result(dw)); brw_set_src0(p, insn, src0); brw_set_dp_write_message(p, insn, 0, msg_control, msg_type, msg_len, header, true, 0, true, false); }
void vec4_generator::generate_pull_constant_load(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset) { assert(brw->gen <= 7); assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D), offset); uint32_t msg_type; if (brw->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else if (brw->gen == 5 || brw->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; /* Each of the 8 channel enables is considered for whether each * dword is written. */ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (brw->gen < 6) send->header.destreg__conditionalmod = inst->base_mrf; brw_set_dp_read_message(p, send, surf_index, BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2, /* mlen */ true, /* header_present */ 1 /* rlen */); brw_mark_surface_used(&prog_data->base, surf_index); }
/** * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. * Scratch offset should be a multiple of 64. * Used for register spilling. */ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, GLuint scratch_offset ) { GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); brw_pop_insn_state(p); } { GLuint msg_length = 3; struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); brw_set_dp_write_message(p->brw, insn, 255, /* binding table index (255=stateless) */ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ msg_length, 0, /* pixel scoreboard */ 0, /* response_length */ 0); /* eot */ } }
struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *if_insn) { struct brw_instruction *insn; GLuint br = 1; if (BRW_IS_IGDNG(p->brw)) br = 2; if (p->single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); } else { insn = next_insn(p, BRW_OPCODE_ELSE); } brw_set_dest(insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = if_insn->header.execution_size; insn->header.mask_control = BRW_MASK_ENABLE; if (!p->single_program_flow) insn->header.thread_control = BRW_THREAD_SWITCH; /* Patch the if instruction to point at this instruction. */ if (p->single_program_flow) { assert(if_insn->header.opcode == BRW_OPCODE_ADD); if_insn->bits3.ud = (insn - if_insn + 1) * 16; } else { assert(if_insn->header.opcode == BRW_OPCODE_IF); if_insn->bits3.if_else.jump_count = br * (insn - if_insn); if_insn->bits3.if_else.pop_count = 0; if_insn->bits3.if_else.pad0 = 0; } return insn; }
/** * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. * Scratch offset should be a multiple of 64. * Used for register spilling. */ void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, GLuint scratch_offset ) { GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); /* UW? */ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); brw_set_dp_read_message(p->brw, insn, 255, /* binding table index (255=stateless) */ 3, /* msg_control (3 means 4 Owords) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1, /* target cache (render/scratch) */ 1, /* msg_length */ 2, /* response_length */ 0); /* eot */ } }
/* DO/WHILE loop: */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) { if (p->single_program_flow) { return &p->store[p->nr_insn]; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); /* Override the defaults for this instruction: */ brw_set_dest(insn, brw_null_reg()); brw_set_src0(insn, brw_null_reg()); brw_set_src1(insn, brw_null_reg()); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = execute_size; insn->header.predicate_control = BRW_PREDICATE_NONE; /* insn->header.mask_control = BRW_MASK_ENABLE; */ /* insn->header.mask_control = BRW_MASK_DISABLE; */ return insn; } }
void vec4_generator::generate_scratch_read(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index) { struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), index); uint32_t msg_type; if (brw->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else if (brw->gen == 5 || brw->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; /* Each of the 8 channel enables is considered for whether each * dword is written. */ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (brw->gen < 6) send->header.destreg__conditionalmod = inst->base_mrf; brw_set_dp_read_message(p, send, 255, /* binding table index: stateless access */ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 2, /* mlen */ true, /* header_present */ 1 /* rlen */); }
void vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, struct brw_reg dst, struct brw_reg surf_index, struct brw_reg offset) { assert(surf_index.file == BRW_IMMEDIATE_VALUE && surf_index.type == BRW_REGISTER_TYPE_UD); brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, offset); brw_set_sampler_message(p, insn, surf_index.dw1.ud, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ 1, /* mlen */ false, /* no header */ BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); }
void vec4_generator::generate_scratch_write(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src, struct brw_reg index) { struct brw_reg header = brw_vec8_grf(0, 0); bool write_commit; /* If the instruction is predicated, we'll predicate the send, not * the header setup. */ brw_set_predicate_control(p, false); gen6_resolve_implied_move(p, &header, inst->base_mrf); generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), index); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D), retype(src, BRW_REGISTER_TYPE_D)); uint32_t msg_type; if (brw->gen >= 7) msg_type = GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; else if (brw->gen == 6) msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; else msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; brw_set_predicate_control(p, inst->predicate); /* Pre-gen6, we have to specify write commits to ensure ordering * between reads and writes within a thread. Afterwards, that's * guaranteed and write commits only matter for inter-thread * synchronization. */ if (brw->gen >= 6) { write_commit = false; } else { /* The visitor set up our destination register to be g0. This * means that when the next read comes along, we will end up * reading from g0 and causing a block on the write commit. For * write-after-read, we are relying on the value of the previous * read being used (and thus blocking on completion) before our * write is executed. This means we have to be careful in * instruction scheduling to not violate this assumption. */ write_commit = true; } /* Each of the 8 channel enables is considered for whether each * dword is written. */ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (brw->gen < 6) send->header.destreg__conditionalmod = inst->base_mrf; brw_set_dp_write_message(p, send, 255, /* binding table index: stateless access */ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, 3, /* mlen */ true, /* header present */ false, /* not a render target write */ write_commit, /* rlen */ false, /* eot */ write_commit); }
void vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, struct brw_reg dst, struct brw_reg surf_index, struct brw_reg offset) { assert(surf_index.type == BRW_REGISTER_TYPE_UD); if (surf_index.file == BRW_IMMEDIATE_VALUE) { brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, offset); brw_set_sampler_message(p, insn, surf_index.dw1.ud, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ 1, /* mlen */ false, /* no header */ BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); } else { struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); /* a0.0 = surf_index & 0xff */ brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND); brw_inst_set_exec_size(p->brw, insn_and, BRW_EXECUTE_1); brw_set_dest(p, insn_and, addr); brw_set_src0(p, insn_and, vec1(retype(surf_index, BRW_REGISTER_TYPE_UD))); brw_set_src1(p, insn_and, brw_imm_ud(0x0ff)); /* a0.0 |= <descriptor> */ brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR); brw_set_sampler_message(p, insn_or, 0 /* surface */, 0 /* sampler */, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1 /* rlen */, 1 /* mlen */, false /* header */, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1); brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD); brw_set_src0(p, insn_or, addr); brw_set_dest(p, insn_or, addr); /* dst = send(offset, a0.0) */ brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn_send, dst); brw_set_src0(p, insn_send, offset); brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr); brw_pop_insn_state(p); /* visitor knows more than we do about the surface limit required, * so has already done marking. */ } }
void vec4_generator::generate_tex(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src, struct brw_reg sampler_index) { int msg_type = -1; if (brw->gen >= 5) { switch (inst->opcode) { case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXL: if (inst->shadow_compare) { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; case SHADER_OPCODE_TXD: if (inst->shadow_compare) { /* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */ assert(brw->gen >= 8 || brw->is_haswell); msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; } break; case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_CMS: if (brw->gen >= 7) msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; else msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_MCS: assert(brw->gen >= 7); msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; break; case SHADER_OPCODE_TXS: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; break; case SHADER_OPCODE_TG4: if (inst->shadow_compare) { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C; } else { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; } break; case SHADER_OPCODE_TG4_OFFSET: if (inst->shadow_compare) { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C; } else { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; } break; default: unreachable("should not get here: invalid vec4 texture opcode"); } } else { switch (inst->opcode) { case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXL: if (inst->shadow_compare) { msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE; assert(inst->mlen == 3); } else { msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD; assert(inst->mlen == 2); } break; case SHADER_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually. */ msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS; assert(inst->mlen == 4); break; case SHADER_OPCODE_TXF: msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD; assert(inst->mlen == 2); break; case SHADER_OPCODE_TXS: msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO; assert(inst->mlen == 2); break; default: unreachable("should not get here: invalid vec4 texture opcode"); } } assert(msg_type != -1); assert(sampler_index.type == BRW_REGISTER_TYPE_UD); /* Load the message header if present. If there's a texture offset, we need * to set it up explicitly and load the offset bitfield. Otherwise, we can * use an implied move from g0 to the first message register. */ if (inst->header_present) { if (brw->gen < 6 && !inst->texture_offset) { /* Set up an implied move from g0 to the MRF. */ src = brw_vec8_grf(0, 0); } else { struct brw_reg header = retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD); /* Explicitly set up the message header by copying g0 to the MRF. */ brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); brw_set_default_access_mode(p, BRW_ALIGN_1); if (inst->texture_offset) { /* Set the texel offset bits in DWord 2. */ brw_MOV(p, get_element_ud(header, 2), brw_imm_ud(inst->texture_offset)); } brw_adjust_sampler_state_pointer(p, header, sampler_index, dst); brw_pop_insn_state(p); } } uint32_t return_format; switch (dst.type) { case BRW_REGISTER_TYPE_D: return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32; break; case BRW_REGISTER_TYPE_UD: return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; break; default: return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; break; } uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 || inst->opcode == SHADER_OPCODE_TG4_OFFSET) ? prog_data->base.binding_table.gather_texture_start : prog_data->base.binding_table.texture_start; if (sampler_index.file == BRW_IMMEDIATE_VALUE) { uint32_t sampler = sampler_index.dw1.ud; brw_SAMPLE(p, dst, inst->base_mrf, src, sampler + base_binding_table_index, sampler % 16, msg_type, 1, /* response length */ inst->mlen, inst->header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, return_format); brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index); } else { /* Non-constant sampler index. */ /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); /* Some care required: `sampler` and `temp` may alias: * addr = sampler & 0xff * temp = (sampler << 8) & 0xf00 * addr = addr | temp */ brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); brw_OR(p, addr, addr, temp); /* a0.0 |= <descriptor> */ brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR); brw_set_sampler_message(p, insn_or, 0 /* surface */, 0 /* sampler */, msg_type, 1 /* rlen */, inst->mlen /* mlen */, inst->header_present /* header */, BRW_SAMPLER_SIMD_MODE_SIMD4X2, return_format); brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1); brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD); brw_set_src0(p, insn_or, addr); brw_set_dest(p, insn_or, addr); /* dst = send(offset, a0.0) */ brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn_send, dst); brw_set_src0(p, insn_send, src); brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr); brw_pop_insn_state(p); /* visitor knows more than we do about the surface limit required, * so has already done marking. */ } }
/** * Read float[4] constant(s) from VS constant buffer. * For relative addressing, two float[4] constants will be read into 'dest'. * Otherwise, one float[4] constant will be read into the lower half of 'dest'. */ void brw_dp_READ_4_vs(struct brw_compile *p, struct brw_reg dest, GLuint oword, GLboolean relAddr, struct brw_reg addrReg, GLuint location, GLuint bind_table_index) { GLuint msg_reg_nr = 1; assert(oword < 2); /* printf("vs const read msg, location %u, msg_reg_nr %d\n", location, msg_reg_nr); */ /* Setup MRF[1] with location/offset into const buffer */ { struct brw_reg b; brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); /*brw_set_access_mode(p, BRW_ALIGN_16);*/ /* XXX I think we're setting all the dwords of MRF[1] to 'location'. * when the docs say only dword[2] should be set. Hmmm. But it works. */ b = brw_message_reg(msg_reg_nr); b = retype(b, BRW_REGISTER_TYPE_UD); /*b = get_element_ud(b, 2);*/ if (relAddr) { brw_ADD(p, b, addrReg, brw_imm_ud(location)); } else { brw_MOV(p, b, brw_imm_ud(location)); } brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /*insn->header.access_mode = BRW_ALIGN_16;*/ brw_set_dest(insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, insn, bind_table_index, oword, /* 0 = lower Oword, 1 = upper Oword */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 0, /* source cache = data cache */ 1, /* msg_length */ 1, /* response_length (1 Oword) */ 0); /* eot */ } }
/** * Texture sample instruction. * Note: the msg_type plus msg_length values determine exactly what kind * of sampling operation is performed. See volume 4, page 161 of docs. */ void brw_SAMPLE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, GLuint binding_table_index, GLuint sampler, GLuint writemask, GLuint msg_type, GLuint response_length, GLuint msg_length, GLboolean eot, GLuint header_present, GLuint simd_mode) { GLboolean need_stall = 0; if (writemask == 0) { /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } /* Hardware doesn't do destination dependency checking on send * instructions properly. Add a workaround which generates the * dependency by other means. In practice it seems like this bug * only crops up for texture samples, and only where registers are * written by the send and then written again later without being * read in between. Luckily for us, we already track that * information and use it to modify the writemask for the * instruction, so that is a guide for whether a workaround is * needed. */ if (writemask != WRITEMASK_XYZW) { GLuint dst_offset = 0; GLuint i, newmask = 0, len = 0; for (i = 0; i < 4; i++) { if (writemask & (1<<i)) break; dst_offset += 2; } for (; i < 4; i++) { if (!(writemask & (1<<i))) break; newmask |= 1<<i; len++; } if (newmask != writemask) { need_stall = 1; /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); newmask = ~newmask & WRITEMASK_XYZW; brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, m1, brw_vec8_grf(0,0)); brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); brw_pop_insn_state(p); src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); dest = offset(dest, dst_offset); response_length = len * 2; } } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_sampler_message(p->brw, insn, binding_table_index, sampler, msg_type, response_length, msg_length, eot, header_present, simd_mode); } if (need_stall) { struct brw_reg reg = vec8(offset(dest, response_length-1)); /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, reg, reg); brw_pop_insn_state(p); } }