void vec4_generator::generate_math2_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 * "Message Payload": * * "Operand0[7]. For the INT DIV functions, this operand is the * denominator." * ... * "Operand1[7]. For the INT DIV functions, this operand is the * numerator." */ bool is_int_div = inst->opcode != SHADER_OPCODE_POW; struct brw_reg &op0 = is_int_div ? src1 : src0; struct brw_reg &op1 = is_int_div ? src0 : src1; brw_push_insn_state(p); brw_set_saturate(p, false); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1); brw_pop_insn_state(p); brw_math(p, dst, brw_math_function(inst->opcode), inst->base_mrf, op0, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); }
void vec4_generator::generate_math2_gen7(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1); }
void vec4_generator::generate_math1_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src) { brw_math(p, dst, brw_math_function(inst->opcode), inst->base_mrf, src, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); }
void vec4_generator::generate_math_gen6(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { /* Can't do writemask because math can't be align16. */ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); /* Source swizzles are ignored. */ check_gen6_math_src_arg(src0); if (src1.file == BRW_GENERAL_REGISTER_FILE) check_gen6_math_src_arg(src1); brw_set_default_access_mode(p, BRW_ALIGN_1); gen6_math(p, dst, brw_math_function(inst->opcode), src0, src1); brw_set_default_access_mode(p, BRW_ALIGN_16); }
void vec4_generator::generate_math1_gen6(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src) { /* Can't do writemask because math can't be align16. */ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); check_gen6_math_src_arg(src); brw_set_access_mode(p, BRW_ALIGN_1); brw_math(p, dst, brw_math_function(inst->opcode), inst->base_mrf, src, BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); brw_set_access_mode(p, BRW_ALIGN_16); }
/** * Generate assembly for a Vec4 IR instruction. * * \param instruction The Vec4 IR instruction to generate code for. * \param dst The destination register. * \param src An array of up to three source registers. */ void vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, struct brw_reg dst, struct brw_reg *src) { vec4_instruction *inst = (vec4_instruction *) instruction; if (dst.width == BRW_WIDTH_4) { /* This happens in attribute fixups for "dual instanced" geometry * shaders, since they use attributes that are vec4's. Since the exec * width is only 4, it's essential that the caller set * force_writemask_all in order to make sure the instruction is executed * regardless of which channels are enabled. */ assert(inst->force_writemask_all); /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy * the following register region restrictions (from Graphics BSpec: * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions * > Register Region Restrictions) * * 1. ExecSize must be greater than or equal to Width. * * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set * to Width * HorzStride." */ for (int i = 0; i < 3; i++) { if (src[i].file == BRW_GENERAL_REGISTER_FILE) src[i] = stride(src[i], 4, 4, 1); } } switch (inst->opcode) { case BRW_OPCODE_MOV: brw_MOV(p, dst, src[0]); break; case BRW_OPCODE_ADD: brw_ADD(p, dst, src[0], src[1]); break; case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; case BRW_OPCODE_MACH: brw_MACH(p, dst, src[0], src[1]); break; case BRW_OPCODE_MAD: assert(brw->gen >= 6); brw_MAD(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; case BRW_OPCODE_RNDD: brw_RNDD(p, dst, src[0]); break; case BRW_OPCODE_RNDE: brw_RNDE(p, dst, src[0]); break; case BRW_OPCODE_RNDZ: brw_RNDZ(p, dst, src[0]); break; case BRW_OPCODE_AND: brw_AND(p, dst, src[0], src[1]); break; case BRW_OPCODE_OR: brw_OR(p, dst, src[0], src[1]); break; case BRW_OPCODE_XOR: brw_XOR(p, dst, src[0], src[1]); break; case BRW_OPCODE_NOT: brw_NOT(p, dst, src[0]); break; case BRW_OPCODE_ASR: brw_ASR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHR: brw_SHR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHL: brw_SHL(p, dst, src[0], src[1]); break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; case BRW_OPCODE_SEL: brw_SEL(p, dst, src[0], src[1]); break; case BRW_OPCODE_DPH: brw_DPH(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP4: brw_DP4(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP3: brw_DP3(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP2: brw_DP2(p, dst, src[0], src[1]); break; case BRW_OPCODE_F32TO16: assert(brw->gen >= 7); brw_F32TO16(p, dst, src[0]); break; case BRW_OPCODE_F16TO32: assert(brw->gen >= 7); brw_F16TO32(p, dst, src[0]); break; case BRW_OPCODE_LRP: assert(brw->gen >= 6); brw_LRP(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFREV: assert(brw->gen >= 7); /* BFREV only supports UD type for src and dst. */ brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), retype(src[0], BRW_REGISTER_TYPE_UD)); break; case BRW_OPCODE_FBH: assert(brw->gen >= 7); /* FBH only supports UD type for dst. */ brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_FBL: assert(brw->gen >= 7); /* FBL only supports UD type for dst. */ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_CBIT: assert(brw->gen >= 7); /* CBIT only supports UD type for dst. */ brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_ADDC: assert(brw->gen >= 7); brw_ADDC(p, dst, src[0], src[1]); break; case BRW_OPCODE_SUBB: assert(brw->gen >= 7); brw_SUBB(p, dst, src[0], src[1]); break; case BRW_OPCODE_MAC: brw_MAC(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFE: assert(brw->gen >= 7); brw_BFE(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFI1: assert(brw->gen >= 7); brw_BFI1(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFI2: assert(brw->gen >= 7); brw_BFI2(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ assert(brw->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8); brw_inst_set_pred_control(brw, if_inst, inst->predicate); } break; case BRW_OPCODE_ELSE: brw_ELSE(p); break; case BRW_OPCODE_ENDIF: brw_ENDIF(p); break; case BRW_OPCODE_DO: brw_DO(p, BRW_EXECUTE_8); break; case BRW_OPCODE_BREAK: brw_BREAK(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: brw_CONT(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_WHILE: brw_WHILE(p); break; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: case SHADER_OPCODE_SQRT: case SHADER_OPCODE_EXP2: case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: if (brw->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], brw_null_reg()); } else if (brw->gen == 6) { generate_math_gen6(inst, dst, src[0], brw_null_reg()); } else { generate_math1_gen4(inst, dst, src[0]); } break; case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: if (brw->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); } else if (brw->gen == 6) { generate_math_gen6(inst, dst, src[0], src[1]); } else { generate_math2_gen4(inst, dst, src[0], src[1]); } break; case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: generate_tex(inst, dst, src[0], src[1]); break; case VS_OPCODE_URB_WRITE: generate_vs_urb_write(inst); break; case SHADER_OPCODE_GEN4_SCRATCH_READ: generate_scratch_read(inst, dst, src[0]); break; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: generate_scratch_write(inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD: generate_pull_constant_load(inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); break; case GS_OPCODE_URB_WRITE: generate_gs_urb_write(inst); break; case GS_OPCODE_THREAD_END: generate_gs_thread_end(inst); break; case GS_OPCODE_SET_WRITE_OFFSET: generate_gs_set_write_offset(dst, src[0], src[1]); break; case GS_OPCODE_SET_VERTEX_COUNT: generate_gs_set_vertex_count(dst, src[0]); break; case GS_OPCODE_SET_DWORD_2_IMMED: generate_gs_set_dword_2_immed(dst, src[0]); break; case GS_OPCODE_PREPARE_CHANNEL_MASKS: generate_gs_prepare_channel_masks(dst); break; case GS_OPCODE_SET_CHANNEL_MASKS: generate_gs_set_channel_masks(dst, src[0]); break; case GS_OPCODE_GET_INSTANCE_ID: generate_gs_get_instance_id(dst); break; case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], prog_data->base.binding_table.shader_time_start); brw_mark_surface_used(&prog_data->base, prog_data->base.binding_table.shader_time_start); break; case SHADER_OPCODE_UNTYPED_ATOMIC: generate_untyped_atomic(inst, dst, src[0], src[1]); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: generate_untyped_surface_read(inst, dst, src[0]); break; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: generate_unpack_flags(inst, dst); break; default: if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n", opcode_descs[inst->opcode].name); } else { _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode); } abort(); } }