static LLVMValueRef build_cube_intrinsic(struct gallivm_state *gallivm, LLVMValueRef in[3]) { if (HAVE_LLVM >= 0x0309) { LLVMTypeRef f32 = LLVMTypeOf(in[0]); LLVMValueRef out[4]; out[0] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubetc", f32, in, 3, LLVMReadNoneAttribute); out[1] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubesc", f32, in, 3, LLVMReadNoneAttribute); out[2] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubema", f32, in, 3, LLVMReadNoneAttribute); out[3] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubeid", f32, in, 3, LLVMReadNoneAttribute); return lp_build_gather_values(gallivm, out, 4); } else { LLVMValueRef c[4] = { in[0], in[1], in[2], LLVMGetUndef(LLVMTypeOf(in[0])) }; LLVMValueRef vec = lp_build_gather_values(gallivm, c, 4); return lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.cube", LLVMTypeOf(vec), &vec, 1, LLVMReadNoneAttribute); } }
static void dp_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { struct lp_build_context * base = &bld_base->base; unsigned chan; LLVMValueRef elements[2][4]; unsigned opcode = emit_data->inst->Instruction.Opcode; unsigned dp_components = (opcode == TGSI_OPCODE_DP2 ? 2 : (opcode == TGSI_OPCODE_DP3 ? 3 : 4)); for (chan = 0 ; chan < dp_components; chan++) { elements[0][chan] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, chan); elements[1][chan] = lp_build_emit_fetch(bld_base, emit_data->inst, 1, chan); } for ( ; chan < 4; chan++) { elements[0][chan] = base->zero; elements[1][chan] = base->zero; } /* Fix up for DPH */ if (opcode == TGSI_OPCODE_DPH) { elements[0][TGSI_CHAN_W] = base->one; } emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, elements[0], 4); emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm, elements[1], 4); emit_data->arg_count = 2; emit_data->dst_type = base->elem_type; }
static void txp_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { const struct tgsi_full_instruction * inst = emit_data->inst; LLVMValueRef src_w; unsigned chan; LLVMValueRef coords[5]; emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); for (chan = 0; chan < 3; chan++ ) { LLVMValueRef arg = lp_build_emit_fetch(bld_base, emit_data->inst, 0, chan); coords[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV, arg, src_w); } coords[3] = bld_base->base.one; if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && inst->Instruction.Opcode != TGSI_OPCODE_TXQ && inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL); } emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, coords, 4); emit_data->arg_count = 1; }
static LLVMValueRef emit_fetch_temporary( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); LLVMBuilderRef builder = bld_base->base.gallivm->builder; if (swizzle == ~0) { LLVMValueRef values[TGSI_NUM_CHANNELS] = {}; unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { values[chan] = emit_fetch_temporary(bld_base, reg, type, chan); } return lp_build_gather_values(bld_base->base.gallivm, values, TGSI_NUM_CHANNELS); } if (reg->Register.Indirect) { LLVMValueRef array_index = emit_array_index(bld, reg, swizzle); LLVMValueRef ptr = LLVMBuildGEP(builder, bld->temps_array, &array_index, 1, ""); return LLVMBuildLoad(builder, ptr, ""); } else { LLVMValueRef temp_ptr; temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); return bitcast(bld_base,type,LLVMBuildLoad(builder, temp_ptr, "")); } }
static LLVMValueRef emit_fetch( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMValueRef result, ptr; if (swizzle == ~0) { LLVMValueRef values[TGSI_NUM_CHANNELS]; unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { values[chan] = emit_fetch(bld_base, reg, type, chan); } return lp_build_gather_values(bld_base->base.gallivm, values, TGSI_NUM_CHANNELS); } if (reg->Register.Indirect) { struct tgsi_declaration_range range = get_array_range(bld_base, reg->Register.File, ®->Indirect); return LLVMBuildExtractElement(builder, emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle), emit_array_index(bld, ®->Indirect, reg->Register.Index - range.First), ""); } switch(reg->Register.File) { case TGSI_FILE_IMMEDIATE: { LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); } case TGSI_FILE_INPUT: result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]; break; case TGSI_FILE_TEMPORARY: ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); result = LLVMBuildLoad(builder, ptr, ""); break; case TGSI_FILE_OUTPUT: ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle); result = LLVMBuildLoad(builder, ptr, ""); break; default: return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); } return bitcast(bld_base, type, result); }
static void txd_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { const struct tgsi_full_instruction * inst = emit_data->inst; LLVMValueRef coords[4]; unsigned chan, src; for (src = 0; src < 3; src++) { for (chan = 0; chan < 4; chan++) coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan); emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm, coords, 4); } emit_data->arg_count = 3; emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); }
static LLVMValueRef emit_fetch_input( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); if (swizzle == ~0) { LLVMValueRef values[TGSI_NUM_CHANNELS] = {}; unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { values[chan] = ctx->inputs[radeon_llvm_reg_index_soa( reg->Register.Index, chan)]; } return lp_build_gather_values(bld_base->base.gallivm, values, TGSI_NUM_CHANNELS); } else { return bitcast(bld_base, type, ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]); } }
static LLVMValueRef fetch_constant( struct lp_build_tgsi_context * bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); struct lp_build_context * base = &bld_base->base; const struct tgsi_ind_register *ireg = ®->Indirect; unsigned idx; LLVMValueRef args[2]; LLVMValueRef addr; LLVMValueRef result; if (swizzle == LP_CHAN_ALL) { unsigned chan; LLVMValueRef values[4]; for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) values[chan] = fetch_constant(bld_base, reg, type, chan); return lp_build_gather_values(bld_base->base.gallivm, values, 4); } idx = reg->Register.Index * 4 + swizzle; if (!reg->Register.Indirect) return bitcast(bld_base, type, si_shader_ctx->constants[idx]); args[0] = si_shader_ctx->const_resource; args[1] = lp_build_const_int32(base->gallivm, idx * 4); addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle]; addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg"); addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16); args[1] = lp_build_add(&bld_base->uint_bld, addr, args[1]); result = build_intrinsic(base->gallivm->builder, "llvm.SI.load.const", base->elem_type, args, 2, LLVMReadNoneAttribute | LLVMNoUnwindAttribute); return bitcast(bld_base, type, result); }
static void txp_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { LLVMValueRef src_w; unsigned chan; LLVMValueRef coords[4]; emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); for (chan = 0; chan < 3; chan++ ) { LLVMValueRef arg = lp_build_emit_fetch(bld_base, emit_data->inst, 0, chan); coords[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV, arg, src_w); } coords[3] = bld_base->base.one; emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, coords, 4); emit_data->arg_count = 1; }
static void tex_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { const struct tgsi_full_instruction * inst = emit_data->inst; LLVMValueRef coords[5]; unsigned chan; for (chan = 0; chan < 4; chan++) { coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan); } if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { /* These instructions have additional operand that should be packed * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords. * That operand should be passed as a float value in the args array * right after the coord vector. After packing it's not used anymore, * that's why arg_count is not increased */ coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0); } if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && inst->Instruction.Opcode != TGSI_OPCODE_TXQ && inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL); } emit_data->arg_count = 1; emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, coords, 4); emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); }
static LLVMValueRef emit_fetch_immediate( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { LLVMTypeRef ctype; LLVMContextRef ctx = bld_base->base.gallivm->context; switch (type) { case TGSI_TYPE_UNSIGNED: case TGSI_TYPE_SIGNED: ctype = LLVMInt32TypeInContext(ctx); break; case TGSI_TYPE_UNTYPED: case TGSI_TYPE_FLOAT: ctype = LLVMFloatTypeInContext(ctx); break; default: ctype = 0; break; } struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); if (swizzle == ~0) { LLVMValueRef values[TGSI_NUM_CHANNELS] = {}; unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { values[chan] = LLVMConstBitCast(bld->immediates[reg->Register.Index][chan], ctype); } return lp_build_gather_values(bld_base->base.gallivm, values, TGSI_NUM_CHANNELS); } else { return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); } }
static void llvm_emit_tex( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { struct gallivm_state * gallivm = bld_base->base.gallivm; LLVMValueRef args[7]; unsigned c, sampler_src; struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { switch (emit_data->inst->Instruction.Opcode) { case TGSI_OPCODE_TXQ: { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->uses_tex_buffers = true; bool isEgPlus = (ctx->chip_class >= EVERGREEN); LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, isEgPlus ? 0 : 1); LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER); if (!isEgPlus) { LLVMValueRef maskval[4] = { lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 2), lp_build_const_int32(gallivm, 3), lp_build_const_int32(gallivm, 0), }; LLVMValueRef mask = LLVMConstVector(maskval, 4); cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval, mask, ""); } emit_data->output[0] = cvecval; return; } case TGSI_OPCODE_TXF: { args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""); args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS); emit_data->output[0] = build_intrinsic(gallivm->builder, "llvm.R600.load.texbuf", emit_data->dst_type, args, 2, LLVMReadNoneAttribute); if (ctx->chip_class >= EVERGREEN) return; ctx->uses_tex_buffers = true; LLVMDumpValue(emit_data->output[0]); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4), ""); LLVMValueRef Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 0), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildBitCast(gallivm->builder, Mask, LLVMVectorType(bld_base->base.int_elem_type, 4), ""); emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND, emit_data->output[0], Mask); LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder, emit_data->output[0], lp_build_const_int32(gallivm, 3), ""); Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildExtractElement(gallivm->builder, Mask, lp_build_const_int32(gallivm, 0), ""); Mask = LLVMBuildBitCast(gallivm->builder, Mask, bld_base->base.int_elem_type, ""); WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR, WComponent, Mask); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), ""); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), ""); } return; default: break; } } if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX || emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) { LLVMValueRef Vector[4] = { LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 1), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 2), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 3), ""), }; switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; case TGSI_TEXTURE_1D: Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; default: break; } args[0] = lp_build_gather_values(gallivm, Vector, 4); } else { args[0] = emit_data->args[0]; } assert(emit_data->arg_count + 2 <= Elements(args)); for (c = 1; c < emit_data->arg_count; ++c) args[c] = emit_data->args[c]; if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) { args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), ""); args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), ""); args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), ""); } sampler_src = emit_data->inst->Instruction.NumSrcRegs-1; args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF && (emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D); break; case TGSI_TEXTURE_2D_ARRAY_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY); break; default: break; } if (ctx->has_compressed_msaa_texturing) { LLVMValueRef ldptr_args[10] = { args[0], // Coord args[1], // Offset X args[2], // Offset Y args[3], // Offset Z args[4], args[5], lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1) }; LLVMValueRef ptr = build_intrinsic(gallivm->builder, "llvm.R600.ldptr", emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute); LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0], lp_build_const_int32(gallivm, 3), ""); Tmp = LLVMBuildMul(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 4), ""); LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr, lp_build_const_int32(gallivm, 0), ""); ResX = LLVMBuildBitCast(gallivm->builder, ResX, bld_base->base.int_elem_type, ""); Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, ""); Tmp = LLVMBuildAnd(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 0xF), ""); args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp, lp_build_const_int32(gallivm, 3), ""); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); } } emit_data->output[0] = build_intrinsic(gallivm->builder, action->intr_name, emit_data->dst_type, args, c, LLVMReadNoneAttribute); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ && ((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) if (emit_data->inst->Dst[0].Register.WriteMask & 4) { LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0); LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder, llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER), lp_build_const_int32(gallivm, 0), ""); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), ""); struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->has_txq_cube_array_z_comp = true; } }
static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); struct lp_build_context * base = &bld_base->base; struct pipe_stream_output_info * so = ctx->stream_outputs; unsigned i; unsigned next_pos = 60; unsigned next_param = 0; unsigned color_count = 0; boolean has_color = false; if (ctx->type == TGSI_PROCESSOR_VERTEX && so->num_outputs) { for (i = 0; i < so->num_outputs; i++) { unsigned register_index = so->output[i].register_index; unsigned start_component = so->output[i].start_component; unsigned num_components = so->output[i].num_components; unsigned dst_offset = so->output[i].dst_offset; unsigned chan; LLVMValueRef elements[4]; if (dst_offset < start_component) { for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { elements[chan] = LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[register_index][(chan + start_component) % TGSI_NUM_CHANNELS], ""); } start_component = 0; } else { for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { elements[chan] = LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[register_index][chan], ""); } } LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4); LLVMValueRef args[4]; args[0] = output; args[1] = lp_build_const_int32(base->gallivm, dst_offset - start_component); args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer); args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component); lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output", LLVMVoidTypeInContext(base->gallivm->context), args, 4); } } /* Add the necessary export instructions */ for (i = 0; i < ctx->output_reg_count; i++) { unsigned chan; LLVMValueRef elements[4]; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { elements[chan] = LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][chan], ""); } if (ctx->alpha_to_one && ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->r600_outputs[i].name == TGSI_SEMANTIC_COLOR) elements[3] = lp_build_const_float(base->gallivm, 1.0f); LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4); if (ctx->type == TGSI_PROCESSOR_VERTEX) { switch (ctx->r600_outputs[i].name) { case TGSI_SEMANTIC_POSITION: case TGSI_SEMANTIC_PSIZE: { LLVMValueRef args[3]; args[0] = output; args[1] = lp_build_const_int32(base->gallivm, next_pos++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), args, 3, 0); break; } case TGSI_SEMANTIC_CLIPVERTEX: { LLVMValueRef args[3]; unsigned reg_index; unsigned base_vector_chan; LLVMValueRef adjusted_elements[4]; for (reg_index = 0; reg_index < 2; reg_index ++) { for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, reg_index * 4 + chan); LLVMValueRef base_vector = llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE); args[0] = output; args[1] = base_vector; adjusted_elements[chan] = build_intrinsic(base->gallivm->builder, "llvm.AMDGPU.dp4", bld_base->base.elem_type, args, 2, LLVMReadNoneAttribute); } args[0] = lp_build_gather_values(base->gallivm, adjusted_elements, 4); args[1] = lp_build_const_int32(base->gallivm, next_pos++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), args, 3, 0); } break; } case TGSI_SEMANTIC_CLIPDIST : { LLVMValueRef args[3]; args[0] = output; args[1] = lp_build_const_int32(base->gallivm, next_pos++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), args, 3, 0); args[1] = lp_build_const_int32(base->gallivm, next_param++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), args, 3, 0); break; } case TGSI_SEMANTIC_FOG: { elements[0] = LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][0], ""); elements[1] = elements[2] = lp_build_const_float(base->gallivm, 0.0f); elements[3] = lp_build_const_float(base->gallivm, 1.0f); LLVMValueRef args[3]; args[0] = lp_build_gather_values(base->gallivm, elements, 4); args[1] = lp_build_const_int32(base->gallivm, next_param++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), args, 3, 0); break; } default: { LLVMValueRef args[3]; args[0] = output; args[1] = lp_build_const_int32(base->gallivm, next_param++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), args, 3, 0); break; } } } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { switch (ctx->r600_outputs[i].name) { case TGSI_SEMANTIC_COLOR: has_color = true; if ( color_count < ctx->color_buffer_count) { LLVMValueRef args[3]; args[0] = output; if (ctx->fs_color_all) { for (unsigned j = 0; j < ctx->color_buffer_count; j++) { args[1] = lp_build_const_int32(base->gallivm, j); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), args, 3, 0); } } else { args[1] = lp_build_const_int32(base->gallivm, color_count++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), args, 3, 0); } } break; case TGSI_SEMANTIC_POSITION: lp_build_intrinsic_unary( base->gallivm->builder, "llvm.R600.store.pixel.depth", LLVMVoidTypeInContext(base->gallivm->context), LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][2], "")); break; case TGSI_SEMANTIC_STENCIL: lp_build_intrinsic_unary( base->gallivm->builder, "llvm.R600.store.pixel.stencil", LLVMVoidTypeInContext(base->gallivm->context), LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][1], "")); break; } } } // Add dummy exports if (ctx->type == TGSI_PROCESSOR_VERTEX) { if (!next_param) { lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy", LLVMVoidTypeInContext(base->gallivm->context), lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM)); } if (!(next_pos-60)) { lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy", LLVMVoidTypeInContext(base->gallivm->context), lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS)); } } if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { if (!has_color) { lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy", LLVMVoidTypeInContext(base->gallivm->context), lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL)); } } }